def download_repositories(self, language, search_phrase='', number_of_repositories=10, number_of_files_for_repo=5): """ Downloads found repositories. :param language: repository language :param search_phrase: phrase to be searched. If no search_phrase is given, search is done with language only. :param number_of_repositories: max number of repositories to be found :param number_of_files_for_repo: max number of files for each repository """ query = RESTUtils.create_search_query(searched_phrase=search_phrase, language=language) try: repos = RepositoryDownloader.get_repositories_info(query, number_of_repositories) logging.info('Repositories info downloaded.') except ConnectionError as err: logging.error('Network problem while downloading repositories info. ' 'Error message: {0}'.format(str(err))) print('Network problem. Repositories cannot be downloaded.') sys.exit(1) except TimeoutError as err: logging.error('Timeout while downloading repositories info. ' 'Error message: {0}'.format(str(err))) print('Timeout problem. Repository download time was excedeed.') sys.exit(1) except Exception as err: logging.error('Unknown exception while downloading repositories info. ' 'Error message: {0}'.format(str(err))) print('Unknown exception while downloading repositories info.') print('Error message: {0}'.format(str(err))) sys.exit(1) # noinspection PyShadowingNames def thread_target(repo, download_direcotry_path, file_extensions, number_of_files_for_repo): # exception bucket is a closure try: repo.download_repository(download_direcotry_path, file_extensions, number_of_files_for_repo) except ConnectionError as err: logging.error('Network problem while downloading repository content. ' 'Error message: {0}'.format(str(err))) exception_bucket.put(sys.exc_info()) except TimeoutError as err: logging.error('TimeoutError while downloading repository content. ' 'Error message: {0}'.format(str(err))) exception_bucket.put(sys.exc_info()) except Exception as err: logging.error('Unknown exception while downloading repositories info. ' 'Error message: {0}'.format(str(err))) exception_bucket.put(sys.exc_info()) threads = [] exception_bucket = queue.Queue() for repo in repos: t = threading.Thread(target=thread_target, args=(repo, self.downloadDirectoryPath, self.fileExtensions, number_of_files_for_repo), daemon=True) threads.append(t) for (t, repo) in zip(threads, repos): t.start() print('Thread for {name} {url} started.'.format(name=repo.full_name, url=repo.html_url)) logging.info('Thread for {name} {url} started.'.format(name=repo.full_name, url=repo.html_url)) print('All threads for downloading repositories started') logging.info('All threads for downloading repositories started') for (t, repo) in zip(threads, repos): t.join() try: exc = exception_bucket.get(block=False) exc_type, exc_obj, exc_trace = exc raise exc_obj except queue.Empty: pass except ConnectionError as err: logging.error('Network problem while downloading repository content. ' 'Error message: {0}'.format(str(err))) print('Network problem. Repository content cannot be downloaded. Program ends') sys.exit(1) except TimeoutError as err: logging.error('TimeoutError while downloading repository content. ' 'Error message: {0}'.format(str(err))) print('TimeoutError. Repository content cannot be downloaded. Program skips.') except Exception as err: logging.error('Unknown exception while downloading repositories info. ' 'Error message: {0}'.format(str(err))) print('Unknown exception while downloading repositories info.') print('Error message: {0}'.format(str(err))) sys.exit(1) print('Thread for {name} {url} joined.'.format(name=repo.full_name, url=repo.html_url)) logging.info('Thread for {name} {url} joined.'.format(name=repo.full_name, url=repo.html_url)) print('All threads for downloading repositories joined') logging.info('All threads for downloading repositories joined')