コード例 #1
0
    def download_repositories(self, language, search_phrase='',
                              number_of_repositories=10, number_of_files_for_repo=5):
        """
        Downloads found repositories.

        :param language: repository language
        :param search_phrase: phrase to be searched. If no search_phrase is given, search is done with language only.
        :param number_of_repositories: max number of repositories to be found
        :param number_of_files_for_repo: max number of files for each repository
        """
        query = RESTUtils.create_search_query(searched_phrase=search_phrase, language=language)
        try:
            repos = RepositoryDownloader.get_repositories_info(query, number_of_repositories)
            logging.info('Repositories info downloaded.')
        except ConnectionError as err:
            logging.error('Network problem while downloading repositories info. '
                          'Error message: {0}'.format(str(err)))
            print('Network problem. Repositories cannot be downloaded.')
            sys.exit(1)
        except TimeoutError as err:
            logging.error('Timeout while downloading repositories info. '
                          'Error message: {0}'.format(str(err)))
            print('Timeout problem. Repository download time was excedeed.')
            sys.exit(1)
        except Exception as err:
            logging.error('Unknown exception while downloading repositories info. '
                          'Error message: {0}'.format(str(err)))
            print('Unknown exception while downloading repositories info.')
            print('Error message: {0}'.format(str(err)))
            sys.exit(1)

        # noinspection PyShadowingNames
        def thread_target(repo, download_direcotry_path, file_extensions, number_of_files_for_repo):
            # exception bucket is a closure
            try:
                repo.download_repository(download_direcotry_path, file_extensions,
                                         number_of_files_for_repo)
            except ConnectionError as err:
                logging.error('Network problem while downloading repository content. '
                              'Error message: {0}'.format(str(err)))
                exception_bucket.put(sys.exc_info())
            except TimeoutError as err:
                logging.error('TimeoutError while downloading repository content. '
                              'Error message: {0}'.format(str(err)))
                exception_bucket.put(sys.exc_info())
            except Exception as err:
                logging.error('Unknown exception while downloading repositories info. '
                              'Error message: {0}'.format(str(err)))
                exception_bucket.put(sys.exc_info())

        threads = []
        exception_bucket = queue.Queue()
        for repo in repos:
            t = threading.Thread(target=thread_target,
                                 args=(repo,
                                       self.downloadDirectoryPath,
                                       self.fileExtensions,
                                       number_of_files_for_repo),
                                 daemon=True)
            threads.append(t)

        for (t, repo) in zip(threads, repos):
            t.start()
            print('Thread for {name} {url} started.'.format(name=repo.full_name, url=repo.html_url))
            logging.info('Thread for {name} {url} started.'.format(name=repo.full_name, url=repo.html_url))
        print('All threads for downloading repositories started')
        logging.info('All threads for downloading repositories started')

        for (t, repo) in zip(threads, repos):
            t.join()
            try:
                exc = exception_bucket.get(block=False)
                exc_type, exc_obj, exc_trace = exc
                raise exc_obj
            except queue.Empty:
                pass
            except ConnectionError as err:
                logging.error('Network problem while downloading repository content. '
                              'Error message: {0}'.format(str(err)))
                print('Network problem. Repository content cannot be downloaded. Program ends')
                sys.exit(1)
            except TimeoutError as err:
                logging.error('TimeoutError while downloading repository content. '
                              'Error message: {0}'.format(str(err)))
                print('TimeoutError. Repository content cannot be downloaded. Program skips.')
            except Exception as err:
                logging.error('Unknown exception while downloading repositories info. '
                              'Error message: {0}'.format(str(err)))
                print('Unknown exception while downloading repositories info.')
                print('Error message: {0}'.format(str(err)))
                sys.exit(1)

            print('Thread for {name} {url} joined.'.format(name=repo.full_name, url=repo.html_url))
            logging.info('Thread for {name} {url} joined.'.format(name=repo.full_name, url=repo.html_url))
        print('All threads for downloading repositories joined')
        logging.info('All threads for downloading repositories joined')