Beispiel #1
0
    def retrieve_files_in_repo(self, repo_name):
        repo = Repository(repo_name)

        # Skip if repository was done already
        if repo.retrieved:
            self._logger.info(
                self._repo_fmt.format(label='Already done:',
                                      full_name=repo.name,
                                      id=repo.id,
                                      url=repo.url))
            return

        # Skip if repository has no contents URL
        if not repo.contents_url:
            self._logger.info(
                self._repo_fmt.format(label='No contents URL found:',
                                      full_name=repo.name,
                                      id=repo.id,
                                      url=repo.url))
            return

        # Do retrieving contents from GitHub
        self._logger.info(
            self._repo_fmt.format(label='Retrieving:',
                                  full_name=repo.name,
                                  id=repo.id,
                                  url=repo.url))

        added = False
        for file in self._retriever.traverse(repo.contents_url):
            assert self.is_running()
            if not Repository.expects_file(file.path):
                self._logger.info('  (-) %s' % file.path)
                continue
            self._logger.info('  (+) %s' % file.path)
            self._retriever.retrieve_content(file)
            repo.add_file(file.path, file.decoded_content)
            if not added:
                added = True

        # Find packages if files found
        if added:
            self._logger.info('  --> Finding packages...')
            repo.find_packages()

        # Do nothing if no file found
        else:
            self._logger.info('  --> No expected files found.')

        # Save repository
        self._logger.info('  --> Saving repository...')
        repo.set_retrieved(True)
        repo.commit_changes()
Beispiel #2
0
    def search_repos_in_slice(self, time_slice):
        self._logger.info('Searching time slice: %s' % time_slice)
        self._search.search(created=time_slice)
        for repo in self._search.traverse():
            assert self.is_running()
            if Repository.exists(repo.full_name):
                self._logger.info(
                    self._repo_fmt.format(label='Existed', **repo.__dict__))
                continue
            self._logger.info(
                self._repo_fmt.format(label='Found', **repo.__dict__))

            # Newly create repo in database
            newrepo = Repository(repo.full_name)
            newrepo.set_id(repo.id)
            newrepo.set_url(repo.url)
            newrepo.set_contents_url(repo.contents_url)
            newrepo.commit_changes()

            # Queue repository for later retrieving
            if self._repos is not None:
                self._repos.put(repo.full_name)