Exemple #1
0
    def get_diff_commits_origin_raw(self, ocean_backend):
        """Return the commit hashes which are stored in the raw index but not in the original repo.

        :param ocean_backend: Ocean backend
        """
        repo_origin = anonymize_url(self.perceval_backend.origin)
        fltr = {
            'name': 'origin',
            'value': [repo_origin]
        }

        current_hashes = []
        try:
            git_repo = GitRepository(self.perceval_backend.uri, self.perceval_backend.gitpath)
            current_hashes = [commit for commit in git_repo.rev_list()]
        except EmptyRepositoryError:
            logger.warning("No commits retrieved from {}, repo is empty".format(repo_origin))
        except RepositoryError:
            logger.warning("No commits retrieved from {}, repo doesn't exist locally".format(repo_origin))
        except Exception as e:
            logger.error("[git] No commits retrieved from {}, "
                         "git rev-list command failed: {}".format(repo_origin, e))

        current_hashes = set(current_hashes)
        raw_hashes = set([item['data']['commit']
                          for item in ocean_backend.fetch(ignore_incremental=True, _filter=fltr)])

        hashes_to_delete = list(raw_hashes.difference(current_hashes))

        return hashes_to_delete
Exemple #2
0
    def update_items(self, ocean_backend, enrich_backend):
        """Retrieve the commits not present in the original repository and delete
        the corresponding documents from the raw and enriched indexes"""

        fltr = {
            'name': 'origin',
            'value': [self.perceval_backend.origin]
        }

        logger.debug("[update-items] Checking commits for %s.", self.perceval_backend.origin)

        git_repo = GitRepository(self.perceval_backend.uri, self.perceval_backend.gitpath)

        try:
            current_hashes = set([commit for commit in git_repo.rev_list()])
        except Exception as e:
            logger.error("Something went wrong with %s, %s", git_repo.uri, e, exc_info=True)
            return

        raw_hashes = set([item['data']['commit']
                          for item in ocean_backend.fetch(ignore_incremental=True, _filter=fltr)])

        hashes_to_delete = list(raw_hashes.difference(current_hashes))

        to_process = []
        for _hash in hashes_to_delete:
            to_process.append(_hash)

            if len(to_process) != MAX_BULK_UPDATE_SIZE:
                continue

            # delete documents from the raw index
            self.remove_commits(to_process, ocean_backend.elastic.index_url,
                                'data.commit', self.perceval_backend.origin)
            # delete documents from the enriched index
            self.remove_commits(to_process, enrich_backend.elastic.index_url,
                                'hash', self.perceval_backend.origin)

            to_process = []

        if to_process:
            # delete documents from the raw index
            self.remove_commits(to_process, ocean_backend.elastic.index_url,
                                'data.commit', self.perceval_backend.origin)
            # delete documents from the enriched index
            self.remove_commits(to_process, enrich_backend.elastic.index_url,
                                'hash', self.perceval_backend.origin)

        logger.debug("[update-items] %s commits deleted from %s with origin %s.",
                     len(hashes_to_delete), ocean_backend.elastic.anonymize_url(ocean_backend.elastic.index_url),
                     self.perceval_backend.origin)
        logger.debug("[update-items] %s commits deleted from %s with origin %s.",
                     len(hashes_to_delete), enrich_backend.elastic.anonymize_url(enrich_backend.elastic.index_url),
                     self.perceval_backend.origin)

        # update branch info
        self.delete_commit_branches(enrich_backend)
        self.add_commit_branches(git_repo, enrich_backend)
    def update_items(self, ocean_backend, enrich_backend):
        """Retrieve the commits not present in the original repository and delete
        the corresponding documents from the raw and enriched indexes"""

        fltr = {'name': 'origin', 'value': [self.perceval_backend.origin]}

        logger.debug("[git] update-items Checking commits for {}.".format(
            self.perceval_backend.origin))

        try:
            git_repo = GitRepository(self.perceval_backend.uri,
                                     self.perceval_backend.gitpath)
            current_hashes = set([commit for commit in git_repo.rev_list()])
        except EmptyRepositoryError:
            logger.warning("[git] Skip updating branch info for repo {}, "
                           "repo is empty".format(
                               self.perceval_backend.origin))
            return
        except RepositoryError:
            logger.warning("[git] Skip updating branch info for repo {}, "
                           "repo doesn't exist locally".format(
                               self.perceval_backend.origin))
            return
        except Exception as e:
            logger.error("[git] Skip updating branch info for repo {}, "
                         "git rev-list command failed: {}".format(
                             self.perceval_backend.origin, e))
            return

        raw_hashes = set([
            item['data']['commit']
            for item in ocean_backend.fetch(ignore_incremental=True,
                                            _filter=fltr)
        ])

        hashes_to_delete = list(raw_hashes.difference(current_hashes))

        to_process = []
        for _hash in hashes_to_delete:
            to_process.append(_hash)

            if len(to_process) != MAX_BULK_UPDATE_SIZE:
                continue

            # delete documents from the raw index
            self.remove_commits(to_process, ocean_backend.elastic.index_url,
                                'data.commit', self.perceval_backend.origin)
            # delete documents from the enriched index
            self.remove_commits(to_process, enrich_backend.elastic.index_url,
                                'hash', self.perceval_backend.origin)

            to_process = []

        if to_process:
            # delete documents from the raw index
            self.remove_commits(to_process, ocean_backend.elastic.index_url,
                                'data.commit', self.perceval_backend.origin)
            # delete documents from the enriched index
            self.remove_commits(to_process, enrich_backend.elastic.index_url,
                                'hash', self.perceval_backend.origin)

        logger.debug(
            "[git] update-items {} commits deleted from {} with origin {}.".
            format(
                len(hashes_to_delete),
                ocean_backend.elastic.anonymize_url(
                    ocean_backend.elastic.index_url),
                self.perceval_backend.origin))
        logger.debug(
            "[git] update-items {} commits deleted from {} with origin {}.".
            format(
                len(hashes_to_delete),
                enrich_backend.elastic.anonymize_url(
                    enrich_backend.elastic.index_url),
                self.perceval_backend.origin))