Exemple #1
0
    def get_diff_commits_origin_raw(self, ocean_backend):
        """Return the commit hashes which are stored in the raw index but not in the original repo.

        :param ocean_backend: Ocean backend
        """
        repo_origin = anonymize_url(self.perceval_backend.origin)
        fltr = {
            'name': 'origin',
            'value': [repo_origin]
        }

        current_hashes = []
        try:
            git_repo = GitRepository(self.perceval_backend.uri, self.perceval_backend.gitpath)
            current_hashes = [commit for commit in git_repo.rev_list()]
        except EmptyRepositoryError:
            logger.warning("No commits retrieved from {}, repo is empty".format(repo_origin))
        except RepositoryError:
            logger.warning("No commits retrieved from {}, repo doesn't exist locally".format(repo_origin))
        except Exception as e:
            logger.error("[git] No commits retrieved from {}, "
                         "git rev-list command failed: {}".format(repo_origin, e))

        current_hashes = set(current_hashes)
        raw_hashes = set([item['data']['commit']
                          for item in ocean_backend.fetch(ignore_incremental=True, _filter=fltr)])

        hashes_to_delete = list(raw_hashes.difference(current_hashes))

        return hashes_to_delete
Exemple #2
0
    def update_items(self, ocean_backend, enrich_backend):
        """Retrieve the commits not present in the original repository and delete
        the corresponding documents from the raw and enriched indexes"""

        fltr = {
            'name': 'origin',
            'value': [self.perceval_backend.origin]
        }

        logger.debug("[update-items] Checking commits for %s.", self.perceval_backend.origin)

        git_repo = GitRepository(self.perceval_backend.uri, self.perceval_backend.gitpath)

        try:
            current_hashes = set([commit for commit in git_repo.rev_list()])
        except Exception as e:
            logger.error("Something went wrong with %s, %s", git_repo.uri, e, exc_info=True)
            return

        raw_hashes = set([item['data']['commit']
                          for item in ocean_backend.fetch(ignore_incremental=True, _filter=fltr)])

        hashes_to_delete = list(raw_hashes.difference(current_hashes))

        to_process = []
        for _hash in hashes_to_delete:
            to_process.append(_hash)

            if len(to_process) != MAX_BULK_UPDATE_SIZE:
                continue

            # delete documents from the raw index
            self.remove_commits(to_process, ocean_backend.elastic.index_url,
                                'data.commit', self.perceval_backend.origin)
            # delete documents from the enriched index
            self.remove_commits(to_process, enrich_backend.elastic.index_url,
                                'hash', self.perceval_backend.origin)

            to_process = []

        if to_process:
            # delete documents from the raw index
            self.remove_commits(to_process, ocean_backend.elastic.index_url,
                                'data.commit', self.perceval_backend.origin)
            # delete documents from the enriched index
            self.remove_commits(to_process, enrich_backend.elastic.index_url,
                                'hash', self.perceval_backend.origin)

        logger.debug("[update-items] %s commits deleted from %s with origin %s.",
                     len(hashes_to_delete), ocean_backend.elastic.anonymize_url(ocean_backend.elastic.index_url),
                     self.perceval_backend.origin)
        logger.debug("[update-items] %s commits deleted from %s with origin %s.",
                     len(hashes_to_delete), enrich_backend.elastic.anonymize_url(enrich_backend.elastic.index_url),
                     self.perceval_backend.origin)

        # update branch info
        self.delete_commit_branches(enrich_backend)
        self.add_commit_branches(git_repo, enrich_backend)
Exemple #3
0
    def test_count_objects_invalid_output(self):
        """Test if an exception is raised when count_objects output is invalid"""

        new_path = os.path.join(self.tmp_path, 'newgit')
        repo = GitRepository.clone(self.git_path, new_path)

        # Check missing value
        expected = "unable to parse 'count-objects' output;" + \
            " reason: 'in-pack' entry not found"

        with unittest.mock.patch('perceval.backends.core.git.GitRepository._exec') as mock_exec:
            mock_exec.return_value = b'count: 69\n:sze: 900\n'

            with self.assertRaises(RepositoryError) as e:
                _ = repo.count_objects()

        self.assertEqual(str(e.exception), expected)

        # Check invalid output
        with unittest.mock.patch('perceval.backends.core.git.GitRepository._exec') as mock_exec:
            mock_exec.return_value = b'invalid value'

            with self.assertRaises(RepositoryError) as e:
                _ = repo.count_objects()

        shutil.rmtree(new_path)
Exemple #4
0
    def enrich_git_branches(self, ocean_backend, enrich_backend):
        """Update the information about branches within the documents representing
        commits in the enriched index.

        :param ocean_backend: the ocean backend
        :param enrich_backend: the enrich backend
        """
        logger.debug("[git] study git-branches start")
        for ds in self.prjs_map:
            if ds != "git":
                continue

            urls = self.prjs_map[ds]

            for url in urls:
                cmd = GitCommand(*[url])

                git_repo = GitRepository(cmd.parsed_args.uri, cmd.parsed_args.gitpath)

                logger.debug("[git] study git-branches delete branch info for repo {} in index {}".format(
                             git_repo.uri, self.elastic.anonymize_url(enrich_backend.elastic.index_url)))
                self.delete_commit_branches(git_repo, enrich_backend)

                logger.debug("[git] study git-branches add branch info for repo {} in index {}".format(
                             git_repo.uri, self.elastic.anonymize_url(enrich_backend.elastic.index_url)))
                self.add_commit_branches(git_repo, enrich_backend)

                logger.debug("[git] study git-branches repo {} in index {} processed".format(
                             git_repo.uri, self.elastic.anonymize_url(enrich_backend.elastic.index_url)))

        logger.debug("[git] study git-branches end")
Exemple #5
0
    def test_not_existing_repo_on_init(self):
        """Test if init fails when the repos does not exists"""

        expected = "git repository '%s' does not exist" % (self.tmp_path)

        with self.assertRaisesRegex(RepositoryError, expected):
            _ = GitRepository('http://example.org', self.tmp_path)
Exemple #6
0
    def test_log_from_date(self):
        """Test if commits are returned from the given date"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        repo = GitRepository.clone(self.git_path, new_path)
        gitlog = repo.log(from_date=datetime.datetime(2014, 2, 11, 22, 7, 49))
        gitlog = [line for line in gitlog]

        self.assertEqual(len(gitlog), 36)
        self.assertEqual(gitlog[0][:14], "commit ce8e0b8")

        # Use a timezone, it will return an empty line
        from_date = datetime.datetime(2014,
                                      2,
                                      11,
                                      22,
                                      7,
                                      49,
                                      tzinfo=dateutil.tz.tzoffset(
                                          None, -36000))
        gitlog = repo.log(from_date=from_date)
        gitlog = [line for line in gitlog]

        self.assertEqual(gitlog, [])

        shutil.rmtree(new_path)
Exemple #7
0
    def test_clone_existing_directory(self):
        """Test if it raises an exception when tries to clone an existing directory"""

        expected = "git command - fatal: destination path '%s' already exists" \
            % (self.tmp_path)

        with self.assertRaisesRegex(RepositoryError, expected):
            _ = GitRepository.clone(self.git_path, self.tmp_path)
Exemple #8
0
    def test_init(self):
        """Test initialization"""

        repo = GitRepository('http://example.git', self.git_path)

        self.assertIsInstance(repo, GitRepository)
        self.assertEqual(repo.uri, 'http://example.git')
        self.assertEqual(repo.dirpath, self.git_path)
Exemple #9
0
    def enrich_git_branches(self, ocean_backend, enrich_backend, run_month_days=[7, 14, 21, 28]):
        """Update the information about branches within the documents representing
        commits in the enriched index.

        The example below shows how to activate the study by modifying the setup.cfg. The study
        `enrich_git_branches` will be run on days depending on the parameter `run_month_days`,
        by default the days are 7, 14, 21, and 28 of each month.

        ```
        [git]
        raw_index = git_raw
        enriched_index = git_enriched
        ...
        studies = [enrich_git_branches]

        [enrich_git_branches]
        run_month_days = [5, 22]
        ```

        :param ocean_backend: the ocean backend
        :param enrich_backend: the enrich backend
        :param run_month_days: days of the month to run this study
        """
        logger.debug("[git] study git-branches start")
        day = datetime_utcnow().day
        run_month_days = list(map(int, run_month_days))
        if day not in run_month_days:
            logger.debug("[git] study git-branches will execute only the days {} of each month".format(run_month_days))
            logger.debug("[git] study git-branches end")
            return

        for ds in self.prjs_map:
            if ds != "git":
                continue

            urls = self.prjs_map[ds]

            for url in urls:
                cmd = GitCommand(*[url])

                git_repo = GitRepository(cmd.parsed_args.uri, cmd.parsed_args.gitpath)

                logger.debug("[git] study git-branches delete branch info for repo {} in index {}".format(
                             git_repo.uri, anonymize_url(enrich_backend.elastic.index_url)))
                self.delete_commit_branches(git_repo, enrich_backend)

                logger.debug("[git] study git-branches add branch info for repo {} in index {}".format(
                             git_repo.uri, anonymize_url(enrich_backend.elastic.index_url)))
                try:
                    self.add_commit_branches(git_repo, enrich_backend)
                except Exception as e:
                    logger.error("[git] study git-branches failed on repo {}, due to {}".format(git_repo.uri, e))
                    continue

                logger.debug("[git] study git-branches repo {} in index {} processed".format(
                             git_repo.uri, anonymize_url(enrich_backend.elastic.index_url)))

        logger.debug("[git] study git-branches end")
Exemple #10
0
    def test_git_parser_from_iter(self):
        """Test if the static method parses a git log from a repository"""

        repo = GitRepository(self.git_path, self.git_path)
        commits = Git.parse_git_log_from_iter(repo.log())
        result = [commit['commit'] for commit in commits]

        expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703',
                    '87783129c3f00d2c81a3a8e585eb86a47e39891a',
                    '7debcf8a2f57f86663809c58b5c07a398be7674c',
                    'c0d66f92a95e31c77be08dc9d0f11a16715d1885',
                    'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb',
                    '589bb080f059834829a2a5955bebfd7c2baa110a',
                    'ce8e0b86a1e9877f42fe9453ede418519115f367',
                    '51a3b654f252210572297f47597b31527c475fb8',
                    '456a68ee1407a77f3e804a30dff245bb6c6b872f']

        self.assertListEqual(result, expected)
Exemple #11
0
    def test_is_detached(self):
        """Test if a repository is in detached state or not"""

        new_path = os.path.join(self.tmp_path, 'newgit')
        repo = GitRepository.clone(self.git_path, new_path)

        is_detached = repo.is_detached()
        self.assertEqual(is_detached, False)

        shutil.rmtree(new_path)

        new_path = os.path.join(self.tmp_path, 'newgit')
        repo = GitRepository.clone(self.git_detached_path, new_path)

        is_detached = repo.is_detached()
        self.assertEqual(is_detached, True)

        shutil.rmtree(new_path)
Exemple #12
0
    def test_is_empty(self):
        """Test if a repository is empty or not"""

        new_path = os.path.join(self.tmp_path, 'newgit')
        repo = GitRepository.clone(self.git_path, new_path)

        is_empty = repo.is_empty()
        self.assertEqual(is_empty, False)

        shutil.rmtree(new_path)

        new_path = os.path.join(self.tmp_path, 'newgit')
        repo = GitRepository.clone(self.git_empty_path, new_path)

        is_empty = repo.is_empty()
        self.assertEqual(is_empty, True)

        shutil.rmtree(new_path)
Exemple #13
0
    def test_pull(self):
        """Test if the repository is updated to 'origin' status"""
        def count_commits():
            """Get the number of commits counting the entries on the log"""

            cmd = ['git', 'log', '--oneline']
            gitlog = subprocess.check_output(cmd,
                                             stderr=subprocess.STDOUT,
                                             cwd=new_path,
                                             env={
                                                 'LANG': 'C',
                                                 'PAGER': ''
                                             })
            commits = gitlog.strip(b'\n').split(b'\n')
            return len(commits)

        new_path = os.path.join(self.tmp_path, 'newgit')
        new_file = os.path.join(new_path, 'newfile')

        repo = GitRepository.clone(self.git_path, new_path)

        # Count the number of commits before adding a new one
        ncommits = count_commits()
        self.assertEqual(ncommits, 9)

        # Create a new file and commit it to the repository
        with open(new_file, 'w') as f:
            f.write("Testing pull method")

        cmd = ['git', 'add', new_file]
        subprocess.check_output(cmd,
                                stderr=subprocess.STDOUT,
                                cwd=new_path,
                                env={'LANG': 'C'})

        cmd = [
            'git', '-c', 'user.name="mock"', '-c',
            'user.email="*****@*****.**"', 'commit', '-m', 'Testing pull'
        ]
        subprocess.check_output(cmd,
                                stderr=subprocess.STDOUT,
                                cwd=new_path,
                                env={'LANG': 'C'})

        # Count the number of commits after the adding a new one
        ncommits = count_commits()
        self.assertEqual(ncommits, 10)

        # Update the repository to its original status
        repo.pull()

        # The number of commits should be updated to its original value
        ncommits = count_commits()
        self.assertEqual(ncommits, 9)

        shutil.rmtree(new_path)
Exemple #14
0
    def test_count_objects(self):
        """Test if it gets the number of objects in a repository"""

        new_path = os.path.join(self.tmp_path, 'newgit')
        repo = GitRepository.clone(self.git_path, new_path)

        nobjs = repo.count_objects()
        self.assertEqual(nobjs, 42)

        shutil.rmtree(new_path)
Exemple #15
0
    def test_pull_empty_repository(self):
        """Test if an exception is raised when the repository is empty"""

        new_path = os.path.join(self.tmp_path, 'newgit')
        repo = GitRepository.clone(self.git_empty_path, new_path)

        with self.assertRaises(EmptyRepositoryError):
            repo.pull()

        shutil.rmtree(new_path)
Exemple #16
0
    def test_clone_error(self):
        """Test if it raises an exception when an error occurs cloning a repository"""

        # Clone a non-git repository
        new_path = os.path.join(self.tmp_path, 'newgit')

        expected = "git command - fatal: repository '%s' does not exist" \
            % self.tmp_path

        with self.assertRaisesRegex(RepositoryError, expected):
            _ = GitRepository.clone(self.tmp_path, new_path)
Exemple #17
0
    def test_log_from_empty_repository(self):
        """Test if an exception is raised when the repository is empty"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        repo = GitRepository.clone(self.git_empty_path, new_path)
        gitlog = repo.log()

        with self.assertRaises(EmptyRepositoryError):
            _ = [line for line in gitlog]

        shutil.rmtree(new_path)
Exemple #18
0
    def test_log_empty(self):
        """Test if no line is returned when the log is empty"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        repo = GitRepository.clone(self.git_path, new_path)
        gitlog = repo.log(from_date=datetime.datetime(2020, 1, 1, 1, 1, 1))
        gitlog = [line for line in gitlog]

        self.assertListEqual(gitlog, [])

        shutil.rmtree(new_path)
Exemple #19
0
    def test_log(self):
        """Test log command"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        repo = GitRepository.clone(self.git_path, new_path)
        gitlog = repo.log()
        gitlog = [line for line in gitlog]
        self.assertEqual(len(gitlog), 108)
        self.assertEqual(gitlog[0][:14], "commit bc57a92")

        shutil.rmtree(new_path)
Exemple #20
0
    def test_not_git(self):
        """Test if a supposed git repo is not a git repo"""

        new_path = os.path.join(self.tmp_path, 'falsegit')
        if not os.path.isdir(new_path):
            os.makedirs(new_path)

        expected = "git repository '%s' does not exist" % new_path

        with self.assertRaisesRegex(RepositoryError, expected):
            repo = GitRepository(uri="", dirpath=new_path)

        shutil.rmtree(new_path)
Exemple #21
0
    def test_clone(self):
        """Test if a git repository is cloned"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        repo = GitRepository.clone(self.git_path, new_path)

        self.assertIsInstance(repo, GitRepository)
        self.assertEqual(repo.uri, self.git_path)
        self.assertEqual(repo.dirpath, new_path)
        self.assertTrue(os.path.exists(new_path))
        self.assertTrue(os.path.exists(os.path.join(new_path, '.git')))

        shutil.rmtree(new_path)
Exemple #22
0
    def enrich_git_branches(self, ocean_backend, enrich_backend):
        """Update the information about branches within the documents representing
        commits in the enriched index.

        :param ocean_backend: the ocean backend
        :param enrich_backend: the enrich backend
        """
        for ds in self.prjs_map:
            if ds != "git":
                continue

            urls = self.prjs_map[ds]

            for url in urls:
                cmd = GitCommand(*[url])

                git_repo = GitRepository(cmd.parsed_args.uri,
                                         cmd.parsed_args.gitpath)
                self.delete_commit_branches(git_repo, enrich_backend)
                self.add_commit_branches(git_repo, enrich_backend)
    def update_items(self, ocean_backend, enrich_backend):
        """Retrieve the commits not present in the original repository and delete
        the corresponding documents from the raw and enriched indexes"""

        fltr = {'name': 'origin', 'value': [self.perceval_backend.origin]}

        logger.debug("[git] update-items Checking commits for {}.".format(
            self.perceval_backend.origin))

        try:
            git_repo = GitRepository(self.perceval_backend.uri,
                                     self.perceval_backend.gitpath)
            current_hashes = set([commit for commit in git_repo.rev_list()])
        except EmptyRepositoryError:
            logger.warning("[git] Skip updating branch info for repo {}, "
                           "repo is empty".format(
                               self.perceval_backend.origin))
            return
        except RepositoryError:
            logger.warning("[git] Skip updating branch info for repo {}, "
                           "repo doesn't exist locally".format(
                               self.perceval_backend.origin))
            return
        except Exception as e:
            logger.error("[git] Skip updating branch info for repo {}, "
                         "git rev-list command failed: {}".format(
                             self.perceval_backend.origin, e))
            return

        raw_hashes = set([
            item['data']['commit']
            for item in ocean_backend.fetch(ignore_incremental=True,
                                            _filter=fltr)
        ])

        hashes_to_delete = list(raw_hashes.difference(current_hashes))

        to_process = []
        for _hash in hashes_to_delete:
            to_process.append(_hash)

            if len(to_process) != MAX_BULK_UPDATE_SIZE:
                continue

            # delete documents from the raw index
            self.remove_commits(to_process, ocean_backend.elastic.index_url,
                                'data.commit', self.perceval_backend.origin)
            # delete documents from the enriched index
            self.remove_commits(to_process, enrich_backend.elastic.index_url,
                                'hash', self.perceval_backend.origin)

            to_process = []

        if to_process:
            # delete documents from the raw index
            self.remove_commits(to_process, ocean_backend.elastic.index_url,
                                'data.commit', self.perceval_backend.origin)
            # delete documents from the enriched index
            self.remove_commits(to_process, enrich_backend.elastic.index_url,
                                'hash', self.perceval_backend.origin)

        logger.debug(
            "[git] update-items {} commits deleted from {} with origin {}.".
            format(
                len(hashes_to_delete),
                ocean_backend.elastic.anonymize_url(
                    ocean_backend.elastic.index_url),
                self.perceval_backend.origin))
        logger.debug(
            "[git] update-items {} commits deleted from {} with origin {}.".
            format(
                len(hashes_to_delete),
                enrich_backend.elastic.anonymize_url(
                    enrich_backend.elastic.index_url),
                self.perceval_backend.origin))