Ejemplo n.º 1
0
    def test_email_mapping(self):
        contributors = Contributor.Mapping()
        Contributor.from_scm_log('Author: Jonathan Bedard <*****@*****.**>',
                                 contributors)
        contributor = Contributor.from_scm_log(
            'r266751 | [email protected] | 2020-09-08 14:33:42 -0700 (Tue, 08 Sep 2020) | 10 lines',
            contributors)

        self.assertEqual(contributor.name, 'Jonathan Bedard')
        self.assertEqual(contributor.emails, ['*****@*****.**'])
Ejemplo n.º 2
0
    def test_author_mapping(self):
        contributors = Contributor.Mapping()
        Contributor.from_scm_log('Author: Jonathan Bedard <*****@*****.**>',
                                 contributors)
        contributor = Contributor.from_scm_log(
            'Author: Jonathan Bedard <*****@*****.**>', contributors)

        self.assertEqual(contributor.name, 'Jonathan Bedard')
        self.assertEqual(contributor.emails,
                         ['*****@*****.**', '*****@*****.**'])
Ejemplo n.º 3
0
    def test_git_svn_log(self):
        contributor = Contributor.from_scm_log(
            'Author: Jonathan Bedard <[email protected]@268f45cc-cd09-0410-ab3c-d52691b4dbfc>'
        )

        self.assertEqual(contributor.name, 'Jonathan Bedard')
        self.assertEqual(contributor.emails, ['*****@*****.**'])
Ejemplo n.º 4
0
    def test_short_svn_log(self):
        contributor = Contributor.from_scm_log(
            'r266751 | [email protected] | 2020-09-08 14:33:42 -0700 (Tue, 08 Sep 2020) | 1 line'
        )

        self.assertEqual(contributor.name, '*****@*****.**')
        self.assertEqual(contributor.emails, ['*****@*****.**'])
Ejemplo n.º 5
0
    def test_json_encode(self):
        contributor = Contributor.from_scm_log(
            'Author: Jonathan Bedard <*****@*****.**>')

        self.assertDictEqual(
            dict(
                revision=1,
                hash='c3bd784f8b88bd03f64467ddd3304ed8be28acbe',
                branch='main',
                identifier='1@main',
                timestamp=1000,
                order=0,
                author=dict(
                    name='Jonathan Bedard',
                    emails=['*****@*****.**'],
                ),
                message='Message',
            ),
            json.loads(
                json.dumps(Commit(
                    revision=1,
                    hash='c3bd784f8b88bd03f64467ddd3304ed8be28acbe',
                    identifier='1@main',
                    timestamp=1000,
                    author=contributor,
                    message='Message',
                ),
                           cls=Commit.Encoder)))
Ejemplo n.º 6
0
    def _args_from_content(self, content, include_log=True):
        author = None
        timestamp = None

        for line in content.splitlines()[:4]:
            split = line.split(': ')
            if split[0] == 'Author':
                author = Contributor.from_scm_log(line.lstrip(),
                                                  self.contributors)
            elif split[0] == 'CommitDate':
                tz_diff = line.split(' ')[-1]
                date = datetime.strptime(split[1].lstrip()[:-len(tz_diff)],
                                         '%a %b %d %H:%M:%S %Y ')
                date += timedelta(
                    hours=int(tz_diff[1:3]),
                    minutes=int(tz_diff[3:5]),
                ) * (1 if tz_diff[0] == '-' else -1)
                timestamp = int(calendar.timegm(
                    date.timetuple())) - time.timezone

        message = ''
        for line in content.splitlines()[5:]:
            message += line[4:] + '\n'
        matches = self.GIT_SVN_REVISION.findall(message)

        return dict(
            revision=int(matches[-1].split('@')[0]) if matches else None,
            author=author,
            timestamp=timestamp,
            message=message.rstrip() if include_log else None,
        )
Ejemplo n.º 7
0
    def test_contributor(self):
        contributor = Contributor.from_scm_log(
            'Author: Jonathan Bedard <*****@*****.**>')

        commit = Commit(revision=1, identifier=1, author=contributor)
        self.assertEqual(commit.author, contributor)

        commit = Commit(revision=1,
                        identifier=1,
                        author=Contributor.Encoder().default(contributor))
        self.assertEqual(commit.author, contributor)
Ejemplo n.º 8
0
    def test_json_decode(self):
        contributor = Contributor.from_scm_log(
            'Author: Jonathan Bedard <*****@*****.**>')

        commit_a = Commit(revision=1,
                          hash='c3bd784f8b88bd03f64467ddd3304ed8be28acbe',
                          identifier='1@main',
                          timestamp=1000,
                          author=Contributor.Encoder().default(contributor),
                          message='Message')

        dictionary = json.loads(json.dumps(commit_a, cls=Commit.Encoder))
        commit_b = Commit(**dictionary)

        self.assertEqual(commit_a, commit_b)
Ejemplo n.º 9
0
    def test_from_json_str(self):
        contributor = Contributor.from_scm_log(
            'Author: [email protected] <*****@*****.**>')
        self.assertEqual(
            Commit.from_json('''{
    "revision": 1,
    "hash": "c3bd784f8b88bd03f64467ddd3304ed8be28acbe",
    "identifier": "1@main",
    "timestamp": 1000,
    "author": "*****@*****.**",
    "message": "Message"
}'''),
            Commit(revision=1,
                   hash='c3bd784f8b88bd03f64467ddd3304ed8be28acbe',
                   identifier='1@main',
                   timestamp=1000,
                   author=Contributor.Encoder().default(contributor),
                   message='Message'),
        )
Ejemplo n.º 10
0
    def _args_from_content(self, content, include_log=True):
        leading = content.splitlines()[0]
        match = Contributor.SVN_AUTHOR_RE.match(
            leading) or Contributor.SVN_AUTHOR_Q_RE.match(leading)
        if not match:
            return {}

        tz_diff = match.group('date').split(' ', 2)[-1]
        date = datetime.strptime(
            match.group('date')[:-len(tz_diff)], '%Y-%m-%d %H:%M:%S ')
        date += timedelta(
            hours=int(tz_diff[1:3]),
            minutes=int(tz_diff[3:5]),
        ) * (1 if tz_diff[0] == '-' else -1)

        return dict(
            revision=int(match.group('revision')),
            timestamp=int(calendar.timegm(date.timetuple())),
            author=Contributor.from_scm_log(leading, self.contributors),
            message='\n'.join(content.splitlines()[2:]).rstrip()
            if include_log else None,
        )
Ejemplo n.º 11
0
 def test_git_no_author(self):
     contributor = Contributor.from_scm_log(
         'Author: Automated Checkin <devnull>')
     self.assertIsNone(contributor)
Ejemplo n.º 12
0
    def commit(self, hash=None, revision=None, identifier=None, branch=None, tag=None, include_log=True):
        if hash:
            raise ValueError('SVN does not support Git hashes')

        parsed_branch_point = None
        if identifier is not None:
            if revision:
                raise ValueError('Cannot define both revision and identifier')
            if tag:
                raise ValueError('Cannot define both tag and identifier')

            parsed_branch_point, identifier, parsed_branch = Commit._parse_identifier(identifier, do_assert=True)
            if parsed_branch:
                if branch and branch != parsed_branch:
                    raise ValueError(
                        "Caller passed both 'branch' and 'identifier', but specified different branches ({} and {})".format(
                            branch, parsed_branch,
                        ),
                    )
                branch = parsed_branch
            branch = branch or self.branch

            if branch == self.default_branch and parsed_branch_point:
                raise self.Exception('Cannot provide a branch point for a commit on the default branch')

            if not self._metadata_cache.get(branch, []) or identifier >= len(self._metadata_cache.get(branch, [])):
                if branch != self.default_branch:
                    self._cache_revisions(branch=self.default_branch)
                self._cache_revisions(branch=branch)
            if identifier > len(self._metadata_cache.get(branch, [])):
                raise self.Exception('Identifier {} cannot be found on the specified branch in the current checkout'.format(identifier))

            if identifier <= 0:
                if branch == self.default_branch:
                    raise self.Exception('Illegal negative identifier on the default branch')
                identifier = self._commit_count(branch=branch) + identifier
                if identifier < 0:
                    raise self.Exception('Identifier does not exist on the specified branch')

                branch = self.default_branch

            revision = self._metadata_cache[branch][identifier]
            info = self.info(cached=True, branch=branch, revision=revision)
            branch = self._branch_for(revision)
            if not self._metadata_cache.get(branch, []) or identifier >= len(self._metadata_cache.get(branch, [])):
                self._cache_revisions(branch=branch)

        elif revision:
            if branch:
                raise ValueError('Cannot define both branch and revision')
            if tag:
                raise ValueError('Cannot define both tag and revision')
            revision = Commit._parse_revision(revision, do_assert=True)
            branch = self._branch_for(revision)
            info = self.info(cached=True, revision=revision)

        else:
            if branch and tag:
                raise ValueError('Cannot define both branch and tag')

            branch = None if tag else branch or self.branch
            info = self.info(tag=tag) if tag else self.info(branch=branch)
            if not info:
                raise self.Exception("'{}' is not a recognized {}".format(
                    tag or branch,
                    'tag' if tag else 'branch',
                ))
            revision = int(info['Last Changed Rev'])
            if branch != self.default_branch:
                branch = self._branch_for(revision)

        date = info['Last Changed Date'].split(' (')[0]
        tz_diff = date.split(' ')[-1]
        date = datetime.strptime(date[:-len(tz_diff)], '%Y-%m-%d %H:%M:%S ')
        date += timedelta(
            hours=int(tz_diff[1:3]),
            minutes=int(tz_diff[3:5]),
        ) * (1 if tz_diff[0] == '-' else -1)

        if not identifier:
            if branch != self.default_branch and revision > self._metadata_cache.get(self.default_branch, [0])[-1]:
                self._cache_revisions(branch=self.default_branch)
            if revision not in self._metadata_cache.get(branch, []):
                self._cache_revisions(branch=branch)
            identifier = self._commit_count(revision=revision, branch=branch)

        branch_point = None if branch == self.default_branch else self._commit_count(branch=branch)
        if branch_point and parsed_branch_point and branch_point != parsed_branch_point:
            raise ValueError("Provided 'branch_point' does not match branch point of specified branch")

        if branch == self.default_branch or '/' in branch:
            branch_arg = '^/{}'.format(branch)
        else:
            branch_arg = '^/branches/{}'.format(branch)

        log = run(
            [self.executable(), 'log', '-l', '1', '-r', str(revision), branch_arg], cwd=self.root_path,
            capture_output=True, encoding='utf-8',
        ) if include_log else None
        split_log = log.stdout.splitlines() if log else []
        if log and (not log.returncode or len(split_log) >= 3):
            author_line = split_log[1]
            for line in split_log[2:8]:
                if Contributor.SVN_PATCH_FROM_RE.match(line):
                    author_line = line
                    break

            author = Contributor.from_scm_log(author_line, self.contributors)
            message = '\n'.join(split_log[3:-1])
        else:
            if include_log:
                self.log('Failed to connect to remote, cannot compute commit message')
            email = info.get('Last Changed Author')
            author = self.contributors.create(email, email) if '@' in email else self.contributors.create(email)
            message = None

        return Commit(
            revision=int(revision),
            branch=branch,
            identifier=identifier,
            branch_point=branch_point,
            timestamp=int(calendar.timegm(date.timetuple())),
            author=author,
            message=message,
        )
Ejemplo n.º 13
0
    def commit(self,
               hash=None,
               revision=None,
               identifier=None,
               branch=None,
               tag=None,
               include_log=True,
               include_identifier=True):
        # Only git-svn checkouts can convert revisions to fully qualified commits
        if revision and not self.is_svn:
            raise self.Exception(
                'This git checkout does not support SVN revisions')

        # Determine the hash for a provided Subversion revision
        elif revision:
            if hash:
                raise ValueError('Cannot define both hash and revision')

            revision = Commit._parse_revision(revision, do_assert=True)
            revision_log = run(
                [self.executable(), 'svn', 'find-rev', 'r{}'.format(revision)],
                cwd=self.root_path,
                capture_output=True,
                encoding='utf-8',
                timeout=3,
            )
            if revision_log.returncode:
                raise self.Exception(
                    "Failed to retrieve commit information for 'r{}'".format(
                        revision))
            hash = revision_log.stdout.rstrip()
            if not hash:
                raise self.Exception("Failed to find 'r{}'".format(revision))

        default_branch = self.default_branch
        parsed_branch_point = None
        log_format = ['-1'] if include_log else ['-1', '--format=short']

        # Determine the `git log` output and branch for a given identifier
        if identifier is not None:
            if revision:
                raise ValueError('Cannot define both revision and identifier')
            if hash:
                raise ValueError('Cannot define both hash and identifier')
            if tag:
                raise ValueError('Cannot define both tag and identifier')

            parsed_branch_point, identifier, parsed_branch = Commit._parse_identifier(
                identifier, do_assert=True)
            if parsed_branch:
                if branch and branch != parsed_branch:
                    raise ValueError(
                        "Caller passed both 'branch' and 'identifier', but specified different branches ({} and {})"
                        .format(
                            branch,
                            parsed_branch,
                        ), )
                branch = parsed_branch

            baseline = branch or 'HEAD'
            is_default = baseline == default_branch
            if baseline == 'HEAD':
                is_default = default_branch in self._branches_for(baseline)

            if is_default and parsed_branch_point:
                raise self.Exception(
                    'Cannot provide a branch point for a commit on the default branch'
                )

            base_count = self._commit_count(
                baseline if is_default else '{}..{}'.
                format(default_branch, baseline))

            if identifier > base_count:
                raise self.Exception(
                    'Identifier {} cannot be found on the specified branch in the current checkout'
                    .format(identifier))
            log = run(
                [
                    self.executable(), 'log', '{}~{}'.format(
                        branch or 'HEAD', base_count - identifier)
                ] + log_format,
                cwd=self.root_path,
                capture_output=True,
                encoding='utf-8',
            )
            if log.returncode:
                raise self.Exception(
                    "Failed to retrieve commit information for 'i{}@{}'".
                    format(identifier, branch or 'HEAD'))

            # Negative identifiers are actually commits on the default branch, we will need to re-compute the identifier
            if identifier < 0 and is_default:
                raise self.Exception(
                    'Illegal negative identifier on the default branch')
            if identifier < 0:
                identifier = None

        # Determine the `git log` output for a given branch or tag
        elif branch or tag:
            if hash:
                raise ValueError('Cannot define both tag/branch and hash')
            if branch and tag:
                raise ValueError('Cannot define both tag and branch')

            log = run([self.executable(), 'log', branch or tag] + log_format,
                      cwd=self.root_path,
                      capture_output=True,
                      encoding='utf-8')
            if log.returncode:
                raise self.Exception(
                    "Failed to retrieve commit information for '{}'".format(
                        branch or tag))

        # Determine the `git log` output for a given hash
        else:
            hash = Commit._parse_hash(hash, do_assert=True)
            log = run([self.executable(), 'log', hash or 'HEAD'] + log_format,
                      cwd=self.root_path,
                      capture_output=True,
                      encoding='utf-8')
            if log.returncode:
                raise self.Exception(
                    "Failed to retrieve commit information for '{}'".format(
                        hash or 'HEAD'))

        # Fully define the hash from the `git log` output
        match = self.GIT_COMMIT.match(log.stdout.splitlines()[0])
        if not match:
            raise self.Exception('Invalid commit hash in git log')
        hash = match.group('hash')

        # A commit is often on multiple branches, the canonical branch is the one with the highest priority
        branch = self.prioritize_branches(self._branches_for(hash))

        # Compute the identifier if the function did not receive one and we were asked to
        if not identifier and include_identifier:
            identifier = self._commit_count(
                hash if branch ==
                default_branch else '{}..{}'.format(default_branch, hash))

        # Only compute the branch point we're on something other than the default branch
        branch_point = None if not include_identifier or branch == default_branch else self._commit_count(
            hash) - identifier
        if branch_point and parsed_branch_point and branch_point != parsed_branch_point:
            raise ValueError(
                "Provided 'branch_point' does not match branch point of specified branch"
            )

        # Check the commit log for a git-svn revision
        logcontent = '\n'.join(line[4:]
                               for line in log.stdout.splitlines()[4:])
        matches = self.GIT_SVN_REVISION.findall(logcontent)
        revision = int(matches[-1].split('@')[0]) if matches else None

        # We only care about when a commit was commited
        commit_time = run(
            [self.executable(), 'show', '-s', '--format=%ct', hash],
            cwd=self.root_path,
            capture_output=True,
            encoding='utf-8',
        )
        if commit_time.returncode:
            raise self.Exception(
                'Failed to retrieve commit time for {}'.format(hash))
        timestamp = int(commit_time.stdout.lstrip())

        # Comparing commits in different repositories involves comparing timestamps. This is problematic because it git,
        # it's possible for a series of commits to share a commit time. To handle this case, we assign each commit a
        # zero-indexed "order" within it's timestamp.
        order = 0
        while not identifier or order + 1 < identifier + (branch_point or 0):
            commit_time = run(
                [
                    self.executable(), 'show', '-s', '--format=%ct',
                    '{}~{}'.format(hash, order + 1)
                ],
                cwd=self.root_path,
                capture_output=True,
                encoding='utf-8',
            )
            if commit_time.returncode:
                break
            if int(commit_time.stdout.lstrip()) != timestamp:
                break
            order += 1

        return Commit(
            repository_id=self.id,
            hash=hash,
            revision=revision,
            identifier=identifier if include_identifier else None,
            branch_point=branch_point,
            branch=branch,
            timestamp=timestamp,
            order=order,
            author=Contributor.from_scm_log(log.stdout.splitlines()[1],
                                            self.contributors),
            message=logcontent if include_log else None,
        )
Ejemplo n.º 14
0
    def commit(self,
               hash=None,
               revision=None,
               identifier=None,
               branch=None,
               tag=None,
               include_log=True):
        if revision and not self.is_svn:
            raise self.Exception(
                'This git checkout does not support SVN revisions')
        elif revision:
            if hash:
                raise ValueError('Cannot define both hash and revision')

            revision = Commit._parse_revision(revision, do_assert=True)
            revision_log = run(
                [self.executable(), 'svn', 'find-rev', 'r{}'.format(revision)],
                cwd=self.root_path,
                capture_output=True,
                encoding='utf-8',
                timeout=3,
            )
            if revision_log.returncode:
                raise self.Exception(
                    "Failed to retrieve commit information for 'r{}'".format(
                        revision))
            hash = revision_log.stdout.rstrip()
            if not hash:
                raise self.Exception("Failed to find 'r{}'".format(revision))

        default_branch = self.default_branch
        parsed_branch_point = None
        log_format = ['-1'] if include_log else ['-1', '--format=short']

        if identifier is not None:
            if revision:
                raise ValueError('Cannot define both revision and identifier')
            if hash:
                raise ValueError('Cannot define both hash and identifier')
            if tag:
                raise ValueError('Cannot define both tag and identifier')

            parsed_branch_point, identifier, parsed_branch = Commit._parse_identifier(
                identifier, do_assert=True)
            if parsed_branch:
                if branch and branch != parsed_branch:
                    raise ValueError(
                        "Caller passed both 'branch' and 'identifier', but specified different branches ({} and {})"
                        .format(
                            branch,
                            parsed_branch,
                        ), )
                branch = parsed_branch

            baseline = branch or 'HEAD'
            is_default = baseline == default_branch
            if baseline == 'HEAD':
                is_default = default_branch in self._branches_for(baseline)

            if is_default and parsed_branch_point:
                raise self.Exception(
                    'Cannot provide a branch point for a commit on the default branch'
                )

            base_count = self._commit_count(
                baseline if is_default else '{}..{}'.
                format(default_branch, baseline))

            if identifier > base_count:
                raise self.Exception(
                    'Identifier {} cannot be found on the specified branch in the current checkout'
                    .format(identifier))
            log = run(
                [
                    self.executable(), 'log', '{}~{}'.format(
                        branch or 'HEAD', base_count - identifier)
                ] + log_format,
                cwd=self.root_path,
                capture_output=True,
                encoding='utf-8',
            )
            if log.returncode:
                raise self.Exception(
                    "Failed to retrieve commit information for 'i{}@{}'".
                    format(identifier, branch or 'HEAD'))

            # Negative identifiers are actually commits on the default branch, we will need to re-compute the identifier
            if identifier < 0 and is_default:
                raise self.Exception(
                    'Illegal negative identifier on the default branch')
            if identifier < 0:
                identifier = None

        elif branch or tag:
            if hash:
                raise ValueError('Cannot define both tag/branch and hash')
            if branch and tag:
                raise ValueError('Cannot define both tag and branch')

            log = run([self.executable(), 'log', branch or tag] + log_format,
                      cwd=self.root_path,
                      capture_output=True,
                      encoding='utf-8')
            if log.returncode:
                raise self.Exception(
                    "Failed to retrieve commit information for '{}'".format(
                        branch or tag))

        else:
            hash = Commit._parse_hash(hash, do_assert=True)
            log = run([self.executable(), 'log', hash or 'HEAD'] + log_format,
                      cwd=self.root_path,
                      capture_output=True,
                      encoding='utf-8')
            if log.returncode:
                raise self.Exception(
                    "Failed to retrieve commit information for '{}'".format(
                        hash or 'HEAD'))

        match = self.GIT_COMMIT.match(log.stdout.splitlines()[0])
        if not match:
            raise self.Exception('Invalid commit hash in git log')
        hash = match.group('hash')

        branch = self.prioritize_branches(self._branches_for(hash))

        if not identifier:
            identifier = self._commit_count(
                hash if branch ==
                default_branch else '{}..{}'.format(default_branch, hash))
        branch_point = None if branch == default_branch else self._commit_count(
            hash) - identifier
        if branch_point and parsed_branch_point and branch_point != parsed_branch_point:
            raise ValueError(
                "Provided 'branch_point' does not match branch point of specified branch"
            )

        match = self.GIT_SVN_REVISION.search(log.stdout)
        revision = int(match.group('revision')) if match else None

        commit_time = run(
            [self.executable(), 'show', '-s', '--format=%ct', hash],
            cwd=self.root_path,
            capture_output=True,
            encoding='utf-8',
        )
        if commit_time.returncode:
            raise self.Exception(
                'Failed to retrieve commit time for {}'.format(hash))

        return Commit(
            hash=hash,
            revision=revision,
            identifier=identifier,
            branch_point=branch_point,
            branch=branch,
            timestamp=int(commit_time.stdout.lstrip()),
            author=Contributor.from_scm_log(log.stdout.splitlines()[1],
                                            self.contributors),
            message='\n'.join(line[4:] for line in log.stdout.splitlines()[4:])
            if include_log else None,
        )
Ejemplo n.º 15
0
 def test_invalid_log(self):
     with self.assertRaises(ValueError):
         Contributor.from_scm_log('Jonathan Bedard <*****@*****.**>')
Ejemplo n.º 16
0
    def test_svn_patch_by_log(self):
        contributor = Contributor.from_scm_log(
            'Patch by Jonathan Bedard <*****@*****.**> on 2020-09-10')

        self.assertEqual(contributor.name, 'Jonathan Bedard')
        self.assertEqual(contributor.emails, ['*****@*****.**'])
Ejemplo n.º 17
0
    def test_git_log(self):
        contributor = Contributor.from_scm_log(
            'Author: Jonathan Bedard <*****@*****.**>')

        self.assertEqual(contributor.name, 'Jonathan Bedard')
        self.assertEqual(contributor.emails, ['*****@*****.**'])
Ejemplo n.º 18
0
 def test_git_svn_no_author(self):
     contributor = Contributor.from_scm_log(
         'Author: (no author) <(no author)@268f45cc-cd09-0410-ab3c-d52691b4dbfc>'
     )
     self.assertIsNone(contributor)