예제 #1
0
def test_git_clone():
    # FIXME: THIS ONE IS CAUSING SOME INTERESTING PROBLEMS?
    from metrique.utils import git_clone, safestr, remove_file
    uri = 'https://github.com/kejbaly2/tornadohttp.git'
    local_path = os.path.join(cache_dir, safestr(uri))
    remove_file(local_path, force=True)

    _t = time()
    repo = git_clone(uri, pull=False, reflect=False, cache_dir=cache_dir)
    assert repo == local_path
    not_cached = time() - _t

    _t = time()
    repo = git_clone(uri, pull=False, reflect=True, cache_dir=cache_dir)
    cached = time() - _t

    assert repo.path == local_path
    assert cached < not_cached

    git_clone(uri, pull=True, reflect=False, cache_dir=cache_dir)
    remove_file(local_path, force=True)
예제 #2
0
def test_git_clone():
    # FIXME: THIS ONE IS CAUSING SOME INTERESTING PROBLEMS?
    from metrique.utils import git_clone, safestr, remove_file
    uri = 'https://github.com/kejbaly2/tornadohttp.git'
    local_path = os.path.join(cache_dir, safestr(uri))
    remove_file(local_path, force=True)

    _t = time()
    repo = git_clone(uri, pull=False, reflect=False, cache_dir=cache_dir)
    assert repo == local_path
    not_cached = time() - _t

    _t = time()
    repo = git_clone(uri, pull=False, reflect=True, cache_dir=cache_dir)
    cached = time() - _t

    assert repo.path == local_path
    assert cached < not_cached

    git_clone(uri, pull=True, reflect=False, cache_dir=cache_dir)
    remove_file(local_path, force=True)
예제 #3
0
    def get_objects(self, uri, pull=True, **kwargs):
        '''
        Walk through repo commits to generate a list of repo commit
        objects.

        Each object has the following properties:
            * repo uri
            * general commit info
            * files added, removed fnames
            * lines added, removed
            * acked_by
            * signed_off_by
            * resolves
            * related
        '''
        self.repo = repo = git_clone(uri, pull=pull, reflect=True)
        # get a full list of all commit SHAs in the repo (all branches)
        cmd = 'git rev-list --all'
        output = sys_call(cmd, cwd=repo.path)
        repo_shas = set(x.strip() for x in output.split('\n') if x)
        logger.debug("Total Commits: %s" % len(repo_shas))

        cmd = 'git --no-pager log --all --format=sha:%H --numstat'
        output = sys_call(cmd)
        all_logs = re.sub('\n+', '\n', output)
        c_logs = [x for x in [s.strip() for s in all_logs.split('sha:')] if x]

        _end = None  # once was true, always is true...
        objs = []
        for c_log in c_logs:
            sha, s, all_changes = c_log.partition('\n')
            #try:
            c = repo.get_object(sha)
            # FIXME: not normalizing to UTC
            _start = ts2dt(c.commit_time)
            #except Exception as e:
            #    _start = now
            #    obj = dict(_oid=sha, _start=_start, _end=_end,
            #               repo_uri=uri, _e={sha: to_encoding(e)})
            #    self.objects.add(obj)
            #    continue

            # and some basic stuff...
            obj = dict(_oid=sha,
                       _start=_start,
                       _end=_end,
                       repo_uri=uri,
                       tree=c.tree,
                       parents=c.parents,
                       author=c.author,
                       committer=c.committer,
                       author_time=c.author_time,
                       message=c.message,
                       mergetag=c.mergetag,
                       extra=c.extra)

            for _file in all_changes.split('\n'):
                _file = _file.strip()
                obj.setdefault('files', {})
                if not _file:
                    added, removed, fname = 0, 0, None
                else:
                    added, removed, fname = _file.split('\t')
                    added = 0 if added == '-' else int(added)
                    removed = 0 if removed == '-' else int(removed)
                    # FIXME: sql doesn't nest well..
                    changes = {'added': added, 'removed': removed}
                    obj['files'][fname] = changes

            # file +/- totals
            obj['added'] = sum(
                [v.get('added', 0) for v in obj['files'].itervalues()])
            obj['removed'] = sum(
                [v.get('removed', 0) for v in obj['files'].itervalues()])

            # extract interesting bits from the message
            obj['acked_by'] = acked_by_re.findall(c.message)
            obj['signed_off_by'] = signed_off_by_re.findall(c.message)
            obj['resolves'] = resolves_re.findall(c.message)
            obj['related'] = related_re.findall(c.message)
            objs.append(obj)
        self.objects.extend(objs)

        return super(Commit, self).get_objects(**kwargs)
예제 #4
0
파일: commit.py 프로젝트: kejbaly2/metrique
    def get_objects(self, uri, pull=True, **kwargs):
        """
        Walk through repo commits to generate a list of repo commit
        objects.

        Each object has the following properties:
            * repo uri
            * general commit info
            * files added, removed fnames
            * lines added, removed
            * acked_by
            * signed_off_by
            * resolves
            * related
        """
        self.repo = repo = git_clone(uri, pull=pull, reflect=True)
        # get a full list of all commit SHAs in the repo (all branches)
        cmd = "git rev-list --all"
        output = sys_call(cmd, cwd=repo.path)
        repo_shas = set(x.strip() for x in output.split("\n") if x)
        logger.debug("Total Commits: %s" % len(repo_shas))

        cmd = "git --no-pager log --all --format=sha:%H --numstat"
        output = sys_call(cmd)
        all_logs = re.sub("\n+", "\n", output)
        c_logs = [x for x in [s.strip() for s in all_logs.split("sha:")] if x]

        _end = None  # once was true, always is true...
        objs = []
        for c_log in c_logs:
            sha, s, all_changes = c_log.partition("\n")
            # try:
            c = repo.get_object(sha)
            # FIXME: not normalizing to UTC
            _start = ts2dt(c.commit_time)
            # except Exception as e:
            #    _start = now
            #    obj = dict(_oid=sha, _start=_start, _end=_end,
            #               repo_uri=uri, _e={sha: to_encoding(e)})
            #    self.objects.add(obj)
            #    continue

            # and some basic stuff...
            obj = dict(
                _oid=sha,
                _start=_start,
                _end=_end,
                repo_uri=uri,
                tree=c.tree,
                parents=c.parents,
                author=c.author,
                committer=c.committer,
                author_time=c.author_time,
                message=c.message,
                mergetag=c.mergetag,
                extra=c.extra,
            )

            for _file in all_changes.split("\n"):
                _file = _file.strip()
                obj.setdefault("files", {})
                if not _file:
                    added, removed, fname = 0, 0, None
                else:
                    added, removed, fname = _file.split("\t")
                    added = 0 if added == "-" else int(added)
                    removed = 0 if removed == "-" else int(removed)
                    # FIXME: sql doesn't nest well..
                    changes = {"added": added, "removed": removed}
                    obj["files"][fname] = changes

            # file +/- totals
            obj["added"] = sum([v.get("added", 0) for v in obj["files"].itervalues()])
            obj["removed"] = sum([v.get("removed", 0) for v in obj["files"].itervalues()])

            # extract interesting bits from the message
            obj["acked_by"] = acked_by_re.findall(c.message)
            obj["signed_off_by"] = signed_off_by_re.findall(c.message)
            obj["resolves"] = resolves_re.findall(c.message)
            obj["related"] = related_re.findall(c.message)
            objs.append(obj)
        self.objects.extend(objs)

        return super(Commit, self).get_objects(**kwargs)