Example #1
0
    def test_parse_commits(self):
        
        results = [i for i in parse_commits(self.repo_path)]
        
        assert len(results) == self.total_commits

        r = results[1]

        assert r.hash_val == 'ed4dd8e797db7d6c1ce23980c24d94228d66b1d6'
        assert r.author == 'tbonza'
        assert r.author_email == '*****@*****.**'
        assert r.author_timestamp == '2019-02-26T09:55:26-05:00'
        assert r.committer == 'tbonza'
        assert r.committer_email == '*****@*****.**'
        assert r.committer_timestamp == '2019-02-26T09:55:26-05:00'
Example #2
0
    def test_new_parse_commits(self):
        """ Parse from last commit """
        chash = '35d8e493ef66bd8c01c15a519c15d9a6d31cb2f4'
        results = [i for i in parse_commits(self.repo_path, chash)]

        assert len(results) == self.total_commits - 1

        r = results[1]

        assert r.hash_val == 'ed4dd8e797db7d6c1ce23980c24d94228d66b1d6'
        assert r.author == 'tbonza'
        assert r.author_email == '*****@*****.**'
        assert r.author_timestamp == '2019-02-26T09:55:26-05:00'
        assert r.committer == 'tbonza'
        assert r.committer_email == '*****@*****.**'
        assert r.committer_timestamp == '2019-02-26T09:55:26-05:00'
Example #3
0
def repo_to_objects(owner: str, project: str, repopath: str, last_commit=""):
    """ Retrieve objects from last commit if exists

    This function is a generator so we can specify a buffer size
    when making commits to the database. Otherwise, the I/O would
    slow things way down.

    :param repo_name: git user/git repo name, 'tbonza/EDS19'
    :param dirpath: path to directory storing repo information
    :param last_commit: string of git commit hash last stored in database
    :return: generator of populated model database objects
    :rtype: sqlalchemy database objects
    """
    if len(last_commit) == 0:

        cmts = parse_commits(repopath)
        msgs = parse_messages(repopath)
        fobjs = parse_committed_files(repopath)

    else:
        # retrieve from last commit HEAD
        # need to set 'c1' lists

        cmts = parse_commits(repopath, chash=last_commit)
        msgs = parse_messages(repopath, chash=last_commit)
        fobjs = parse_committed_files(repopath, chash=last_commit)

    # map objects to database objects

    m = re.compile(r"Co-authored-by\:(.*?)<(.*?)>")
    for msg, cmt in zip(msgs, cmts):
        contrib_id = 0

        commit_authored_datetime = datetime.fromisoformat(
            cmt.committer_timestamp)
        yrmo_cad = str(commit_authored_datetime.year) + "-" + str(
            commit_authored_datetime.month)

        msg_item = Info(commit_hash=msg.hash_val,
                        subject=msg.subject,
                        message=msg.message_body,
                        created=datetime.fromisoformat(msg.timestamp))

        meta_item = Meta(commit_hash=msg.hash_val,
                         owner_name=owner,
                         project_name=project,
                         yearmo=yrmo_cad)

        author_item = Author(commit_hash=cmt.hash_val,
                             name=cmt.author,
                             email=cmt.author_email,
                             authored=commit_authored_datetime)

        contrib_item = Contrib(contrib_id=contrib_id,
                               commit_hash=cmt.hash_val,
                               name=cmt.committer,
                               email=cmt.committer_email,
                               contributed=datetime.fromisoformat(
                                   cmt.committer_timestamp))
        yield msg_item
        yield meta_item
        yield author_item
        yield contrib_item

        # some commits will have multiple contributors

        if m.match(msg.message_body):

            contrib_id += 1
            for item in m.findall(msg.message_body):

                contrib_item = Contrib(contrib_id=contrib_id,
                                       commit_hash=msg.hash_val,
                                       name=item[0].strip(),
                                       email=item[1].strip(),
                                       contributed=datetime.fromisoformat(
                                           msg.timestamp))
                yield contrib_item
                contrib_id += 1

    file_id = 0
    for fobj in fobjs:

        cf_item = CommitFile(file_id=file_id,
                             commit_hash=fobj.hash_val,
                             modified_file=fobj.file_path,
                             lines_added=make_digit(fobj.added, "fobj.added"),
                             lines_subtracted=make_digit(
                                 fobj.deleted, "fobj.deleted"))
        yield cf_item

        file_id += 1