Ejemplo n.º 1
0
def commit_tree_to_frame(commit):
    tree_dicts = []
    _add_subtree(tree_dicts, commit.tree, '.')
    tree_frame = pd.DataFrame(tree_dicts)
    tree_frame['hexsha'] = commit.hexsha
    tree_frame['child_type'] = su.categorize(tree_frame['child_type'])
    return tree_frame
Ejemplo n.º 2
0
def commits_to_frame(commits):
    commit_attrs = (
        'hexsha', 'name_rev', 'size',
        'author.name', 'author.email',
        'authored_datetime', 'author_tz_offset',
        'committer.name', 'committer.email',
        'committed_datetime', 'committer_tz_offset',
        'encoding', 'message',
        'stats.total.files', 'stats.total.lines',
        'stats.total.insertions', 'stats.total.deletions',
        'stats.files')
    column_names = {attr: attr.replace('.', '_') for attr in commit_attrs}
    commit_frame = su.dicts_to_dataframe(list(
        su.objs_to_dicts(commits, commit_attrs)))
    commit_frame.rename(columns=column_names, inplace=True)
    commit_frame['name_rev'] = commit_frame['name_rev'].str.split(
        ' ', 1).apply(lambda x: x[-1])
    categorical_cols = (
        'name_rev', 'author_name', 'author_email',
        'committer_name', 'committer_email', 'encoding')
    for c in categorical_cols:
        commit_frame[c] = su.categorize(commit_frame[c])
    for c in ('authored_datetime', 'committed_datetime'):
        commit_frame[c] = commit_frame[c].astype('datetime64[ns]')
    commit_frame['message'] = commit_frame['message'].str.replace('\n', '\\n')
    commit_frame = commit_frame.sort_values(
        'committed_datetime', ascending=False).reset_index(drop=True)
    return commit_frame
Ejemplo n.º 3
0
def commit_trees_to_frame(commits):
    frame: pd.DataFrame = pd.concat(
        (commit_tree_to_frame(c) for c in commits))
    cat_columns = ('hexsha', 'tree', 'child', 'child_type')
    for col in cat_columns:
        frame[col] = su.categorize(frame[col])
    frame.reset_index(inplace=True, drop=True)
    return frame
Ejemplo n.º 4
0
def commits_to_actor_frame(commits):
    attrs = ('name', 'email')
    authors = extract_actors(commits, 'author', attrs)
    committers = extract_actors(commits, 'committer', attrs)
    actors = pd.merge(authors, committers, on=attrs, how='outer')
    actors = actors.drop_duplicates().reset_index(drop=True).fillna(0)
    for attr in attrs:
        actors[attr] = su.categorize(actors[attr])
    for col_name in ('author_commits', 'committer_commits'):
        actors[col_name] = actors[col_name].astype('int')
    return actors
Ejemplo n.º 5
0
Archivo: actor.py Proyecto: eamd/saapy
def connect_actors(actor_frame, connectivity_sets, connectivity_column):
    """
    :param actor_frame:
    :param connectivity_sets:
    :param connectivity_column:
    :return:

    Examples:

    same_actors = {
        'ccason': [3, 14, 15], 'clipka': [4, 5, 13],
        'wfpokorny': [11, 17], 'anshuarya': [0],
        'bentsm': [1], 'cbarton': [2], 'dbodor': [6],
        'jlecher': [7], 'jgrimbert': [8], 'nalvarez': [9],
        'selvik': [10], 'wverhelst': [12], 'gryken': [16],
        'github': [18]}
    actor_frame = connect_actors(actor_frame, same_actors, 'actor_id')
    """
    connectivity = {}
    for actor_id, connectivity_set in connectivity_sets.items():
        for actor in connectivity_set:
            connectivity[actor] = actor_id
    actor_frame[connectivity_column] = su.categorize(pd.Series(connectivity))
    return actor_frame