Esempio n. 1
0
    threshold = 5
    repo = Repository(
        working_dir='git://github.com/CamDavidsonPilon/lifelines.git')
    fch = repo.file_change_history(limit=None, extensions=['py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner',
                      repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)

    # add in the time since column
    fch['time_until_refactor'] = 0
    for idx, row in fch.iterrows():
        ts = None
        chunk = fch[(fch['timestamp'] > row.timestamp) & (fch['refactor'] == 1)
                    & (fch['filename'] == row.filename)]
        if chunk.shape[0] > 0:
            ts = chunk['timestamp'].min()
            fch.set_value(idx, 'observed', True)
        else:
Esempio n. 2
0
if __name__ == '__main__':
    threshold = 100
    repo = Repository(working_dir='git://github.com/scikit-learn/scikit-learn.git', verbose=True)
    fch = repo.file_change_history(limit=None, extensions=['py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)

    # add in the time since column
    fch['time_until_refactor'] = 0
    for idx, row in fch.iterrows():
        ts = None
        chunk = fch[(fch['timestamp'] > row.timestamp) & (fch['refactor'] == 1) & (fch['filename'] == row.filename)]
        if chunk.shape[0] > 0:
            ts = chunk['timestamp'].min()
            fch.set_value(idx, 'observed', True)
        else:
            ts = fch['timestamp'].max()
Esempio n. 3
0
import lifelines
import matplotlib.pyplot as plt

threshold = 20
repo = Repository(working_dir='git://github.com/ogr3/race-management-system.git', verbose=True)
fch = repo.file_change_history(limit=100000, extensions=['py', 'pyx', 'h', 'c', 'cpp', 'java', 'xml'])
fch['file_owner'] = ''
fch['refactor'] = 0
fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
fch['observed'] = False
fch = fch.reindex()
fch = fch.reset_index()

# add in the file owner and whether or not each item is a refactor
for idx, row in fch.iterrows():
    fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename, committer=True))
    if abs(row.insertions - row.deletions) > threshold:
        fch.set_value(idx, 'refactor', 1)
    else:
        fch.set_value(idx, 'refactor', 0)

# add in the time since column
fch['time_until_refactor'] = 0
for idx, row in fch.iterrows():
    ts = None
    chunk = fch[(fch['timestamp'] > row.timestamp) & (fch['refactor'] == 1) & (fch['filename'] == row.filename)]
    if chunk.shape[0] > 0:
        ts = chunk['timestamp'].min()
        fch.set_value(idx, 'observed', True)
    else:
        ts = fch['timestamp'].max()