threshold = 5 repo = Repository( working_dir='git://github.com/CamDavidsonPilon/lifelines.git') fch = repo.file_change_history(limit=None, extensions=['py']) fch['file_owner'] = '' fch['refactor'] = 0 fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9) fch['observed'] = False fch = fch.reindex() fch = fch.reset_index() # add in the file owner and whether or not each item is a refactor for idx, row in fch.iterrows(): fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename)) if abs(row.insertions - row.deletions) > threshold: fch.set_value(idx, 'refactor', 1) else: fch.set_value(idx, 'refactor', 0) # add in the time since column fch['time_until_refactor'] = 0 for idx, row in fch.iterrows(): ts = None chunk = fch[(fch['timestamp'] > row.timestamp) & (fch['refactor'] == 1) & (fch['filename'] == row.filename)] if chunk.shape[0] > 0: ts = chunk['timestamp'].min() fch.set_value(idx, 'observed', True) else:
if __name__ == '__main__': threshold = 100 repo = Repository(working_dir='git://github.com/scikit-learn/scikit-learn.git', verbose=True) fch = repo.file_change_history(limit=None, extensions=['py']) fch['file_owner'] = '' fch['refactor'] = 0 fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9) fch['observed'] = False fch = fch.reindex() fch = fch.reset_index() # add in the file owner and whether or not each item is a refactor for idx, row in fch.iterrows(): fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename)) if abs(row.insertions - row.deletions) > threshold: fch.set_value(idx, 'refactor', 1) else: fch.set_value(idx, 'refactor', 0) # add in the time since column fch['time_until_refactor'] = 0 for idx, row in fch.iterrows(): ts = None chunk = fch[(fch['timestamp'] > row.timestamp) & (fch['refactor'] == 1) & (fch['filename'] == row.filename)] if chunk.shape[0] > 0: ts = chunk['timestamp'].min() fch.set_value(idx, 'observed', True) else: ts = fch['timestamp'].max()
import lifelines import matplotlib.pyplot as plt threshold = 20 repo = Repository(working_dir='git://github.com/ogr3/race-management-system.git', verbose=True) fch = repo.file_change_history(limit=100000, extensions=['py', 'pyx', 'h', 'c', 'cpp', 'java', 'xml']) fch['file_owner'] = '' fch['refactor'] = 0 fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9) fch['observed'] = False fch = fch.reindex() fch = fch.reset_index() # add in the file owner and whether or not each item is a refactor for idx, row in fch.iterrows(): fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename, committer=True)) if abs(row.insertions - row.deletions) > threshold: fch.set_value(idx, 'refactor', 1) else: fch.set_value(idx, 'refactor', 0) # add in the time since column fch['time_until_refactor'] = 0 for idx, row in fch.iterrows(): ts = None chunk = fch[(fch['timestamp'] > row.timestamp) & (fch['refactor'] == 1) & (fch['filename'] == row.filename)] if chunk.shape[0] > 0: ts = chunk['timestamp'].min() fch.set_value(idx, 'observed', True) else: ts = fch['timestamp'].max()