class TestRemoteProperties(unittest.TestCase): """ For now this is using the git-python repo for tests. This probably isn't a great idea, we should really be either mocking the git portion, or have a known static repo in this directory to work with. """ def setUp(self): self.repo = Repository(working_dir='git://github.com/wdm0006/git-pandas.git', verbose=True) def tearDown(self): self.repo.__del__() def test_repo_name(self): self.assertEqual(self.repo.repo_name, 'git-pandas') def test_branches(self): branches = list(self.repo.branches()['branch'].values) self.assertIn('master', branches) self.assertIn('gh-pages', branches) def test_tags(self): tags = list(self.repo.tags()['tag'].values) self.assertIn('0.0.1', tags) self.assertIn('0.0.2', tags) def test_is_bare(self): self.assertFalse(self.repo.is_bare())
def repository(path): # build an example repository object and try some things out ignore_dirs = [ 'docs/*', 'tests/*', 'Data/*' ] r = Repository(path) # is it bare? print('\nRepo bare?') print(r.is_bare()) print('\n') # get the commit history ch = r.commit_history('HEAD', limit=None, include_globs=['*.py'], ignore_globs=ignore_dirs) print(ch.head(5)) # get the list of committers print('\nCommiters:') print(''.join([str(x) + '\n' for x in set(ch['committer'].values)])) print('\n') # print out everyone's contributions attr = ch.reindex(columns=['committer', 'lines', 'insertions', 'deletions']).groupby(['committer']) attr = attr.agg({ 'lines': np.sum, 'insertions': np.sum, 'deletions': np.sum }) print(attr) # get the file change history fh = r.file_change_history('HEAD', limit=None, ignore_globs=ignore_dirs) fh['ext'] = fh['filename'].map(lambda x: x.split('.')[-1]) print(fh.head(50)) # print out unique extensions print('\nExtensions Found:') print(''.join([str(x) + '\n' for x in set(fh['ext'].values)])) print('\n') # agg by extension etns = fh.reindex(columns=['ext', 'insertions', 'deletions']).groupby(['ext']) etns = etns.agg({ 'insertions': np.sum, 'deletions': np.sum }) print(etns)
def author_minded(working_dir, frequency=None): from numpy import median, min, max, diff, nan, timedelta64 from pandas import DataFrame from gitpandas import Repository from itertools import groupby if frequency is None: frequency = timedelta64(0, 'D') repo = Repository(working_dir=working_dir) commits = repo.commit_history() authors = set(commits.author) tot_lines = float(commits.lines.sum()) result = { 'first': [], 'last': [], 'line_changes': [], 'commits': [], 'median_commit_frequency': [], 'max_dry_stretch': [], 'max_dayly_commit_run': [] } for author in authors: specific = commits[commits.author == author] result['first'].append(specific.index.min()) result['last'].append(specific.index.max()) result['line_changes'].append(specific.lines.sum() / tot_lines) result['commits'].append(len(specific) / float(len(commits))) deriv = diff(specific.index[::-1]) if len(deriv) == 0: result['median_commit_frequency'].append(nan) result['max_dry_stretch'].append(nan) result['max_dayly_commit_run'].append(nan) else: result['median_commit_frequency'].append( median(deriv).astype('timedelta64[D]')) result['max_dry_stretch'].append( max(deriv).astype('timedelta64[D]')) result['max_dayly_commit_run'].append( max([ len(list(u)) for k, u in groupby(deriv.astype('timedelta64[D]')) if k <= frequency ])) return DataFrame(result, index=authors)
def main(repo, out_dir, clobber_output, verbose): """ """ import logging from gitpandas import Repository if verbose: logging.getLogger().setLevel(10) if repo.find("git@") == 0: logging.info("Cloning repo %s" % repo) repository = Repository(working_dir=repo) repo = repository.git_dir logging.info("Repo located at %s" % repo) if out_dir is None: out_dir = os.path.join(os.getcwd(), OUT_SUBFOLDER) verify_local_repo_location(repo) repo_name = os.path.basename(repo) make_output_folder(out_dir, overwrite=clobber_output) contributor_data = author_minded(repo) citation_data = pmc_data('SPSS') logging.info("output path: %s" % os.path.join(out_dir, 'contributor_data.json')) contributor_data.to_json(os.path.join(out_dir, 'contributor_data.json'), date_format='iso') citation_data['citations'].to_json( os.path.join(out_dir, 'citation_data.json'))
def author_minded(working_dir, frequency=None): from numpy import median, min, max, diff, nan, timedelta64 from pandas import DataFrame from gitpandas import Repository from itertools import groupby if frequency is None: frequency = timedelta64(0, 'D') repo = Repository(working_dir=working_dir) commits = repo.commit_history() authors = set(commits.author) tot_lines = float(commits.lines.sum()) result = {'first': [], 'last': [], 'line_changes': [], 'commits': [], 'median_commit_frequency': [], 'max_dry_stretch': [], 'max_dayly_commit_run': []} for author in authors: specific = commits[commits.author == author] result['first'].append(specific.index.min()) result['last'].append(specific.index.max()) result['line_changes'].append(specific.lines.sum() / tot_lines) result['commits'].append(len(specific) / float(len(commits))) deriv = diff(specific.index[::-1]) if len(deriv) == 0: result['median_commit_frequency'].append(nan) result['max_dry_stretch'].append(nan) result['max_dayly_commit_run'].append(nan) else: result['median_commit_frequency'].append(median(deriv).astype('timedelta64[D]')) result['max_dry_stretch'].append(max(deriv).astype('timedelta64[D]')) result['max_dayly_commit_run'].append( max([ len(list(u)) for k, u in groupby(deriv.astype('timedelta64[D]')) if k <= frequency ]) ) return DataFrame(result, index=authors)
def setUp(self): """ :return: """ project_dir = str(os.path.dirname( os.path.abspath(__file__))) + os.sep + 'repos' repo_dir = str(os.path.dirname(os.path.abspath( __file__))) + os.sep + 'repos' + os.sep + 'repository1' if os.path.exists(project_dir): shutil.rmtree(project_dir) os.makedirs(project_dir) if not os.path.exists(repo_dir): os.makedirs(repo_dir) # create an empty repo (but not bare) grepo = git.Repo.init(repo_dir) # add a file with open(repo_dir + os.sep + 'README.md', 'w') as f: f.write('Sample README for a sample project\n') # commit it grepo.git.add('README.md') grepo.git.commit(m='first commit') # now add some other files: for idx in range(5): with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f: f.write('import sys\nimport os\n') time.sleep(2.0) grepo.git.add(all=True) grepo.git.commit(m='adding file_%d.py' % (idx, )) self.repo = Repository(working_dir=repo_dir, verbose=True)
def main(): r = Repository(working_dir=os.path.abspath('../nixpkgs')) logger.info('fetching commit history') ch = fetch_commit_history(r) logger.info('fetching hours estimate') he = fetch_hours_estimate(r, ch) logger.info('fetching file change history') fch = fetch_file_change_history(r) logger.info('fetching file change rate') fcr = fetch_file_change_rate(r, fch) logger.info('fetching cumulative blame') cb = fetch_cumulative_blame(r) logger.info('fetching bus factor') bf = fetch_bus_factor(r) logger.info('fetching file owner') fo = fetch_file_owner(r) logger.info('fetching punch card') pc = fetch_punch_card(r)
def setUp(self): """ :return: """ project_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos' repo_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos' + os.sep + 'repository1' if os.path.exists(project_dir): shutil.rmtree(project_dir) os.makedirs(project_dir) if not os.path.exists(repo_dir): os.makedirs(repo_dir) # create an empty repo (but not bare) grepo = git.Repo.init(repo_dir) # add a file with open(repo_dir + os.sep + 'README.md', 'w') as f: f.write('Sample README for a sample project\n') # commit it grepo.git.add('README.md') grepo.git.commit(m='first commit') # now add some other files: for idx in range(5): with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f: f.write('import sys\nimport os\n') time.sleep(2.0) grepo.git.add(all=True) grepo.git.commit(m='adding file_%d.py' % (idx, )) self.repo = Repository(working_dir=repo_dir, verbose=True)
from gitpandas import Repository __author__ = 'willmcginnis' if __name__ == '__main__': repo = Repository( working_dir='git://github.com/CamDavidsonPilon/lifelines.git', verbose=True) shared_blame = repo.blame(extensions=['py'], committer=False, by='file') print(shared_blame)
import os from gitpandas import Repository __author__ = 'willmcginnis' if __name__ == '__main__': repo = Repository(working_dir=os.path.abspath('../../git-pandas')) fc = repo.file_change_rates(extensions=['py'], coverage=True) print(fc)
from gitpandas import Repository import numpy as np import lifelines import matplotlib.pyplot as plt __author__ = 'willmcginnis' if __name__ == '__main__': threshold = 5 repo = Repository( working_dir='git://github.com/CamDavidsonPilon/lifelines.git') fch = repo.file_change_history(limit=None, extensions=['py']) fch['file_owner'] = '' fch['refactor'] = 0 fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9) fch['observed'] = False fch = fch.reindex() fch = fch.reset_index() # add in the file owner and whether or not each item is a refactor for idx, row in fch.iterrows(): fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename)) if abs(row.insertions - row.deletions) > threshold: fch.set_value(idx, 'refactor', 1) else: fch.set_value(idx, 'refactor', 0) # add in the time since column fch['time_until_refactor'] = 0
from gitpandas import Repository import numpy as np import lifelines import matplotlib.pyplot as plt plt.style.use('ggplot') __author__ = 'willmcginnis' if __name__ == '__main__': threshold = 100 repo = Repository( working_dir='git://github.com/scikit-learn/scikit-learn.git', verbose=True) fch = repo.file_change_history(limit=None, include_globs=['*.py']) fch['file_owner'] = '' fch['refactor'] = 0 fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9) fch['observed'] = False fch = fch.reindex() fch = fch.reset_index() # add in the file owner and whether or not each item is a refactor for idx, row in fch.iterrows(): fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename)) if abs(row.insertions - row.deletions) > threshold: fch.set_value(idx, 'refactor', 1) else: fch.set_value(idx, 'refactor', 0)
""" Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory. """ import os from pandas import merge from gitpandas import ProjectDirectory, Repository __author__ = "willmcginnis" if __name__ == "__main__": flask_repo = Repository(working_dir="git://github.com/mitsuhiko/flask.git") # do some blaming flask_blame = flask_repo.blame(extensions=["py"]) # figure out committer count from each flask_ch = flask_repo.commit_history("master", limit=None, extensions=["py"]) print("\tflask committers: %d" % (len(set(flask_ch["committer"].values)))) print("\tflask bus count:") print(flask_repo.bus_factor(extensions=["py"]))
class TestLocalProperties(unittest.TestCase): """ """ def setUp(self): """ :return: """ project_dir = str(os.path.dirname( os.path.abspath(__file__))) + os.sep + 'repos' repo_dir = str(os.path.dirname(os.path.abspath( __file__))) + os.sep + 'repos' + os.sep + 'repository1' if os.path.exists(project_dir): shutil.rmtree(project_dir) os.makedirs(project_dir) if not os.path.exists(repo_dir): os.makedirs(repo_dir) # create an empty repo (but not bare) grepo = git.Repo.init(repo_dir) # add a file with open(repo_dir + os.sep + 'README.md', 'w') as f: f.write('Sample README for a sample project\n') # commit it grepo.git.add('README.md') grepo.git.commit(m='first commit') # now add some other files: for idx in range(5): with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f: f.write('import sys\nimport os\n') time.sleep(2.0) grepo.git.add(all=True) grepo.git.commit(m='adding file_%d.py' % (idx, )) self.repo = Repository(working_dir=repo_dir, verbose=True) def tearDown(self): self.repo.__del__() project_dir = str(os.path.dirname( os.path.abspath(__file__))) + os.sep + 'repos' shutil.rmtree(project_dir) def test_repo_name(self): self.assertEqual(self.repo._repo_name(), 'repository1') def test_branches(self): branches = list(self.repo.branches()['branch'].values) self.assertIn('master', branches) def test_tags(self): tags = list(self.repo.tags()['tag'].values) self.assertEqual(len(tags), 0) def test_is_bare(self): self.assertFalse(self.repo.is_bare()) def test_commit_history(self): ch = self.repo.commit_history(branch='master') self.assertEqual(ch.shape[0], 6) ch2 = self.repo.commit_history(branch='master', extensions=['py']) self.assertEqual(ch2.shape[0], 5) ch3 = self.repo.commit_history(branch='master', limit=3) self.assertEqual(ch3.shape[0], 3) ch4 = self.repo.commit_history(branch='master', days=5) self.assertEqual(ch4.shape[0], 6) fch = self.repo.file_change_history(branch='master') self.assertEqual(fch.shape[0], 6) fch2 = self.repo.file_change_history(branch='master', extensions=['py']) self.assertEqual(fch2.shape[0], 5) fch3 = self.repo.file_change_history(branch='master', limit=3) self.assertEqual(fch3.shape[0], 3) fcr = self.repo.file_change_rates(branch='master') self.assertEqual(fcr.shape[0], 6) self.assertEqual(fcr['unique_committers'].sum(), 6) self.assertEqual(fcr['net_change'].sum(), 11) # we know this repo doesnt have coverage self.assertFalse(self.repo.has_coverage()) # we know this repo only has one committer self.assertEqual( self.repo.bus_factor(by='repository')['bus factor'].values[0], 1) # lets do some blaming blame = self.repo.blame(extensions=['py']) self.assertEqual(blame['loc'].sum(), 10) self.assertEqual(blame.shape[0], 1) cblame = self.repo.cumulative_blame() self.assertEqual(cblame.shape[0], 6) self.assertEqual(cblame[cblame.columns.values[0]].sum(), 36) revs = self.repo.revs(num_datapoints=2) self.assertEqual(revs.shape[0], 2) revs = self.repo.revs(limit=2) self.assertEqual(revs.shape[0], 2) revs = self.repo.revs() self.assertEqual(revs.shape[0], 6)
""" Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory. """ from gitpandas import Repository __author__ = 'willmcginnis' if __name__ == '__main__': flask_repo = Repository(working_dir='git://github.com/mitsuhiko/flask.git') # do some blaming flask_blame = flask_repo.blame(extensions=['py']) # figure out committer count from each flask_ch = flask_repo.commit_history('master', limit=None, extensions=['py']) print('\tflask committers: %d' % (len(set(flask_ch['committer'].values)))) print('\tflask bus count:') print(flask_repo.bus_factor(extensions=['py']))
import os from gitpandas import Repository __author__ = 'willmcginnis' if __name__ == '__main__': repo = Repository(working_dir=os.path.abspath('../../git-pandas')) fc = repo.file_change_rates(include_globs=['*.py'], coverage=True) print(fc)
from gitpandas import Repository import numpy as np import lifelines import matplotlib.pyplot as plt plt.style.use('ggplot') __author__ = 'willmcginnis' if __name__ == '__main__': threshold = 100 repo = Repository(working_dir='git://github.com/scikit-learn/scikit-learn.git', verbose=True) fch = repo.file_change_history(limit=None, include_globs=['*.py']) fch['file_owner'] = '' fch['refactor'] = 0 fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9) fch['observed'] = False fch = fch.reindex() fch = fch.reset_index() # add in the file owner and whether or not each item is a refactor for idx, row in fch.iterrows(): fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename)) if abs(row.insertions - row.deletions) > threshold: fch.set_value(idx, 'refactor', 1) else: fch.set_value(idx, 'refactor', 0) # add in the time since column fch['time_until_refactor'] = 0
from gitpandas import Repository __author__ = 'willmcginnis' if __name__ == '__main__': repo = Repository(working_dir='git://github.com/CamDavidsonPilon/lifelines.git', verbose=True) shared_blame = repo.blame(include_globs=['*.py'], committer=False, by='file') print(shared_blame)
from gitpandas import Repository import numpy as np import lifelines import matplotlib.pyplot as plt threshold = 20 repo = Repository(working_dir='git://github.com/ogr3/race-management-system.git', verbose=True) fch = repo.file_change_history(limit=100000, extensions=['py', 'pyx', 'h', 'c', 'cpp', 'java', 'xml']) fch['file_owner'] = '' fch['refactor'] = 0 fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9) fch['observed'] = False fch = fch.reindex() fch = fch.reset_index() # add in the file owner and whether or not each item is a refactor for idx, row in fch.iterrows(): fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename, committer=True)) if abs(row.insertions - row.deletions) > threshold: fch.set_value(idx, 'refactor', 1) else: fch.set_value(idx, 'refactor', 0) # add in the time since column fch['time_until_refactor'] = 0 for idx, row in fch.iterrows(): ts = None chunk = fch[(fch['timestamp'] > row.timestamp) & (fch['refactor'] == 1) & (fch['filename'] == row.filename)] if chunk.shape[0] > 0: ts = chunk['timestamp'].min() fch.set_value(idx, 'observed', True)
import os from gitpandas import Repository from definitions import GIT_PANDAS_DIR __author__ = 'willmcginnis' if __name__ == '__main__': repo = Repository(working_dir=GIT_PANDAS_DIR) fc = repo.file_change_rates(include_globs=['*.py'], coverage=True) print(fc)
""" Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory. """ import os from pandas import merge from gitpandas import ProjectDirectory, Repository __author__ = 'willmcginnis' if __name__ == '__main__': flask_repo = Repository(working_dir='git://github.com/mitsuhiko/flask.git') # do some blaming flask_blame = flask_repo.blame(extensions=['py']) # figure out committer count from each flask_ch = flask_repo.commit_history('master', limit=None, extensions=['py']) print('\tflask committers: %d' % (len(set(flask_ch['committer'].values)))) print('\tflask bus count:') print(flask_repo.bus_factor(extensions=['py']))
def setUp(self): self.repo = Repository( working_dir='git://github.com/wdm0006/git-pandas.git', verbose=True)
class TestLocalProperties(unittest.TestCase): """ """ def setUp(self): """ :return: """ project_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos' repo_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos' + os.sep + 'repository1' if os.path.exists(project_dir): shutil.rmtree(project_dir) os.makedirs(project_dir) if not os.path.exists(repo_dir): os.makedirs(repo_dir) # create an empty repo (but not bare) grepo = git.Repo.init(repo_dir) # add a file with open(repo_dir + os.sep + 'README.md', 'w') as f: f.write('Sample README for a sample project\n') # commit it grepo.git.add('README.md') grepo.git.commit(m='first commit') # now add some other files: for idx in range(5): with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f: f.write('import sys\nimport os\n') time.sleep(2.0) grepo.git.add(all=True) grepo.git.commit(m='adding file_%d.py' % (idx, )) self.repo = Repository(working_dir=repo_dir, verbose=True) def tearDown(self): self.repo.__del__() project_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos' shutil.rmtree(project_dir) def test_repo_name(self): self.assertEqual(self.repo.repo_name, 'repository1') def test_branches(self): branches = list(self.repo.branches()['branch'].values) self.assertIn('master', branches) def test_tags(self): tags = list(self.repo.tags()['tag'].values) self.assertEqual(len(tags), 0) def test_is_bare(self): self.assertFalse(self.repo.is_bare()) def test_commit_history(self): ch = self.repo.commit_history(branch='master') self.assertEqual(ch.shape[0], 6) ch2 = self.repo.commit_history(branch='master', extensions=['py']) self.assertEqual(ch2.shape[0], 5) ch3 = self.repo.commit_history(branch='master', limit=3) self.assertEqual(ch3.shape[0], 3) ch4 = self.repo.commit_history(branch='master', days=5) self.assertEqual(ch4.shape[0], 6) fch = self.repo.file_change_history(branch='master') self.assertEqual(fch.shape[0], 6) fch2 = self.repo.file_change_history(branch='master', extensions=['py']) self.assertEqual(fch2.shape[0], 5) fch3 = self.repo.file_change_history(branch='master', limit=3) self.assertEqual(fch3.shape[0], 3) fcr = self.repo.file_change_rates(branch='master') self.assertEqual(fcr.shape[0], 6) self.assertEqual(fcr['unique_committers'].sum(), 6) self.assertEqual(fcr['net_change'].sum(), 11) # we know this repo doesnt have coverage self.assertFalse(self.repo.has_coverage()) # we know this repo only has one committer self.assertEqual(self.repo.bus_factor(by='repository')['bus factor'].values[0], 1) # lets do some blaming blame = self.repo.blame(extensions=['py']) self.assertEqual(blame['loc'].sum(), 10) self.assertEqual(blame.shape[0], 1) cblame = self.repo.cumulative_blame() self.assertEqual(cblame.shape[0], 6) self.assertEqual(cblame[cblame.columns.values[0]].sum(), 36) revs = self.repo.revs(num_datapoints=2) self.assertEqual(revs.shape[0], 2) revs = self.repo.revs(limit=2) self.assertEqual(revs.shape[0], 2) revs = self.repo.revs() self.assertEqual(revs.shape[0], 6)
from gitpandas import Repository import numpy as np import lifelines import matplotlib.pyplot as plt import pandas as pd plt.style.use('ggplot') __author__ = 'willmcginnis' if __name__ == '__main__': threshold = 100 repo = Repository(working_dir='git://github.com/scikit-learn/scikit-learn.git', verbose=True) fch = repo.file_change_history(limit=None, extensions=['py']) fch['file_owner'] = '' fch['refactor'] = 0 fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9) fch['observed'] = False fch = fch.reindex() fch = fch.reset_index() # add in the file owner and whether or not each item is a refactor for idx, row in fch.iterrows(): fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename)) if abs(row.insertions - row.deletions) > threshold: fch.set_value(idx, 'refactor', 1) else: fch.set_value(idx, 'refactor', 0) # add in the time since column
import matplotlib.pyplot as plt import os import json from gitpandas import Repository, ProjectDirectory import matplotlib matplotlib.style.use('ggplot') __author__ = 'willmcginnis' if __name__ == '__main__': g = Repository(working_dir=os.path.abspath('../../git-pandas'), verbose=True) b = g.cumulative_blame(branch='master', extensions=['py'], ignore_dir=['docs'], limit=None, skip=None) ax = b.plot(kind='area', stacked=True) plt.title('Cumulative Blame') plt.xlabel('date') plt.ylabel('LOC') box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.show()
from gitpandas import Repository import time from definitions import GIT_PANDAS_DIR __author__ = 'willmcginnis' if __name__ == '__main__': g = Repository(working_dir=GIT_PANDAS_DIR) st = time.time() blame = g.cumulative_blame(branch='master', include_globs=['*.py', '*.html', '*.sql', '*.md'], limit=None, skip=None) print(blame.head()) print(time.time() - st) st = time.time() blame = g.parallel_cumulative_blame(branch='master', include_globs=['*.py', '*.html', '*.sql', '*.md'], limit=None, skip=None, workers=4) print(blame.head()) print(time.time() - st)
""" Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory. """ from gitpandas import Repository __author__ = 'willmcginnis' if __name__ == '__main__': flask_repo = Repository(working_dir='git://github.com/mitsuhiko/flask.git') # do some blaming flask_blame = flask_repo.blame(include_globs=['*.py']) # figure out committer count from each flask_ch = flask_repo.commit_history('master', limit=None, include_globs=['*.py']) print('\tflask committers: %d' % (len(set(flask_ch['committer'].values)))) print('\tflask bus count:') print(flask_repo.bus_factor(include_globs=['*.py']))
from gitpandas import Repository __author__ = 'willmcginnis' if __name__ == '__main__': repo = Repository(working_dir='git://github.com/CamDavidsonPilon/lifelines.git', verbose=True) shared_blame = repo.blame(extensions=['py'], committer=False, by='file') print(shared_blame)
def setUp(self): self.repo = Repository(working_dir='git://github.com/wdm0006/git-pandas.git', verbose=True)
import matplotlib.pyplot as plt import os import json from gitpandas import Repository, ProjectDirectory import matplotlib matplotlib.style.use("ggplot") __author__ = "willmcginnis" if __name__ == "__main__": g = Repository(working_dir=os.path.abspath("../../git-pandas"), verbose=True) b = g.cumulative_blame(branch="master", extensions=["py"], ignore_dir=["docs"], limit=None, skip=None) ax = b.plot(kind="area", stacked=True) plt.title("Cumulative Blame") plt.xlabel("date") plt.ylabel("LOC") box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc="center left", bbox_to_anchor=(1, 0.5)) plt.show()
""" Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory. """ from gitpandas import Repository __author__ = 'willmcginnis' if __name__ == '__main__': flask_repo = Repository(working_dir='git://github.com/mitsuhiko/flask.git') # do some blaming flask_blame = flask_repo.blame(include_globs=['*.py']) # figure out committer count from each flask_ch = flask_repo.commit_history( 'master', limit=None, include_globs=['*.py']) print('\tflask committers: %d' % (len(set(flask_ch['committer'].values)))) print('\tflask bus count:') print(flask_repo.bus_factor(include_globs=['*.py']))