Esempio n. 1
0
def repository(path):
    # build an example repository object and try some things out
    ignore_dirs = [
        'docs/*',
        'tests/*',
        'Data/*'
    ]
    r = Repository(path)

    # is it bare?
    print('\nRepo bare?')
    print(r.is_bare())
    print('\n')

    # get the commit history
    ch = r.commit_history('HEAD', limit=None, include_globs=['*.py'], ignore_globs=ignore_dirs)
    print(ch.head(5))

    # get the list of committers
    print('\nCommiters:')
    print(''.join([str(x) + '\n' for x in set(ch['committer'].values)]))
    print('\n')

    # print out everyone's contributions
    attr = ch.reindex(columns=['committer', 'lines', 'insertions', 'deletions']).groupby(['committer'])
    attr = attr.agg({
        'lines': np.sum,
        'insertions': np.sum,
        'deletions': np.sum
    })
    print(attr)

    # get the file change history
    fh = r.file_change_history('HEAD', limit=None, ignore_globs=ignore_dirs)
    fh['ext'] = fh['filename'].map(lambda x: x.split('.')[-1])
    print(fh.head(50))

    # print out unique extensions
    print('\nExtensions Found:')
    print(''.join([str(x) + '\n' for x in set(fh['ext'].values)]))
    print('\n')

    # agg by extension
    etns = fh.reindex(columns=['ext', 'insertions', 'deletions']).groupby(['ext'])
    etns = etns.agg({
        'insertions': np.sum,
        'deletions': np.sum
    })
    print(etns)
Esempio n. 2
0
def repository(path):
    # build an example repository object and try some things out
    ignore_dirs = [
        'docs/*',
        'tests/*',
        'Data/*'
    ]
    r = Repository(path)

    # is it bare?
    print('\nRepo bare?')
    print(r.is_bare())
    print('\n')

    # get the commit history
    ch = r.commit_history('HEAD', limit=None, include_globs=['*.py'], ignore_globs=ignore_dirs)
    print(ch.head(5))

    # get the list of committers
    print('\nCommiters:')
    print(''.join([str(x) + '\n' for x in set(ch['committer'].values)]))
    print('\n')

    # print out everyone's contributions
    attr = ch.reindex(columns=['committer', 'lines', 'insertions', 'deletions']).groupby(['committer'])
    attr = attr.agg({
        'lines': np.sum,
        'insertions': np.sum,
        'deletions': np.sum
    })
    print(attr)

    # get the file change history
    fh = r.file_change_history('HEAD', limit=None, ignore_globs=ignore_dirs)
    fh['ext'] = fh['filename'].map(lambda x: x.split('.')[-1])
    print(fh.head(50))

    # print out unique extensions
    print('\nExtensions Found:')
    print(''.join([str(x) + '\n' for x in set(fh['ext'].values)]))
    print('\n')

    # agg by extension
    etns = fh.reindex(columns=['ext', 'insertions', 'deletions']).groupby(['ext'])
    etns = etns.agg({
        'insertions': np.sum,
        'deletions': np.sum
    })
    print(etns)
Esempio n. 3
0
from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use('ggplot')

__author__ = 'willmcginnis'


if __name__ == '__main__':
    threshold = 100
    repo = Repository(working_dir='git://github.com/scikit-learn/scikit-learn.git', verbose=True)
    fch = repo.file_change_history(limit=None, extensions=['py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)

    # add in the time since column
Esempio n. 4
0
class TestLocalProperties(unittest.TestCase):
    """

    """
    def setUp(self):
        """

        :return:
        """
        project_dir = str(os.path.dirname(
            os.path.abspath(__file__))) + os.sep + 'repos'
        repo_dir = str(os.path.dirname(os.path.abspath(
            __file__))) + os.sep + 'repos' + os.sep + 'repository1'

        if os.path.exists(project_dir):
            shutil.rmtree(project_dir)

        os.makedirs(project_dir)

        if not os.path.exists(repo_dir):
            os.makedirs(repo_dir)

        # create an empty repo (but not bare)
        grepo = git.Repo.init(repo_dir)

        # add a file
        with open(repo_dir + os.sep + 'README.md', 'w') as f:
            f.write('Sample README for a sample project\n')

        # commit it
        grepo.git.add('README.md')
        grepo.git.commit(m='first commit')

        # now add some other files:
        for idx in range(5):
            with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f:
                f.write('import sys\nimport os\n')

            time.sleep(2.0)
            grepo.git.add(all=True)
            grepo.git.commit(m='adding file_%d.py' % (idx, ))

        self.repo = Repository(working_dir=repo_dir, verbose=True)

    def tearDown(self):
        self.repo.__del__()
        project_dir = str(os.path.dirname(
            os.path.abspath(__file__))) + os.sep + 'repos'
        shutil.rmtree(project_dir)

    def test_repo_name(self):
        self.assertEqual(self.repo._repo_name(), 'repository1')

    def test_branches(self):
        branches = list(self.repo.branches()['branch'].values)
        self.assertIn('master', branches)

    def test_tags(self):
        tags = list(self.repo.tags()['tag'].values)
        self.assertEqual(len(tags), 0)

    def test_is_bare(self):
        self.assertFalse(self.repo.is_bare())

    def test_commit_history(self):
        ch = self.repo.commit_history(branch='master')
        self.assertEqual(ch.shape[0], 6)

        ch2 = self.repo.commit_history(branch='master', extensions=['py'])
        self.assertEqual(ch2.shape[0], 5)

        ch3 = self.repo.commit_history(branch='master', limit=3)
        self.assertEqual(ch3.shape[0], 3)

        ch4 = self.repo.commit_history(branch='master', days=5)
        self.assertEqual(ch4.shape[0], 6)

        fch = self.repo.file_change_history(branch='master')
        self.assertEqual(fch.shape[0], 6)

        fch2 = self.repo.file_change_history(branch='master',
                                             extensions=['py'])
        self.assertEqual(fch2.shape[0], 5)

        fch3 = self.repo.file_change_history(branch='master', limit=3)
        self.assertEqual(fch3.shape[0], 3)

        fcr = self.repo.file_change_rates(branch='master')
        self.assertEqual(fcr.shape[0], 6)
        self.assertEqual(fcr['unique_committers'].sum(), 6)
        self.assertEqual(fcr['net_change'].sum(), 11)

        # we know this repo doesnt have coverage
        self.assertFalse(self.repo.has_coverage())

        # we know this repo only has one committer
        self.assertEqual(
            self.repo.bus_factor(by='repository')['bus factor'].values[0], 1)

        # lets do some blaming
        blame = self.repo.blame(extensions=['py'])
        self.assertEqual(blame['loc'].sum(), 10)
        self.assertEqual(blame.shape[0], 1)

        cblame = self.repo.cumulative_blame()
        self.assertEqual(cblame.shape[0], 6)
        self.assertEqual(cblame[cblame.columns.values[0]].sum(), 36)

        revs = self.repo.revs(num_datapoints=2)
        self.assertEqual(revs.shape[0], 2)
        revs = self.repo.revs(limit=2)
        self.assertEqual(revs.shape[0], 2)
        revs = self.repo.revs()
        self.assertEqual(revs.shape[0], 6)
Esempio n. 5
0
from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt

__author__ = 'willmcginnis'

if __name__ == '__main__':
    threshold = 5
    repo = Repository(
        working_dir='git://github.com/CamDavidsonPilon/lifelines.git')
    fch = repo.file_change_history(limit=None, extensions=['py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner',
                      repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)

    # add in the time since column
    fch['time_until_refactor'] = 0
Esempio n. 6
0
from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt
plt.style.use('ggplot')

__author__ = 'willmcginnis'

if __name__ == '__main__':
    threshold = 100
    repo = Repository(
        working_dir='git://github.com/scikit-learn/scikit-learn.git',
        verbose=True)
    fch = repo.file_change_history(limit=None, include_globs=['*.py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner',
                      repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)
Esempio n. 7
0
class TestLocalProperties(unittest.TestCase):
    """

    """

    def setUp(self):
        """

        :return:
        """
        project_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos'
        repo_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos' + os.sep + 'repository1'

        if os.path.exists(project_dir):
            shutil.rmtree(project_dir)

        os.makedirs(project_dir)

        if not os.path.exists(repo_dir):
            os.makedirs(repo_dir)

        # create an empty repo (but not bare)
        grepo = git.Repo.init(repo_dir)

        # add a file
        with open(repo_dir + os.sep + 'README.md', 'w') as f:
            f.write('Sample README for a sample project\n')

        # commit it
        grepo.git.add('README.md')
        grepo.git.commit(m='first commit')

        # now add some other files:
        for idx in range(5):
            with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f:
                f.write('import sys\nimport os\n')

            time.sleep(2.0)
            grepo.git.add(all=True)
            grepo.git.commit(m='adding file_%d.py' % (idx, ))

        self.repo = Repository(working_dir=repo_dir, verbose=True)

    def tearDown(self):
        self.repo.__del__()
        project_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos'
        shutil.rmtree(project_dir)

    def test_repo_name(self):
        self.assertEqual(self.repo.repo_name, 'repository1')

    def test_branches(self):
        branches = list(self.repo.branches()['branch'].values)
        self.assertIn('master', branches)

    def test_tags(self):
        tags = list(self.repo.tags()['tag'].values)
        self.assertEqual(len(tags), 0)

    def test_is_bare(self):
        self.assertFalse(self.repo.is_bare())

    def test_commit_history(self):
        ch = self.repo.commit_history(branch='master')
        self.assertEqual(ch.shape[0], 6)

        ch2 = self.repo.commit_history(branch='master', extensions=['py'])
        self.assertEqual(ch2.shape[0], 5)

        ch3 = self.repo.commit_history(branch='master', limit=3)
        self.assertEqual(ch3.shape[0], 3)

        ch4 = self.repo.commit_history(branch='master', days=5)
        self.assertEqual(ch4.shape[0], 6)

        fch = self.repo.file_change_history(branch='master')
        self.assertEqual(fch.shape[0], 6)

        fch2 = self.repo.file_change_history(branch='master', extensions=['py'])
        self.assertEqual(fch2.shape[0], 5)

        fch3 = self.repo.file_change_history(branch='master', limit=3)
        self.assertEqual(fch3.shape[0], 3)

        fcr = self.repo.file_change_rates(branch='master')
        self.assertEqual(fcr.shape[0], 6)
        self.assertEqual(fcr['unique_committers'].sum(), 6)
        self.assertEqual(fcr['net_change'].sum(), 11)

        # we know this repo doesnt have coverage
        self.assertFalse(self.repo.has_coverage())

        # we know this repo only has one committer
        self.assertEqual(self.repo.bus_factor(by='repository')['bus factor'].values[0], 1)

        # lets do some blaming
        blame = self.repo.blame(extensions=['py'])
        self.assertEqual(blame['loc'].sum(), 10)
        self.assertEqual(blame.shape[0], 1)

        cblame = self.repo.cumulative_blame()
        self.assertEqual(cblame.shape[0], 6)
        self.assertEqual(cblame[cblame.columns.values[0]].sum(), 36)

        revs = self.repo.revs(num_datapoints=2)
        self.assertEqual(revs.shape[0], 2)
        revs = self.repo.revs(limit=2)
        self.assertEqual(revs.shape[0], 2)
        revs = self.repo.revs()
        self.assertEqual(revs.shape[0], 6)
Esempio n. 8
0
from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt

threshold = 20
repo = Repository(working_dir='git://github.com/ogr3/race-management-system.git', verbose=True)
fch = repo.file_change_history(limit=100000, extensions=['py', 'pyx', 'h', 'c', 'cpp', 'java', 'xml'])
fch['file_owner'] = ''
fch['refactor'] = 0
fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
fch['observed'] = False
fch = fch.reindex()
fch = fch.reset_index()

# add in the file owner and whether or not each item is a refactor
for idx, row in fch.iterrows():
    fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename, committer=True))
    if abs(row.insertions - row.deletions) > threshold:
        fch.set_value(idx, 'refactor', 1)
    else:
        fch.set_value(idx, 'refactor', 0)

# add in the time since column
fch['time_until_refactor'] = 0
for idx, row in fch.iterrows():
    ts = None
    chunk = fch[(fch['timestamp'] > row.timestamp) & (fch['refactor'] == 1) & (fch['filename'] == row.filename)]
    if chunk.shape[0] > 0:
        ts = chunk['timestamp'].min()
        fch.set_value(idx, 'observed', True)
Esempio n. 9
0
from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt
plt.style.use('ggplot')

__author__ = 'willmcginnis'


if __name__ == '__main__':
    threshold = 100
    repo = Repository(working_dir='git://github.com/scikit-learn/scikit-learn.git', verbose=True)
    fch = repo.file_change_history(limit=None, include_globs=['*.py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)

    # add in the time since column
    fch['time_until_refactor'] = 0