Esempio n. 1
0
class TestRemoteProperties(unittest.TestCase):
    """
    For now this is using the git-python repo for tests. This probably isn't a great idea, we should really
    be either mocking the git portion, or have a known static repo in this directory to work with.

    """

    def setUp(self):
        self.repo = Repository(working_dir='git://github.com/wdm0006/git-pandas.git', verbose=True)

    def tearDown(self):
        self.repo.__del__()

    def test_repo_name(self):
        self.assertEqual(self.repo.repo_name, 'git-pandas')

    def test_branches(self):
        branches = list(self.repo.branches()['branch'].values)
        self.assertIn('master', branches)
        self.assertIn('gh-pages', branches)

    def test_tags(self):
        tags = list(self.repo.tags()['tag'].values)
        self.assertIn('0.0.1', tags)
        self.assertIn('0.0.2', tags)

    def test_is_bare(self):
        self.assertFalse(self.repo.is_bare())
Esempio n. 2
0
class TestRemoteProperties(unittest.TestCase):
    """
    For now this is using the git-python repo for tests. This probably isn't a great idea, we should really
    be either mocking the git portion, or have a known static repo in this directory to work with.

    """

    def setUp(self):
        self.repo = Repository(working_dir='git://github.com/wdm0006/git-pandas.git', verbose=True)

    def tearDown(self):
        self.repo.__del__()

    def test_repo_name(self):
        self.assertEqual(self.repo.repo_name, 'git-pandas')

    def test_branches(self):
        branches = list(self.repo.branches()['branch'].values)
        self.assertIn('master', branches)
        self.assertIn('gh-pages', branches)

    def test_tags(self):
        tags = list(self.repo.tags()['tag'].values)
        self.assertIn('0.0.1', tags)
        self.assertIn('0.0.2', tags)

    def test_is_bare(self):
        self.assertFalse(self.repo.is_bare())
Esempio n. 3
0
def repository(path):
    # build an example repository object and try some things out
    ignore_dirs = [
        'docs/*',
        'tests/*',
        'Data/*'
    ]
    r = Repository(path)

    # is it bare?
    print('\nRepo bare?')
    print(r.is_bare())
    print('\n')

    # get the commit history
    ch = r.commit_history('HEAD', limit=None, include_globs=['*.py'], ignore_globs=ignore_dirs)
    print(ch.head(5))

    # get the list of committers
    print('\nCommiters:')
    print(''.join([str(x) + '\n' for x in set(ch['committer'].values)]))
    print('\n')

    # print out everyone's contributions
    attr = ch.reindex(columns=['committer', 'lines', 'insertions', 'deletions']).groupby(['committer'])
    attr = attr.agg({
        'lines': np.sum,
        'insertions': np.sum,
        'deletions': np.sum
    })
    print(attr)

    # get the file change history
    fh = r.file_change_history('HEAD', limit=None, ignore_globs=ignore_dirs)
    fh['ext'] = fh['filename'].map(lambda x: x.split('.')[-1])
    print(fh.head(50))

    # print out unique extensions
    print('\nExtensions Found:')
    print(''.join([str(x) + '\n' for x in set(fh['ext'].values)]))
    print('\n')

    # agg by extension
    etns = fh.reindex(columns=['ext', 'insertions', 'deletions']).groupby(['ext'])
    etns = etns.agg({
        'insertions': np.sum,
        'deletions': np.sum
    })
    print(etns)
Esempio n. 4
0
def repository(path):
    # build an example repository object and try some things out
    ignore_dirs = [
        'docs/*',
        'tests/*',
        'Data/*'
    ]
    r = Repository(path)

    # is it bare?
    print('\nRepo bare?')
    print(r.is_bare())
    print('\n')

    # get the commit history
    ch = r.commit_history('HEAD', limit=None, include_globs=['*.py'], ignore_globs=ignore_dirs)
    print(ch.head(5))

    # get the list of committers
    print('\nCommiters:')
    print(''.join([str(x) + '\n' for x in set(ch['committer'].values)]))
    print('\n')

    # print out everyone's contributions
    attr = ch.reindex(columns=['committer', 'lines', 'insertions', 'deletions']).groupby(['committer'])
    attr = attr.agg({
        'lines': np.sum,
        'insertions': np.sum,
        'deletions': np.sum
    })
    print(attr)

    # get the file change history
    fh = r.file_change_history('HEAD', limit=None, ignore_globs=ignore_dirs)
    fh['ext'] = fh['filename'].map(lambda x: x.split('.')[-1])
    print(fh.head(50))

    # print out unique extensions
    print('\nExtensions Found:')
    print(''.join([str(x) + '\n' for x in set(fh['ext'].values)]))
    print('\n')

    # agg by extension
    etns = fh.reindex(columns=['ext', 'insertions', 'deletions']).groupby(['ext'])
    etns = etns.agg({
        'insertions': np.sum,
        'deletions': np.sum
    })
    print(etns)
Esempio n. 5
0
def author_minded(working_dir, frequency=None):
    from numpy import median, min, max, diff, nan, timedelta64
    from pandas import DataFrame
    from gitpandas import Repository
    from itertools import groupby

    if frequency is None:
        frequency = timedelta64(0, 'D')

    repo = Repository(working_dir=working_dir)
    commits = repo.commit_history()
    authors = set(commits.author)

    tot_lines = float(commits.lines.sum())
    result = {
        'first': [],
        'last': [],
        'line_changes': [],
        'commits': [],
        'median_commit_frequency': [],
        'max_dry_stretch': [],
        'max_dayly_commit_run': []
    }
    for author in authors:
        specific = commits[commits.author == author]
        result['first'].append(specific.index.min())
        result['last'].append(specific.index.max())
        result['line_changes'].append(specific.lines.sum() / tot_lines)
        result['commits'].append(len(specific) / float(len(commits)))

        deriv = diff(specific.index[::-1])
        if len(deriv) == 0:
            result['median_commit_frequency'].append(nan)
            result['max_dry_stretch'].append(nan)
            result['max_dayly_commit_run'].append(nan)
        else:
            result['median_commit_frequency'].append(
                median(deriv).astype('timedelta64[D]'))
            result['max_dry_stretch'].append(
                max(deriv).astype('timedelta64[D]'))
            result['max_dayly_commit_run'].append(
                max([
                    len(list(u))
                    for k, u in groupby(deriv.astype('timedelta64[D]'))
                    if k <= frequency
                ]))

    return DataFrame(result, index=authors)
Esempio n. 6
0
def main(repo, out_dir, clobber_output, verbose):
    """  """
    import logging
    from gitpandas import Repository
    if verbose:
        logging.getLogger().setLevel(10)

    if repo.find("git@") == 0:
        logging.info("Cloning repo %s" % repo)
        repository = Repository(working_dir=repo)
        repo = repository.git_dir
        logging.info("Repo located at %s" % repo)

    if out_dir is None:
        out_dir = os.path.join(os.getcwd(), OUT_SUBFOLDER)

    verify_local_repo_location(repo)
    repo_name = os.path.basename(repo)
    make_output_folder(out_dir, overwrite=clobber_output)
    contributor_data = author_minded(repo)
    citation_data = pmc_data('SPSS')
    logging.info("output path: %s" %
                 os.path.join(out_dir, 'contributor_data.json'))
    contributor_data.to_json(os.path.join(out_dir, 'contributor_data.json'),
                             date_format='iso')
    citation_data['citations'].to_json(
        os.path.join(out_dir, 'citation_data.json'))
Esempio n. 7
0
def author_minded(working_dir, frequency=None):
    from numpy import median, min, max, diff, nan, timedelta64
    from pandas import DataFrame
    from gitpandas import Repository
    from itertools import groupby

    if frequency is None:
        frequency = timedelta64(0, 'D')

    repo = Repository(working_dir=working_dir)
    commits = repo.commit_history()
    authors = set(commits.author)

    tot_lines = float(commits.lines.sum())
    result = {'first': [], 'last': [], 'line_changes': [], 'commits': [],
              'median_commit_frequency': [], 'max_dry_stretch': [],
              'max_dayly_commit_run': []}
    for author in authors:
        specific = commits[commits.author == author]
        result['first'].append(specific.index.min())
        result['last'].append(specific.index.max())
        result['line_changes'].append(specific.lines.sum() / tot_lines)
        result['commits'].append(len(specific) / float(len(commits)))

        deriv = diff(specific.index[::-1])
        if len(deriv) == 0:
            result['median_commit_frequency'].append(nan)
            result['max_dry_stretch'].append(nan)
            result['max_dayly_commit_run'].append(nan)
        else:
            result['median_commit_frequency'].append(median(deriv).astype('timedelta64[D]'))
            result['max_dry_stretch'].append(max(deriv).astype('timedelta64[D]'))
            result['max_dayly_commit_run'].append(
                max([
                    len(list(u)) for k, u in groupby(deriv.astype('timedelta64[D]'))
                    if k <= frequency
                ])
            )

    return DataFrame(result, index=authors)
Esempio n. 8
0
    def setUp(self):
        """

        :return:
        """
        project_dir = str(os.path.dirname(
            os.path.abspath(__file__))) + os.sep + 'repos'
        repo_dir = str(os.path.dirname(os.path.abspath(
            __file__))) + os.sep + 'repos' + os.sep + 'repository1'

        if os.path.exists(project_dir):
            shutil.rmtree(project_dir)

        os.makedirs(project_dir)

        if not os.path.exists(repo_dir):
            os.makedirs(repo_dir)

        # create an empty repo (but not bare)
        grepo = git.Repo.init(repo_dir)

        # add a file
        with open(repo_dir + os.sep + 'README.md', 'w') as f:
            f.write('Sample README for a sample project\n')

        # commit it
        grepo.git.add('README.md')
        grepo.git.commit(m='first commit')

        # now add some other files:
        for idx in range(5):
            with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f:
                f.write('import sys\nimport os\n')

            time.sleep(2.0)
            grepo.git.add(all=True)
            grepo.git.commit(m='adding file_%d.py' % (idx, ))

        self.repo = Repository(working_dir=repo_dir, verbose=True)
Esempio n. 9
0
def main():
    r = Repository(working_dir=os.path.abspath('../nixpkgs'))
    logger.info('fetching commit history')
    ch = fetch_commit_history(r)
    logger.info('fetching hours estimate')
    he = fetch_hours_estimate(r, ch)
    logger.info('fetching file change history')
    fch = fetch_file_change_history(r)
    logger.info('fetching file change rate')
    fcr = fetch_file_change_rate(r, fch)
    logger.info('fetching cumulative blame')
    cb = fetch_cumulative_blame(r)
    logger.info('fetching bus factor')
    bf = fetch_bus_factor(r)
    logger.info('fetching file owner')
    fo = fetch_file_owner(r)
    logger.info('fetching punch card')
    pc = fetch_punch_card(r)
Esempio n. 10
0
    def setUp(self):
        """

        :return:
        """
        project_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos'
        repo_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos' + os.sep + 'repository1'

        if os.path.exists(project_dir):
            shutil.rmtree(project_dir)

        os.makedirs(project_dir)

        if not os.path.exists(repo_dir):
            os.makedirs(repo_dir)

        # create an empty repo (but not bare)
        grepo = git.Repo.init(repo_dir)

        # add a file
        with open(repo_dir + os.sep + 'README.md', 'w') as f:
            f.write('Sample README for a sample project\n')

        # commit it
        grepo.git.add('README.md')
        grepo.git.commit(m='first commit')

        # now add some other files:
        for idx in range(5):
            with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f:
                f.write('import sys\nimport os\n')

            time.sleep(2.0)
            grepo.git.add(all=True)
            grepo.git.commit(m='adding file_%d.py' % (idx, ))

        self.repo = Repository(working_dir=repo_dir, verbose=True)
Esempio n. 11
0
from gitpandas import Repository

__author__ = 'willmcginnis'

if __name__ == '__main__':
    repo = Repository(
        working_dir='git://github.com/CamDavidsonPilon/lifelines.git',
        verbose=True)
    shared_blame = repo.blame(extensions=['py'], committer=False, by='file')

    print(shared_blame)
Esempio n. 12
0
import os
from gitpandas import Repository

__author__ = 'willmcginnis'

if __name__ == '__main__':
    repo = Repository(working_dir=os.path.abspath('../../git-pandas'))
    fc = repo.file_change_rates(extensions=['py'], coverage=True)
    print(fc)
Esempio n. 13
0
from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt

__author__ = 'willmcginnis'

if __name__ == '__main__':
    threshold = 5
    repo = Repository(
        working_dir='git://github.com/CamDavidsonPilon/lifelines.git')
    fch = repo.file_change_history(limit=None, extensions=['py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner',
                      repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)

    # add in the time since column
    fch['time_until_refactor'] = 0
Esempio n. 14
0
from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt
plt.style.use('ggplot')

__author__ = 'willmcginnis'

if __name__ == '__main__':
    threshold = 100
    repo = Repository(
        working_dir='git://github.com/scikit-learn/scikit-learn.git',
        verbose=True)
    fch = repo.file_change_history(limit=None, include_globs=['*.py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner',
                      repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)
Esempio n. 15
0
"""
Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory.
"""

import os
from pandas import merge
from gitpandas import ProjectDirectory, Repository

__author__ = "willmcginnis"


if __name__ == "__main__":
    flask_repo = Repository(working_dir="git://github.com/mitsuhiko/flask.git")

    # do some blaming
    flask_blame = flask_repo.blame(extensions=["py"])

    # figure out committer count from each
    flask_ch = flask_repo.commit_history("master", limit=None, extensions=["py"])

    print("\tflask committers: %d" % (len(set(flask_ch["committer"].values))))
    print("\tflask bus count:")
    print(flask_repo.bus_factor(extensions=["py"]))
Esempio n. 16
0
class TestLocalProperties(unittest.TestCase):
    """

    """
    def setUp(self):
        """

        :return:
        """
        project_dir = str(os.path.dirname(
            os.path.abspath(__file__))) + os.sep + 'repos'
        repo_dir = str(os.path.dirname(os.path.abspath(
            __file__))) + os.sep + 'repos' + os.sep + 'repository1'

        if os.path.exists(project_dir):
            shutil.rmtree(project_dir)

        os.makedirs(project_dir)

        if not os.path.exists(repo_dir):
            os.makedirs(repo_dir)

        # create an empty repo (but not bare)
        grepo = git.Repo.init(repo_dir)

        # add a file
        with open(repo_dir + os.sep + 'README.md', 'w') as f:
            f.write('Sample README for a sample project\n')

        # commit it
        grepo.git.add('README.md')
        grepo.git.commit(m='first commit')

        # now add some other files:
        for idx in range(5):
            with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f:
                f.write('import sys\nimport os\n')

            time.sleep(2.0)
            grepo.git.add(all=True)
            grepo.git.commit(m='adding file_%d.py' % (idx, ))

        self.repo = Repository(working_dir=repo_dir, verbose=True)

    def tearDown(self):
        self.repo.__del__()
        project_dir = str(os.path.dirname(
            os.path.abspath(__file__))) + os.sep + 'repos'
        shutil.rmtree(project_dir)

    def test_repo_name(self):
        self.assertEqual(self.repo._repo_name(), 'repository1')

    def test_branches(self):
        branches = list(self.repo.branches()['branch'].values)
        self.assertIn('master', branches)

    def test_tags(self):
        tags = list(self.repo.tags()['tag'].values)
        self.assertEqual(len(tags), 0)

    def test_is_bare(self):
        self.assertFalse(self.repo.is_bare())

    def test_commit_history(self):
        ch = self.repo.commit_history(branch='master')
        self.assertEqual(ch.shape[0], 6)

        ch2 = self.repo.commit_history(branch='master', extensions=['py'])
        self.assertEqual(ch2.shape[0], 5)

        ch3 = self.repo.commit_history(branch='master', limit=3)
        self.assertEqual(ch3.shape[0], 3)

        ch4 = self.repo.commit_history(branch='master', days=5)
        self.assertEqual(ch4.shape[0], 6)

        fch = self.repo.file_change_history(branch='master')
        self.assertEqual(fch.shape[0], 6)

        fch2 = self.repo.file_change_history(branch='master',
                                             extensions=['py'])
        self.assertEqual(fch2.shape[0], 5)

        fch3 = self.repo.file_change_history(branch='master', limit=3)
        self.assertEqual(fch3.shape[0], 3)

        fcr = self.repo.file_change_rates(branch='master')
        self.assertEqual(fcr.shape[0], 6)
        self.assertEqual(fcr['unique_committers'].sum(), 6)
        self.assertEqual(fcr['net_change'].sum(), 11)

        # we know this repo doesnt have coverage
        self.assertFalse(self.repo.has_coverage())

        # we know this repo only has one committer
        self.assertEqual(
            self.repo.bus_factor(by='repository')['bus factor'].values[0], 1)

        # lets do some blaming
        blame = self.repo.blame(extensions=['py'])
        self.assertEqual(blame['loc'].sum(), 10)
        self.assertEqual(blame.shape[0], 1)

        cblame = self.repo.cumulative_blame()
        self.assertEqual(cblame.shape[0], 6)
        self.assertEqual(cblame[cblame.columns.values[0]].sum(), 36)

        revs = self.repo.revs(num_datapoints=2)
        self.assertEqual(revs.shape[0], 2)
        revs = self.repo.revs(limit=2)
        self.assertEqual(revs.shape[0], 2)
        revs = self.repo.revs()
        self.assertEqual(revs.shape[0], 6)
Esempio n. 17
0
"""
Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory.
"""

from gitpandas import Repository

__author__ = 'willmcginnis'


if __name__ == '__main__':
    flask_repo = Repository(working_dir='git://github.com/mitsuhiko/flask.git')

    # do some blaming
    flask_blame = flask_repo.blame(extensions=['py'])

    # figure out committer count from each
    flask_ch = flask_repo.commit_history('master', limit=None, extensions=['py'])

    print('\tflask committers: %d' % (len(set(flask_ch['committer'].values))))
    print('\tflask bus count:')
    print(flask_repo.bus_factor(extensions=['py']))
Esempio n. 18
0
import os
from gitpandas import Repository

__author__ = 'willmcginnis'


if __name__ == '__main__':
    repo = Repository(working_dir=os.path.abspath('../../git-pandas'))
    fc = repo.file_change_rates(include_globs=['*.py'], coverage=True)
    print(fc)
Esempio n. 19
0
from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt
plt.style.use('ggplot')

__author__ = 'willmcginnis'


if __name__ == '__main__':
    threshold = 100
    repo = Repository(working_dir='git://github.com/scikit-learn/scikit-learn.git', verbose=True)
    fch = repo.file_change_history(limit=None, include_globs=['*.py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)

    # add in the time since column
    fch['time_until_refactor'] = 0
Esempio n. 20
0
from gitpandas import Repository

__author__ = 'willmcginnis'


if __name__ == '__main__':
    repo = Repository(working_dir='git://github.com/CamDavidsonPilon/lifelines.git', verbose=True)
    shared_blame = repo.blame(include_globs=['*.py'], committer=False, by='file')

    print(shared_blame)
Esempio n. 21
0
from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt

threshold = 20
repo = Repository(working_dir='git://github.com/ogr3/race-management-system.git', verbose=True)
fch = repo.file_change_history(limit=100000, extensions=['py', 'pyx', 'h', 'c', 'cpp', 'java', 'xml'])
fch['file_owner'] = ''
fch['refactor'] = 0
fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
fch['observed'] = False
fch = fch.reindex()
fch = fch.reset_index()

# add in the file owner and whether or not each item is a refactor
for idx, row in fch.iterrows():
    fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename, committer=True))
    if abs(row.insertions - row.deletions) > threshold:
        fch.set_value(idx, 'refactor', 1)
    else:
        fch.set_value(idx, 'refactor', 0)

# add in the time since column
fch['time_until_refactor'] = 0
for idx, row in fch.iterrows():
    ts = None
    chunk = fch[(fch['timestamp'] > row.timestamp) & (fch['refactor'] == 1) & (fch['filename'] == row.filename)]
    if chunk.shape[0] > 0:
        ts = chunk['timestamp'].min()
        fch.set_value(idx, 'observed', True)
Esempio n. 22
0
import os
from gitpandas import Repository

from definitions import GIT_PANDAS_DIR

__author__ = 'willmcginnis'


if __name__ == '__main__':
    repo = Repository(working_dir=GIT_PANDAS_DIR)
    fc = repo.file_change_rates(include_globs=['*.py'], coverage=True)
    print(fc)
Esempio n. 23
0
import os
from gitpandas import Repository

__author__ = 'willmcginnis'


if __name__ == '__main__':
    repo = Repository(working_dir=os.path.abspath('../../git-pandas'))
    fc = repo.file_change_rates(extensions=['py'], coverage=True)
    print(fc)
Esempio n. 24
0
"""
Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory.
"""

import os
from pandas import merge
from gitpandas import ProjectDirectory, Repository

__author__ = 'willmcginnis'

if __name__ == '__main__':
    flask_repo = Repository(working_dir='git://github.com/mitsuhiko/flask.git')

    # do some blaming
    flask_blame = flask_repo.blame(extensions=['py'])

    # figure out committer count from each
    flask_ch = flask_repo.commit_history('master',
                                         limit=None,
                                         extensions=['py'])

    print('\tflask committers: %d' % (len(set(flask_ch['committer'].values))))
    print('\tflask bus count:')
    print(flask_repo.bus_factor(extensions=['py']))
Esempio n. 25
0
 def setUp(self):
     self.repo = Repository(
         working_dir='git://github.com/wdm0006/git-pandas.git',
         verbose=True)
Esempio n. 26
0
class TestLocalProperties(unittest.TestCase):
    """

    """

    def setUp(self):
        """

        :return:
        """
        project_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos'
        repo_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos' + os.sep + 'repository1'

        if os.path.exists(project_dir):
            shutil.rmtree(project_dir)

        os.makedirs(project_dir)

        if not os.path.exists(repo_dir):
            os.makedirs(repo_dir)

        # create an empty repo (but not bare)
        grepo = git.Repo.init(repo_dir)

        # add a file
        with open(repo_dir + os.sep + 'README.md', 'w') as f:
            f.write('Sample README for a sample project\n')

        # commit it
        grepo.git.add('README.md')
        grepo.git.commit(m='first commit')

        # now add some other files:
        for idx in range(5):
            with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f:
                f.write('import sys\nimport os\n')

            time.sleep(2.0)
            grepo.git.add(all=True)
            grepo.git.commit(m='adding file_%d.py' % (idx, ))

        self.repo = Repository(working_dir=repo_dir, verbose=True)

    def tearDown(self):
        self.repo.__del__()
        project_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos'
        shutil.rmtree(project_dir)

    def test_repo_name(self):
        self.assertEqual(self.repo.repo_name, 'repository1')

    def test_branches(self):
        branches = list(self.repo.branches()['branch'].values)
        self.assertIn('master', branches)

    def test_tags(self):
        tags = list(self.repo.tags()['tag'].values)
        self.assertEqual(len(tags), 0)

    def test_is_bare(self):
        self.assertFalse(self.repo.is_bare())

    def test_commit_history(self):
        ch = self.repo.commit_history(branch='master')
        self.assertEqual(ch.shape[0], 6)

        ch2 = self.repo.commit_history(branch='master', extensions=['py'])
        self.assertEqual(ch2.shape[0], 5)

        ch3 = self.repo.commit_history(branch='master', limit=3)
        self.assertEqual(ch3.shape[0], 3)

        ch4 = self.repo.commit_history(branch='master', days=5)
        self.assertEqual(ch4.shape[0], 6)

        fch = self.repo.file_change_history(branch='master')
        self.assertEqual(fch.shape[0], 6)

        fch2 = self.repo.file_change_history(branch='master', extensions=['py'])
        self.assertEqual(fch2.shape[0], 5)

        fch3 = self.repo.file_change_history(branch='master', limit=3)
        self.assertEqual(fch3.shape[0], 3)

        fcr = self.repo.file_change_rates(branch='master')
        self.assertEqual(fcr.shape[0], 6)
        self.assertEqual(fcr['unique_committers'].sum(), 6)
        self.assertEqual(fcr['net_change'].sum(), 11)

        # we know this repo doesnt have coverage
        self.assertFalse(self.repo.has_coverage())

        # we know this repo only has one committer
        self.assertEqual(self.repo.bus_factor(by='repository')['bus factor'].values[0], 1)

        # lets do some blaming
        blame = self.repo.blame(extensions=['py'])
        self.assertEqual(blame['loc'].sum(), 10)
        self.assertEqual(blame.shape[0], 1)

        cblame = self.repo.cumulative_blame()
        self.assertEqual(cblame.shape[0], 6)
        self.assertEqual(cblame[cblame.columns.values[0]].sum(), 36)

        revs = self.repo.revs(num_datapoints=2)
        self.assertEqual(revs.shape[0], 2)
        revs = self.repo.revs(limit=2)
        self.assertEqual(revs.shape[0], 2)
        revs = self.repo.revs()
        self.assertEqual(revs.shape[0], 6)
Esempio n. 27
0
from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use('ggplot')

__author__ = 'willmcginnis'


if __name__ == '__main__':
    threshold = 100
    repo = Repository(working_dir='git://github.com/scikit-learn/scikit-learn.git', verbose=True)
    fch = repo.file_change_history(limit=None, extensions=['py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)

    # add in the time since column
import matplotlib.pyplot as plt
import os
import json
from gitpandas import Repository, ProjectDirectory
import matplotlib
matplotlib.style.use('ggplot')

__author__ = 'willmcginnis'

if __name__ == '__main__':
    g = Repository(working_dir=os.path.abspath('../../git-pandas'),
                   verbose=True)

    b = g.cumulative_blame(branch='master',
                           extensions=['py'],
                           ignore_dir=['docs'],
                           limit=None,
                           skip=None)

    ax = b.plot(kind='area', stacked=True)
    plt.title('Cumulative Blame')
    plt.xlabel('date')
    plt.ylabel('LOC')
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.show()
Esempio n. 29
0
from gitpandas import Repository
import time

from definitions import GIT_PANDAS_DIR

__author__ = 'willmcginnis'


if __name__ == '__main__':
    g = Repository(working_dir=GIT_PANDAS_DIR)

    st = time.time()
    blame = g.cumulative_blame(branch='master', include_globs=['*.py', '*.html', '*.sql', '*.md'], limit=None, skip=None)
    print(blame.head())
    print(time.time() - st)

    st = time.time()
    blame = g.parallel_cumulative_blame(branch='master', include_globs=['*.py', '*.html', '*.sql', '*.md'], limit=None, skip=None, workers=4)
    print(blame.head())
    print(time.time() - st)
Esempio n. 30
0
"""
Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory.
"""

from gitpandas import Repository

__author__ = 'willmcginnis'

if __name__ == '__main__':
    flask_repo = Repository(working_dir='git://github.com/mitsuhiko/flask.git')

    # do some blaming
    flask_blame = flask_repo.blame(include_globs=['*.py'])

    # figure out committer count from each
    flask_ch = flask_repo.commit_history('master',
                                         limit=None,
                                         include_globs=['*.py'])

    print('\tflask committers: %d' % (len(set(flask_ch['committer'].values))))
    print('\tflask bus count:')
    print(flask_repo.bus_factor(include_globs=['*.py']))
Esempio n. 31
0
from gitpandas import Repository

__author__ = 'willmcginnis'


if __name__ == '__main__':
    repo = Repository(working_dir='git://github.com/CamDavidsonPilon/lifelines.git', verbose=True)
    shared_blame = repo.blame(extensions=['py'], committer=False, by='file')

    print(shared_blame)
Esempio n. 32
0
 def setUp(self):
     self.repo = Repository(working_dir='git://github.com/wdm0006/git-pandas.git', verbose=True)
import matplotlib.pyplot as plt
import os
import json
from gitpandas import Repository, ProjectDirectory
import matplotlib

matplotlib.style.use("ggplot")

__author__ = "willmcginnis"


if __name__ == "__main__":
    g = Repository(working_dir=os.path.abspath("../../git-pandas"), verbose=True)

    b = g.cumulative_blame(branch="master", extensions=["py"], ignore_dir=["docs"], limit=None, skip=None)

    ax = b.plot(kind="area", stacked=True)
    plt.title("Cumulative Blame")
    plt.xlabel("date")
    plt.ylabel("LOC")
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
    plt.show()
Esempio n. 34
0
"""
Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory.
"""

from gitpandas import Repository

__author__ = 'willmcginnis'

if __name__ == '__main__':
    flask_repo = Repository(working_dir='git://github.com/mitsuhiko/flask.git')

    # do some blaming
    flask_blame = flask_repo.blame(include_globs=['*.py'])

    # figure out committer count from each
    flask_ch = flask_repo.commit_history(
        'master', limit=None, include_globs=['*.py'])

    print('\tflask committers: %d' % (len(set(flask_ch['committer'].values))))
    print('\tflask bus count:')
    print(flask_repo.bus_factor(include_globs=['*.py']))