Esempi in Python per Repository, esempi in Python per gitpandas.Repository

Esempio n. 1

0

Mostra file

class TestRemoteProperties(unittest.TestCase):
    """
    For now this is using the git-python repo for tests. This probably isn't a great idea, we should really
    be either mocking the git portion, or have a known static repo in this directory to work with.

    """

    def setUp(self):
        self.repo = Repository(working_dir='git://github.com/wdm0006/git-pandas.git', verbose=True)

    def tearDown(self):
        self.repo.__del__()

    def test_repo_name(self):
        self.assertEqual(self.repo.repo_name, 'git-pandas')

    def test_branches(self):
        branches = list(self.repo.branches()['branch'].values)
        self.assertIn('master', branches)
        self.assertIn('gh-pages', branches)

    def test_tags(self):
        tags = list(self.repo.tags()['tag'].values)
        self.assertIn('0.0.1', tags)
        self.assertIn('0.0.2', tags)

    def test_is_bare(self):
        self.assertFalse(self.repo.is_bare())

Esempio n. 2

0

Mostra file

File: test_properties.py Progetto: rvisio/git-pandas

class TestRemoteProperties(unittest.TestCase):
    """
    For now this is using the git-python repo for tests. This probably isn't a great idea, we should really
    be either mocking the git portion, or have a known static repo in this directory to work with.

    """

    def setUp(self):
        self.repo = Repository(working_dir='git://github.com/wdm0006/git-pandas.git', verbose=True)

    def tearDown(self):
        self.repo.__del__()

    def test_repo_name(self):
        self.assertEqual(self.repo.repo_name, 'git-pandas')

    def test_branches(self):
        branches = list(self.repo.branches()['branch'].values)
        self.assertIn('master', branches)
        self.assertIn('gh-pages', branches)

    def test_tags(self):
        tags = list(self.repo.tags()['tag'].values)
        self.assertIn('0.0.1', tags)
        self.assertIn('0.0.2', tags)

    def test_is_bare(self):
        self.assertFalse(self.repo.is_bare())

Esempio n. 3

0

Mostra file

File: commit_history.py Progetto: rlugojr/git-pandas

def repository(path):
    # build an example repository object and try some things out
    ignore_dirs = [
        'docs/*',
        'tests/*',
        'Data/*'
    ]
    r = Repository(path)

    # is it bare?
    print('\nRepo bare?')
    print(r.is_bare())
    print('\n')

    # get the commit history
    ch = r.commit_history('HEAD', limit=None, include_globs=['*.py'], ignore_globs=ignore_dirs)
    print(ch.head(5))

    # get the list of committers
    print('\nCommiters:')
    print(''.join([str(x) + '\n' for x in set(ch['committer'].values)]))
    print('\n')

    # print out everyone's contributions
    attr = ch.reindex(columns=['committer', 'lines', 'insertions', 'deletions']).groupby(['committer'])
    attr = attr.agg({
        'lines': np.sum,
        'insertions': np.sum,
        'deletions': np.sum
    })
    print(attr)

    # get the file change history
    fh = r.file_change_history('HEAD', limit=None, ignore_globs=ignore_dirs)
    fh['ext'] = fh['filename'].map(lambda x: x.split('.')[-1])
    print(fh.head(50))

    # print out unique extensions
    print('\nExtensions Found:')
    print(''.join([str(x) + '\n' for x in set(fh['ext'].values)]))
    print('\n')

    # agg by extension
    etns = fh.reindex(columns=['ext', 'insertions', 'deletions']).groupby(['ext'])
    etns = etns.agg({
        'insertions': np.sum,
        'deletions': np.sum
    })
    print(etns)

Esempio n. 4

0

Mostra file

File: commit_history.py Progetto: wdm0006/git-pandas

def repository(path):
    # build an example repository object and try some things out
    ignore_dirs = [
        'docs/*',
        'tests/*',
        'Data/*'
    ]
    r = Repository(path)

    # is it bare?
    print('\nRepo bare?')
    print(r.is_bare())
    print('\n')

    # get the commit history
    ch = r.commit_history('HEAD', limit=None, include_globs=['*.py'], ignore_globs=ignore_dirs)
    print(ch.head(5))

    # get the list of committers
    print('\nCommiters:')
    print(''.join([str(x) + '\n' for x in set(ch['committer'].values)]))
    print('\n')

    # print out everyone's contributions
    attr = ch.reindex(columns=['committer', 'lines', 'insertions', 'deletions']).groupby(['committer'])
    attr = attr.agg({
        'lines': np.sum,
        'insertions': np.sum,
        'deletions': np.sum
    })
    print(attr)

    # get the file change history
    fh = r.file_change_history('HEAD', limit=None, ignore_globs=ignore_dirs)
    fh['ext'] = fh['filename'].map(lambda x: x.split('.')[-1])
    print(fh.head(50))

    # print out unique extensions
    print('\nExtensions Found:')
    print(''.join([str(x) + '\n' for x in set(fh['ext'].values)]))
    print('\n')

    # agg by extension
    etns = fh.reindex(columns=['ext', 'insertions', 'deletions']).groupby(['ext'])
    etns = etns.agg({
        'insertions': np.sum,
        'deletions': np.sum
    })
    print(etns)

Esempio n. 5

0

Mostra file

def author_minded(working_dir, frequency=None):
    from numpy import median, min, max, diff, nan, timedelta64
    from pandas import DataFrame
    from gitpandas import Repository
    from itertools import groupby

    if frequency is None:
        frequency = timedelta64(0, 'D')

    repo = Repository(working_dir=working_dir)
    commits = repo.commit_history()
    authors = set(commits.author)

    tot_lines = float(commits.lines.sum())
    result = {
        'first': [],
        'last': [],
        'line_changes': [],
        'commits': [],
        'median_commit_frequency': [],
        'max_dry_stretch': [],
        'max_dayly_commit_run': []
    }
    for author in authors:
        specific = commits[commits.author == author]
        result['first'].append(specific.index.min())
        result['last'].append(specific.index.max())
        result['line_changes'].append(specific.lines.sum() / tot_lines)
        result['commits'].append(len(specific) / float(len(commits)))

        deriv = diff(specific.index[::-1])
        if len(deriv) == 0:
            result['median_commit_frequency'].append(nan)
            result['max_dry_stretch'].append(nan)
            result['max_dayly_commit_run'].append(nan)
        else:
            result['median_commit_frequency'].append(
                median(deriv).astype('timedelta64[D]'))
            result['max_dry_stretch'].append(
                max(deriv).astype('timedelta64[D]'))
            result['max_dayly_commit_run'].append(
                max([
                    len(list(u))
                    for k, u in groupby(deriv.astype('timedelta64[D]'))
                    if k <= frequency
                ]))

    return DataFrame(result, index=authors)

Esempio n. 6

0

Mostra file

File: commit_opener.py Progetto: lbillingham/commit_opener

def main(repo, out_dir, clobber_output, verbose):
    """  """
    import logging
    from gitpandas import Repository
    if verbose:
        logging.getLogger().setLevel(10)

    if repo.find("git@") == 0:
        logging.info("Cloning repo %s" % repo)
        repository = Repository(working_dir=repo)
        repo = repository.git_dir
        logging.info("Repo located at %s" % repo)

    if out_dir is None:
        out_dir = os.path.join(os.getcwd(), OUT_SUBFOLDER)

    verify_local_repo_location(repo)
    repo_name = os.path.basename(repo)
    make_output_folder(out_dir, overwrite=clobber_output)
    contributor_data = author_minded(repo)
    citation_data = pmc_data('SPSS')
    logging.info("output path: %s" %
                 os.path.join(out_dir, 'contributor_data.json'))
    contributor_data.to_json(os.path.join(out_dir, 'contributor_data.json'),
                             date_format='iso')
    citation_data['citations'].to_json(
        os.path.join(out_dir, 'citation_data.json'))

Esempio n. 7

0

Mostra file

File: tree_scrape.py Progetto: lbillingham/commit_opener

def author_minded(working_dir, frequency=None):
    from numpy import median, min, max, diff, nan, timedelta64
    from pandas import DataFrame
    from gitpandas import Repository
    from itertools import groupby

    if frequency is None:
        frequency = timedelta64(0, 'D')

    repo = Repository(working_dir=working_dir)
    commits = repo.commit_history()
    authors = set(commits.author)

    tot_lines = float(commits.lines.sum())
    result = {'first': [], 'last': [], 'line_changes': [], 'commits': [],
              'median_commit_frequency': [], 'max_dry_stretch': [],
              'max_dayly_commit_run': []}
    for author in authors:
        specific = commits[commits.author == author]
        result['first'].append(specific.index.min())
        result['last'].append(specific.index.max())
        result['line_changes'].append(specific.lines.sum() / tot_lines)
        result['commits'].append(len(specific) / float(len(commits)))

        deriv = diff(specific.index[::-1])
        if len(deriv) == 0:
            result['median_commit_frequency'].append(nan)
            result['max_dry_stretch'].append(nan)
            result['max_dayly_commit_run'].append(nan)
        else:
            result['median_commit_frequency'].append(median(deriv).astype('timedelta64[D]'))
            result['max_dry_stretch'].append(max(deriv).astype('timedelta64[D]'))
            result['max_dayly_commit_run'].append(
                max([
                    len(list(u)) for k, u in groupby(deriv.astype('timedelta64[D]'))
                    if k <= frequency
                ])
            )

    return DataFrame(result, index=authors)

Esempio n. 8

0

Mostra file

File: test_properties.py Progetto: willingc/git-pandas

    def setUp(self):
        """

        :return:
        """
        project_dir = str(os.path.dirname(
            os.path.abspath(__file__))) + os.sep + 'repos'
        repo_dir = str(os.path.dirname(os.path.abspath(
            __file__))) + os.sep + 'repos' + os.sep + 'repository1'

        if os.path.exists(project_dir):
            shutil.rmtree(project_dir)

        os.makedirs(project_dir)

        if not os.path.exists(repo_dir):
            os.makedirs(repo_dir)

        # create an empty repo (but not bare)
        grepo = git.Repo.init(repo_dir)

        # add a file
        with open(repo_dir + os.sep + 'README.md', 'w') as f:
            f.write('Sample README for a sample project\n')

        # commit it
        grepo.git.add('README.md')
        grepo.git.commit(m='first commit')

        # now add some other files:
        for idx in range(5):
            with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f:
                f.write('import sys\nimport os\n')

            time.sleep(2.0)
            grepo.git.add(all=True)
            grepo.git.commit(m='adding file_%d.py' % (idx, ))

        self.repo = Repository(working_dir=repo_dir, verbose=True)

Esempio n. 9

0

Mostra file

File: __init__.py Progetto: mayflower/github-stats-miner

def main():
    r = Repository(working_dir=os.path.abspath('../nixpkgs'))
    logger.info('fetching commit history')
    ch = fetch_commit_history(r)
    logger.info('fetching hours estimate')
    he = fetch_hours_estimate(r, ch)
    logger.info('fetching file change history')
    fch = fetch_file_change_history(r)
    logger.info('fetching file change rate')
    fcr = fetch_file_change_rate(r, fch)
    logger.info('fetching cumulative blame')
    cb = fetch_cumulative_blame(r)
    logger.info('fetching bus factor')
    bf = fetch_bus_factor(r)
    logger.info('fetching file owner')
    fo = fetch_file_owner(r)
    logger.info('fetching punch card')
    pc = fetch_punch_card(r)

Esempio n. 10

0

Mostra file

File: test_properties.py Progetto: rvisio/git-pandas

    def setUp(self):
        """

        :return:
        """
        project_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos'
        repo_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos' + os.sep + 'repository1'

        if os.path.exists(project_dir):
            shutil.rmtree(project_dir)

        os.makedirs(project_dir)

        if not os.path.exists(repo_dir):
            os.makedirs(repo_dir)

        # create an empty repo (but not bare)
        grepo = git.Repo.init(repo_dir)

        # add a file
        with open(repo_dir + os.sep + 'README.md', 'w') as f:
            f.write('Sample README for a sample project\n')

        # commit it
        grepo.git.add('README.md')
        grepo.git.commit(m='first commit')

        # now add some other files:
        for idx in range(5):
            with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f:
                f.write('import sys\nimport os\n')

            time.sleep(2.0)
            grepo.git.add(all=True)
            grepo.git.commit(m='adding file_%d.py' % (idx, ))

        self.repo = Repository(working_dir=repo_dir, verbose=True)

Esempio n. 11

0

Mostra file

File: cloud_repo.py Progetto: willingc/git-pandas

from gitpandas import Repository

__author__ = 'willmcginnis'

if __name__ == '__main__':
    repo = Repository(
        working_dir='git://github.com/CamDavidsonPilon/lifelines.git',
        verbose=True)
    shared_blame = repo.blame(extensions=['py'], committer=False, by='file')

    print(shared_blame)

Esempio n. 12

0

Mostra file

import os
from gitpandas import Repository

__author__ = 'willmcginnis'

if __name__ == '__main__':
    repo = Repository(working_dir=os.path.abspath('../../git-pandas'))
    fc = repo.file_change_rates(extensions=['py'], coverage=True)
    print(fc)

Esempio n. 13

0

Mostra file

File: lifeline.py Progetto: samalallover/git-pandas

from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt

__author__ = 'willmcginnis'

if __name__ == '__main__':
    threshold = 5
    repo = Repository(
        working_dir='git://github.com/CamDavidsonPilon/lifelines.git')
    fch = repo.file_change_history(limit=None, extensions=['py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner',
                      repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)

    # add in the time since column
    fch['time_until_refactor'] = 0

Esempio n. 14

0

Mostra file

File: lifeline.py Progetto: wdm0006/git-pandas

from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt
plt.style.use('ggplot')

__author__ = 'willmcginnis'

if __name__ == '__main__':
    threshold = 100
    repo = Repository(
        working_dir='git://github.com/scikit-learn/scikit-learn.git',
        verbose=True)
    fch = repo.file_change_history(limit=None, include_globs=['*.py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner',
                      repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)

Esempio n. 15

0

Mostra file

File: bus_analysis.py Progetto: samalallover/git-pandas

"""
Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory.
"""

import os
from pandas import merge
from gitpandas import ProjectDirectory, Repository

__author__ = "willmcginnis"


if __name__ == "__main__":
    flask_repo = Repository(working_dir="git://github.com/mitsuhiko/flask.git")

    # do some blaming
    flask_blame = flask_repo.blame(extensions=["py"])

    # figure out committer count from each
    flask_ch = flask_repo.commit_history("master", limit=None, extensions=["py"])

    print("\tflask committers: %d" % (len(set(flask_ch["committer"].values))))
    print("\tflask bus count:")
    print(flask_repo.bus_factor(extensions=["py"]))

Esempio n. 16

0

Mostra file

File: test_properties.py Progetto: willingc/git-pandas

class TestLocalProperties(unittest.TestCase):
    """

    """
    def setUp(self):
        """

        :return:
        """
        project_dir = str(os.path.dirname(
            os.path.abspath(__file__))) + os.sep + 'repos'
        repo_dir = str(os.path.dirname(os.path.abspath(
            __file__))) + os.sep + 'repos' + os.sep + 'repository1'

        if os.path.exists(project_dir):
            shutil.rmtree(project_dir)

        os.makedirs(project_dir)

        if not os.path.exists(repo_dir):
            os.makedirs(repo_dir)

        # create an empty repo (but not bare)
        grepo = git.Repo.init(repo_dir)

        # add a file
        with open(repo_dir + os.sep + 'README.md', 'w') as f:
            f.write('Sample README for a sample project\n')

        # commit it
        grepo.git.add('README.md')
        grepo.git.commit(m='first commit')

        # now add some other files:
        for idx in range(5):
            with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f:
                f.write('import sys\nimport os\n')

            time.sleep(2.0)
            grepo.git.add(all=True)
            grepo.git.commit(m='adding file_%d.py' % (idx, ))

        self.repo = Repository(working_dir=repo_dir, verbose=True)

    def tearDown(self):
        self.repo.__del__()
        project_dir = str(os.path.dirname(
            os.path.abspath(__file__))) + os.sep + 'repos'
        shutil.rmtree(project_dir)

    def test_repo_name(self):
        self.assertEqual(self.repo._repo_name(), 'repository1')

    def test_branches(self):
        branches = list(self.repo.branches()['branch'].values)
        self.assertIn('master', branches)

    def test_tags(self):
        tags = list(self.repo.tags()['tag'].values)
        self.assertEqual(len(tags), 0)

    def test_is_bare(self):
        self.assertFalse(self.repo.is_bare())

    def test_commit_history(self):
        ch = self.repo.commit_history(branch='master')
        self.assertEqual(ch.shape[0], 6)

        ch2 = self.repo.commit_history(branch='master', extensions=['py'])
        self.assertEqual(ch2.shape[0], 5)

        ch3 = self.repo.commit_history(branch='master', limit=3)
        self.assertEqual(ch3.shape[0], 3)

        ch4 = self.repo.commit_history(branch='master', days=5)
        self.assertEqual(ch4.shape[0], 6)

        fch = self.repo.file_change_history(branch='master')
        self.assertEqual(fch.shape[0], 6)

        fch2 = self.repo.file_change_history(branch='master',
                                             extensions=['py'])
        self.assertEqual(fch2.shape[0], 5)

        fch3 = self.repo.file_change_history(branch='master', limit=3)
        self.assertEqual(fch3.shape[0], 3)

        fcr = self.repo.file_change_rates(branch='master')
        self.assertEqual(fcr.shape[0], 6)
        self.assertEqual(fcr['unique_committers'].sum(), 6)
        self.assertEqual(fcr['net_change'].sum(), 11)

        # we know this repo doesnt have coverage
        self.assertFalse(self.repo.has_coverage())

        # we know this repo only has one committer
        self.assertEqual(
            self.repo.bus_factor(by='repository')['bus factor'].values[0], 1)

        # lets do some blaming
        blame = self.repo.blame(extensions=['py'])
        self.assertEqual(blame['loc'].sum(), 10)
        self.assertEqual(blame.shape[0], 1)

        cblame = self.repo.cumulative_blame()
        self.assertEqual(cblame.shape[0], 6)
        self.assertEqual(cblame[cblame.columns.values[0]].sum(), 36)

        revs = self.repo.revs(num_datapoints=2)
        self.assertEqual(revs.shape[0], 2)
        revs = self.repo.revs(limit=2)
        self.assertEqual(revs.shape[0], 2)
        revs = self.repo.revs()
        self.assertEqual(revs.shape[0], 6)

Esempio n. 17

0

Mostra file

File: bus_analysis.py Progetto: rvisio/git-pandas

"""
Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory.
"""

from gitpandas import Repository

__author__ = 'willmcginnis'


if __name__ == '__main__':
    flask_repo = Repository(working_dir='git://github.com/mitsuhiko/flask.git')

    # do some blaming
    flask_blame = flask_repo.blame(extensions=['py'])

    # figure out committer count from each
    flask_ch = flask_repo.commit_history('master', limit=None, extensions=['py'])

    print('\tflask committers: %d' % (len(set(flask_ch['committer'].values))))
    print('\tflask bus count:')
    print(flask_repo.bus_factor(extensions=['py']))

Esempio n. 18

0

Mostra file

File: file_change_rates.py Progetto: rlugojr/git-pandas

import os
from gitpandas import Repository

__author__ = 'willmcginnis'


if __name__ == '__main__':
    repo = Repository(working_dir=os.path.abspath('../../git-pandas'))
    fc = repo.file_change_rates(include_globs=['*.py'], coverage=True)
    print(fc)

Esempio n. 19

0

Mostra file

File: lifeline.py Progetto: rlugojr/git-pandas

from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt
plt.style.use('ggplot')

__author__ = 'willmcginnis'


if __name__ == '__main__':
    threshold = 100
    repo = Repository(working_dir='git://github.com/scikit-learn/scikit-learn.git', verbose=True)
    fch = repo.file_change_history(limit=None, include_globs=['*.py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)

    # add in the time since column
    fch['time_until_refactor'] = 0

Esempio n. 20

0

Mostra file

File: cloud_repo.py Progetto: rlugojr/git-pandas

from gitpandas import Repository

__author__ = 'willmcginnis'


if __name__ == '__main__':
    repo = Repository(working_dir='git://github.com/CamDavidsonPilon/lifelines.git', verbose=True)
    shared_blame = repo.blame(include_globs=['*.py'], committer=False, by='file')

    print(shared_blame)

Esempio n. 21

0

Mostra file

File: gitpandatest.py Progetto: seppaleinen/gitanalyze

from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt

threshold = 20
repo = Repository(working_dir='git://github.com/ogr3/race-management-system.git', verbose=True)
fch = repo.file_change_history(limit=100000, extensions=['py', 'pyx', 'h', 'c', 'cpp', 'java', 'xml'])
fch['file_owner'] = ''
fch['refactor'] = 0
fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
fch['observed'] = False
fch = fch.reindex()
fch = fch.reset_index()

# add in the file owner and whether or not each item is a refactor
for idx, row in fch.iterrows():
    fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename, committer=True))
    if abs(row.insertions - row.deletions) > threshold:
        fch.set_value(idx, 'refactor', 1)
    else:
        fch.set_value(idx, 'refactor', 0)

# add in the time since column
fch['time_until_refactor'] = 0
for idx, row in fch.iterrows():
    ts = None
    chunk = fch[(fch['timestamp'] > row.timestamp) & (fch['refactor'] == 1) & (fch['filename'] == row.filename)]
    if chunk.shape[0] > 0:
        ts = chunk['timestamp'].min()
        fch.set_value(idx, 'observed', True)

Esempio n. 22

0

Mostra file

import os
from gitpandas import Repository

from definitions import GIT_PANDAS_DIR

__author__ = 'willmcginnis'


if __name__ == '__main__':
    repo = Repository(working_dir=GIT_PANDAS_DIR)
    fc = repo.file_change_rates(include_globs=['*.py'], coverage=True)
    print(fc)

Esempio n. 23

0

Mostra file

File: file_change_rates.py Progetto: Cophy08/git-pandas

import os
from gitpandas import Repository

__author__ = 'willmcginnis'


if __name__ == '__main__':
    repo = Repository(working_dir=os.path.abspath('../../git-pandas'))
    fc = repo.file_change_rates(extensions=['py'], coverage=True)
    print(fc)

Esempio n. 24

0

Mostra file

File: bus_analysis.py Progetto: willingc/git-pandas

"""
Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory.
"""

import os
from pandas import merge
from gitpandas import ProjectDirectory, Repository

__author__ = 'willmcginnis'

if __name__ == '__main__':
    flask_repo = Repository(working_dir='git://github.com/mitsuhiko/flask.git')

    # do some blaming
    flask_blame = flask_repo.blame(extensions=['py'])

    # figure out committer count from each
    flask_ch = flask_repo.commit_history('master',
                                         limit=None,
                                         extensions=['py'])

    print('\tflask committers: %d' % (len(set(flask_ch['committer'].values))))
    print('\tflask bus count:')
    print(flask_repo.bus_factor(extensions=['py']))

Esempio n. 25

0

Mostra file

File: test_properties.py Progetto: willingc/git-pandas

 def setUp(self):
     self.repo = Repository(
         working_dir='git://github.com/wdm0006/git-pandas.git',
         verbose=True)

Esempio n. 26

0

Mostra file

File: test_properties.py Progetto: rvisio/git-pandas

class TestLocalProperties(unittest.TestCase):
    """

    """

    def setUp(self):
        """

        :return:
        """
        project_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos'
        repo_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos' + os.sep + 'repository1'

        if os.path.exists(project_dir):
            shutil.rmtree(project_dir)

        os.makedirs(project_dir)

        if not os.path.exists(repo_dir):
            os.makedirs(repo_dir)

        # create an empty repo (but not bare)
        grepo = git.Repo.init(repo_dir)

        # add a file
        with open(repo_dir + os.sep + 'README.md', 'w') as f:
            f.write('Sample README for a sample project\n')

        # commit it
        grepo.git.add('README.md')
        grepo.git.commit(m='first commit')

        # now add some other files:
        for idx in range(5):
            with open(repo_dir + os.sep + 'file_%d.py' % (idx, ), 'w') as f:
                f.write('import sys\nimport os\n')

            time.sleep(2.0)
            grepo.git.add(all=True)
            grepo.git.commit(m='adding file_%d.py' % (idx, ))

        self.repo = Repository(working_dir=repo_dir, verbose=True)

    def tearDown(self):
        self.repo.__del__()
        project_dir = str(os.path.dirname(os.path.abspath(__file__))) + os.sep + 'repos'
        shutil.rmtree(project_dir)

    def test_repo_name(self):
        self.assertEqual(self.repo.repo_name, 'repository1')

    def test_branches(self):
        branches = list(self.repo.branches()['branch'].values)
        self.assertIn('master', branches)

    def test_tags(self):
        tags = list(self.repo.tags()['tag'].values)
        self.assertEqual(len(tags), 0)

    def test_is_bare(self):
        self.assertFalse(self.repo.is_bare())

    def test_commit_history(self):
        ch = self.repo.commit_history(branch='master')
        self.assertEqual(ch.shape[0], 6)

        ch2 = self.repo.commit_history(branch='master', extensions=['py'])
        self.assertEqual(ch2.shape[0], 5)

        ch3 = self.repo.commit_history(branch='master', limit=3)
        self.assertEqual(ch3.shape[0], 3)

        ch4 = self.repo.commit_history(branch='master', days=5)
        self.assertEqual(ch4.shape[0], 6)

        fch = self.repo.file_change_history(branch='master')
        self.assertEqual(fch.shape[0], 6)

        fch2 = self.repo.file_change_history(branch='master', extensions=['py'])
        self.assertEqual(fch2.shape[0], 5)

        fch3 = self.repo.file_change_history(branch='master', limit=3)
        self.assertEqual(fch3.shape[0], 3)

        fcr = self.repo.file_change_rates(branch='master')
        self.assertEqual(fcr.shape[0], 6)
        self.assertEqual(fcr['unique_committers'].sum(), 6)
        self.assertEqual(fcr['net_change'].sum(), 11)

        # we know this repo doesnt have coverage
        self.assertFalse(self.repo.has_coverage())

        # we know this repo only has one committer
        self.assertEqual(self.repo.bus_factor(by='repository')['bus factor'].values[0], 1)

        # lets do some blaming
        blame = self.repo.blame(extensions=['py'])
        self.assertEqual(blame['loc'].sum(), 10)
        self.assertEqual(blame.shape[0], 1)

        cblame = self.repo.cumulative_blame()
        self.assertEqual(cblame.shape[0], 6)
        self.assertEqual(cblame[cblame.columns.values[0]].sum(), 36)

        revs = self.repo.revs(num_datapoints=2)
        self.assertEqual(revs.shape[0], 2)
        revs = self.repo.revs(limit=2)
        self.assertEqual(revs.shape[0], 2)
        revs = self.repo.revs()
        self.assertEqual(revs.shape[0], 6)

Esempio n. 27

0

Mostra file

File: lifeline.py Progetto: Cophy08/git-pandas

from gitpandas import Repository
import numpy as np
import lifelines
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use('ggplot')

__author__ = 'willmcginnis'


if __name__ == '__main__':
    threshold = 100
    repo = Repository(working_dir='git://github.com/scikit-learn/scikit-learn.git', verbose=True)
    fch = repo.file_change_history(limit=None, extensions=['py'])

    fch['file_owner'] = ''
    fch['refactor'] = 0
    fch['timestamp'] = fch.index.astype(np.int64) // (24 * 3600 * 10**9)
    fch['observed'] = False
    fch = fch.reindex()
    fch = fch.reset_index()

    # add in the file owner and whether or not each item is a refactor
    for idx, row in fch.iterrows():
        fch.set_value(idx, 'file_owner', repo.file_owner(row.rev, row.filename))
        if abs(row.insertions - row.deletions) > threshold:
            fch.set_value(idx, 'refactor', 1)
        else:
            fch.set_value(idx, 'refactor', 0)

    # add in the time since column

Esempio n. 28

0

Mostra file

File: cumulative_repo_blame.py Progetto: samalallover/git-pandas

import matplotlib.pyplot as plt
import os
import json
from gitpandas import Repository, ProjectDirectory
import matplotlib
matplotlib.style.use('ggplot')

__author__ = 'willmcginnis'

if __name__ == '__main__':
    g = Repository(working_dir=os.path.abspath('../../git-pandas'),
                   verbose=True)

    b = g.cumulative_blame(branch='master',
                           extensions=['py'],
                           ignore_dir=['docs'],
                           limit=None,
                           skip=None)

    ax = b.plot(kind='area', stacked=True)
    plt.title('Cumulative Blame')
    plt.xlabel('date')
    plt.ylabel('LOC')
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.show()

Esempio n. 29

0

Mostra file

File: parallel_blame.py Progetto: wdm0006/git-pandas

from gitpandas import Repository
import time

from definitions import GIT_PANDAS_DIR

__author__ = 'willmcginnis'


if __name__ == '__main__':
    g = Repository(working_dir=GIT_PANDAS_DIR)

    st = time.time()
    blame = g.cumulative_blame(branch='master', include_globs=['*.py', '*.html', '*.sql', '*.md'], limit=None, skip=None)
    print(blame.head())
    print(time.time() - st)

    st = time.time()
    blame = g.parallel_cumulative_blame(branch='master', include_globs=['*.py', '*.html', '*.sql', '*.md'], limit=None, skip=None, workers=4)
    print(blame.head())
    print(time.time() - st)

Esempio n. 30

0

Mostra file

File: bus_analysis.py Progetto: wdm0006/git-pandas

"""
Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory.
"""

from gitpandas import Repository

__author__ = 'willmcginnis'

if __name__ == '__main__':
    flask_repo = Repository(working_dir='git://github.com/mitsuhiko/flask.git')

    # do some blaming
    flask_blame = flask_repo.blame(include_globs=['*.py'])

    # figure out committer count from each
    flask_ch = flask_repo.commit_history('master',
                                         limit=None,
                                         include_globs=['*.py'])

    print('\tflask committers: %d' % (len(set(flask_ch['committer'].values))))
    print('\tflask bus count:')
    print(flask_repo.bus_factor(include_globs=['*.py']))

Esempio n. 31

0

Mostra file

File: cloud_repo.py Progetto: Cophy08/git-pandas

from gitpandas import Repository

__author__ = 'willmcginnis'


if __name__ == '__main__':
    repo = Repository(working_dir='git://github.com/CamDavidsonPilon/lifelines.git', verbose=True)
    shared_blame = repo.blame(extensions=['py'], committer=False, by='file')

    print(shared_blame)

Esempio n. 32

0

Mostra file

File: test_properties.py Progetto: rvisio/git-pandas

 def setUp(self):
     self.repo = Repository(working_dir='git://github.com/wdm0006/git-pandas.git', verbose=True)

Esempio n. 33

0

Mostra file

File: cumulative_repo_blame.py Progetto: samalallover/git-pandas

import matplotlib.pyplot as plt
import os
import json
from gitpandas import Repository, ProjectDirectory
import matplotlib

matplotlib.style.use("ggplot")

__author__ = "willmcginnis"


if __name__ == "__main__":
    g = Repository(working_dir=os.path.abspath("../../git-pandas"), verbose=True)

    b = g.cumulative_blame(branch="master", extensions=["py"], ignore_dir=["docs"], limit=None, skip=None)

    ax = b.plot(kind="area", stacked=True)
    plt.title("Cumulative Blame")
    plt.xlabel("date")
    plt.ylabel("LOC")
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
    plt.show()

Esempio n. 34

0

Mostra file

File: bus_analysis.py Progetto: wdm0006/git-pandas

"""
Assumes that GitPython and pandas are in the same directory as this repo, and nothing else is in that directory.
"""

from gitpandas import Repository

__author__ = 'willmcginnis'

if __name__ == '__main__':
    flask_repo = Repository(working_dir='git://github.com/mitsuhiko/flask.git')

    # do some blaming
    flask_blame = flask_repo.blame(include_globs=['*.py'])

    # figure out committer count from each
    flask_ch = flask_repo.commit_history(
        'master', limit=None, include_globs=['*.py'])

    print('\tflask committers: %d' % (len(set(flask_ch['committer'].values))))
    print('\tflask bus count:')
    print(flask_repo.bus_factor(include_globs=['*.py']))