Esempio n. 1
0
    def test_fetch(self):
        """Test whether commits are fetched from a Git repository"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        git = Git(self.git_path, new_path)
        commits = [commit for commit in git.fetch()]

        expected = [('bc57a9209f096a130dcc5ba7089a8663f758a703', 1344965413.0),
                    ('87783129c3f00d2c81a3a8e585eb86a47e39891a', 1344965535.0),
                    ('7debcf8a2f57f86663809c58b5c07a398be7674c', 1344965607.0),
                    ('c0d66f92a95e31c77be08dc9d0f11a16715d1885', 1344965702.0),
                    ('c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', 1344966351.0),
                    ('589bb080f059834829a2a5955bebfd7c2baa110a', 1344967441.0),
                    ('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0),
                    ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0),
                    ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)]

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x][0])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x][0])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['updated_on'], expected[x][1])
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        shutil.rmtree(new_path)
def commit_counter(own, repo_url, d1, df, k):
    # url for the git repo to analyze
    #repo_url = 'https://github.com/kmn5409/INFO1601.git'
    print("Owner\t\tRepository")
    print(own, "\t", repo_url)
    repo_url = 'https://github.com/' + own + '/' + repo_url + '.git'
    # directory for letting Perceval clone the git repo
    repo_dir = '/tmp/' + repo_url + '.git'

    # create a Git object, pointing to repo_url, using repo_dir for cloning
    repo = Git(uri=repo_url, gitpath=repo_dir)
    count = 0
    # fetch all commits as an iteratoir, and iterate it printing each hash
    mon = [
        "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
        "Nov", "Dec"
    ]
    for commit in repo.fetch():
        temp = commit['data']['CommitDate'].split(" ")
        day = int(temp[2])
        month = temp[1]
        for i in range(len(mon)):
            if (month == mon[i]):
                month_num = i + 1
        year = int(temp[4])
        if (isrecent(d1, day, month_num, year)):
            count += 1
    print("Number of commmits: ", count)
    df['Number of commits'][k] = count
    return count
Esempio n. 3
0
 def list_committers(self, url, directory):
     # Return the list containing the strings from a git repository related to the users ordered by commit including repeated users to allow count gender contributions.
     repo = Git(uri=url, gitpath=directory)
     list_committers = []
     for user in repo.fetch():
         committer = self.removeMail(user['data']['Author'])
         list_committers.append(committer)
     return list_committers
Esempio n. 4
0
    def test_fetch_empty_log(self):
        """Test whether it parsers an empty log"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        from_date = datetime.datetime(2020, 1, 1, 1, 1, 1)
        git = Git(self.git_path, new_path)
        commits = [commit for commit in git.fetch(from_date=from_date)]

        self.assertListEqual(commits, [])
Esempio n. 5
0
    def test_fetch_from_empty_repository(self):
        """Test whether it parses from empty repository"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        git = Git(self.git_empty_path, new_path)
        commits = [commit for commit in git.fetch()]

        self.assertListEqual(commits, [])

        shutil.rmtree(new_path)
Esempio n. 6
0
def run(args):
    """
    """  
    repo_url = args['git_repository']
    period_length = args['period']
    threshold = args['threshold']
    active_days = True

    # directory for letting Perceval clone the git repo
    # TODO: this is Linux-operating system specific. Should change
    repo_dir = '/tmp/' + repo_url.split('/')[-1] + '.git'

    first_commit = datetime.now(timezone.utc)
    authorDict = defaultdict(list)

    repo = Git(uri=repo_url, gitpath=repo_dir)

    for commit in repo.fetch():
        commitdate = datetime.strptime(commit['data']['AuthorDate'], '%a %b %d %H:%M:%S %Y %z')
        if commitdate < first_commit:
            first_commit = commitdate
        authorDict[commit['data']['Author']].append(commitdate)
    logging.info("Authors found: " + str(len(authorDict)))

    simplemerge(authorDict)
    logging.info("Authors after merge: " + str(len(authorDict)))
    
    author_count = author_counting(authorDict, period_length, active_days)
#    print(author_count)
    (effort_periods, full_time_periods, non_full_time_periods) = project_period_effort(author_count, threshold, period_length)
    maxeffort_periods = project_period_maxeffort(author_count, period_length)

    # Printing results
    print()
    print("CONFIGURATIONS:")
    print("  Length of period (in months):", period_length)
    print("  Threshold t (in commits in a period):", threshold)
    print()
    print("RESULTS:")
    print("  First commit date:", first_commit, "--", round((datetime.now(timezone.utc)-first_commit).days/30, 2) , "months ago")
    print("  Maximum possible development effort (in person-months):", sum(maxeffort_periods.values()))
    print()
    print(pretty_print_period(period_length, first_commit, ["FT", "Non-FT", "Effort"], full_time_periods, non_full_time_periods, effort_periods))
    print(" " * 8, "FT: Full-time developers")
    print()
    print("  ---> Estimated development effort (in person-months):", round(sum(effort_periods.values()), 2))
    print()
    print("For more information, visit http://github.com/gregoriorobles/git2effort")
    print()
    def test_get_elastic_items_filter(self):
        """Test whether the elastic method works properly with filter"""

        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)

        # Load items
        items = json.loads(read_file('data/git.json'))
        ocean = GitOcean(perceval_backend)
        ocean.elastic = elastic
        ocean.feed_items(items)

        filter = {
            "name": "uuid",
            "value": [
                "43f217b2f678a5691fdbc5c6c5302243e79e5a90",
                "00ee6902e34b309cd05706c26e3e195a62492f60"
            ]
        }

        eitems = ElasticItems(perceval_backend)
        eitems.elastic = elastic
        r_json = eitems.get_elastic_items(_filter=filter)
        hits = r_json['hits']['hits']

        self.assertEqual(len(hits), 2)
        self.assertEqual(hits[0]['_source']['uuid'], "43f217b2f678a5691fdbc5c6c5302243e79e5a90")
        self.assertEqual(hits[1]['_source']['uuid'], "00ee6902e34b309cd05706c26e3e195a62492f60")
Esempio n. 8
0
def get_commits(username, reponame, commits, config):
    """
    TODO: Add docstring. See: https://realpython.com/documenting-python-code/
    TODO: Implement recursion argument, default to False.

    Parameters
    ==========

    `username` : str, required
    `reponame` : str, required
    `commits` : list, required
 
    Raises
    ======

    NotImplementedError
        If no sound is set for the animal or passed in as a
        parameter.
    """
    
    repo_URL = 'https://github.com/' + username + '/' + reponame

     # checks whether the export dir exists and if not creates it # TODO: this is a code snippet we use three times, we should make a function out of it
    local_dir = os.path.join(config["data_dir"],'grimoire_dumps')
    if not os.path.isdir(local_dir):
        os.makedirs(local_dir)
    data_dump_path = os.path.join(local_dir, username + '-' + reponame)

    git = Git(repo_URL, data_dump_path)
    
    # `fetch()` gets commits from all branches by default.
    # It returns a list of dictionaries, where the `data` key in each
    # dictionary contains the actual metadata for each commit.
    # Other stuff are metadata about the perceval `fetch()` operation.
    try:
        repo_fetched = [commit for commit in git.fetch()]

        # issue 33 (very ugly) band aid: delete *.pack files once downloaded by perceval
        shutil.rmtree(os.path.join(data_dump_path, 'objects','pack'), ignore_errors=True)
        
        # Keep just commit `data`
        for commit_data in repo_fetched:
            commits.append(commit_data["data"])
    except RepositoryError as repo_error:
        logging.warning("Error with this repository: " + username + "/" + reponame, file=stderr)
        pass
Esempio n. 9
0
    def test_initialization(self):
        """Test whether attributes are initializated"""

        git = Git('http://example.com', self.git_path, tag='test')

        self.assertEqual(git.uri, 'http://example.com')
        self.assertEqual(git.gitpath, self.git_path)
        self.assertEqual(git.origin, 'http://example.com')
        self.assertEqual(git.tag, 'test')

        # When tag is empty or None it will be set to
        # the value in uri
        git = Git('http://example.com', self.git_path)
        self.assertEqual(git.origin, 'http://example.com')
        self.assertEqual(git.tag, 'http://example.com')

        git = Git('http://example.com', self.git_path, tag='')
        self.assertEqual(git.origin, 'http://example.com')
        self.assertEqual(git.tag, 'http://example.com')
Esempio n. 10
0
    def test_git_encoding_error(self):
        """Test if encoding errors are escaped when a git log is parsed"""

        commits = Git.parse_git_log_from_file("data/git/git_bad_encoding.txt")
        result = [commit for commit in commits]

        self.assertEqual(len(result), 1)

        commit = result[0]
        self.assertEqual(commit['commit'], 'cb24e4f2f7b2a7f3450bfb15d1cbaa97371e93fb')
        self.assertEqual(commit['message'], 'Calling \udc93Open Type\udc94 (CTRL+SHIFT+T) after startup - performance improvement.')
Esempio n. 11
0
    def test_git_cr_error(self):
        """Test if mislocated carriage return chars do not break lines

        In some commit messages, carriage return characters (\r) are found
        in weird places. They should not be misconsidered as end of line.

        Before fixing, this test raises an exception:
        "perceval.errors.ParseError: commit expected on line 10"

        """
        commits = Git.parse_git_log_from_file("data/git/git_bad_cr.txt")
        result = [commit for commit in commits]
        self.assertEqual(len(result), 1)
Esempio n. 12
0
    def test_get_elastic_items_error(self):
        """Test whether a message is logged if an error occurs when getting items from an index"""

        items = json.loads(read_file('data/git.json'))
        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)
        elastic.bulk_upload(items, field_id="uuid")

        # Load items
        eitems = ElasticItems(perceval_backend)
        eitems.elastic = elastic

        with self.assertLogs(logger, level='DEBUG') as cm:
            r_json = eitems.get_elastic_items()
            self.assertIsNone(r_json)
            self.assertRegex(cm.output[-1], 'DEBUG:grimoire_elk.elastic_items:No results found from*')
Esempio n. 13
0
    def test_git_parser(self):
        """Test if the static method parses a git log file"""

        commits = Git.parse_git_log_from_file("data/git/git_log.txt")
        result = [commit['commit'] for commit in commits]

        expected = ['456a68ee1407a77f3e804a30dff245bb6c6b872f',
                    '51a3b654f252210572297f47597b31527c475fb8',
                    'ce8e0b86a1e9877f42fe9453ede418519115f367',
                    '589bb080f059834829a2a5955bebfd7c2baa110a',
                    'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb',
                    'c0d66f92a95e31c77be08dc9d0f11a16715d1885',
                    '7debcf8a2f57f86663809c58b5c07a398be7674c',
                    '87783129c3f00d2c81a3a8e585eb86a47e39891a',
                    'bc57a9209f096a130dcc5ba7089a8663f758a703']

        self.assertListEqual(result, expected)
Esempio n. 14
0
    def test_fetch_filter_raw(self):
        """Test whether the fetch with filter raw properly works"""

        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)

        # Load items
        items = json.loads(read_file('data/git.json'))
        ocean = GitOcean(perceval_backend)
        ocean.elastic = elastic
        ocean.feed_items(items)

        eitems = ElasticItems(perceval_backend)
        eitems.set_filter_raw("data.commit:87783129c3f00d2c81a3a8e585eb86a47e39891a")
        eitems.elastic = elastic
        items = [ei for ei in eitems.fetch()]
        self.assertEqual(len(items), 1)
Esempio n. 15
0
    def test_fetch(self):
        """Test whether the fetch method properly works"""

        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)

        # Load items
        items = json.loads(read_file('data/git.json'))
        ocean = GitOcean(perceval_backend)
        ocean.elastic = elastic
        ocean.feed_items(items)

        eitems = ElasticItems(perceval_backend)
        eitems.scroll_size = 2
        eitems.elastic = elastic

        items = [ei for ei in eitems.fetch()]
        self.assertEqual(len(items), 9)
Esempio n. 16
0
    def test_get_elastic_items(self):
        """Test whether the elastic method works properly"""

        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)

        # Load items
        items = json.loads(read_file('data/git.json'))
        ocean = GitOcean(perceval_backend)
        ocean.elastic = elastic
        ocean.feed_items(items)

        eitems = ElasticItems(perceval_backend)
        eitems.elastic = elastic
        r_json = eitems.get_elastic_items()

        total = r_json['hits']['total']
        total = total['value'] if isinstance(total, dict) else total
        self.assertEqual(total, 9)
Esempio n. 17
0
    def test_fetch_since_date(self):
        """Test whether commits are fetched from a Git repository since the given date"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        from_date = datetime.datetime(2014, 2, 11, 22, 7, 49)
        git = Git(self.git_path, new_path)
        commits = [commit for commit in git.fetch(from_date=from_date)]

        expected = [('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0),
                    ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0),
                    ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)]

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x][0])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x][0])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['updated_on'], expected[x][1])
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        # Test it using a datetime that includes the timezone
        from_date = datetime.datetime(2012,
                                      8,
                                      14,
                                      14,
                                      30,
                                      00,
                                      tzinfo=dateutil.tz.tzoffset(
                                          None, -36000))
        git = Git(self.git_path, new_path)
        commits = [commit for commit in git.fetch(from_date=from_date)]

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x][0])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x][0])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['updated_on'], expected[x][1])
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        shutil.rmtree(new_path)
Esempio n. 18
0
    def test_fetch_no_results(self):
        """Test whether a message is logged when no results are found"""

        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest-not_found', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)

        # Load items
        items = json.loads(read_file('data/git.json'))
        ocean = GitOcean(perceval_backend)
        ocean.elastic = elastic
        ocean.feed_items(items)

        eitems = ElasticItems(perceval_backend)
        eitems.elastic = elastic

        with self.assertLogs(logger, level='DEBUG') as cm:
            items = [ei for ei in eitems.fetch()]
            self.assertEqual(len(items), 0)
            self.assertRegex(cm.output[-2], 'DEBUG:grimoire_elk.elastic_items:No results found.*')
            self.assertRegex(cm.output[-1], 'DEBUG:grimoire_elk.elastic_items:Releasing scroll_id=*')
Esempio n. 19
0
    def test_fetch_from_date(self):
        """Test whether the fetch method with from_date properly works"""

        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)

        # Load items
        items = json.loads(read_file('data/git.json'))
        ocean = GitOcean(perceval_backend)
        ocean.elastic = elastic
        ocean.feed_items(items)

        # Fetch total items
        eitems = ElasticItems(perceval_backend)
        eitems.elastic = elastic
        items = [ei for ei in eitems.fetch()]
        self.assertEqual(len(items), 9)

        # Fetch with from date
        from_date = str_to_datetime("2018-02-09T08:33:22.699+00:00")
        eitems = ElasticItems(perceval_backend, from_date=from_date)
        eitems.elastic = elastic
        items = [ei for ei in eitems.fetch()]
        self.assertEqual(len(items), 2)
#! /usr/bin/env python3
# Count commits

import argparse

from perceval.backends.core.git import Git

# Read command line arguments
parser = argparse.ArgumentParser(description="Count commits in a git repo")
parser.add_argument("repo", help="Repository url")
parser.add_argument("dir", help="Directory for cloning the repository")
parser.add_argument("--print", action='store_true', help="Print hashes")
args = parser.parse_args()

# create a Git object, and count commmits
repo = Git(uri=args.repo, gitpath=args.dir)
count = 0
for commit in repo.fetch():
    if args.print:
        print(commit['data']['commit'])
    count += 1
print("Number of commmits: %d." % count)
Esempio n. 21
0
def git_repos():
    users = []  # creating empty lists
    test = []

    #target url"vidyaratna.git"
    repo_url = 'https://github.com/amfoss/vidyaratna.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    #target url"cms"
    repo_url = 'https://github.com/amfoss/cms.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    #target url""TempleApp"
    repo_url = 'https://github.com/amfoss/TempleApp.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    #target url""website.git"
    repo_url = 'https://github.com/amfoss/website.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    #target url""WebApp.git"
    repo_url = 'https://github.com/amfoss/WebApp.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    #target url"cms-mobile""
    repo_url = 'https://github.com/amfoss/cms-mobile.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    #target url
    repo_url = 'https://github.com/amfoss/Praveshan.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/bot.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/tasks.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/star-me.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/amdec-website.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Wiki.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/GitLit.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Qujini.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/attendance-tracker.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/events.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Hack4Amrita.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/master-syllabus.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/test-repo.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/webspace.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/internal-hackathon.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/foss-meetups.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/automated-scripts.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/fosswebsite.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/fosster.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Foss-talks.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/cybergurukulam.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/kdeconf.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/android-workshop-summer-2018.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/App.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Workshops.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Wikimedia_Hackathon_Amrita_University.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/website_old.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    for u in Counter(users).keys():
        a = "Commits:" + str(Counter(users).get(u)) + "\t User: "******"\n"
        test.append(a)  # appending all the fetched commits into 'test'

    return test
Esempio n. 22
0
def getCommits(user_owner, repo_name):
    repo = Git(f"https://github.com/{user_owner}/{repo_name}.git",
               f"https://github.com/{user_owner}/{repo_name}.git")
    commits = repo.fetch()
    return commits
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
## Authors:
##   Jesus M. Gonzalez-Barahona <*****@*****.**>
##

from perceval.backends.core.git import Git
import elasticsearch

# Url for the git repo to analyze
repo_url = 'http://github.com/grimoirelab/perceval.git'
# Directory for letting Perceval clone the git repo
repo_dir = '/tmp/perceval.git'
# ElasticSearch instance (url)
es = elasticsearch.Elasticsearch(['http://localhost:9200/'])

# Create the 'commits' index in ElasticSearch
es.indices.create('commits')
# Create a Git object, pointing to repo_url, using repo_dir for cloning
repo = Git(uri=repo_url, gitpath=repo_dir)
# Fetch all commits as an iteratoir, and iterate it uploading to ElasticSearch
for commit in repo.fetch():
    # Create the object (dictionary) to upload to ElasticSearch
    summary = {'hash': commit['data']['commit']}
    print(summary)
    # Upload the object to ElasticSearch
    es.index(index='commits', doc_type='summary', body=summary)
Esempio n. 24
0
def main(args):
    github_key = args.github_token
    list_jsons = os.listdir(os.path.abspath(args.output_path))
    repo_set = set()
    with open(args.urls_file, 'r') as url_file:
        os.chdir(os.path.abspath(args.output_path))
        for line in url_file:
            if line in ['\n', '\r\n']:
                continue
            try:
                url = line.split('/')
                if not url:
                    continue
                repo = "%s/%s" % (url[3], url[4])
            except IndexError:
                print("url:" + line)
                logger.error("Error in repo (line) " + line + "\r\n")
                continue

            repo_set.add(repo)

    for repo in sorted(repo_set):

        repo_split = repo.split('/')
        outfile_name = "%s_%s.json" % (repo_split[0], repo_split[1])
        outfile_path = "%s/%s" % (args.output_path, outfile_name)

        if outfile_name in list_jsons:
            logger.info("Already downloaded: %s " % outfile_name)
            continue
        if "framework" in outfile_name:
            logger.info("Skipping <framework> repository")
            continue

        api_url = "https://api.github.com/repos/" + str(repo) + "?access_token=" + github_key
        logger.info("Checking metadata for repo %s" % api_url)
        try:
            response = urllib.request.urlopen(api_url)
        except urllib.error.HTTPError:
            logger.error("HTTP 404: Not found: %s" % repo)
            continue

        try:
            json_data = response.read().decode('utf-8')
            dicc_out = json.loads(json_data)
        except ValueError:
            logger.warning("Error in response (ValueError)")
            continue


        if 'message' in dicc_out:
            result = dicc_out['message']
        elif dicc_out == {}:
            result = 'False'
        else:
            result = dicc_out['private']

        if result == 'Not Found':
            logger.error("Not found: %s" % repo)
        elif result == 'True':
            logger.error("Private: %s" % repo)
        else:
            repo_url = "https://github.com/%s" % repo + ".git"

            logger.info('Executing Perceval with repo: %s' % repo)
            logger.debug('Repo stats. Size: %s KB' % dicc_out["size"])
            gitpath = '%s/%s' % (os.path.abspath(args.perceval_path), repo)
            git = Git(uri=repo_url, gitpath=gitpath)
            try:
                commits = [commit for commit in git.fetch()]
            except Exception as e:
                logger.warning("Failure while fetching commits. Repo: %s" % repo)
                logger.error(e)
                continue
            logger.info('Exporting results to JSON...')
            with open(outfile_path, "w", encoding='utf-8') as jfile:
                json.dump(commits, jfile, indent=4, sort_keys=True)
            logger.info('Exported to %s' % outfile_path)
            if not args.cache_mode_on:
                remove_dir(gitpath)
Esempio n. 25
0
# This is also assuming you have installed perceval onto your computer

import datetime
import pytz
from perceval.backends.core.git import Git
from perceval.backends.core.pipermail import PipermailList
from grimoirelab.toolkit.datetime import datetime_utcnow
from grimoirelab.toolkit.datetime import str_to_datetime
from grimoirelab.toolkit.datetime import datetime_to_utc

# Url for the git repo to analyze
git_repo_url = 'https://github.com/mozilla/labs-vcap-tests.git'
# Directory for letting Perceval clone the git repo
git_repo_dir = '/tmp/perceval.git'
# Create a Git object, pointing to repo_url, using repo_dir for cloning
repo = Git(uri=git_repo_url, gitpath=git_repo_dir)
print("Starting 1")
'''
Uses the git object to print information about the repository,
this will then create the directory /tmp/perceval.git
other parameters you can use are:
	commit: aaa7a9209f096aaaadccaaa7089aaaa3f758a703
	Author:     John Smith <*****@*****.**>
	AuthorDate: Tue Aug 14 14:30:13 2012 -0300
	Commit:     John Smith <*****@*****.**>
	CommitDate: Tue Aug 14 14:30:13 2012 -0300
'''

for commit in repo.fetch():
    #print("ugh")
    print(commit['data']['Author'])
Esempio n. 26
0
def analyze_git(es_write):

    #INDEX = 'git_gecko'
    #PROJECT = 'gecko'
    #git = Git("https://github.com/mozilla/gecko-dev.git", "../gecko_all_commits_final_version_no_cm_options_nobrowser_nochrome_notoolkit.log")

    #INDEX = 'git_webkit'
    #PROJECT = 'webkit'
    #git = Git("https://github.com/WebKit/webkit.git", "../webkit_final_log_no_mc_options.log")

    INDEX = "git_blink"
    PROJECT = "blink"
    git = Git("https://chromium.googlesource.com/chromium",
              "../blink_final_log_no_cm_options.log")

    commits = []
    cont = 1
    uniq_id = 1
    first = True
    docs = []

    all_files = pandas.DataFrame()

    es_write.indices.delete(INDEX, ignore=[400, 404])
    es_write.indices.create(INDEX, body=MAPPING_GIT)

    for item in git.fetch():
        commits.append(item)

        if cont % 15000 == 0:
            git_events = events.Git(commits)
            events_df = git_events.eventize(1)

            # Add flags if found
            message_log = MessageLogFlag(events_df)
            events_df = message_log.enrich('message')

            splitemail = SplitEmail(events_df)
            events_df = splitemail.enrich("owner")

            # Code for webkit
            # If there's a bot committing code, then we need to use the values flag
            if PROJECT == 'webkit':
                ## Fix values in the owner column
                events_df.loc[events_df["email"] == '*****@*****.**',
                              "owner"] = events_df["values"]
                # Re-do this analysis to calculate the right email and user
                splitemail = SplitEmail(events_df)
                events_df = splitemail.enrich("owner")

            # Code for Blink
            # If there's a flag, then we need to update the owner
            if PROJECT == 'blink':
                events_df.loc[(events_df["values"] == '') ^ True,
                              "owner"] = events_df["values"]
                splitemail = SplitEmail(events_df)
                events_df = splitemail.enrich("owner")

            splitdomain = SplitEmailDomain(events_df)
            events_df = splitdomain.enrich("email")
            #events_df.drop("message", axis=1, inplace=True)

            # Add project information
            events_df["project"] = PROJECT

            test = events_df.to_dict("index")

            docs = []
            for i in test.keys():
                header = {
                    "_index": INDEX,
                    "_type": "item",
                    "_id": int(uniq_id),
                    "_source": test[i]
                }
                docs.append(header)
                uniq_id = uniq_id + 1

            helpers.bulk(es_write, docs)

            commits = []
        cont = cont + 1

    helpers.bulk(es_write, docs)
from perceval.backends.core.git import Git

url = 'http://github.com/abhiandthetruth/JournalJar'

dir = './temp/Saarthi'

repo = Git(uri=url, gitpath=dir)

for commit in repo.fetch():
    print(commit)
Esempio n. 28
0
with open(out_file, 'w') as f:
    f.write(json.dumps(commits, sort_keys=True, indent=4))

with open(out_file) as str_data:
    print(str_data)
    json_data = json.load(str_data)

#
# getting data via perceval in Python
#

url = 'https://github.com/chaoss/grimoirelab-toolkit'
local_path = './chaoss-grimoirelab-toolkit'
output_file = './grimoirelab-git.json'

git = Git(url, local_path)

commits = [commit for commit in git.fetch()]

dumped = json.dumps(commits, sort_keys=True, indent=4)

# save the Perceval docs to a file
with open(output_file, 'w') as f:
    f.write(dumped)

# load the Perceval docs from a file
with open(output_file, 'r') as f:
    content = f.read()
    commits = json.loads(content)

for c in commits:
Esempio n. 29
0
    def test_get_field_date(self):
        """Test whether the field date is correctly returned"""

        perceval_backend = Git('http://example.com', '/tmp/foo')
        eitems = ElasticOcean(perceval_backend)
        self.assertEqual(eitems.get_field_date(), 'metadata__updated_on')
Esempio n. 30
0
#! /usr/bin/env python3
from flake8.api import legacy as flake8
from graal.graal import GraalRepository
from perceval.backends.core.git import Git
import random

repo_url = input("Enter url: ")
repo_dir = input("Enter dir: ")
worktree_path = input("Enter worktree path: ")
# Git object, pointing to repo_url and repo_dir for cloning
ggit = Git(uri=repo_url , gitpath=repo_dir)

# clone the repository (if it doesn't exist locally)
ggit.fetch_items(category='commit')

commits = list(ggit.fetch())
# hash of random commit
commit = random.choice(commits)
_hash = commit['data']['commit']
print(_hash)
# or input the hash of certain commit
# _hash = input("Enter hash: ")

gral_repo = GraalRepository(uri=repo_url, dirpath=repo_dir)

gral_repo.worktree(worktree_path)
# checkout the commit
gral_repo.checkout(_hash)
style_guide = flake8.get_style_guide()
files = worktree_path
# generate report by flake8
Esempio n. 31
0
def analyze_git(es_write):

    #INDEX = 'git_gecko'
    #PROJECT = 'gecko'
    #git = Git("https://github.com/mozilla/gecko-dev.git", "../gecko_all_commits_final_version_no_cm_options_nobrowser_nochrome_notoolkit.log")

    #INDEX = 'git_webkit'
    #PROJECT = 'webkit'
    #git = Git("https://github.com/WebKit/webkit.git", "../webkit_final_log_no_mc_options.log")

    INDEX = "git_blink"
    PROJECT = "blink"
    git = Git("https://chromium.googlesource.com/chromium", "../blink_final_log_no_cm_options.log")

    commits = []
    cont = 1
    uniq_id = 1
    first = True
    docs = []

    all_files = pandas.DataFrame()

    es_write.indices.delete(INDEX, ignore=[400, 404])
    es_write.indices.create(INDEX, body=MAPPING_GIT)

    for item in git.fetch():
        commits.append(item)

        if cont % 15000 == 0:
            git_events = events.Git(commits)
            events_df = git_events.eventize(1)

            # Add flags if found
            message_log = MessageLogFlag(events_df)
            events_df = message_log.enrich('message')

            splitemail = SplitEmail(events_df)
            events_df = splitemail.enrich("owner")

            # Code for webkit
            # If there's a bot committing code, then we need to use the values flag
            if PROJECT == 'webkit':
                ## Fix values in the owner column
                events_df.loc[events_df["email"]=='*****@*****.**', "owner"] = events_df["values"]
                # Re-do this analysis to calculate the right email and user
                splitemail = SplitEmail(events_df)
                events_df = splitemail.enrich("owner")

            # Code for Blink
            # If there's a flag, then we need to update the owner
            if PROJECT == 'blink':
                events_df.loc[(events_df["values"]=='') ^ True, "owner"] = events_df["values"]
                splitemail = SplitEmail(events_df)
                events_df = splitemail.enrich("owner")

            splitdomain = SplitEmailDomain(events_df)
            events_df = splitdomain.enrich("email")
            #events_df.drop("message", axis=1, inplace=True)

            # Add project information
            events_df["project"] = PROJECT

            test = events_df.to_dict("index")

            docs = []
            for i in test.keys():
                header = {
                      "_index": INDEX,
                      "_type": "item",
                      "_id": int(uniq_id),
                      "_source": test[i]
                      }
                docs.append(header)
                uniq_id = uniq_id + 1

            helpers.bulk(es_write, docs)

            commits = []
        cont = cont + 1

    helpers.bulk(es_write, docs)