def test_fetch(self): """Test whether commits are fetched from a Git repository""" new_path = os.path.join(self.tmp_path, 'newgit') git = Git(self.git_path, new_path) commits = [commit for commit in git.fetch()] expected = [('bc57a9209f096a130dcc5ba7089a8663f758a703', 1344965413.0), ('87783129c3f00d2c81a3a8e585eb86a47e39891a', 1344965535.0), ('7debcf8a2f57f86663809c58b5c07a398be7674c', 1344965607.0), ('c0d66f92a95e31c77be08dc9d0f11a16715d1885', 1344965702.0), ('c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', 1344966351.0), ('589bb080f059834829a2a5955bebfd7c2baa110a', 1344967441.0), ('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0), ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0), ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x][0]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x][0]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['updated_on'], expected[x][1]) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) shutil.rmtree(new_path)
def commit_counter(own, repo_url, d1, df, k): # url for the git repo to analyze #repo_url = 'https://github.com/kmn5409/INFO1601.git' print("Owner\t\tRepository") print(own, "\t", repo_url) repo_url = 'https://github.com/' + own + '/' + repo_url + '.git' # directory for letting Perceval clone the git repo repo_dir = '/tmp/' + repo_url + '.git' # create a Git object, pointing to repo_url, using repo_dir for cloning repo = Git(uri=repo_url, gitpath=repo_dir) count = 0 # fetch all commits as an iteratoir, and iterate it printing each hash mon = [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" ] for commit in repo.fetch(): temp = commit['data']['CommitDate'].split(" ") day = int(temp[2]) month = temp[1] for i in range(len(mon)): if (month == mon[i]): month_num = i + 1 year = int(temp[4]) if (isrecent(d1, day, month_num, year)): count += 1 print("Number of commmits: ", count) df['Number of commits'][k] = count return count
def list_committers(self, url, directory): # Return the list containing the strings from a git repository related to the users ordered by commit including repeated users to allow count gender contributions. repo = Git(uri=url, gitpath=directory) list_committers = [] for user in repo.fetch(): committer = self.removeMail(user['data']['Author']) list_committers.append(committer) return list_committers
def test_fetch_empty_log(self): """Test whether it parsers an empty log""" new_path = os.path.join(self.tmp_path, 'newgit') from_date = datetime.datetime(2020, 1, 1, 1, 1, 1) git = Git(self.git_path, new_path) commits = [commit for commit in git.fetch(from_date=from_date)] self.assertListEqual(commits, [])
def test_fetch_from_empty_repository(self): """Test whether it parses from empty repository""" new_path = os.path.join(self.tmp_path, 'newgit') git = Git(self.git_empty_path, new_path) commits = [commit for commit in git.fetch()] self.assertListEqual(commits, []) shutil.rmtree(new_path)
def run(args): """ """ repo_url = args['git_repository'] period_length = args['period'] threshold = args['threshold'] active_days = True # directory for letting Perceval clone the git repo # TODO: this is Linux-operating system specific. Should change repo_dir = '/tmp/' + repo_url.split('/')[-1] + '.git' first_commit = datetime.now(timezone.utc) authorDict = defaultdict(list) repo = Git(uri=repo_url, gitpath=repo_dir) for commit in repo.fetch(): commitdate = datetime.strptime(commit['data']['AuthorDate'], '%a %b %d %H:%M:%S %Y %z') if commitdate < first_commit: first_commit = commitdate authorDict[commit['data']['Author']].append(commitdate) logging.info("Authors found: " + str(len(authorDict))) simplemerge(authorDict) logging.info("Authors after merge: " + str(len(authorDict))) author_count = author_counting(authorDict, period_length, active_days) # print(author_count) (effort_periods, full_time_periods, non_full_time_periods) = project_period_effort(author_count, threshold, period_length) maxeffort_periods = project_period_maxeffort(author_count, period_length) # Printing results print() print("CONFIGURATIONS:") print(" Length of period (in months):", period_length) print(" Threshold t (in commits in a period):", threshold) print() print("RESULTS:") print(" First commit date:", first_commit, "--", round((datetime.now(timezone.utc)-first_commit).days/30, 2) , "months ago") print(" Maximum possible development effort (in person-months):", sum(maxeffort_periods.values())) print() print(pretty_print_period(period_length, first_commit, ["FT", "Non-FT", "Effort"], full_time_periods, non_full_time_periods, effort_periods)) print(" " * 8, "FT: Full-time developers") print() print(" ---> Estimated development effort (in person-months):", round(sum(effort_periods.values()), 2)) print() print("For more information, visit http://github.com/gregoriorobles/git2effort") print()
def test_get_elastic_items_filter(self): """Test whether the elastic method works properly with filter""" perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) # Load items items = json.loads(read_file('data/git.json')) ocean = GitOcean(perceval_backend) ocean.elastic = elastic ocean.feed_items(items) filter = { "name": "uuid", "value": [ "43f217b2f678a5691fdbc5c6c5302243e79e5a90", "00ee6902e34b309cd05706c26e3e195a62492f60" ] } eitems = ElasticItems(perceval_backend) eitems.elastic = elastic r_json = eitems.get_elastic_items(_filter=filter) hits = r_json['hits']['hits'] self.assertEqual(len(hits), 2) self.assertEqual(hits[0]['_source']['uuid'], "43f217b2f678a5691fdbc5c6c5302243e79e5a90") self.assertEqual(hits[1]['_source']['uuid'], "00ee6902e34b309cd05706c26e3e195a62492f60")
def get_commits(username, reponame, commits, config): """ TODO: Add docstring. See: https://realpython.com/documenting-python-code/ TODO: Implement recursion argument, default to False. Parameters ========== `username` : str, required `reponame` : str, required `commits` : list, required Raises ====== NotImplementedError If no sound is set for the animal or passed in as a parameter. """ repo_URL = 'https://github.com/' + username + '/' + reponame # checks whether the export dir exists and if not creates it # TODO: this is a code snippet we use three times, we should make a function out of it local_dir = os.path.join(config["data_dir"],'grimoire_dumps') if not os.path.isdir(local_dir): os.makedirs(local_dir) data_dump_path = os.path.join(local_dir, username + '-' + reponame) git = Git(repo_URL, data_dump_path) # `fetch()` gets commits from all branches by default. # It returns a list of dictionaries, where the `data` key in each # dictionary contains the actual metadata for each commit. # Other stuff are metadata about the perceval `fetch()` operation. try: repo_fetched = [commit for commit in git.fetch()] # issue 33 (very ugly) band aid: delete *.pack files once downloaded by perceval shutil.rmtree(os.path.join(data_dump_path, 'objects','pack'), ignore_errors=True) # Keep just commit `data` for commit_data in repo_fetched: commits.append(commit_data["data"]) except RepositoryError as repo_error: logging.warning("Error with this repository: " + username + "/" + reponame, file=stderr) pass
def test_initialization(self): """Test whether attributes are initializated""" git = Git('http://example.com', self.git_path, tag='test') self.assertEqual(git.uri, 'http://example.com') self.assertEqual(git.gitpath, self.git_path) self.assertEqual(git.origin, 'http://example.com') self.assertEqual(git.tag, 'test') # When tag is empty or None it will be set to # the value in uri git = Git('http://example.com', self.git_path) self.assertEqual(git.origin, 'http://example.com') self.assertEqual(git.tag, 'http://example.com') git = Git('http://example.com', self.git_path, tag='') self.assertEqual(git.origin, 'http://example.com') self.assertEqual(git.tag, 'http://example.com')
def test_git_encoding_error(self): """Test if encoding errors are escaped when a git log is parsed""" commits = Git.parse_git_log_from_file("data/git/git_bad_encoding.txt") result = [commit for commit in commits] self.assertEqual(len(result), 1) commit = result[0] self.assertEqual(commit['commit'], 'cb24e4f2f7b2a7f3450bfb15d1cbaa97371e93fb') self.assertEqual(commit['message'], 'Calling \udc93Open Type\udc94 (CTRL+SHIFT+T) after startup - performance improvement.')
def test_git_cr_error(self): """Test if mislocated carriage return chars do not break lines In some commit messages, carriage return characters (\r) are found in weird places. They should not be misconsidered as end of line. Before fixing, this test raises an exception: "perceval.errors.ParseError: commit expected on line 10" """ commits = Git.parse_git_log_from_file("data/git/git_bad_cr.txt") result = [commit for commit in commits] self.assertEqual(len(result), 1)
def test_get_elastic_items_error(self): """Test whether a message is logged if an error occurs when getting items from an index""" items = json.loads(read_file('data/git.json')) perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) elastic.bulk_upload(items, field_id="uuid") # Load items eitems = ElasticItems(perceval_backend) eitems.elastic = elastic with self.assertLogs(logger, level='DEBUG') as cm: r_json = eitems.get_elastic_items() self.assertIsNone(r_json) self.assertRegex(cm.output[-1], 'DEBUG:grimoire_elk.elastic_items:No results found from*')
def test_git_parser(self): """Test if the static method parses a git log file""" commits = Git.parse_git_log_from_file("data/git/git_log.txt") result = [commit['commit'] for commit in commits] expected = ['456a68ee1407a77f3e804a30dff245bb6c6b872f', '51a3b654f252210572297f47597b31527c475fb8', 'ce8e0b86a1e9877f42fe9453ede418519115f367', '589bb080f059834829a2a5955bebfd7c2baa110a', 'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', 'c0d66f92a95e31c77be08dc9d0f11a16715d1885', '7debcf8a2f57f86663809c58b5c07a398be7674c', '87783129c3f00d2c81a3a8e585eb86a47e39891a', 'bc57a9209f096a130dcc5ba7089a8663f758a703'] self.assertListEqual(result, expected)
def test_fetch_filter_raw(self): """Test whether the fetch with filter raw properly works""" perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) # Load items items = json.loads(read_file('data/git.json')) ocean = GitOcean(perceval_backend) ocean.elastic = elastic ocean.feed_items(items) eitems = ElasticItems(perceval_backend) eitems.set_filter_raw("data.commit:87783129c3f00d2c81a3a8e585eb86a47e39891a") eitems.elastic = elastic items = [ei for ei in eitems.fetch()] self.assertEqual(len(items), 1)
def test_fetch(self): """Test whether the fetch method properly works""" perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) # Load items items = json.loads(read_file('data/git.json')) ocean = GitOcean(perceval_backend) ocean.elastic = elastic ocean.feed_items(items) eitems = ElasticItems(perceval_backend) eitems.scroll_size = 2 eitems.elastic = elastic items = [ei for ei in eitems.fetch()] self.assertEqual(len(items), 9)
def test_get_elastic_items(self): """Test whether the elastic method works properly""" perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) # Load items items = json.loads(read_file('data/git.json')) ocean = GitOcean(perceval_backend) ocean.elastic = elastic ocean.feed_items(items) eitems = ElasticItems(perceval_backend) eitems.elastic = elastic r_json = eitems.get_elastic_items() total = r_json['hits']['total'] total = total['value'] if isinstance(total, dict) else total self.assertEqual(total, 9)
def test_fetch_since_date(self): """Test whether commits are fetched from a Git repository since the given date""" new_path = os.path.join(self.tmp_path, 'newgit') from_date = datetime.datetime(2014, 2, 11, 22, 7, 49) git = Git(self.git_path, new_path) commits = [commit for commit in git.fetch(from_date=from_date)] expected = [('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0), ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0), ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x][0]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x][0]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['updated_on'], expected[x][1]) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) # Test it using a datetime that includes the timezone from_date = datetime.datetime(2012, 8, 14, 14, 30, 00, tzinfo=dateutil.tz.tzoffset( None, -36000)) git = Git(self.git_path, new_path) commits = [commit for commit in git.fetch(from_date=from_date)] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x][0]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x][0]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['updated_on'], expected[x][1]) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) shutil.rmtree(new_path)
def test_fetch_no_results(self): """Test whether a message is logged when no results are found""" perceval_backend = Git('/tmp/perceval_mc84igfc/gittest-not_found', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) # Load items items = json.loads(read_file('data/git.json')) ocean = GitOcean(perceval_backend) ocean.elastic = elastic ocean.feed_items(items) eitems = ElasticItems(perceval_backend) eitems.elastic = elastic with self.assertLogs(logger, level='DEBUG') as cm: items = [ei for ei in eitems.fetch()] self.assertEqual(len(items), 0) self.assertRegex(cm.output[-2], 'DEBUG:grimoire_elk.elastic_items:No results found.*') self.assertRegex(cm.output[-1], 'DEBUG:grimoire_elk.elastic_items:Releasing scroll_id=*')
def test_fetch_from_date(self): """Test whether the fetch method with from_date properly works""" perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) # Load items items = json.loads(read_file('data/git.json')) ocean = GitOcean(perceval_backend) ocean.elastic = elastic ocean.feed_items(items) # Fetch total items eitems = ElasticItems(perceval_backend) eitems.elastic = elastic items = [ei for ei in eitems.fetch()] self.assertEqual(len(items), 9) # Fetch with from date from_date = str_to_datetime("2018-02-09T08:33:22.699+00:00") eitems = ElasticItems(perceval_backend, from_date=from_date) eitems.elastic = elastic items = [ei for ei in eitems.fetch()] self.assertEqual(len(items), 2)
#! /usr/bin/env python3 # Count commits import argparse from perceval.backends.core.git import Git # Read command line arguments parser = argparse.ArgumentParser(description="Count commits in a git repo") parser.add_argument("repo", help="Repository url") parser.add_argument("dir", help="Directory for cloning the repository") parser.add_argument("--print", action='store_true', help="Print hashes") args = parser.parse_args() # create a Git object, and count commmits repo = Git(uri=args.repo, gitpath=args.dir) count = 0 for commit in repo.fetch(): if args.print: print(commit['data']['commit']) count += 1 print("Number of commmits: %d." % count)
def git_repos(): users = [] # creating empty lists test = [] #target url"vidyaratna.git" repo_url = 'https://github.com/amfoss/vidyaratna.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url"cms" repo_url = 'https://github.com/amfoss/cms.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url""TempleApp" repo_url = 'https://github.com/amfoss/TempleApp.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url""website.git" repo_url = 'https://github.com/amfoss/website.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url""WebApp.git" repo_url = 'https://github.com/amfoss/WebApp.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url"cms-mobile"" repo_url = 'https://github.com/amfoss/cms-mobile.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url repo_url = 'https://github.com/amfoss/Praveshan.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/bot.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/tasks.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/star-me.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/amdec-website.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Wiki.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/GitLit.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Qujini.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/attendance-tracker.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/events.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Hack4Amrita.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/master-syllabus.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/test-repo.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/webspace.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/internal-hackathon.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/foss-meetups.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/automated-scripts.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/fosswebsite.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/fosster.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Foss-talks.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/cybergurukulam.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/kdeconf.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/android-workshop-summer-2018.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/App.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Workshops.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Wikimedia_Hackathon_Amrita_University.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/website_old.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details for u in Counter(users).keys(): a = "Commits:" + str(Counter(users).get(u)) + "\t User: "******"\n" test.append(a) # appending all the fetched commits into 'test' return test
def getCommits(user_owner, repo_name): repo = Git(f"https://github.com/{user_owner}/{repo_name}.git", f"https://github.com/{user_owner}/{repo_name}.git") commits = repo.fetch() return commits
## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ## ## Authors: ## Jesus M. Gonzalez-Barahona <*****@*****.**> ## from perceval.backends.core.git import Git import elasticsearch # Url for the git repo to analyze repo_url = 'http://github.com/grimoirelab/perceval.git' # Directory for letting Perceval clone the git repo repo_dir = '/tmp/perceval.git' # ElasticSearch instance (url) es = elasticsearch.Elasticsearch(['http://localhost:9200/']) # Create the 'commits' index in ElasticSearch es.indices.create('commits') # Create a Git object, pointing to repo_url, using repo_dir for cloning repo = Git(uri=repo_url, gitpath=repo_dir) # Fetch all commits as an iteratoir, and iterate it uploading to ElasticSearch for commit in repo.fetch(): # Create the object (dictionary) to upload to ElasticSearch summary = {'hash': commit['data']['commit']} print(summary) # Upload the object to ElasticSearch es.index(index='commits', doc_type='summary', body=summary)
def main(args): github_key = args.github_token list_jsons = os.listdir(os.path.abspath(args.output_path)) repo_set = set() with open(args.urls_file, 'r') as url_file: os.chdir(os.path.abspath(args.output_path)) for line in url_file: if line in ['\n', '\r\n']: continue try: url = line.split('/') if not url: continue repo = "%s/%s" % (url[3], url[4]) except IndexError: print("url:" + line) logger.error("Error in repo (line) " + line + "\r\n") continue repo_set.add(repo) for repo in sorted(repo_set): repo_split = repo.split('/') outfile_name = "%s_%s.json" % (repo_split[0], repo_split[1]) outfile_path = "%s/%s" % (args.output_path, outfile_name) if outfile_name in list_jsons: logger.info("Already downloaded: %s " % outfile_name) continue if "framework" in outfile_name: logger.info("Skipping <framework> repository") continue api_url = "https://api.github.com/repos/" + str(repo) + "?access_token=" + github_key logger.info("Checking metadata for repo %s" % api_url) try: response = urllib.request.urlopen(api_url) except urllib.error.HTTPError: logger.error("HTTP 404: Not found: %s" % repo) continue try: json_data = response.read().decode('utf-8') dicc_out = json.loads(json_data) except ValueError: logger.warning("Error in response (ValueError)") continue if 'message' in dicc_out: result = dicc_out['message'] elif dicc_out == {}: result = 'False' else: result = dicc_out['private'] if result == 'Not Found': logger.error("Not found: %s" % repo) elif result == 'True': logger.error("Private: %s" % repo) else: repo_url = "https://github.com/%s" % repo + ".git" logger.info('Executing Perceval with repo: %s' % repo) logger.debug('Repo stats. Size: %s KB' % dicc_out["size"]) gitpath = '%s/%s' % (os.path.abspath(args.perceval_path), repo) git = Git(uri=repo_url, gitpath=gitpath) try: commits = [commit for commit in git.fetch()] except Exception as e: logger.warning("Failure while fetching commits. Repo: %s" % repo) logger.error(e) continue logger.info('Exporting results to JSON...') with open(outfile_path, "w", encoding='utf-8') as jfile: json.dump(commits, jfile, indent=4, sort_keys=True) logger.info('Exported to %s' % outfile_path) if not args.cache_mode_on: remove_dir(gitpath)
# This is also assuming you have installed perceval onto your computer import datetime import pytz from perceval.backends.core.git import Git from perceval.backends.core.pipermail import PipermailList from grimoirelab.toolkit.datetime import datetime_utcnow from grimoirelab.toolkit.datetime import str_to_datetime from grimoirelab.toolkit.datetime import datetime_to_utc # Url for the git repo to analyze git_repo_url = 'https://github.com/mozilla/labs-vcap-tests.git' # Directory for letting Perceval clone the git repo git_repo_dir = '/tmp/perceval.git' # Create a Git object, pointing to repo_url, using repo_dir for cloning repo = Git(uri=git_repo_url, gitpath=git_repo_dir) print("Starting 1") ''' Uses the git object to print information about the repository, this will then create the directory /tmp/perceval.git other parameters you can use are: commit: aaa7a9209f096aaaadccaaa7089aaaa3f758a703 Author: John Smith <*****@*****.**> AuthorDate: Tue Aug 14 14:30:13 2012 -0300 Commit: John Smith <*****@*****.**> CommitDate: Tue Aug 14 14:30:13 2012 -0300 ''' for commit in repo.fetch(): #print("ugh") print(commit['data']['Author'])
def analyze_git(es_write): #INDEX = 'git_gecko' #PROJECT = 'gecko' #git = Git("https://github.com/mozilla/gecko-dev.git", "../gecko_all_commits_final_version_no_cm_options_nobrowser_nochrome_notoolkit.log") #INDEX = 'git_webkit' #PROJECT = 'webkit' #git = Git("https://github.com/WebKit/webkit.git", "../webkit_final_log_no_mc_options.log") INDEX = "git_blink" PROJECT = "blink" git = Git("https://chromium.googlesource.com/chromium", "../blink_final_log_no_cm_options.log") commits = [] cont = 1 uniq_id = 1 first = True docs = [] all_files = pandas.DataFrame() es_write.indices.delete(INDEX, ignore=[400, 404]) es_write.indices.create(INDEX, body=MAPPING_GIT) for item in git.fetch(): commits.append(item) if cont % 15000 == 0: git_events = events.Git(commits) events_df = git_events.eventize(1) # Add flags if found message_log = MessageLogFlag(events_df) events_df = message_log.enrich('message') splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") # Code for webkit # If there's a bot committing code, then we need to use the values flag if PROJECT == 'webkit': ## Fix values in the owner column events_df.loc[events_df["email"] == '*****@*****.**', "owner"] = events_df["values"] # Re-do this analysis to calculate the right email and user splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") # Code for Blink # If there's a flag, then we need to update the owner if PROJECT == 'blink': events_df.loc[(events_df["values"] == '') ^ True, "owner"] = events_df["values"] splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") splitdomain = SplitEmailDomain(events_df) events_df = splitdomain.enrich("email") #events_df.drop("message", axis=1, inplace=True) # Add project information events_df["project"] = PROJECT test = events_df.to_dict("index") docs = [] for i in test.keys(): header = { "_index": INDEX, "_type": "item", "_id": int(uniq_id), "_source": test[i] } docs.append(header) uniq_id = uniq_id + 1 helpers.bulk(es_write, docs) commits = [] cont = cont + 1 helpers.bulk(es_write, docs)
from perceval.backends.core.git import Git url = 'http://github.com/abhiandthetruth/JournalJar' dir = './temp/Saarthi' repo = Git(uri=url, gitpath=dir) for commit in repo.fetch(): print(commit)
with open(out_file, 'w') as f: f.write(json.dumps(commits, sort_keys=True, indent=4)) with open(out_file) as str_data: print(str_data) json_data = json.load(str_data) # # getting data via perceval in Python # url = 'https://github.com/chaoss/grimoirelab-toolkit' local_path = './chaoss-grimoirelab-toolkit' output_file = './grimoirelab-git.json' git = Git(url, local_path) commits = [commit for commit in git.fetch()] dumped = json.dumps(commits, sort_keys=True, indent=4) # save the Perceval docs to a file with open(output_file, 'w') as f: f.write(dumped) # load the Perceval docs from a file with open(output_file, 'r') as f: content = f.read() commits = json.loads(content) for c in commits:
def test_get_field_date(self): """Test whether the field date is correctly returned""" perceval_backend = Git('http://example.com', '/tmp/foo') eitems = ElasticOcean(perceval_backend) self.assertEqual(eitems.get_field_date(), 'metadata__updated_on')
#! /usr/bin/env python3 from flake8.api import legacy as flake8 from graal.graal import GraalRepository from perceval.backends.core.git import Git import random repo_url = input("Enter url: ") repo_dir = input("Enter dir: ") worktree_path = input("Enter worktree path: ") # Git object, pointing to repo_url and repo_dir for cloning ggit = Git(uri=repo_url , gitpath=repo_dir) # clone the repository (if it doesn't exist locally) ggit.fetch_items(category='commit') commits = list(ggit.fetch()) # hash of random commit commit = random.choice(commits) _hash = commit['data']['commit'] print(_hash) # or input the hash of certain commit # _hash = input("Enter hash: ") gral_repo = GraalRepository(uri=repo_url, dirpath=repo_dir) gral_repo.worktree(worktree_path) # checkout the commit gral_repo.checkout(_hash) style_guide = flake8.get_style_guide() files = worktree_path # generate report by flake8
def analyze_git(es_write): #INDEX = 'git_gecko' #PROJECT = 'gecko' #git = Git("https://github.com/mozilla/gecko-dev.git", "../gecko_all_commits_final_version_no_cm_options_nobrowser_nochrome_notoolkit.log") #INDEX = 'git_webkit' #PROJECT = 'webkit' #git = Git("https://github.com/WebKit/webkit.git", "../webkit_final_log_no_mc_options.log") INDEX = "git_blink" PROJECT = "blink" git = Git("https://chromium.googlesource.com/chromium", "../blink_final_log_no_cm_options.log") commits = [] cont = 1 uniq_id = 1 first = True docs = [] all_files = pandas.DataFrame() es_write.indices.delete(INDEX, ignore=[400, 404]) es_write.indices.create(INDEX, body=MAPPING_GIT) for item in git.fetch(): commits.append(item) if cont % 15000 == 0: git_events = events.Git(commits) events_df = git_events.eventize(1) # Add flags if found message_log = MessageLogFlag(events_df) events_df = message_log.enrich('message') splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") # Code for webkit # If there's a bot committing code, then we need to use the values flag if PROJECT == 'webkit': ## Fix values in the owner column events_df.loc[events_df["email"]=='*****@*****.**', "owner"] = events_df["values"] # Re-do this analysis to calculate the right email and user splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") # Code for Blink # If there's a flag, then we need to update the owner if PROJECT == 'blink': events_df.loc[(events_df["values"]=='') ^ True, "owner"] = events_df["values"] splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") splitdomain = SplitEmailDomain(events_df) events_df = splitdomain.enrich("email") #events_df.drop("message", axis=1, inplace=True) # Add project information events_df["project"] = PROJECT test = events_df.to_dict("index") docs = [] for i in test.keys(): header = { "_index": INDEX, "_type": "item", "_id": int(uniq_id), "_source": test[i] } docs.append(header) uniq_id = uniq_id + 1 helpers.bulk(es_write, docs) commits = [] cont = cont + 1 helpers.bulk(es_write, docs)