def _flatten(self, item): """ Flatten a raw commit fetched by Perceval into a flat dictionary. A list with a single flat directory will be returned. That dictionary will have the elements we need for computing metrics. The list may be empty, if for some reason the commit should not be considered. This method overrides CommitGit._flatten and does not ignore commits based on the dates in date_range. The difference lies in the meaning of date_range for this metric when compared to the default meaning. For this metric, date_range signifies the dates between which we look for new contributors when compared to all contributors, while the general use of date_range is to decide which commits to consider for analysis. :param item: raw item fetched by Perceval (dictionary) :returns: list of a single flat dictionary """ creation_date = str_to_date(item['data']['AuthorDate']) code_files = [file['file'] for file in item['data']['files'] if all(condition.check(file['file']) for condition in self.is_code)] if len(code_files) > 0: flat = { 'repo': item['origin'], 'hash': item['data']['commit'], 'author': item['data']['Author'], 'category': "commit", 'created_date': creation_date, 'committer': item['data']['Commit'], 'commit_date': str_to_date(item['data']['CommitDate']), 'files_no': len(item['data']['files']), 'refs': item['data']['refs'], 'parents': item['data']['parents'], 'files': item['data']['files'] } actions = 0 for file in item['data']['files']: if 'action' in file: actions += 1 flat['files_action'] = actions if 'Merge' in item['data']: flat['merge'] = True else: flat['merge'] = False return [flat] else: return []
def _flatten(self, item): """ Flatten a raw commit fetched by Perceval into a flat dictionary. A list with a single flat directory will be returned. That dictionary will have the elements we need for computing metrics. The list may be empty, if for some reason the commit should not be considered. :param item: raw item fetched by Perceval (dictionary) :returns: list of a single flat dictionary """ creation_date = str_to_date(item['data']['AuthorDate']) if self.since and (self.since > creation_date): return [] if self.until and (self.until < creation_date): return [] code_files = [file['file'] for file in item['data']['files'] if all(condition.check(file['file']) for condition in self.is_code)] if len(code_files) > 0: flat = { 'repo': item['origin'], 'hash': item['data']['commit'], 'author': item['data']['Author'], 'category': "commit", 'created_date': creation_date, 'committer': item['data']['Commit'], 'commit_date': str_to_date(item['data']['CommitDate']), 'files_no': len(item['data']['files']), 'refs': item['data']['refs'], 'parents': item['data']['parents'], 'files': item['data']['files'] } actions = 0 for file in item['data']['files']: if 'action' in file: actions += 1 flat['files_action'] = actions if 'Merge' in item['data']: flat['merge'] = True else: flat['merge'] = False return [flat] else: return []
def _flatten(self, item): """ Flatten a raw issue fetched by Perceval into a flat dictionary. A list with a single flat directory will be returned. That dictionary will have the elements we need for computing metrics. The list may be empty, if for some reason the issue should not be considered. :param item: raw item fetched by Perceval (dictionary) :returns: list of a single flat dictionary """ creation_date = str_to_date(item['data']['created_at']) if self.since and (self.since > creation_date): return [] if self.until and (self.until < creation_date): return [] flat = { 'repo': item['origin'], 'hash': item['data']['id'], 'category': "issue", 'author': item['data']['user']['login'], 'created_date': creation_date, 'current_status': item['data']['state'] } if flat['current_status'] != 'open': return [] flat['open_issue_age'] = (datetime.now() - flat['created_date']).days return [flat]
def _update_with_reopened_items(self, df): """ Add reopened items as new items to the data frame df. The original item to be replaced is removed, while its constituent items (created from a reopen-close cycle) are appending to the dataframe. :param df: A pandas DataFrame, containing items obtained from Perceval. :returns df: A modified pandas DataFrame. """ reopened_items = [] new_items = [] for index, item in df.iterrows(): events = [event for event in item['events_data'] if event['event'] == 'closed' or event['event'] == 'reopened'] if events: # the first closing event gives us our first item new_item = self._add_item(item, item['created_date'], 'closed') new_items.append(new_item) # for every reopen-close pair, create another event for i in range(1, len(events), 2): if events[i]['event'] == 'reopened': if i == len(events) - 1: new_item = self._add_item(item, str_to_date(events[i]['created_at']), 'open') else: new_item = self._add_item(item, str_to_date(events[i]['created_at']), 'closed') # print(new_item['created_date']) new_items.append(new_item) reopened_items.append(index) # remove the items that we split into constituent items df = df.drop(reopened_items) df = df.append(new_items, ignore_index=True) return df
def test_str_to_date_issue(self): """ Test whether str_to_date correctly converts a commit's date string to a datetime object. """ date = "2013-10-20T01:56:25Z" expected = datetime.strptime( date, "%Y-%m-%dT%H:%M:%SZ") \ .strftime("%Y-%m-%d") expected = datetime.strptime(expected, "%Y-%m-%d") datetimeobj = str_to_date(date) self.assertEqual(expected, datetimeobj)
def test_str_to_date_commit(self): """ Test whether _str_to_date correctly converts a commit's date string to a datetime object. """ date = "Tue Aug 18 18:08:27 2015 +0200" expected = datetime.strptime( date, "%a %b %d %H:%M:%S %Y %z") \ .strftime("%Y-%m-%d") expected = datetime.strptime(expected, "%Y-%m-%d") datetimeobj = str_to_date(date) self.assertEqual(expected, datetimeobj)
def test_compute(self): """ Test the compute method of a OpenIssueAgeGitHub object with default parameters. """ open_issue_age = OpenIssueAgeGitHub(self.items) # manually calculate the age of all open issues in days expected_ages = [ (datetime.now() - str_to_date(item['data']['created_at'])).days for item in self.items if item['data']['state'] == 'open' ] expected_mean = sum(expected_ages) / len(expected_ages) mean_age = open_issue_age.compute() self.assertEqual(expected_mean, mean_age)