def sort_bug_reports_by_commit_date(bug_reports): commit_dates = [] for index, commit in enumerate(tqdm(bug_reports)): sha = bug_reports[commit]['commit']['metadata']['sha'].replace('commit ', '').strip() commit_date = convert_commit_date( bug_reports[commit]['commit']['metadata']['date'].replace('Date:', '').strip()) commit_dates.append((sha, commit_date)) sorted_commit_dates = sorted(commit_dates, key=itemgetter(1)) sorted_commits = [commit_date[0] for commit_date in sorted_commit_dates] return sorted_commits
def fix_commit_metadata(data): """Trim metadata fields, and add 'timestamp' from 'date' Assumes that if 'timestamp' exists, then there is no need for fixes. Parameters ---------- data : dict | OrderedDict The combined bug report and repository information from the JSON file. Returns ------- dict | OrderedDict Changed and augmented data. Side effects ------------ Changes its input. """ n_skipped = 0 for commit in tqdm(data): if 'timestamp' in data[commit]['commit']['metadata']: n_skipped = n_skipped + 1 continue trim_commit_info(data[commit]['commit']['metadata']) data[commit]['commit']['metadata']['timestamp'] = \ datetime_to_timestamp(convert_commit_date( data[commit]['commit']['metadata']['date'] )) print('%d / %d skipped: had already "timestamp" field in commit metadata' % (n_skipped, len(data)), file=sys.stderr) return data