def print_to_disk(issues, results_folder): """ Print issues to file "issues-github.list" in the results folder. :param issues: the issues to dump :param results_folder: the folder where to place "issues-github.list" output file """ # construct path to output file output_file = os.path.join(results_folder, "issues-github.list") log.info("Dumping output in file '{}'...".format(output_file)) # construct lines of output lines = [] for issue in issues: for event in issue["eventsList"]: lines.append(( issue["number"], issue["title"], json.dumps(issue["type"]), issue["state_new"], json.dumps(issue["resolution"]), issue["created_at"], issue["closed_at"], json.dumps([]), # components event["event"], event["user"]["name"], event["user"]["email"], event["created_at"], event["event_info_1"], json.dumps(event["event_info_2"]))) # write to output file csv_writer.write_to_csv( output_file, sorted(set(lines), key=lambda line: lines.index(line)))
def merge_user_with_user_from_csv(user, persons): """ Merge list of given users with list of already known users. :param user: list of users to be merged :param persons: contains maps of names/usernames to persons from JIRA (incl. e-mail addresses), see function "load_csv" :return: list of merged users """ new_user = dict() name_utf8 = unicode(user["name"]).encode("utf-8") username_utf8 = unicode(user["username"].lower()).encode("utf-8") if username_utf8 in persons["by_username"].keys(): new_user["username"] = username_utf8 new_user["name"] = unicode( persons["by_username"].get(username_utf8)[0]).encode("utf-8") new_user["email"] = unicode( persons["by_username"].get(username_utf8)[1]).encode("utf-8") elif name_utf8 in persons["by_name"].keys(): new_user["username"] = username_utf8 new_user["name"] = unicode( persons["by_name"].get(name_utf8)[0]).encode("utf-8") new_user["email"] = unicode( persons["by_name"].get(name_utf8)[1]).encode("utf-8") else: new_user["username"] = username_utf8 new_user["name"] = name_utf8 new_user["email"] = unicode(user["email"]).encode("utf-8") log.warning("User not in csv-file: " + str(user)) log.info("current User: "******", new user: " + str(new_user)) return new_user
def print_to_disk(issues, results_folder): """ Print issues to file "issues.list" in result folder. This format is outdated but still used by the network library. TODO When the network library is updated, this method can be overwritten by "print_to_disk_new". :param issues: the issues to dump :param results_folder: the folder where to place "issues.list" output file """ # construct path to output file output_file = os.path.join(results_folder, "issues.list") log.info("Dumping output in file '{}'...".format(output_file)) # construct lines of output lines = [] for issue in issues: for event in issue["eventsList"]: lines.append((issue["number"], issue["state"], issue["created_at"], issue["closed_at"], issue["isPullRequest"], event["user"]["name"], event["user"]["email"], event["created_at"], "" if event["ref_target"] == "" else event["ref_target"]["name"], event["event"])) # write to output file csv_writer.write_to_csv(output_file, lines)
def print_to_disk(issues, results_folder): """ Print issues to file "issues-jira.list" in result folder :param issues: the issues to dump :param results_folder: the folder where to place "issues-jira.list" output file """ # construct path to output file output_file = os.path.join(results_folder, "issues-jira.list") log.info("Dumping output in file '{}'...".format(output_file)) # construct lines of output lines = [] for issue in issues: log.info("Current issue '{}'".format(issue["externalId"])) lines.append((issue["author"]["name"], issue["author"]["email"], issue["externalId"], issue["creationDate"], issue["externalId"], issue["type"])) for comment in issue["comments"]: lines.append(( comment["author"]["name"], comment["author"]["email"], comment["id"], comment["changeDate"], issue["externalId"], "comment" )) # write to output file csv_writer.write_to_csv(output_file, lines, append=True)
def load_xml(source_folder): """Load issues from disk. :param source_folder: the folder where to find .xml-files :return: the loaded issue data """ filelist = [ f for f in os.listdir(source_folder) if os.path.isfile(os.path.join(source_folder, f)) ] issue_data = list() for file in filelist: srcfile = os.path.join(source_folder, file) log.devinfo("Loading issues from file '{}'...".format(srcfile)) # check if file exists and exit early if not if not os.path.exists(srcfile): log.info("Issue file '{}' does not exist! Exiting early...".format( srcfile)) sys.exit(-1) # with open(srcfile, 'r') as issues_file: xmldoc = parse(srcfile) issue_data.append(xmldoc) return issue_data
def run(): # get all needed paths and argument for the method call. parser = argparse.ArgumentParser(prog='codeface-extraction-issues-github', description='Codeface extraction') parser.add_argument('-c', '--config', help="Codeface configuration file", default='codeface.conf') parser.add_argument('-p', '--project', help="Project configuration file", required=True) parser.add_argument('resdir', help="Directory to store analysis results in") # parse arguments args = parser.parse_args(sys.argv[1:]) __codeface_conf, __project_conf = map(os.path.abspath, (args.config, args.project)) # create configuration __conf = Configuration.load(__codeface_conf, __project_conf) # get source and results folders __srcdir = os.path.abspath(os.path.join(args.resdir, __conf['repo'] + "_issues")) __resdir = os.path.abspath(os.path.join(args.resdir, __conf['project'], __conf["tagging"])) # run processing of issue data: # 1) load the list of issues issues = load(__srcdir) # 2) re-format the issues issues = reformat_issues(issues) # 3) merges all issue events into one list issues = merge_issue_events(issues) # 4) re-format the eventsList of the issues issues = reformat_events(issues) # 5) update user data with Codeface database issues = insert_user_data(issues, __conf) # 6) dump result to disk print_to_disk(issues, __resdir) log.info("Github issue processing complete!")
def print_to_disk_extr(issues, results_folder): """ Print issues to file "issues.list" in result folder :param issues: the issues to dump :param results_folder: the folder where to place "issues.list" output file """ # construct path to output file output_file = os.path.join(results_folder, "issues.list") log.info("Dumping output in file '{}'...".format(output_file)) # construct lines of output lines = [] for issue in issues: log.info("Current issue '{}'".format(issue["externalId"])) lines.append(( issue["externalId"], issue["state"], issue["creationDate"], issue["resolveDate"], False, ## Value of is.pull.request issue["author"]["name"], issue["author"]["email"], issue["creationDate"], "", ## ref.name "open" ## event.name )) lines.append(( issue["externalId"], issue["state"], issue["creationDate"], issue["resolveDate"], False, ## Value of is.pull.request issue["author"]["name"], issue["author"]["email"], issue["creationDate"], "", ## ref.name "commented" ## event.name )) for comment in issue["comments"]: lines.append(( issue["externalId"], issue["state"], issue["creationDate"], issue["resolveDate"], False, ## Value of is.pull.request comment["author"]["name"], comment["author"]["email"], comment["changeDate"], "", ## ref.name "commented" ## event.name )) # write to output file csv_writer.write_to_csv(output_file, lines, append=True)
def run_extraction(conf, resdir, extract_commit_messages, extract_impl, extract_on_range_level): """ Runs the extraction process for the list of given parameters. :param conf: the Codeface configuration object :param resdir: the Codeface results dir, where output files are written """ log.info("%s: Extracting data" % conf["project"]) # initialize database manager with given configuration dbm = DBManager(conf) # get all types of extractions, both project-level and range-level __extractions_project, __extractions_range = extractions.get_extractions( dbm, conf, resdir, csv_writer, extract_commit_messages, extract_impl, extract_on_range_level) # run project-level extractions for extraction in __extractions_project: extraction.run() # run range-level extractions (only if explicitely enabled) if extract_on_range_level: # check if list of revisions in database is the same as in the config file revs = conf["revisions"] list_of_revisions = extractions.RevisionExtraction( dbm, conf, resdir, csv_writer).get_list() if revs: if set(revs) != set(list_of_revisions): log.error( "List of revisions in configuration file do not match the list stored in the DB! Stopping now." ) sys.exit(1) else: log.info( "List of revisions in configuration file and DB match.") else: log.info( "No list of revisions found in configuration file, using the list from the DB instead!" ) revs = list_of_revisions # set list of revisions as stored in the database # for all revisions of this project for i in range(len(revs) - 1): start_rev = revs[i] end_rev = revs[i + 1] range_number = i + 1 log.info("%s: Extracting data for range %s [version '%s']" % (conf["project"], range_number, end_rev)) for extraction in __extractions_range: extraction.run(range_number, start_rev, end_rev) log.info("Extraction complete!")
def parse(mbox_name, results_folder, include_filepath, files_as_artifacts, reindex, append_result): """Parse the given mbox file with the commit information from the results folder. :param mbox_name: the mbox file to search in :param results_folder: the results folder for index and commit information :param include_filepath: indicator whether to use the 'file name' part of the artifact into account :param files_as_artifacts: indicator whether to search for files (base names) as artifacts :param reindex: force reindexing if True :param append_result: flag whether to append the results for the current mbox file to the output file """ # load mbox file mbox = mailbox.mbox(mbox_name) # create schema for text search analyzer = StandardAnalyzer( expression=r"[^\s,:\"']+" ) # split by whitespace, commas, colons, and quotation marks. schema = Schema(messageID=ID(stored=True), content=TEXT(analyzer=analyzer)) # create/load index (initialize if necessary) ix = __get_index(mbox, mbox_name, results_folder, schema, reindex) # extract artifacts from results folder artifacts = __get_artifacts(results_folder, files_as_artifacts) # parallelize execution call for the text search log.info("Start parsing...") num_cores = multiprocessing.cpu_count() csv_data = Parallel(n_jobs=num_cores - 1)( delayed(__parse_execute)(commit, schema, ix, include_filepath) for commit in artifacts) log.info("Parsing finished.") # re-arrange results result = [] if not append_result: result.append(('file', 'artifact', 'messageID')) for entry in csv_data: for row in entry: result.append(row) # determine ouput file filename = "mboxparsing" if files_as_artifacts: filename += "_file" if include_filepath: filename += "_filepath.list" else: filename += ".list" output_file = os.path.join(results_folder, filename) # Writes found hits to file. log.info("Writing results to file {}.".format(output_file)) csv_writer.write_to_csv(output_file, result, append=append_result) log.info("Parsing mbox file complete!")
def run(): # get all needed paths and argument for the method call. parser = argparse.ArgumentParser(prog='codeface-extraction-issues-github', description='Codeface extraction') parser.add_argument('-c', '--config', help="Codeface configuration file", default='codeface.conf') parser.add_argument('-p', '--project', help="Project configuration file", required=True) parser.add_argument('resdir', help="Directory to store analysis results in") # parse arguments args = parser.parse_args(sys.argv[1:]) __codeface_conf, __project_conf = map(os.path.abspath, (args.config, args.project)) # create configuration __conf = Configuration.load(__codeface_conf, __project_conf) # get source and results folders __srcdir = os.path.abspath( os.path.join(args.resdir, __conf['repo'] + "_issues")) __resdir = os.path.abspath( os.path.join(args.resdir, __conf['project'], __conf["tagging"])) # run processing of issue data: # 1) load the list of issues issues = load(__srcdir) # 2) re-format the issues issues = reformat_issues(issues) # 3) merges all issue events into one list issues = merge_issue_events(issues) # 4) re-format the eventsList of the issues issues = reformat_events(issues) # 5) update user data with Codeface database issues = insert_user_data(issues, __conf) # 6) dump result to disk print_to_disk(issues, __resdir) print_to_disk_new(issues, __resdir) log.info("Github issue processing complete!")
def clear_result_files(results_folder): """ Creates an empty csv file for every result file. :param results_folder: the folder where to save the result files """ log.info("Clear result files ...") # construct list of path to output files output_files = [os.path.join(results_folder, "issues-jira.list"), os.path.join(results_folder, "bugs-jira.list"), os.path.join(results_folder, "issue-jira.list"), os.path.join(results_folder, "issues-jira-gephi-edges.csv"), os.path.join(results_folder, "issues-jira-gephi-nodes.csv")] # creates empty csv files for output_file in output_files: open(output_file, "w+").close()
def load_xml(source_folder, xml_file): """ Load issues from disk. :param source_folder: the folder where to .xml-file is in :param xml_file: the given xml-file :return: the loaded issue data """ srcfile = os.path.join(source_folder, xml_file) log.devinfo("Loading issues from file '{}'...".format(srcfile)) try: # parse the xml-file issue_data = parse(srcfile) return issue_data except Exception as e: log.info("Issue file " + format(srcfile) + " couldn't be opened because of a " + e.__class__.__name__) return None
def merge_user_with_user_from_csv(user, persons): """ merges list of given users with list of already known users :param user: list of users to be merged :param persons: list of persons from JIRA (incl. e-mail addresses) :return: list of merged users """ new_user = dict() if user["username"].lower() in persons.keys(): new_user["username"] = unicode(user["username"].lower()).encode("utf-8") new_user["name"] = unicode(persons.get(user["username"].lower())[0]).encode("utf-8") new_user["email"] = unicode(persons.get(user["username"].lower())[1]).encode("utf-8") else: new_user = user log.warning("User not in csv-file: " + str(user)) log.info("current User: "******", new user: " + str(new_user)) return new_user
def print_to_disk_gephi(issues, results_folder): """ Print issues to file "issues-jira-gephi-nodes.csv" and "issues-jira-gephi-edges.csv" in result folder. The files can be used to build dynamic networks in Gephi. :param issues: the issues to dump :param results_folder: the folder where to place the two output file """ # construct path to output file output_file_nodes = os.path.join(results_folder, "issues-jira-gephi-nodes.csv") output_file_edges = os.path.join(results_folder, "issues-jira-gephi-edges.csv") log.info("Dumping output in file '{}'...".format(output_file_nodes)) log.info("Dumping output in file '{}'...".format(output_file_edges)) # construct lines of output node_lines = [] edge_lines = [] node_lines.append(("Id", "Type")) edge_lines.append(("Source", "Target", "Timestamp", "Edgetype")) for issue in issues: node_lines.append((issue["externalId"], "Issue")) node_lines.append((issue["author"]["name"], "Person")) edge_lines.append((issue["author"]["name"], issue["externalId"], issue["creationDate"], "Person-Issue")) for comment in issue["comments"]: node_lines.append((comment["id"], "Comment")) node_lines.append((comment["author"]["name"], "Person")) edge_lines.append((issue["externalId"], comment["id"], comment["changeDate"], "Issue-Comment")) edge_lines.append((comment["author"]["name"], comment["id"], ["changeDate"], "Person-Comment")) # write to output file csv_writer.write_to_csv(output_file_edges, edge_lines, append=True) csv_writer.write_to_csv(output_file_nodes, node_lines, append=True)
def print_to_disk(issues, results_folder): """Print issues to file 'issues.list' in result folder :param issues: the issues to dump :param results_folder: the folder where to place 'issues.list' output file """ # construct path to output file output_file = os.path.join(results_folder, "issues.list") log.info("Dumping output in file '{}'...".format(output_file)) # construct lines of output lines = [] for issue in issues: for event in issue["eventsList"]: lines.append((issue["number"], issue["state"], issue["created_at"], issue["closed_at"], issue["isPullRequest"], event["user"]["name"], event["user"]["email"], event["created_at"], "" if event["ref_target"] == "" else event["ref_target"]["name"], event["event"])) # write to output file csv_writer.write_to_csv(output_file, lines)
def run(self, start_revision=None, end_revision=None): """ Runs the extraction. :param start_revision: start of an release range (for range-level extractions) :param end_revision: end of an release range (for range-level extractions) """ artifacts = self._tagging2artifacts[self.tagging] if not self.is_generic_extraction(): artifacts = [artifacts[0]] for entity_type in artifacts: log.info("%s: %s to %s" % (self.project, self.__class__.__name__, self._get_out_file(start_revision, end_revision, entity_type) )) result = self._run_sql(end_revision, entity_type) lines = self._reduce_result(result) outfile = self._get_out_file(start_revision, end_revision, entity_type) self._write_export_file(lines, outfile)
def print_to_disk(issues, results_folder): """ Print issues to file "issues.list" in result folder. This format is outdated but still used by the network library. TODO When the network library is updated, this method can be overwritten by "print_to_disk_new". :param issues: the issues to dump :param results_folder: the folder where to place "issues.list" output file """ # construct path to output file output_file = os.path.join(results_folder, "issues-github.list") log.info("Dumping output in file '{}'...".format(output_file)) # construct lines of output lines = [] for issue in issues: for event in issue["eventsList"]: lines.append(( issue["number"], issue["title"], json.dumps(issue["type"]), issue["state_new"], json.dumps(issue["resolution"]), issue["created_at"], issue["closed_at"], json.dumps([]), # components event["event"], event["user"]["name"], event["user"]["email"], event["created_at"], event["event_info_1"], json.dumps(event["event_info_2"]) )) # write to output file csv_writer.write_to_csv(output_file, lines)
def perform_data_backup(results_path, results_path_backup): """ Copy the existing .list files of a certain directory (also recursively) to a separate backup folder. If the backup folder already exists, no files are copied, i.e., no backup is performed. :param results_path: the results dir, from which the data should be backuped :param results_path_backup: the results dir where the backup should be written to """ if path.exists(results_path_backup): log.info("Backup folder already exists. No backup is to be performed.") return for filepath, dirnames, filenames in walk(results_path): for filename in filenames: if filename.endswith(".list"): current_file = path.join(filepath, filename) backup_file = path.join(results_path_backup, filepath[len(results_path) + 1:], filename) if not path.exists(path.dirname(backup_file)): makedirs(path.dirname(backup_file)) log.info("Backup %s to %s" % (current_file, backup_file)) copy(current_file, backup_file)
def print_to_disk_new(issues, results_folder): """ Print issues to file "issues_new.list" in result folder. This file has a consistent format to the "bugs-jira.list" file. TODO When the network library is updated, this is the format which shall be used. :param issues: the issues to dump :param results_folder: the folder where to place "issues.list" output file """ # construct path to output file output_file = os.path.join(results_folder, "new_format.list") log.info("Dumping output in file '{}'...".format(output_file)) # construct lines of output lines = [] for issue in issues: for event in issue["eventsList"]: lines.append(( issue["number"], issue["title"], issue["type"], issue["state_new"], issue["resolution"], issue["created_at"], issue["closed_at"], [], # components event["event"], event["user"]["name"], event["user"]["email"], event["created_at"], event["event_info_1"], event["event_info_2"])) # write to output file csv_writer.write_to_csv(output_file, lines)
def run_extraction(conf, resdir): """ Runs the extraction process for the list of given parameters. :param conf: the Codeface configuration object :param resdir: the Codeface results dir, where output files are written """ log.info("%s: Extracting data" % conf["project"]) # initialize database manager with given configuration dbm = DBManager(conf) # get all types of extractions, both project-level and range-level __extractions_project, __extractions_range = extractions.get_extractions(dbm, conf, resdir) # run project-level extractions for extraction in __extractions_project: extraction.run() # check if list of revisions in database is the same as in the config file revs = conf["revisions"] list_of_revisions = extractions.RevisionExtraction(dbm, conf, resdir).get_list() if revs: if set(revs) != set(list_of_revisions): log.error("List of revisions in configuration file do not match the list stored in the DB! Stopping now.") sys.exit(1) else: log.info("List of revisions in configuration file and DB match.") else: log.info("No list of revisions found in configuration file, using the list from the DB instead!") revs = list_of_revisions # set list of revisions as stored in the database # for all revisions of this project for i in range(len(revs) - 1): start_rev = revs[i] end_rev = revs[i + 1] log.info("%s: Extracting data for version '%s'" % (conf["project"], end_rev)) for extraction in __extractions_range: extraction.run(start_rev, end_rev)
def parse_xml(issue_data, persons, skip_history): """ Parse issues from the xml-data :param issue_data: list of xml-files :param persons: list of persons from JIRA (incl. e-mail addresses) :param skip_history: flag if the history will be loaded in a different method :return: list of parsed issues """ log.info("Parse jira issues...") issues = list() issuelist = issue_data.getElementsByTagName("item") # re-process all issues log.debug("Number of issues:" + str(len(issuelist))) for issue_x in issuelist: # temporary container for references comments = list() issue = dict() components = [] # parse values form xml # add issue values to the issue key = issue_x.getElementsByTagName("key")[0] issue["id"] = key.attributes["id"].value issue["externalId"] = key.firstChild.data created = issue_x.getElementsByTagName("created")[0] createDate = created.firstChild.data issue["creationDate"] = format_time(createDate) resolved = issue_x.getElementsByTagName("resolved") issue["resolveDate"] = "" if (len(resolved) > 0) and (not resolved[0] is None): resolveDate = resolved[0].firstChild.data issue["resolveDate"] = format_time(resolveDate) title = issue_x.getElementsByTagName("title")[0] issue["title"] = title.firstChild.data link = issue_x.getElementsByTagName("link")[0] issue["url"] = link.firstChild.data type = issue_x.getElementsByTagName("type")[0] issue["type"] = type.firstChild.data # TODO new consistent format with GitHub issues. Not supported by the network library yet issue["type_new"] = ["issue", str(type.firstChild.data.lower())] status = issue_x.getElementsByTagName("status")[0] issue["state"] = status.firstChild.data # TODO new consistent format with GitHub issues. Not supported by the network library yet issue["state_new"] = status.firstChild.data.lower() project = issue_x.getElementsByTagName("project")[0] issue["projectId"] = project.attributes["id"].value resolution = issue_x.getElementsByTagName("resolution")[0] issue["resolution"] = resolution.firstChild.data # new consistent format with GitHub issues. Not supported by the network library yet issue["resolution_new"] = [str(resolution.firstChild.data.lower())] # consistency to default GitHub labels if issue["resolution"] == "Won't Fix": issue["resolution_new"] = ["wontfix"] # consistency to default GitHub labels if issue["resolution"] == "Won't Do": issue["resolution_new"] = ["wontdo"] for component in issue_x.getElementsByTagName("component"): components.append(str(component.firstChild.data)) issue["components"] = components # if links are not loaded via api, they are added as a history event with less information if skip_history: issue["history"] = [] for ref in issue_x.getElementsByTagName("issuelinktype"): history = dict() history["event"] = "add_link" history["author"] = create_user("", "", "") history["date"] = "" history["event_info_1"] = ref.getElementsByTagName("issuekey")[0].firstChild.data history["event_info_2"] = "issue" issue["history"].append(history) reporter = issue_x.getElementsByTagName("reporter")[0] user = create_user(reporter.firstChild.data, reporter.attributes["username"].value, "") issue["author"] = merge_user_with_user_from_csv(user, persons) issue["title"] = issue_x.getElementsByTagName("title")[0].firstChild.data # add comments / issue_changes to the issue for comment_x in issue_x.getElementsByTagName("comment"): comment = dict() comment["id"] = comment_x.attributes["id"].value user = create_user("", comment_x.attributes["author"].value, "") comment["author"] = merge_user_with_user_from_csv(user, persons) comment["state_on_creation"] = issue["state"] # can get updated if history is retrieved comment["resolution_on_creation"] = issue["resolution"] # can get updated if history is retrieved created = comment_x.attributes["created"].value comment["changeDate"] = format_time(created) comment["text"] = comment_x.firstChild.data comment["issueId"] = issue["id"] comments.append(comment) issue["comments"] = comments # add relations to the issue relations = list() for rel in issue_x.getElementsByTagName("issuelinktype"): relation = dict() relation["relation"] = rel.getElementsByTagName("name")[0].firstChild.data if rel.hasAttribute("inwardlinks"): left = rel.getElementsByTagName("inwardlinks") issuekeys = left.getElementsByTagName("issuekey") for key in issuekeys: relation["type"] = "inward" relation["id"] = key.firstChild.data relations.append(relation) if rel.hasAttribute("outwardlinks"): right = rel.getElementsByTagName("outwardlinks") issuekeys = right.getElementsByTagName("issuekey") for key in issuekeys: relation["type"] = "outward" relation["id"] = key.firstChild.data relations.append(relation) issue["relations"] = relations issues.append(issue) log.debug("number of issues after parse_xml: '{}'".format(len(issues))) return issues
def merge_issue_events(issue_data): """ All issue events are merged together in the eventsList. This simplifies processing in later steps. :param issue_data: the issue data from which the events shall be merged :return: the issue data with merged eventsList """ log.info("Merge issue events ...") for issue in issue_data: # temporary container for references comments = dict() # adds creation event to eventsList created_event = dict() created_event["user"] = issue["user"] created_event["created_at"] = issue["created_at"] created_event["event"] = "created" created_event["event_info_1"] = "open" created_event["event_info_2"] = [] issue["eventsList"].append(created_event) issue["state_new"] = "open" # the format of every related issue is adjusted to the event format for rel_issue in issue["relatedIssues"]: rel_issue["created_at"] = format_time(rel_issue["referenced_at"]) rel_issue["event"] = "add_link" rel_issue["event_info_1"] = rel_issue["number"] rel_issue["event_info_2"] = "issue" rel_issue["ref_target"] = "" # the format of every related commit is adjusted to the event format for rel_commit in issue["relatedCommits"]: # if the related commit has no time, it is a commit in the pull-request if rel_commit["referenced_at"] is None: rel_commit["user"] = create_user("", "", "") rel_commit["created_at"] = "" rel_commit["event"] = "has_commit" rel_commit["event_info_1"] = rel_commit["commit_id"] rel_commit["event_info_2"] = "" rel_commit["ref_target"] = "" # else it is a commit the issue/ pull-request refers to else: rel_commit["created_at"] = format_time( rel_commit["referenced_at"]) rel_commit["event"] = "add_link" rel_commit["event_info_1"] = rel_commit["commit_id"] rel_commit["event_info_2"] = "commit" rel_commit["ref_target"] = "" # the format of every comment is adjusted to the event format for comment in issue["commentsList"]: comment["event"] = "commented" comment["ref_target"] = "" comment["created_at"] = format_time(comment["referenced_at"]) if "event_info_1" not in comment: comment["event_info_1"] = "" if "event_info_2" not in comment: comment["event_info_2"] = "" # cache comment by date to resolve/re-arrange references later comments[comment["created_at"]] = comment # the format of every event is adjusted for event in issue["eventsList"]: event["ref_target"] = "" event["created_at"] = format_time(event["created_at"]) if "event_info_1" not in event: event["event_info_1"] = "" if "event_info_2" not in event: event["event_info_2"] = "" # if event collides with a comment if event["created_at"] in comments: comment = comments[event["created_at"]] # if someone gets mentioned or subscribed by someone else in a comment, # re-write the reference if (event["event"] == "mentioned" or event["event"] == "subscribed") and \ comment["event"] == "commented": event["ref_target"] = event["user"] event["user"] = comment["user"] # merge events, relatedCommits, relatedIssues and comment lists issue["eventsList"] = issue["commentsList"] + issue[ "eventsList"] + issue["relatedIssues"] + issue["relatedCommits"] # remove events without user issue["eventsList"] = [ event for event in issue["eventsList"] if not (event["user"] is None or event["ref_target"] is None) ] # sorts eventsList by time issue["eventsList"] = sorted(issue["eventsList"], key=lambda k: k["created_at"]) return issue_data
def reformat_events(issue_data): """ Re-format event information dependent on the event type. :param issue_data: the data of all issues that shall be re-formatted :return: the issue data with updated event information """ log.info("Update event information ...") for issue in issue_data: # re-format information of every event in the eventsList of an issue for event in issue["eventsList"]: if event["event"] == "closed": event["event"] = "state_updated" event["event_info_1"] = "closed" # new state event["event_info_2"] = "open" # old state issue["state_new"] = "closed" elif event["event"] == "reopened": event["event"] = "state_updated" event["event_info_1"] = "open" # new state event["event_info_2"] = "closed" # old state issue["state_new"] = "reopened" elif event["event"] == "labeled": label = event["label"]["name"].lower() event["event_info_1"] = label # if the label is in this list, it also is a type of the issue if label in known_types: issue["type"].append(str(label)) # creates an event for type updates and adds it to the eventsList type_event = dict() type_event["user"] = event["user"] type_event["created_at"] = event["created_at"] type_event["event"] = "type_updated" type_event["event_info_1"] = label type_event["event_info_2"] = "" type_event["ref_target"] = "" issue["eventsList"].append(type_event) # if the label is in this list, it also is a resolution of the issue elif label in known_resolutions: issue["resolution"].append(str(label)) # creates an event for resolution updates and adds it to the eventsList resolution_event = dict() resolution_event["user"] = event["user"] resolution_event["created_at"] = event["created_at"] resolution_event["event"] = "resolution_updated" resolution_event["event_info_1"] = label resolution_event["event_info_2"] = "" resolution_event["ref_target"] = "" issue["eventsList"].append(resolution_event) elif event["event"] == "unlabeled": label = event["label"]["name"].lower() event["event_info_1"] = label # if the label is in this list, it also is a type of the issue if label in known_types: issue["type"].remove(str(label)) # creates an event for type updates and adds it to the eventsList type_event = dict() type_event["user"] = event["user"] type_event["created_at"] = event["created_at"] type_event["event"] = "type_updated" type_event["event_info_1"] = "" type_event["event_info_2"] = label type_event["ref_target"] = "" issue["eventsList"].append(type_event) # if the label is in this list, it also is a resolution of the issue elif label in known_resolutions: issue["resolution"].remove(str(label)) # creates an event for resolution updates and adds it to the eventsList resolution_event = dict() resolution_event["user"] = event["user"] resolution_event["created_at"] = event["created_at"] resolution_event["event"] = "resolution_updated" resolution_event["event_info_1"] = "" resolution_event["event_info_2"] = label resolution_event["ref_target"] = "" issue["eventsList"].append(resolution_event) elif event["event"] == "commented": # "state_new" and "resolution" of the issue give the information about the state and the resolution of # the issue when the comment was written, because the eventsList is sorted by time event["event_info_1"] = issue["state_new"] event["event_info_2"] = issue["resolution"] return issue_data
def load_issue_via_api(issues, persons, url): """ For each issue in the list the history is added via the api. :param issues: list of issues :param persons: list of persons from JIRA (incl. e-mail addresses), see function "load_csv" :param url: the project url """ log.info("Load issue information via api...") jira_project = JIRA(url) global jira_request_counter for issue in issues: # if the number of JIRA requests has reached the request limit, wait 24 hours if jira_request_counter > max_requests: log.info( "More than " + str(max_requests) + " JIRA requests have already been sent. Wait for 24 hours...") sleep(86500) # 60 * 60 * 24 = 86400 log.info("Reset JIRA request counter and proceed...") jira_request_counter = 0 try: # send JIRA request for current issues and increase request counter jira_request_counter += 1 log.info("JIRA request counter: " + str(jira_request_counter)) api_issue = jira_project.issue(issue["externalId"], expand="changelog") changelog = api_issue.changelog except JIRAError: log.warn("JIRA Error: Changelog cannot be extracted for issue " + issue["externalId"] + ". History omitted!") changelog = None histories = list() # adds the issue creation time with the default state to a list # list is needed to find out the state the issue had when a comment was written state_changes = [[issue["creationDate"], "open"]] # adds the issue creation time with the default resolution to a list # list is needed to find out the resolution the issue had when a comment was written resolution_changes = [[issue["creationDate"], "unresolved"]] # only consider history changes if we were able to extract the changelog for the current issue if changelog is not None: # history changes get visited in time order from oldest to newest for change in changelog.histories: # default values for state and resolution old_state, new_state, old_resolution, new_resolution = "open", "open", "unresolved", "unresolved" # all changes in the issue changelog are checked if they contain a useful information for item in change.items: # state_updated event gets created and added to the issue history if item.field == "status": if item.fromString is not None: old_state = item.fromString.lower() if item.toString is not None: new_state = item.toString.lower() history = dict() history["event"] = "state_updated" history["event_info_1"] = new_state history["event_info_2"] = old_state if hasattr(change, "author"): user = create_user(change.author.displayName, change.author.name, "") else: log.warn("No author for history: " + str(change.id) + " created at " + str(change.created)) user = create_user("", "", "") history["author"] = merge_user_with_user_from_csv( user, persons) history["date"] = format_time(change.created) histories.append(history) state_changes.append([history["date"], new_state]) # resolution_updated event gets created and added to the issue history elif item.field == "resolution": if item.fromString is not None: old_resolution = item.fromString.lower() if item.toString is not None: new_resolution = item.toString.lower() history = dict() history["event"] = "resolution_updated" history["event_info_1"] = new_resolution history["event_info_2"] = old_resolution if hasattr(change, "author"): user = create_user(change.author.displayName, change.author.name, "") else: log.warn("No author for history: " + str(change.id) + " created at " + str(change.created)) user = create_user("", "", "") history["author"] = merge_user_with_user_from_csv( user, persons) history["date"] = format_time(change.created) histories.append(history) resolution_changes.append( [history["date"], new_resolution]) # assigned event gets created and added to the issue history elif item.field == "assignee": history = dict() history["event"] = "assigned" user = create_user(change.author.displayName, change.author.name, "") history["author"] = merge_user_with_user_from_csv( user, persons) assignee = create_user(item.toString, item.to, "") assigned_user = merge_user_with_user_from_csv( assignee, persons) history["event_info_1"] = assigned_user["name"] history["event_info_2"] = assigned_user["email"] history["date"] = format_time(change.created) histories.append(history) elif item.field == "Link": # add_link event gets created and added to the issue history if item.toString is not None: history = dict() history["event"] = "add_link" user = create_user(change.author.displayName, change.author.name, "") history["author"] = merge_user_with_user_from_csv( user, persons) # api returns a text. The issueId is at the end of the text and gets extracted history["event_info_1"] = item.toString.split()[-1] history["event_info_2"] = "issue" history["date"] = format_time(change.created) histories.append(history) # remove_link event gets created and added to the issue history if item.fromString is not None: history = dict() history["event"] = "remove_link" user = create_user(change.author.displayName, change.author.name, "") history["author"] = merge_user_with_user_from_csv( user, persons) # api returns a text. Th issue id is at the end of the text and gets extracted history["event_info_1"] = item.fromString.split( )[-1] history["event_info_2"] = "issue" history["date"] = format_time(change.created) histories.append(history) # state and resolution change lists get sorted by time state_changes.sort(key=lambda x: x[0]) resolution_changes.sort(key=lambda x: x[0]) for comment in issue["comments"]: # the state the issue had when the comment was written is searched out for state in state_changes: if comment["changeDate"] > state[0]: comment["state_on_creation"] = state[1] # the resolution the issue had when the comment was written is searched out for resolution in resolution_changes: if comment["changeDate"] > resolution[0]: comment["resolution_on_creation"] = [str(resolution[1])] issue["history"] = histories
def print_to_disk_bugs(issues, results_folder): """ Sorts of bug issues and prints them to file "bugs-jira.list" in result folder This method prints in a new format which is consistent to the format of "print_to_disk_new" in "issue_processing.py". TODO When the network library is updated this format shall be used in all print to disk methods. :param issues: the issues to sort of bugs :param results_folder: the folder where to place "bugs-jira.list" output file """ # construct path to output file output_file = os.path.join(results_folder, "bugs-jira.list") log.info("Dumping output in file '{}'...".format(output_file)) # construct lines of output lines = [] for issue in issues: log.info("Current issue '{}'".format(issue["externalId"])) # only writes issues with type bug and their comments in the output file if "bug" in issue["type_new"]: lines.append(( issue["externalId"], issue["title"], issue["type_new"], issue["state_new"], issue["resolution_new"], issue["creationDate"], issue["resolveDate"], issue["components"], "created", ## event.name issue["author"]["name"], issue["author"]["email"], issue["creationDate"], "open", ## default state when created ["unresolved"] ## default resolution when created )) lines.append(( issue["externalId"], issue["title"], issue["type_new"], issue["state_new"], issue["resolution_new"], issue["creationDate"], issue["resolveDate"], issue["components"], "commented", issue["author"]["name"], issue["author"]["email"], issue["creationDate"], "open", ## default state when created ["unresolved"] ## default resolution when created )) for comment in issue["comments"]: lines.append(( issue["externalId"], issue["title"], issue["type_new"], issue["state_new"], issue["resolution_new"], issue["creationDate"], issue["resolveDate"], issue["components"], "commented", comment["author"]["name"], comment["author"]["email"], comment["changeDate"], comment["state_on_creation"], comment["resolution_on_creation"] )) for history in issue["history"]: lines.append(( issue["externalId"], issue["title"], issue["type_new"], issue["state_new"], issue["resolution_new"], issue["creationDate"], issue["resolveDate"], issue["components"], history["event"], history["author"]["name"], history["author"]["email"], history["date"], history["event_info_1"], history["event_info_2"] )) # write to output file csv_writer.write_to_csv(output_file, lines, append=True)
def run(): # get all needed paths and arguments for the method call. parser = argparse.ArgumentParser(prog="codeface-extraction-issues-jira", description="Codeface extraction") parser.add_argument("-c", "--config", help="Codeface configuration file", default="codeface.conf") parser.add_argument("-p", "--project", help="Project configuration file", required=True) parser.add_argument("resdir", help="Directory to store analysis results in") parser.add_argument( "-s", "--skip_history", help= "Skip methods that retrieve additional history information from the configured JIRA" + "server. This decreases the runtime and shuts off the external connection", action="store_true") # parse arguments args = parser.parse_args(sys.argv[1:]) __codeface_conf, __project_conf = map(os.path.abspath, (args.config, args.project)) # create configuration __conf = Configuration.load(__codeface_conf, __project_conf) # get source and results folders __srcdir = os.path.abspath( os.path.join(args.resdir, __conf["repo"] + "_proximity", "conway", "issues_xml")) __resdir = os.path.abspath( os.path.join(args.resdir, __conf["project"], __conf["tagging"])) __srcdir_csv = os.path.abspath( os.path.join(args.resdir, __conf["repo"] + "_proximity", "conway")) # get person folder # __psrcdir = os.path.abspath(os.path.join(args.resdir, __conf["repo"] + "_proximity", "conway")) # load the list of persons persons = load_csv(__srcdir_csv) # load the xml-file list file_list = [ f for f in os.listdir(__srcdir) if os.path.isfile(os.path.join(__srcdir, f)) ] # creates empty result files clear_result_files(__resdir) # list for malformed or missing xml-files incorrect_files = [] # processes every xml-file for current_file in file_list: # 1) load the list of issues issues = load_xml(__srcdir, current_file) # if an error occurred while loading the xml-file if issues is None: incorrect_files.append(current_file) continue # 2) re-format the issues issues = parse_xml(issues, persons, args.skip_history) # 3) load issue information via api if not args.skip_history: load_issue_via_api(issues, persons, __conf["issueTrackerURL"]) # 4) update user data with Codeface database # ATTENTION: As the database update is performed for every iteration in this for loop, but the current issue # data is appended to the results file immediately, the database updates from the later iterations are not # respected in the previously dumped issues from the previous iterations. However, as we don't get email # data from JIRA, this is currently not a problem, as no names will change in the database if we don't # provide emails. If JIRA will provide email data in the future, this implementation needs to be adjusted # in such a way that users in issue data of all iterations are updated in the end and dumped afterwards, # instead of dumping the intermediate issue data immediately. issues = insert_user_data(issues, __conf) # 5) dump result to disk print_to_disk(issues, __resdir) # # 6) export for Gephi # print_to_disk_gephi(issues, __resdir) # # 7) export for jira issue extraction to use them in dev-network-growth # print_to_disk_extr(issues, __resdir) # 8) dump bug issues to disk print_to_disk_bugs(issues, __resdir) log.info("Jira issue processing complete!") log.info("In total, " + str(jira_request_counter) + " requests have been sent to Jira.") if incorrect_files: log.info("Following files where malformed or not existing:: " + str(incorrect_files))
def run(): # get all needed paths and argument for the method call. parser = argparse.ArgumentParser(prog="codeface-extraction-issues-jira", description="Codeface extraction") parser.add_argument("-c", "--config", help="Codeface configuration file", default="codeface.conf") parser.add_argument("-p", "--project", help="Project configuration file", required=True) parser.add_argument("resdir", help="Directory to store analysis results in") parser.add_argument("-s", "--skip_history", help="Skip methods that retrieve additional history information from the configured JIRA" + "server. This decreases the runtime and shuts off the external connection", action="store_true") # parse arguments args = parser.parse_args(sys.argv[1:]) __codeface_conf, __project_conf = map(os.path.abspath, (args.config, args.project)) # create configuration __conf = Configuration.load(__codeface_conf, __project_conf) # get source and results folders __srcdir = os.path.abspath(os.path.join(args.resdir, __conf["repo"] + "_proximity", "conway", "issues_xml")) __resdir = os.path.abspath(os.path.join(args.resdir, __conf["project"], __conf["tagging"])) __srcdir_csv = os.path.abspath(os.path.join(args.resdir, __conf["repo"] + "_proximity", "conway")) # get person folder # __psrcdir = os.path.abspath(os.path.join(args.resdir, __conf["repo"] + "_proximity", "conway")) # load the list of persons persons = load_csv(__srcdir_csv) # load the xml-file list file_list = [f for f in os.listdir(__srcdir) if os.path.isfile(os.path.join(__srcdir, f))] # creates empty result files clear_result_files(__resdir) # list for malformed or missing xml-files incorrect_files = [] # processes every xml-file for current_file in file_list: # 1) load the list of issues issues = load_xml(__srcdir, current_file) # if an error occurred while loading the xml-file if issues is None: incorrect_files.append(current_file) continue # 2) re-format the issues issues = parse_xml(issues, persons, args.skip_history) # 3) load issue information via api if not args.skip_history: load_issue_via_api(issues, persons, __conf["issueTrackerURL"]) # 4) update user data with Codeface database # mabye not nessecary issues = insert_user_data(issues, __conf) # 5) dump result to disk print_to_disk(issues, __resdir) # 6) export for Gephi print_to_disk_gephi(issues, __resdir) # 7) export for jira issue extraction to use them in dev-network-growth print_to_disk_extr(issues, __resdir) # 8) dump bug issues to disk print_to_disk_bugs(issues, __resdir) log.info("Jira issue processing complete!") if incorrect_files: log.info("Following files where malformed or not existing:: " + str(incorrect_files))
def insert_user_data(issues, conf): """ Insert user data into database ad update issue data. :param issues: the issues to retrieve user data from :param conf: the project configuration :return: the updated issue data """ log.info("Syncing users with ID service...") # create buffer for users user_buffer = dict() # open database connection dbm = DBManager(conf) # open ID-service connection idservice = idManager(dbm, conf) def get_user_string(name, email): if not email or email is None: return "{name}".format(name=name) # return "{name} <{name}@default.com>".format(name=name) # for debugging only else: return "{name} <{email}>".format(name=name, email=email) def get_or_update_user(user, buffer_db=user_buffer): # fix encoding for name and e-mail address if user["name"] is not None: name = unicode(user["name"]).encode("utf-8") else: name = unicode(user["username"]).encode("utf-8") mail = unicode(user["email"]).encode("utf-8") # construct string for ID service and send query user_string = get_user_string(name, mail) # check buffer to reduce amount of DB queries if user_string in buffer_db: log.devinfo("Returning user '{}' from buffer.".format(user_string)) return buffer_db[user_string] # get person information from ID service log.devinfo("Passing user '{}' to ID service.".format(user_string)) idx = idservice.getPersonID(user_string) # update user data with person information from DB person = idservice.getPersonFromDB(idx) user["email"] = person["email1"] # column "email1" user["name"] = person["name"] # column "name" user["id"] = person["id"] # column "id" # add user information to buffer # user_string = get_user_string(user["name"], user["email"]) # update for buffer_db[user_string] = user return user for issue in issues: # check database for issue author issue["user"] = get_or_update_user(issue["user"]) # check database for event authors for event in issue["eventsList"]: # get the event user from the DB event["user"] = get_or_update_user(event["user"]) # get the reference-target user from the DB if needed if event["ref_target"] != "": event["ref_target"] = get_or_update_user(event["ref_target"]) event["event_info_1"] = event["ref_target"]["name"] event["event_info_2"] = event["ref_target"]["email"] return issues
def insert_user_data(issues, conf): """Insert user data into database ad update issue data. :param issues: the issues to retrieve user data from :param conf: the project configuration :return: the updated issue data """ log.info("Syncing users with ID service...") # create buffer for users user_buffer = dict() # open database connection dbm = DBManager(conf) # open ID-service connection idservice = idManager(dbm, conf) def get_user_string(name, email): if not email or email is None: return "{name}".format(name=name) # return "{name} <{name}@default.com>".format(name=name) # for debugging only else: return "{name} <{email}>".format(name=name, email=email) def get_or_update_user(user, buffer_db=user_buffer): # fix encoding for name and e-mail address if user["name"] is not None: name = unicode(user["name"]).encode("utf-8") else: name = unicode(user["username"]).encode("utf-8") mail = unicode(user["email"]).encode("utf-8") # construct string for ID service and send query user_string = get_user_string(name, mail) # check buffer to reduce amount of DB queries if user_string in buffer_db: log.devinfo("Returning user '{}' from buffer.".format(user_string)) return buffer_db[user_string] # get person information from ID service log.devinfo("Passing user '{}' to ID service.".format(user_string)) idx = idservice.getPersonID(user_string) # update user data with person information from DB person = idservice.getPersonFromDB(idx) user["email"] = person["email1"] # column 'email1' user["name"] = person["name"] # column 'name' user["id"] = person["id"] # column 'id' # add user information to buffer # user_string = get_user_string(user["name"], user["email"]) # update for buffer_db[user_string] = user return user for issue in issues: # check database for issue author issue["user"] = get_or_update_user(issue["user"]) # check database for event authors for event in issue["eventsList"]: # get the event user from the DB event["user"] = get_or_update_user(event["user"]) # get the reference-target user from the DB if needed if event["ref_target"] != "": event["ref_target"] = get_or_update_user(event["ref_target"]) return issues
def insert_user_data(issues, conf): """ Insert user data into database and update issue data. :param issues: the issues to retrieve user data from :param conf: the project configuration :return: the updated issue data """ log.info("Syncing users with ID service...") # create buffer for users (key: user id) user_buffer = dict() # create buffer for user ids (key: user string) user_id_buffer = dict() # open database connection dbm = DBManager(conf) # open ID-service connection idservice = idManager(dbm, conf) def get_user_string(name, email): if not email or email is None: return "{name}".format(name=name) # return "{name} <{name}@default.com>".format(name=name) # for debugging only else: return "{name} <{email}>".format(name=name, email=email) def get_id_and_update_user(user, buffer_db_ids=user_id_buffer): # fix encoding for name and e-mail address if user["name"] is not None and user["name"] != "": name = unicode(user["name"]).encode("utf-8") else: name = unicode(user["username"]).encode("utf-8") mail = unicode(user["email"]).encode("utf-8") # empty # construct string for ID service and send query user_string = get_user_string(name, mail) # check buffer to reduce amount of DB queries if user_string in buffer_db_ids: log.devinfo( "Returning person id for user '{}' from buffer.".format( user_string)) return buffer_db_ids[user_string] # get person information from ID service log.devinfo("Passing user '{}' to ID service.".format(user_string)) idx = idservice.getPersonID(user_string) # add user information to buffer # user_string = get_user_string(user["name"], user["email"]) # update for buffer_db_ids[user_string] = idx return idx def get_user_from_id(idx, buffer_db=user_buffer): # check whether user information is in buffer to reduce amount of DB queries if idx in buffer_db: log.devinfo("Returning user '{}' from buffer.".format(idx)) return buffer_db[idx] # get person information from ID service log.devinfo("Passing user id '{}' to ID service.".format(idx)) person = idservice.getPersonFromDB(idx) user = dict() user["email"] = person["email1"] # column "email1" user["name"] = person["name"] # column "name" user["id"] = person["id"] # column "id" # add user information to buffer buffer_db[idx] = user return user # check and update database for all occurring users for issue in issues: # check database for issue author issue["author"] = get_id_and_update_user(issue["author"]) # check database for comment authors for comment in issue["comments"]: comment["author"] = get_id_and_update_user(comment["author"]) # check database for event authors in the history for event in issue["history"]: event["author"] = get_id_and_update_user(event["author"]) # check database for target user if needed if event["event"] == "assigned": assigned_user = get_id_and_update_user( create_user(event["event_info_1"], "", event["event_info_2"])) event["event_info_1"] = assigned_user # get all users after database updates having been performed for issue in issues: # get issue author issue["author"] = get_user_from_id(issue["author"]) # get comment authors for comment in issue["comments"]: comment["author"] = get_user_from_id(comment["author"]) # get event authors for non-comment events for event in issue["history"]: event["author"] = get_user_from_id(event["author"]) # get target user if needed if event["event"] == "assigned": assigned_user = get_user_from_id(event["event_info_1"]) event["event_info_1"] = assigned_user["name"] event["event_info_2"] = assigned_user["email"] log.debug("number of issues after insert_user_data: '{}'".format( len(issues))) return issues
def print_to_disk(issues, results_folder): """ Print issues to file "issues-jira.list" in result folder. :param issues: the issues to dump :param results_folder: the folder where to place "issues-jira.list" output file """ # construct path to output file output_file = os.path.join(results_folder, "issues-jira.list") log.info("Dumping output in file '{}'...".format(output_file)) # construct lines of output lines = [] for issue in issues: log.info("Current issue '{}'".format(issue["externalId"])) # add the creation event lines.append(( issue["externalId"], issue["title"], json.dumps(issue["type_list"]), issue["state_new"], json.dumps(issue["resolution_list"]), issue["creationDate"], issue["resolveDate"], json.dumps(issue["components"]), "created", ## event.name issue["author"]["name"], issue["author"]["email"], issue["creationDate"], "open", ## default state when created json.dumps(["unresolved"]) ## default resolution when created )) # add an additional commented event for the creation lines.append(( issue["externalId"], issue["title"], json.dumps(issue["type_list"]), issue["state_new"], json.dumps(issue["resolution_list"]), issue["creationDate"], issue["resolveDate"], json.dumps(issue["components"]), "commented", issue["author"]["name"], issue["author"]["email"], issue["creationDate"], "open", ## default state when created json.dumps(["unresolved"]) ## default resolution when created )) # add comment events for comment in issue["comments"]: lines.append( (issue["externalId"], issue["title"], json.dumps(issue["type_list"]), issue["state_new"], json.dumps(issue["resolution_list"]), issue["creationDate"], issue["resolveDate"], json.dumps(issue["components"]), "commented", comment["author"]["name"], comment["author"]["email"], comment["changeDate"], comment["state_on_creation"], json.dumps(comment["resolution_on_creation"]))) # add history events for history in issue["history"]: lines.append( (issue["externalId"], issue["title"], json.dumps(issue["type_list"]), issue["state_new"], json.dumps(issue["resolution_list"]), issue["creationDate"], issue["resolveDate"], json.dumps(issue["components"]), history["event"], history["author"]["name"], history["author"]["email"], history["date"], history["event_info_1"], json.dumps(history["event_info_2"]))) # write to output file csv_writer.write_to_csv(output_file, lines, append=True)
def print_to_disk_bugs(issues, results_folder): """ Extract bug issues and prints them to file "bugs-jira.list" in result folder. This method prints in a format which is consistent to the format of "print_to_disk" in "issue_processing.py". :param issues: the issues to sort of bugs :param results_folder: the folder where to place "bugs-jira.list" output file """ # construct path to output file output_file = os.path.join(results_folder, "bugs-jira.list") log.info("Dumping output in file '{}'...".format(output_file)) # construct lines of output lines = [] for issue in issues: log.info("Current issue '{}'".format(issue["externalId"])) # only write issues with type bug and their comments in the output file if "bug" in issue["type_list"]: # add the creation event lines.append(( issue["externalId"], issue["title"], json.dumps(issue["type_list"]), issue["state_new"], json.dumps(issue["resolution_list"]), issue["creationDate"], issue["resolveDate"], json.dumps(issue["components"]), "created", ## event.name issue["author"]["name"], issue["author"]["email"], issue["creationDate"], "open", ## default state when created json.dumps(["unresolved"]) ## default resolution when created )) # add an additional commented event for the creation lines.append(( issue["externalId"], issue["title"], json.dumps(issue["type_list"]), issue["state_new"], json.dumps(issue["resolution_list"]), issue["creationDate"], issue["resolveDate"], json.dumps(issue["components"]), "commented", issue["author"]["name"], issue["author"]["email"], issue["creationDate"], "open", ## default state when created json.dumps(["unresolved"]) ## default resolution when created )) # add comment events for comment in issue["comments"]: lines.append( (issue["externalId"], issue["title"], json.dumps(issue["type_list"]), issue["state_new"], json.dumps(issue["resolution_list"]), issue["creationDate"], issue["resolveDate"], json.dumps(issue["components"]), "commented", comment["author"]["name"], comment["author"]["email"], comment["changeDate"], comment["state_on_creation"], json.dumps(comment["resolution_on_creation"]))) # add history events for history in issue["history"]: lines.append( (issue["externalId"], issue["title"], json.dumps(issue["type_list"]), issue["state_new"], json.dumps(issue["resolution_list"]), issue["creationDate"], issue["resolveDate"], json.dumps(issue["components"]), history["event"], history["author"]["name"], history["author"]["email"], history["date"], history["event_info_1"], json.dumps(history["event_info_2"]))) # write to output file csv_writer.write_to_csv(output_file, lines, append=True)
def run_anonymization(conf, resdir): """ Runs the anonymization process for the given parameters, that is, replaces names, e-mail addresses, message ids, and issue titles with pseudonymized contents in all .list files in resdir. Writes the anonymized .list files to another directory (resdir + "_threemonth"). :param conf: the Codeface configuration object :param resdir: the Codeface results dir, where result files are read from """ authors_list = "authors.list" commits_list = "commits.list" emails_list = "emails.list" issues_github_list = "issues-github.list" issues_jira_list = "issues-jira.list" bugs_jira_list = "bugs-jira.list" bots_list = "bots.list" gender_list = "gender.list" revisions_list = "revisions.list" # not to be anonymized, only to be copied to the "anonymized" directory # When looking at elements originating from json lists, we need to consider quotation marks around the string quot_m = "\"" data_path = path.join(resdir, conf["project"], conf["tagging"]) anonymize_path = path.join((resdir + "_anonymized"), conf["project"], conf["tagging"]) if not path.exists(anonymize_path): log.info("Create directory %s", anonymize_path) makedirs(anonymize_path) log.info("%s: Anonymize authors." % conf["project"]) # create dictionaries to store mappings from authors to anonymized authors and titles to anonymized titles author_to_anonymized_author = dict() author_to_anonymized_author_gender = dict() i = 0 i_gender = 0 title_to_anonymized_title = dict() k = 0 """ Helper function to anonymize author data (i.e., data from the authors.list file). :param author_data: the author data to be anonymized (must have been read via "csv_writer.read_from_csv") :param i: counter for anonymized developer names (i.e., its current start value which has not been used yet) :param author_to_anonymized_author: dictionary in which to lookup and store mappings from (name, e-mail) pairs to anonymized (name, e-mail) pairs for the developers :param name_only: whether also the name (without e-mail) should be used as key for the dictionary "author_to_anonymized_author". This is necessary if there might be lookups using auto-generated and, therefore, different e-mail addresses for the same name. :return: the anonymized "author_data", the current value of "i" (which has not been used yet), and the updated dictionary "author_to_anonymized_author" """ def anonymize_authors(author_data, i, author_to_anonymized_author, name_only=False): for author in author_data: orig_author = author[1] orig_email = author[2] # Don't anonymize the deleted user as this one might be needed for filtering (but add it to the dictionary) if orig_author == "Deleted user" and orig_email == "*****@*****.**": if not (orig_author, orig_email) in author_to_anonymized_author: author_to_anonymized_author[(orig_author, orig_email)] = (orig_author, orig_email) else: # check whether (name, e-mail) pair isn't already present in the dictionary if not (orig_author, orig_email) in author_to_anonymized_author: # check if just the name (without e-mail address) isn't already present in the dictionary if not orig_author in author_to_anonymized_author: # if the author has an empty name, only anonymize their e-mail address if not author[1] == "": author[1] = ("developer" + str(i)) author[2] = ("mail" + str(i) + "@dev.org") # add new entry to dictionary (using (name, e-mail) pair as key) author_to_anonymized_author[(orig_author, orig_email)] = (author[1], author[2]) # if we allow name-only entries, also add an additional entry to dictionary if name_only: author_to_anonymized_author[orig_author] = ( author[1], author[2]) # increment counter as we have generated a new anonymized developer id i += 1 else: # as just the name (without e-mail address) is present in the dictionary, make a lookup # for the name only and add a new entry to the dictionary using (name, e-mail) pair author_new = author_to_anonymized_author[orig_author] author_to_anonymized_author[( orig_author, orig_email)] = (author_new[0], author_new[1]) author[1] = author_new[0] author[2] = author_new[1] else: # as the (name, e-mail) pair is present in the dictionary, just make a lookup for the pair author_new = author_to_anonymized_author[(orig_author, orig_email)] author[1] = author_new[0] author[2] = author_new[1] return author_data, i, author_to_anonymized_author # Check for all files in the result directory of the project whether they need to be anonymized for filepath, dirnames, filenames in walk(data_path): # (1) Anonymize authors lists if authors_list in filenames: f = path.join(filepath, authors_list) log.info("Anonymize %s ...", f) author_data = csv_writer.read_from_csv(f) author_data_gender = csv_writer.read_from_csv(f) # check if tagging is "feature" if conf["tagging"] == "feature": # as tagging is "feature", we need to check for the proximity data to keep anonymized ids consistent # over both feature and proximity data # if corresponding proximity data exists, read authors from proximity data and use them for # anonymization to make anonymized proximity data and feature data consistent f_proximity = f.replace("feature", "proximity") if path.isfile(f_proximity): log.info( "Read authors from %s and anonymize them (without dumping to file).", f_proximity) author_data_proximity = csv_writer.read_from_csv( f_proximity) # anonymize authors from proximity data (but just add them to our dictionary, to be used below # for the actual anonymization of the feature data) author_data_proximity, i, author_to_anonymized_author = \ anonymize_authors(author_data_proximity, i, author_to_anonymized_author, name_only = True) # anonymize authors author_data, i, author_to_anonymized_author = \ anonymize_authors(author_data, i, author_to_anonymized_author) author_data_gender, i_gender, author_to_anonymized_author_gender = \ anonymize_authors(author_data_gender, i_gender, author_to_anonymized_author_gender, name_only = True) output_path = f.replace(data_path, anonymize_path) if not path.exists(path.dirname(output_path)): makedirs(path.dirname(output_path)) log.info("Write anonymized data to %s ...", output_path) csv_writer.write_to_csv(output_path, author_data) # (2) Anonymize commits lists if commits_list in filenames: f = path.join(filepath, commits_list) log.info("Anonymize %s ...", f) commit_data = csv_writer.read_from_csv(f) for commit in commit_data: # anonymize author new_author = author_to_anonymized_author[(commit[2], commit[3])] commit[2] = new_author[0] commit[3] = new_author[1] # anonymize committer new_committer = author_to_anonymized_author[(commit[5], commit[6])] commit[5] = new_committer[0] commit[6] = new_committer[1] output_path = f.replace(data_path, anonymize_path) if not path.exists(path.dirname(output_path)): makedirs(output_path) log.info("Write anonymized data to %s ...", output_path) csv_writer.write_to_csv(output_path, commit_data) # (3) Anonymize emails lists if emails_list in filenames: f = path.join(filepath, emails_list) log.info("Anonymize %s ...", f) email_data = csv_writer.read_from_csv(f) j = 0 for email in email_data: # anonymize author new_author = author_to_anonymized_author[(email[0], email[1])] email[0] = new_author[0] email[1] = new_author[1] # anonymize message id email[2] = ("<message" + str(j) + "@message.dev.org>") j += 1 output_path = f.replace(data_path, anonymize_path) if not path.exists(path.dirname(output_path)): makedirs(path.dirname(output_path)) log.info("Write anonymized data to %s ...", output_path) csv_writer.write_to_csv(output_path, email_data) # (4) Anonymize issues lists (github) if issues_github_list in filenames: f = path.join(filepath, issues_github_list) log.info("Anonymize %s ...", f) issue_data = csv_writer.read_from_csv(f) for issue_event in issue_data: # anonymize author new_author = author_to_anonymized_author[(issue_event[9], issue_event[10])] issue_event[9] = new_author[0] issue_event[10] = new_author[1] # anonymize person in event info 1/2 if (issue_event[12], issue_event[13][1:-1]) in author_to_anonymized_author: new_person = author_to_anonymized_author[( issue_event[12], issue_event[13][1:-1])] issue_event[12] = new_person[0] issue_event[13] = quot_m + new_person[1] + quot_m # anonymize issue title if issue_event[1] in title_to_anonymized_title: issue_event[1] = title_to_anonymized_title[issue_event[1]] else: new_title = ("issue-title-" + str(k)) title_to_anonymized_title[issue_event[1]] = new_title issue_event[1] = new_title k += 1 output_path = f.replace(data_path, anonymize_path) if not path.exists(path.dirname(output_path)): makedirs(path.dirname(output_path)) log.info("Write anonymized data to %s ...", output_path) csv_writer.write_to_csv(output_path, issue_data) # (5) Anonymize issues lists (jira) if issues_jira_list in filenames: f = path.join(filepath, issues_jira_list) log.info("Anonymize %s ...", f) issue_data = csv_writer.read_from_csv(f) for issue_event in issue_data: # anonymize author new_author = author_to_anonymized_author[(issue_event[9], issue_event[10])] issue_event[9] = new_author[0] issue_event[10] = new_author[1] # anonymize person in event info 1/2 if (issue_event[12], issue_event[13][1:-1]) in author_to_anonymized_author: new_person = author_to_anonymized_author[( issue_event[12], issue_event[13][1:-1])] issue_event[12] = new_person[0] issue_event[13] = quot_m + new_person[1] + quot_m # anonymize issue title if issue_event[1] in title_to_anonymized_title: issue_event[1] = title_to_anonymized_title[issue_event[1]] else: new_title = ("issue-title-" + str(k)) title_to_anonymized_title[issue_event[1]] = new_title issue_event[1] = new_title k += 1 output_path = f.replace(data_path, anonymize_path) if not path.exists(path.dirname(output_path)): makedirs(path.dirname(output_path)) log.info("Write anonymized data to %s ...", output_path) csv_writer.write_to_csv(output_path, issue_data) # (6) Anonymize bugs lists (jira) if bugs_jira_list in filenames: f = path.join(filepath, bugs_jira_list) log.info("Anonymize %s ...", f) bug_data = csv_writer.read_from_csv(f) for bug_event in bug_data: # anonymize author new_author = author_to_anonymized_author[(bug_event[9], bug_event[10])] bug_event[9] = new_author[0] bug_event[10] = new_author[1] # anonymize person in event info 1/2 if (issue_event[12], issue_event[13][1:-1]) in author_to_anonymized_author: new_person = author_to_anonymized_author[( bug_event[12], bug_event[13][1:-1])] bug_event[12] = new_person[0] bug_event[13] = quot_m + new_person[1] + quot_m # anonymize bug title if bug_event[1] in title_to_anonymized_title: bug_event[1] = title_to_anonymized_title[bug_event[1]] else: new_title = ("issue-title-" + str(k)) title_to_anonymized_title[bug_event[1]] = new_title bug_event[1] = new_title k += 1 output_path = f.replace(data_path, anonymize_path) if not path.exists(path.dirname(output_path)): makedirs(path.dirname(output_path)) log.info("Write anonymized data to %s ...", output_path) csv_writer.write_to_csv(output_path, bug_data) # (7) Anonymize bots list if bots_list in filenames: f = path.join(filepath, bots_list) log.info("Anonymize %s ...", f) bot_data = csv_writer.read_from_csv(f) for bot in bot_data: new_person = author_to_anonymized_author[(bot[0], bot[1])] bot[0] = new_person[0] bot[1] = new_person[1] output_path = f.replace(data_path, anonymize_path) if not path.exists(path.dirname(output_path)): makedirs(path.dirname(output_path)) log.info("Write anonymized data to %s ...", output_path) csv_writer.write_to_csv(output_path, bot_data) # (8) Anonymize gender list if gender_list in filenames: f = path.join(filepath, gender_list) log.info("Anonymize %s ...", f) gender_data = csv_writer.read_from_csv(f) gender_data_new = [] for author in gender_data: if author[0] in author_to_anonymized_author_gender.keys(): new_person = author_to_anonymized_author_gender[author[0]] author[0] = new_person[0] gender_data_new.append(author) output_path = f.replace(data_path, anonymize_path) if not path.exists(path.dirname(output_path)): makedirs(path.dirname(output_path)) log.info("Write anonymized data to %s ...", output_path) csv_writer.write_to_csv(output_path, gender_data_new) # (9) Copy revisions list if revisions_list in filenames: f = path.join(filepath, revisions_list) log.info("Copy %s ...", f) revision_data = csv_writer.read_from_csv(f) output_path = f.replace(data_path, anonymize_path) if not path.exists(path.dirname(output_path)): makedirs(path.dirname(output_path)) log.info("Copy revision data to %s ...", output_path) csv_writer.write_to_csv(output_path, revision_data) log.info("Anonymization complete!")
def merge_issue_events(issue_data): """ All issue events are merged together in the eventsList. This simplifies processing in later steps. :param issue_data: the issue data from which the events shall be merged :return: the issue data with merged eventsList """ log.info("Merge issue events ...") for issue in issue_data: # temporary container for references comments = dict() # adds creation event to eventsList created_event = dict() created_event["user"] = issue["user"] created_event["created_at"] = issue["created_at"] created_event["event"] = "created" created_event["event_info_1"] = "open" created_event["event_info_2"] = [] issue["eventsList"].append(created_event) issue["state_new"] = "open" # the format of every related issue is adjusted to the event format for rel_issue in issue["relatedIssues"]: rel_issue["created_at"] = format_time(rel_issue["referenced_at"]) rel_issue["event"] = "add_link" rel_issue["event_info_1"] = rel_issue["number"] rel_issue["event_info_2"] = "issue" rel_issue["ref_target"] = "" # the format of every related commit is adjusted to the event format for rel_commit in issue["relatedCommits"]: # if the related commit has no time, it is a commit in the pull-request if rel_commit["referenced_at"] is None: rel_commit["user"] = create_user("", "", "") rel_commit["created_at"] = "" rel_commit["event"] = "has_commit" rel_commit["event_info_1"] = rel_commit["commit_id"] rel_commit["event_info_2"] = "" rel_commit["ref_target"] = "" # else it is a commit the issue/ pull-request refers to else: rel_commit["created_at"] = format_time(rel_commit["referenced_at"]) rel_commit["event"] = "add_link" rel_commit["event_info_1"] = rel_commit["commit_id"] rel_commit["event_info_2"] = "commit" rel_commit["ref_target"] = "" # the format of every comment is adjusted to the event format for comment in issue["commentsList"]: comment["event"] = "commented" comment["ref_target"] = "" comment["created_at"] = format_time(comment["referenced_at"]) if "event_info_1" not in comment: comment["event_info_1"] = "" if "event_info_2" not in comment: comment["event_info_2"] = "" # cache comment by date to resolve/re-arrange references later comments[comment["created_at"]] = comment # the format of every event is adjusted for event in issue["eventsList"]: event["ref_target"] = "" event["created_at"] = format_time(event["created_at"]) if "event_info_1" not in event: event["event_info_1"] = "" if "event_info_2" not in event: event["event_info_2"] = "" # if event collides with a comment if event["created_at"] in comments: comment = comments[event["created_at"]] # if someone gets mentioned or subscribed by someone else in a comment, # re-write the reference if (event["event"] == "mentioned" or event["event"] == "subscribed") and \ comment["event"] == "commented": event["ref_target"] = event["user"] event["user"] = comment["user"] # merge events, relatedCommits, relatedIssues and comment lists issue["eventsList"] = issue["commentsList"] + issue["eventsList"] + issue["relatedIssues"] + issue[ "relatedCommits"] # remove events without user issue["eventsList"] = [event for event in issue["eventsList"] if not (event["user"] is None or event["ref_target"] is None)] # sorts eventsList by time issue["eventsList"] = sorted(issue["eventsList"], key=lambda k: k["created_at"]) return issue_data
def reformat_events(issue_data): """ Re-format event information dependent on the event type. :param issue_data: the data of all issues that shall be re-formatted :return: the issue data with updated event information """ log.info("Update event information ...") for issue in issue_data: # re-format information of every event in the eventsList of an issue for event in issue["eventsList"]: if event["event"] == "closed": event["event"] = "state_updated" event["event_info_1"] = "closed" # new state event["event_info_2"] = "open" # old state issue["state_new"] = "closed" elif event["event"] == "reopened": event["event"] = "state_updated" event["event_info_1"] = "open" # new state event["event_info_2"] = "closed" # old state issue["state_new"] = "reopened" elif event["event"] == "labeled": label = event["label"]["name"].lower() event["event_info_1"] = label # if the label is in this list, it also is a type of the issue if label in known_types: issue["type"].append(str(label)) # creates an event for type updates and adds it to the eventsList type_event = dict() type_event["user"] = event["user"] type_event["created_at"] = event["created_at"] type_event["event"] = "type_updated" type_event["event_info_1"] = label type_event["event_info_2"] = "" type_event["ref_target"] = "" issue["eventsList"].append(type_event) # if the label is in this list, it also is a resolution of the issue elif label in known_resolutions: issue["resolution"].append(str(label)) # creates an event for resolution updates and adds it to the eventsList resolution_event = dict() resolution_event["user"] = event["user"] resolution_event["created_at"] = event["created_at"] resolution_event["event"] = "resolution_updated" resolution_event["event_info_1"] = label resolution_event["event_info_2"] = "" resolution_event["ref_target"] = "" issue["eventsList"].append(resolution_event) elif event["event"] == "unlabeled": label = event["label"]["name"].lower() event["event_info_1"] = label # if the label is in this list, it also is a type of the issue if label in known_types: issue["type"].remove(str(label)) # creates an event for type updates and adds it to the eventsList type_event = dict() type_event["user"] = event["user"] type_event["created_at"] = event["created_at"] type_event["event"] = "type_updated" type_event["event_info_1"] = "" type_event["event_info_2"] = label type_event["ref_target"] = "" issue["eventsList"].append(type_event) # if the label is in this list, it also is a resolution of the issue elif label in known_resolutions: issue["resolution"].remove(str(label)) # creates an event for resolution updates and adds it to the eventsList resolution_event = dict() resolution_event["user"] = event["user"] resolution_event["created_at"] = event["created_at"] resolution_event["event"] = "resolution_updated" resolution_event["event_info_1"] = "" resolution_event["event_info_2"] = label resolution_event["ref_target"] = "" issue["eventsList"].append(resolution_event) elif event["event"] == "commented": # "state_new" and "resolution" of the issue give the information about the state and the resolution of # the issue when the comment was written, because the eventsList is sorted by time event["event_info_1"] = issue["state_new"] event["event_info_2"] = str(issue["resolution"]) return issue_data
def load_issue_via_api(issues, persons, url): """ For each issue in the list the history is added via the api :param issues: list of issues :param persons: list of persons from JIRA (incl. e-mail addresses) :param url: the project url """ log.info("Load issue information via api...") jira_project = JIRA(url) for issue in issues: api_issue = jira_project.issue(issue["externalId"], expand="changelog") changelog = api_issue.changelog histories = list() # adds the issue creation time with the default state to an list # list is needed to find out the state the issue had when a comment was written state_changes = [[issue["creationDate"], "open"]] # adds the issue creation time with the default resolution to an list # list is needed to find out the resolution the issue had when a comment was written resolution_changes = [[issue["creationDate"], "unresolved"]] # history changes get visited in time order from oldest to newest for change in changelog.histories: # default values for state and resolution old_state, new_state, old_resolution, new_resolution = "open", "open", "unresolved", "unresolved" # all changes in the issue changelog are checked if they contain an useful information for item in change.items: # state_updated event gets created and added to the issue history if item.field == "status": if item.fromString is not None: old_state = item.fromString.lower() if item.toString is not None: new_state = item.toString.lower() history = dict() history["event"] = "state_updated" history["event_info_1"] = new_state history["event_info_2"] = old_state user = create_user(change.author.name, change.author.name, "") history["author"] = merge_user_with_user_from_csv(user, persons) history["date"] = format_time(change.created) histories.append(history) state_changes.append([history["date"], new_state]) # resolution_updated event gets created and added to the issue history elif item.field == "resolution": if item.fromString is not None: old_resolution = item.fromString.lower() if item.toString is not None: new_resolution = item.toString.lower() history = dict() history["event"] = "resolution_updated" history["event_info_1"] = new_resolution history["event_info_2"] = old_resolution user = create_user(change.author.name, change.author.name, "") history["author"] = merge_user_with_user_from_csv(user, persons) history["date"] = format_time(change.created) histories.append(history) resolution_changes.append([history["date"], new_resolution]) # assigned event gets created and added to the issue history elif item.field == "assignee": history = dict() history["event"] = "assigned" user = create_user(change.author.name, change.author.name, "") history["author"] = merge_user_with_user_from_csv(user, persons) assignee = create_user(item.toString, item.toString, "") assigned_user = merge_user_with_user_from_csv(assignee, persons) history["event_info_1"] = assigned_user["name"] history["event_info_2"] = assigned_user["email"] history["date"] = format_time(change.created) histories.append(history) elif item.field == "Link": # add_link event gets created and added to the issue history if item.toString is not None: history = dict() history["event"] = "add_link" user = create_user(change.author.name, change.author.name, "") history["author"] = merge_user_with_user_from_csv(user, persons) # api returns a text. The issueId is at the end of the text and gets extracted history["event_info_1"] = item.toString.split()[-1] history["event_info_2"] = "issue" history["date"] = format_time(change.created) histories.append(history) # remove_link event gets created and added to the issue history if item.fromString is not None: history = dict() history["event"] = "remove_link" user = create_user(change.author.name, change.author.name, "") history["author"] = merge_user_with_user_from_csv(user, persons) # api returns a text. Th issue id is at the end of the text and gets extracted history["event_info_1"] = item.fromString.split()[-1] history["event_info_2"] = "issue" history["date"] = format_time(change.created) histories.append(history) # state and resolution change lists get sorted by time state_changes.sort(key=lambda x: x[0]) resolution_changes.sort(key=lambda x: x[0]) for comment in issue["comments"]: # the state the issue had when the comment was written is searched out for state in state_changes: if comment["changeDate"] > state[0]: comment["state_on_creation"] = state[1] # the resolution the issue had when the comment was written is searched out for resolution in resolution_changes: if comment["changeDate"] > resolution[0]: comment["resolution_on_creation"] = [str(resolution[1])] issue["history"] = histories