def annotate(self, result): features = {} git_comment_conventions.find_special(features, result.get("title", "")) git_comment_conventions.find_special(features, result.get("text", "")) if "issues" in features: for i in features["issues"]: i["parts"] = list(i["parts"]) i["parts"].append("rev" if result["provenance"] == "issue_crossref" else "") if i["parts"][0] == "%OWNER%": i["parts"][0] = self.owner if i["parts"][1] == "%PROJECT%": i["parts"][1] = self.project result.update(features)
def query_all(owner, project,issue): print "----------Extracting discussion on ", pp_dot(owner,project,issue) csvw = csv.writer(open("samples/repo_" + owner + "_" + project + "_issue" + issue + ".csv", "wb")); fields = ["rectype", "issueid", "project_owner", "project_name", "actor", "time", "text", "action", "title", "provenance"] feature_tags = ["plus_1", "urls", "issues", "userref", "code"] csvw.writerow(tuple(fields+feature_tags)) results = []; for iid in get_pr_ids(owner, project, issue): print "PR id ", iid results.extend(get_pull_request_comments(owner, project, issue, iid)); results.extend(get_pull_request_history(owner, project, issue, iid)); results.extend(get_pull_request_commit_comments(owner, project, issue, iid)); ##results.extend(get_pr_related_hour_ga(owner, project, issue, iid)); for iid in get_issue_ids(owner, project, issue): print "Issue id ", iid results.extend(get_issue_title(owner, project, issue, iid)); results.extend(get_issue_events(owner, project, issue, iid)); results.extend(get_issue_comments(owner, project, issue, iid)); results.extend(get_inbound_issue_comment_references(owner, project, issue, iid)) results.extend(get_inbound_issue_title_references(owner, project, issue, iid)) results.extend(get_inbound_pull_request_references(owner, project, issue, iid)) ##results.extend(get_issue_comments_ga(owner, project, issue, iid)); ##results.extend(get_issue_title_ga(owner, project, issue, iid)); ##results.extend(get_issue_related_hour_ga(owner, project, issue, iid)); epoch = datetime.datetime.fromtimestamp(0).replace(tzinfo=pytz.utc) results.sort(key=lambda r: r["time"].replace(tzinfo=pytz.utc) if r["time"] is not None else epoch ) otherIssues = set() for result in results: result["time"] = result["time"].replace(tzinfo=pytz.utc) if result['time'] is not None else None features = {} git_comment_conventions.find_special(features, result["title"]) git_comment_conventions.find_special(features, result["text"]) if "issues" in features: for i in features["issues"]: i["parts"] = list(i["parts"]) i["parts"].append("rev" if result["provenance"] == "issue_crossref" else "") if i["parts"][0] == "%OWNER%": i["parts"][0] = owner if i["parts"][1] == "%PROJECT%": i["parts"][1] = project print "MKLINK: in", owner, project,issue,"with provenance",result["provenance"],"adding", i["parts"] otherIssues.add(tuple(i["parts"])) csvw.writerow( tuple([unicode(result[f]).encode("utf-8") for f in fields] + [json.dumps(features.get(k,"")) for k in feature_tags])) print "--->Issues referenced: ", otherIssues return otherIssues
def make_pr_comment_references_table(csvfile, limit=None): """ Read through all pull request comments, extract issue reference, and write enough data to a csv to eventually merge into a database table.""" cur = dbutil.execute_at_once("""select body, full_name, pull_requests.base_repo_id repo_id, pull_requests.id prDbId, pullreq_id issueHumanId, pull_request_comments.comment_id pr_comment_id from pull_request_comments left join pull_requests on pull_request_comments.pull_request_id=pull_requests.id join project_stats on pull_requests.base_repo_id=project_stats.project_id""", limit=limit) rowcount = 0 for row in cur: rowcount += 1 if rowcount % 1000 == 0: print rowcount try: match = PROJ_NAME_PAT.match(row["full_name"]) if match is not None: owner = match.group(1) projectname = match.group(2) else: raise Exception("Cannot identify owner and project name of " + str(row)) features = {} issuerefs = set() #git_comment_conventions.find_special(features, row["title"]) git_comment_conventions.find_special(features, row["body"]) if "issues" in features: for i in features["issues"]: refown = i["parts"][0] refprj = i["parts"][1] refnum = i["parts"][2] refstyle = i["refstyle"] if refown == "%OWNER%": refown = owner if refprj == "%PROJECT%": refprj = projectname refrepoid = row["repo_id"] if refown==owner and refprj == projectname else "NULL" csvfile.writerow([owner + "/" + projectname, row["repo_id"], row["issueHumanId"], "NULL", "NULL", row["prDbId"], row["pr_comment_id"], refstyle, refown + "/" + refprj, refrepoid, refnum, "NULL"]) except Exception, e: print e traceback.print_exc(file=sys.stderr) raise e