예제 #1
0
 def annotate(self, result):
     features = {}
     git_comment_conventions.find_special(features, result.get("title", ""))
     git_comment_conventions.find_special(features, result.get("text", ""))
     if "issues" in features:
         for i in features["issues"]:
             i["parts"] = list(i["parts"])
             i["parts"].append("rev" if result["provenance"] ==
                               "issue_crossref" else "")
             if i["parts"][0] == "%OWNER%": i["parts"][0] = self.owner
             if i["parts"][1] == "%PROJECT%": i["parts"][1] = self.project
     result.update(features)
예제 #2
0
def query_all(owner, project,issue):
    print "----------Extracting discussion on ", pp_dot(owner,project,issue)
    csvw = csv.writer(open("samples/repo_" + owner + "_" + project + "_issue" + issue + ".csv", "wb"));
    fields = ["rectype", "issueid", "project_owner",
                     "project_name", "actor",
                     "time", "text", "action", "title", "provenance"]
    feature_tags = ["plus_1", "urls", "issues", "userref", "code"]
    
    csvw.writerow(tuple(fields+feature_tags))
    
    results = [];

    for iid in get_pr_ids(owner, project, issue):
        print "PR id ", iid
        results.extend(get_pull_request_comments(owner, project, issue, iid));
        results.extend(get_pull_request_history(owner, project, issue, iid));
        results.extend(get_pull_request_commit_comments(owner, project, issue, iid));
        ##results.extend(get_pr_related_hour_ga(owner, project, issue, iid));

    for iid in get_issue_ids(owner, project, issue):
        print "Issue id ", iid
        results.extend(get_issue_title(owner, project, issue, iid));
        results.extend(get_issue_events(owner, project, issue, iid));
        results.extend(get_issue_comments(owner, project, issue, iid));
        results.extend(get_inbound_issue_comment_references(owner, project, issue, iid))
        results.extend(get_inbound_issue_title_references(owner, project, issue, iid))
        results.extend(get_inbound_pull_request_references(owner, project, issue, iid))
        ##results.extend(get_issue_comments_ga(owner, project, issue, iid));
        ##results.extend(get_issue_title_ga(owner, project, issue, iid));
        ##results.extend(get_issue_related_hour_ga(owner, project, issue, iid));


    epoch = datetime.datetime.fromtimestamp(0).replace(tzinfo=pytz.utc)
    results.sort(key=lambda r: r["time"].replace(tzinfo=pytz.utc) if r["time"] is not None else epoch )
    otherIssues = set()
    for result in results:
        result["time"] = result["time"].replace(tzinfo=pytz.utc) if result['time'] is not None else None
        features = {}
        git_comment_conventions.find_special(features, result["title"])    
        git_comment_conventions.find_special(features, result["text"])    
        if "issues" in features:
            for i in features["issues"]:
                i["parts"] = list(i["parts"])
                i["parts"].append("rev" if result["provenance"] == "issue_crossref" else "")
                if i["parts"][0] == "%OWNER%": i["parts"][0] = owner
                if i["parts"][1] == "%PROJECT%": i["parts"][1] = project
                print "MKLINK: in", owner, project,issue,"with provenance",result["provenance"],"adding", i["parts"]
                otherIssues.add(tuple(i["parts"]))
        csvw.writerow( tuple([unicode(result[f]).encode("utf-8") for f in fields] + [json.dumps(features.get(k,"")) for k in feature_tags]))
    print "--->Issues referenced: ", otherIssues
    return otherIssues
예제 #3
0
def make_pr_comment_references_table(csvfile, limit=None):
    """ Read through all pull request comments, extract issue reference, and write enough data to a csv
        to eventually merge into a database table."""
    cur = dbutil.execute_at_once("""select body, full_name, pull_requests.base_repo_id repo_id, pull_requests.id prDbId, pullreq_id issueHumanId,
                   pull_request_comments.comment_id pr_comment_id  
                   from pull_request_comments 
                   left join pull_requests on pull_request_comments.pull_request_id=pull_requests.id
                   join project_stats on pull_requests.base_repo_id=project_stats.project_id""", limit=limit)
    rowcount = 0
    for row in cur:
        rowcount += 1
        if rowcount % 1000 == 0: print rowcount
        try:
            match = PROJ_NAME_PAT.match(row["full_name"])
            if match is not None:
                owner = match.group(1)
                projectname = match.group(2)
            else:
                raise Exception("Cannot identify owner and project name of " + str(row))
            features = {}
            issuerefs = set()
            #git_comment_conventions.find_special(features, row["title"])    
            git_comment_conventions.find_special(features, row["body"])    
            if "issues" in features:
                for i in features["issues"]:
                    refown = i["parts"][0]
                    refprj = i["parts"][1]
                    refnum = i["parts"][2]
                    refstyle = i["refstyle"]
                    if refown == "%OWNER%": refown = owner
                    if refprj == "%PROJECT%": refprj = projectname
                    refrepoid = row["repo_id"] if refown==owner and refprj == projectname else "NULL"
                    csvfile.writerow([owner + "/" + projectname, row["repo_id"], row["issueHumanId"], 
                             "NULL", "NULL", row["prDbId"],  row["pr_comment_id"], refstyle, refown + "/" + refprj, refrepoid, refnum, "NULL"])
        except Exception, e:
            print e
            traceback.print_exc(file=sys.stderr)
            raise e