def process_commit(commit_id, resDirName, out_f, out_src_dir, f): print("Processing commit: " + commit_id) # Check for the right files p = subprocess.Popen( ["git", "diff", "--name-only", commit_id, commit_id + "^"], stdout=subprocess.PIPE, cwd=orig_src_dir) (out, err) = p.communicate() files = out.split("\n") def isJava(x): return x.endswith(".java") or x.endswith(".Java") def isTest(x): return x.find("test") != -1 or x.find("Test") != -1 src_files = filter(lambda x: isJava(x) and not isTest(x), files) if len(src_files) == 0: print("No source files changed!") commit_failure(commit_id) return if len(src_files) > 1: print("Too many files changed!") commit_failure(commit_id) return src_file = src_files[0] print("src file: " + src_file) # Check that it's buildable in both configurations system("rm -rf " + src1_dir) system("cp -rf " + orig_src_dir + " " + src1_dir) system("rm -rf " + src2_dir) system("cp -rf " + orig_src_dir + " " + src2_dir) repo1 = create_repo_handler(src1_dir, "git") repo2 = create_repo_handler(src2_dir, "git") repo1.switch_to_rev(commit_id + "^") repo2.switch_to_rev(commit_id) #if not check_buildable(src1_dir) or not check_buildable(src2_dir): # print("Building revision failed.") # return # This all came from crawler.py, and is slightly mysterious tmp1f = "/tmp/__rewritebefore.java" tmp2f = "/tmp/__rewriteafter.java" tmp1f_backup = "/tmp/__backupbefore.java" tmp2f_backup = "/tmp/__backupafter.java" if (rewrite): system("rm -rf " + tmp1f + " " + tmp2f + " " + tmp1f_backup + " " + tmp2f_backup) rewrite_ret = rewrite_pair(src1_dir, src2_dir, src_file, tmp1f, tmp2f) print("Rewrite RET: " + str(rewrite_ret)) if rewrite_ret == 1: print("Rewrite and store backup!") system("cp " + src1_dir + "/" + src_file + " " + tmp1f_backup) system("cp " + src2_dir + "/" + src_file + " " + tmp2f_backup) system("cp " + tmp1f + " " + src1_dir + "/" + src_file) system("cp " + tmp2f + " " + src2_dir + "/" + src_file) else: rewrite_ret = 0 if (rewrite_ret == 2 or not build_pair(src1_dir, src2_dir, src_file, out_src_dir + "/b_" + commit_id + ".po", out_src_dir + "/a_" + commit_id + ".po")): print("Cannot extract pair " + commit_id + "^") if (rewrite_ret == 1): print("Restore back!") system("cp " + tmp1f_backup + " " + src1_dir + "/" + src_file) system("cp " + tmp2f_backup + " " + src2_dir + "/" + src_file) #cnt += 1; #if cnt > 50: # print("Not being able to extract for more than 50 revs in a row, ABORT this project!"); # break commit_failure(commit_id) return if rewrite_ret == 1: print("Restore back!") system("cp " + tmp1f_backup + " " + src1_dir + "/" + src_file) system("cp " + tmp2f_backup + " " + src2_dir + "/" + src_file) commit_success(commit_id) #cnt = 0 f.write(commit_id + '\n') f.flush() global collect_cnt collect_cnt += 1 if collect_limit != 0: if collect_cnt >= collect_limit: print("Already collected enough revisions, going to terminate!") f.close() exit(0)
def analyze_repo(repo_dir, outlogf, outdir, repo_type, fix_only, year_limit, revrec = None): global collect_cnt; repo = create_repo_handler(repo_dir, repo_type); if (revrec == None): revs = repo.get_revs(fix_only, year_limit); else: print "Going for existing revisions:"; print revrec; revs = revrec; tmp_dir = "__tmp"; system("rm -rf " + tmp_dir); system("mkdir " + tmp_dir); tmp_repo1 = tmp_dir + "/src1"; tmp_repo2 = tmp_dir + "/src2"; system("cp -rf " + repo_dir + " " + tmp_repo1); system("cp -rf " + repo_dir + " " + tmp_repo2); system("rm -rf " + outdir); system("mkdir " + outdir); repo1 = create_repo_handler(tmp_repo1, repo_type); repo2 = create_repo_handler(tmp_repo2, repo_type); f = open(outlogf, "w"); cnt = 0; for rev, parent_rev, _ in revs: print "Processing rev: ", rev; # FIXME: This only works for git if (parent_rev == ""): parent_rev = rev + "^1"; diff_res = repo.get_diff_for_java(parent_rev, rev); realsrcf = []; for srcf in diff_res.keys(): if (srcf.find("test") != -1) or (srcf.find("Test") != -1): continue; realsrcf.append(srcf); if (len(realsrcf) == 0): print "No source file changed!"; continue; if (len(realsrcf) > 1): print "Too many file modified!"; continue; src_file = realsrcf[0]; print "src file: ", src_file; print "diff size: ", diff_res[src_file][0]; repo1.switch_to_rev(parent_rev); repo2.switch_to_rev(rev); tmp1f = "/tmp/__rewritebefore.java"; tmp2f = "/tmp/__rewriteafter.java"; tmp1f_backup = "/tmp/__backupbefore.java"; tmp2f_backup = "/tmp/__backupafter.java" system("rm -rf " + tmp1f + " " + tmp2f + " " + tmp1f_backup + " " + tmp2f_backup); rewrite_ret = rewrite_pair(tmp_repo1, tmp_repo2, src_file, tmp1f, tmp2f); print "Rewrite RET: " + str(rewrite_ret); if rewrite_ret == 1: print "Rewrite and store backup!"; system("cp " + tmp_repo1 + "/" + src_file + " " + tmp1f_backup); system("cp " + tmp_repo2 + "/" + src_file + " " + tmp2f_backup); system("cp " + tmp1f + " " + tmp_repo1 + "/" + src_file); system("cp " + tmp2f + " " + tmp_repo2 + "/" + src_file); if (rewrite_ret == 2 or not build_pair(tmp_repo1, tmp_repo2, src_file, outdir + "/b_" + rev + ".po", outdir + "/a_" + rev + ".po")): print "Cannot extract pair " + parent_rev; if (rewrite_ret == 1): print "Restore back!"; system("cp " + tmp1f_backup + " " + tmp_repo1 + "/" + src_file); system("cp " + tmp2f_backup + " " + tmp_repo2 + "/" + src_file); cnt += 1; if cnt > 50: print "Not being able to extract for more than 50 revs in a row, ABORT this project!"; break; continue; if rewrite_ret == 1: print "Restore back!"; system("cp " + tmp1f_backup + " " + tmp_repo1 + "/" + src_file); system("cp " + tmp2f_backup + " " + tmp_repo2 + "/" + src_file); cnt = 0; print >> f, rev; f.flush(); collect_cnt += 1; if (collect_limit != 0): if (collect_cnt >= collect_limit): print "Already collected enough revisions, going to terminate!"; f.close(); system("rm -rf " + tmp_dir); exit(0); f.close(); system("rm -rf " + tmp_dir);
repo = None; revs = res[rurl]; for rev in revs: if rev in diable: continue; if not created and not nomsg: created = True; system("rm -rf __tmp"); if (github_user != ""): ret = system("git clone https://" + github_user + ":" + github_passwd + "@github.com/" + unmarshal_reponame(rurl) + " __tmp"); else: ret = system("git clone https://github.com/" + unmarshal_reponame(rurl) + " __tmp"); if (ret != 0): print "Cannot fetch the repo, give up the repo!"; break; repo = create_repo_handler("__tmp", "git"); if nomsg: commit_msg = ""; else: commit_msg = "\n".join(repo.get_commit_log(rev)) res2.append((rurl, rev, commit_msg)); print res2; db = MySQLdb.connect(user = user, host = dbhost, passwd = passwd, db = "genesis"); for (rurl, rev, commit_msg) in res2: c = db.cursor(); s1 = rurl[1:]; idx = s1.find("_"); accname = s1[0:idx]; reponame = s1[idx+1:];
elif o == "--depdir": deps_dir = a elif o == "--o-revs": out_rev_file = a elif o == "--i-revs": in_rev_file = a elif o == "--sid": sid = int(a) elif o == "--eid": eid = int(a) elif o == "--dump-source": dump_dir = a repo_dir = args[0] repo_type = args[1] repo = create_repo_handler(repo_dir, repo_type) build_cmd = args[2] rev_result_file = args[3] if in_rev_file == "": revs = repo.get_revs(fix_only, year_limit) else: fin = open(in_rev_file, "r") lines = fin.readlines() revs = [] for line in lines: tokens = line.strip().split() if (len(tokens) == 1): revs.append((tokens[0], repo.get_parent_rev(tokens[0]), "")) else: revs.append((tokens[0], tokens[1], ""))
accname = row[1] reponame = row[2] repourl = row[3] c2 = db.cursor() c2.execute( """SELECT id, github_prepatch_rev, github_postpatch_rev FROM patch WHERE app_id = %s""", (idx, )) if (c2.rowcount > 0): try: system("rm -rf __tmp") tmpdir = "__tmp" if (github_user != ""): tmprepourl = repourl[ 0:8] + github_user + ":" + github_passwd + "@" + repourl[8:] system("git clone " + tmprepourl + " " + tmpdir) repo = create_repo_handler(tmpdir, "git") except: print "give up on this repo, try next" continue for row2 in c2: revid = row2[2] try: lines = repo.get_commit_log(revid) except: print "give up this repo, break out" break nullFix = False for line in lines: if (line.find("null") != -1 or line.find("Null") != -1) and \ (line.find("deref") != -1 or line.find("Deref") != -1 or \ line.find("pointer") != -1 or line.find("Pointer") != -1 or \