def process_commit(commit_id, resDirName, out_f, out_src_dir, f):
    print("Processing commit: " + commit_id)
    # Check for the right files
    p = subprocess.Popen(
        ["git", "diff", "--name-only", commit_id, commit_id + "^"],
        stdout=subprocess.PIPE,
        cwd=orig_src_dir)
    (out, err) = p.communicate()
    files = out.split("\n")

    def isJava(x):
        return x.endswith(".java") or x.endswith(".Java")

    def isTest(x):
        return x.find("test") != -1 or x.find("Test") != -1

    src_files = filter(lambda x: isJava(x) and not isTest(x), files)
    if len(src_files) == 0:
        print("No source files changed!")
        commit_failure(commit_id)
        return
    if len(src_files) > 1:
        print("Too many files changed!")
        commit_failure(commit_id)
        return
    src_file = src_files[0]
    print("src file: " + src_file)
    # Check that it's buildable in both configurations
    system("rm -rf " + src1_dir)
    system("cp -rf " + orig_src_dir + " " + src1_dir)
    system("rm -rf " + src2_dir)
    system("cp -rf " + orig_src_dir + " " + src2_dir)
    repo1 = create_repo_handler(src1_dir, "git")
    repo2 = create_repo_handler(src2_dir, "git")
    repo1.switch_to_rev(commit_id + "^")
    repo2.switch_to_rev(commit_id)
    #if not check_buildable(src1_dir) or not check_buildable(src2_dir):
    #    print("Building revision failed.")
    #    return
    # This all came from crawler.py, and is slightly mysterious
    tmp1f = "/tmp/__rewritebefore.java"
    tmp2f = "/tmp/__rewriteafter.java"
    tmp1f_backup = "/tmp/__backupbefore.java"
    tmp2f_backup = "/tmp/__backupafter.java"
    if (rewrite):
        system("rm -rf " + tmp1f + " " + tmp2f + " " + tmp1f_backup + " " +
               tmp2f_backup)
        rewrite_ret = rewrite_pair(src1_dir, src2_dir, src_file, tmp1f, tmp2f)
        print("Rewrite RET: " + str(rewrite_ret))
        if rewrite_ret == 1:
            print("Rewrite and store backup!")
            system("cp " + src1_dir + "/" + src_file + " " + tmp1f_backup)
            system("cp " + src2_dir + "/" + src_file + " " + tmp2f_backup)
            system("cp " + tmp1f + " " + src1_dir + "/" + src_file)
            system("cp " + tmp2f + " " + src2_dir + "/" + src_file)
    else:
        rewrite_ret = 0

    if (rewrite_ret == 2
            or not build_pair(src1_dir, src2_dir, src_file,
                              out_src_dir + "/b_" + commit_id + ".po",
                              out_src_dir + "/a_" + commit_id + ".po")):
        print("Cannot extract pair " + commit_id + "^")
        if (rewrite_ret == 1):
            print("Restore back!")
            system("cp " + tmp1f_backup + " " + src1_dir + "/" + src_file)
            system("cp " + tmp2f_backup + " " + src2_dir + "/" + src_file)
        #cnt += 1;
        #if cnt > 50:
        #    print("Not being able to extract for more than 50 revs in a row, ABORT this project!");
        #    break
        commit_failure(commit_id)
        return

    if rewrite_ret == 1:
        print("Restore back!")
        system("cp " + tmp1f_backup + " " + src1_dir + "/" + src_file)
        system("cp " + tmp2f_backup + " " + src2_dir + "/" + src_file)

    commit_success(commit_id)

    #cnt = 0
    f.write(commit_id + '\n')
    f.flush()
    global collect_cnt
    collect_cnt += 1
    if collect_limit != 0:
        if collect_cnt >= collect_limit:
            print("Already collected enough revisions, going to terminate!")
            f.close()
            exit(0)
Beispiel #2
0
def analyze_repo(repo_dir, outlogf, outdir, repo_type, fix_only, year_limit, revrec = None):
    global collect_cnt;
    repo = create_repo_handler(repo_dir, repo_type);
    if (revrec == None):
        revs = repo.get_revs(fix_only, year_limit);
    else:
        print "Going for existing revisions:";
        print revrec;
        revs = revrec;
    tmp_dir = "__tmp";
    system("rm -rf " + tmp_dir);
    system("mkdir " + tmp_dir);
    tmp_repo1 = tmp_dir + "/src1";
    tmp_repo2 = tmp_dir + "/src2";
    system("cp -rf " + repo_dir + " " + tmp_repo1);
    system("cp -rf " + repo_dir + " " + tmp_repo2);
    system("rm -rf " + outdir);
    system("mkdir " + outdir);

    repo1 = create_repo_handler(tmp_repo1, repo_type);
    repo2 = create_repo_handler(tmp_repo2, repo_type);
    f = open(outlogf, "w");
    cnt = 0;
    for rev, parent_rev, _ in revs:
        print "Processing rev: ", rev;
        # FIXME: This only works for git
        if (parent_rev == ""):
            parent_rev = rev + "^1";
        diff_res = repo.get_diff_for_java(parent_rev, rev);
        realsrcf = [];
        for srcf in diff_res.keys():
            if (srcf.find("test") != -1) or (srcf.find("Test") != -1):
                continue;
            realsrcf.append(srcf);
        if (len(realsrcf) == 0):
            print "No source file changed!";
            continue;
        if (len(realsrcf) > 1):
            print "Too many file modified!";
            continue;
        src_file = realsrcf[0];
        print "src file: ", src_file;
        print "diff size: ", diff_res[src_file][0];
        repo1.switch_to_rev(parent_rev);
        repo2.switch_to_rev(rev);
        tmp1f = "/tmp/__rewritebefore.java";
        tmp2f = "/tmp/__rewriteafter.java";
        tmp1f_backup = "/tmp/__backupbefore.java";
        tmp2f_backup = "/tmp/__backupafter.java"
        system("rm -rf " + tmp1f + " " + tmp2f + " " + tmp1f_backup + " " + tmp2f_backup);
        rewrite_ret = rewrite_pair(tmp_repo1, tmp_repo2, src_file, tmp1f, tmp2f);
        print "Rewrite RET: " + str(rewrite_ret);
        if rewrite_ret == 1:
            print "Rewrite and store backup!";
            system("cp " + tmp_repo1 + "/" + src_file + " " + tmp1f_backup);
            system("cp " + tmp_repo2 + "/" + src_file + " " + tmp2f_backup);
            system("cp " + tmp1f + " " + tmp_repo1 + "/" + src_file);
            system("cp " + tmp2f + " " + tmp_repo2 + "/" + src_file);

        if (rewrite_ret == 2 or not build_pair(tmp_repo1, tmp_repo2, src_file, outdir + "/b_" + rev + ".po", outdir + "/a_" + rev + ".po")):
            print "Cannot extract pair " + parent_rev;
            if (rewrite_ret == 1):
                print "Restore back!";
                system("cp " + tmp1f_backup + " " + tmp_repo1 + "/" + src_file);
                system("cp " + tmp2f_backup + " " + tmp_repo2 + "/" + src_file);
            cnt += 1;
            if cnt > 50:
                print "Not being able to extract for more than 50 revs in a row, ABORT this project!";
                break;
            continue;

        if rewrite_ret == 1:
            print "Restore back!";
            system("cp " + tmp1f_backup + " " + tmp_repo1 + "/" + src_file);
            system("cp " + tmp2f_backup + " " + tmp_repo2 + "/" + src_file);
        cnt = 0;
        print >> f, rev;
        f.flush();
        collect_cnt += 1;
        if (collect_limit != 0):
            if (collect_cnt >= collect_limit):
                print "Already collected enough revisions, going to terminate!";
                f.close();
                system("rm -rf " + tmp_dir);
                exit(0);

    f.close();
    system("rm -rf " + tmp_dir);
    repo = None;
    revs = res[rurl];
    for rev in revs:
        if rev in diable:
            continue;
        if not created and not nomsg:
            created = True;
            system("rm -rf __tmp");
            if (github_user != ""):
                ret = system("git clone https://" + github_user + ":" + github_passwd + "@github.com/" + unmarshal_reponame(rurl) + " __tmp");
            else:
                ret = system("git clone https://github.com/" + unmarshal_reponame(rurl) + " __tmp");
            if (ret != 0):
                print "Cannot fetch the repo, give up the repo!";
                break;
            repo = create_repo_handler("__tmp", "git");
        if nomsg:
            commit_msg = "";
        else:
            commit_msg = "\n".join(repo.get_commit_log(rev))
        res2.append((rurl, rev, commit_msg));

print res2;

db = MySQLdb.connect(user = user, host = dbhost, passwd = passwd, db = "genesis");
for (rurl, rev, commit_msg) in res2:
    c = db.cursor();
    s1 = rurl[1:];
    idx = s1.find("_");
    accname = s1[0:idx];
    reponame = s1[idx+1:];
Beispiel #4
0
        elif o == "--depdir":
            deps_dir = a
        elif o == "--o-revs":
            out_rev_file = a
        elif o == "--i-revs":
            in_rev_file = a
        elif o == "--sid":
            sid = int(a)
        elif o == "--eid":
            eid = int(a)
        elif o == "--dump-source":
            dump_dir = a

    repo_dir = args[0]
    repo_type = args[1]
    repo = create_repo_handler(repo_dir, repo_type)
    build_cmd = args[2]
    rev_result_file = args[3]

    if in_rev_file == "":
        revs = repo.get_revs(fix_only, year_limit)
    else:
        fin = open(in_rev_file, "r")
        lines = fin.readlines()
        revs = []
        for line in lines:
            tokens = line.strip().split()
            if (len(tokens) == 1):
                revs.append((tokens[0], repo.get_parent_rev(tokens[0]), ""))
            else:
                revs.append((tokens[0], tokens[1], ""))
 accname = row[1]
 reponame = row[2]
 repourl = row[3]
 c2 = db.cursor()
 c2.execute(
     """SELECT id, github_prepatch_rev, github_postpatch_rev FROM patch WHERE app_id = %s""",
     (idx, ))
 if (c2.rowcount > 0):
     try:
         system("rm -rf __tmp")
         tmpdir = "__tmp"
         if (github_user != ""):
             tmprepourl = repourl[
                 0:8] + github_user + ":" + github_passwd + "@" + repourl[8:]
         system("git clone " + tmprepourl + " " + tmpdir)
         repo = create_repo_handler(tmpdir, "git")
     except:
         print "give up on this repo, try next"
         continue
     for row2 in c2:
         revid = row2[2]
         try:
             lines = repo.get_commit_log(revid)
         except:
             print "give up this repo, break out"
             break
         nullFix = False
         for line in lines:
             if (line.find("null") != -1 or line.find("Null") != -1) and \
                 (line.find("deref") != -1 or line.find("Deref") != -1 or \
                 line.find("pointer") != -1 or line.find("Pointer") != -1 or \