def dumpFile(self, fileName, sha, destination): with cd(self.repo_path): try: command = 'git show ' + sha + ":" + fileName + " > " + destination os.system(command) except Exception, e: print fileName, sha, destination, e
def dumpSnapshotsByInterval(srcPath, destPath, ss_interval_len, commitDateMin, commitDateMax): print srcPath, destPath, commitDateMin, commitDateMax repo = Repo(srcPath) branch = repo.active_branch print branch project_name = getProjName(srcPath) print project_name start_date = commitDateMin + timedelta(days=1) while start_date <= commitDateMax: #snapshot = destPath + os.sep + project_name + os.sep + project_name + "_" + str(start_date) #snapshot = destPath + os.sep + project_name + os.sep + str(start_date) snapshot = destPath + os.sep + str(start_date) print snapshot if not os.path.isdir(snapshot): Util.copy_dir(srcPath, snapshot) git_command = "git checkout `git rev-list -n 1 --no-merges --before=\"" + str( start_date) + "\" " + str(branch) + "`" with cd(snapshot): gitDumpSnapshot(git_command) start_date = start_date + timedelta(days=ss_interval_len * 30) #snapshot = destPath + os.sep + project_name + os.sep + project_name + "_" + str(commitDateMax) start_date = commitDateMax snapshot = destPath + os.sep + str(start_date) print snapshot if not os.path.isdir(snapshot): Util.copy_dir(srcPath, snapshot) git_command = "git checkout `git rev-list -n 1 --no-merges --before=\"" + str( start_date) + "\" " + str(branch) + "`" with cd(snapshot): gitDumpSnapshot(git_command)
def dumpPatches(self, bug_only=True): patch_folder = os.path.join(self.project,'patches') Util.create_dir(patch_folder) for sha, co in self.sha2commit.iteritems(): if bug_only == False or co.isbug == False: continue for ch in co.changes: insertion, deletion, file_name, language = ch.get() #print insertion, deletion if int(deletion) > DEL_MAX or int(insertion) > ADD_MAX: continue if int(deletion) == DEL_MIN or int(insertion) == ADD_MIN: continue fname = file_name.replace(os.sep, self.configInfo.SEP) #print fname _ , project_name = ntpath.split(self.project) patch_name = project_name \ + self.configInfo.SEP + co.getAuthorDate() \ + self.configInfo.SEP + fname \ + self.configInfo.SEP + co.sha \ + '.patch' patch_path = os.path.join('patches', patch_name) #out_str = ('git diff -U10 -w %s{\^,} -- %s >%s' % \ # (co.sha, file_name, patch_path)) ''' out_str = ('git diff -U10 -w %s^ %s -- %s >%s' % \ (co.sha, co.sha, file_name, patch_path)) ''' out_str = ('git diff -U10 -w %s^ %s -- %s' % \ (co.sha, co.sha, file_name)) with cd(self.project): #print self.project #print ">>>>>" #os.system('pwd') print out_str #os.system(out_str) exitcode, out, err = Util.runCmd(out_str) if exitcode != 0: print "!!Error Git Diff", err else: with open(patch_path, 'w') as patch_file: patch_file.write("%s"%out)
def dumpSnapshotsByInterval(srcPath, destPath, ss_interval_len, commitDateMin, commitDateMax): print srcPath, destPath, commitDateMin, commitDateMax repo = Repo(srcPath) branch = repo.active_branch print branch project_name = getProjName(srcPath) print project_name start_date = commitDateMin + timedelta(days=1) while start_date <= commitDateMax: #snapshot = destPath + os.sep + project_name + os.sep + project_name + "_" + str(start_date) #snapshot = destPath + os.sep + project_name + os.sep + str(start_date) snapshot = destPath + os.sep + str(start_date) print snapshot if not os.path.isdir(snapshot): Util.copy_dir(srcPath,snapshot) git_command = "git checkout `git rev-list -n 1 --no-merges --before=\"" + str(start_date) + "\" " + str(branch) + "`" with cd(snapshot): gitDumpSnapshot(git_command) start_date = start_date + timedelta(days=ss_interval_len*30) #snapshot = destPath + os.sep + project_name + os.sep + project_name + "_" + str(commitDateMax) start_date = commitDateMax snapshot = destPath + os.sep + str(start_date) print snapshot if not os.path.isdir(snapshot): Util.copy_dir(srcPath,snapshot) git_command = "git checkout `git rev-list -n 1 --no-merges --before=\"" + str(start_date) + "\" " + str(branch) + "`" with cd(snapshot): gitDumpSnapshot(git_command)
def main(): print "Run the run.py for all git corpus inside to directory" if len(sys.argv) < 2: print "Pass a top level corpus directory" sys.exit() proj_dir = sys.argv[1] print "corpus directory :" , proj_dir if not os.path.isdir(proj_dir): print "Not a valid directory, please pass a top level corpus directory" sys.exit() output_dir = sys.argv[2] print "output directory :" , output_dir cache_min_order = sys.argv[3] cache_backoff_weight = sys.argv[4] #lm_setting = sys.argv[5] curr_path = os.path.dirname(os.path.realpath(__file__)) projects = os.listdir(proj_dir) for prj in projects: print "echo \"============= " , prj , " =============\"" p = proj_dir + os.sep + prj run_command_plm = "nohup python get_cross_entropy_thread.py -p " + p \ + " -d " + output_dir + " --snapshot --append " \ + " -b " + cache_backoff_weight + " -c " + cache_min_order \ + " > " + prj + ".out 2> " + prj + ".err &" run_command_slm = "python get_cross_entropy_thread.py -p " + p \ + " -d " + output_dir + " --snapshot --reverse --append " \ + " -b " + cache_backoff_weight + " -c " + cache_min_order \ + " > " + prj + "_r.out 2> " + prj + "_r.err &" with cd(curr_path): print run_command_plm #os.system(run_command_plm) print run_command_slm #os.system(run_command_slm) print "run4all done!!"
def resetSnapShots(projPath): print projPath #repo = Repo(projPath) #branch = repo.active_branch #print branch #project_name = getProjName(projPath) snapshots = os.listdir(projPath) for s in snapshots: snap_dir = os.path.join(projPath,s) if os.path.isdir(snap_dir): print snap_dir with cd(snap_dir): # os.system("git reset --hard HEAD") os.system("git clean -f")
def dumpSnapshotsBySha(srcPath, destPath, shaList): #print srcPath, destPath #print len(shaList) for comp_sha in shaList: #print comp_sha bug_id, commit_id, sha_date = comp_sha[0],comp_sha[1],comp_sha[2] sha_date_str = "%s" % (sha_date.date()) dir_name = ('__').join((sha_date_str,bug_id)) #print dir_name snapshot = os.path.join(destPath, dir_name) if not os.path.isdir(snapshot): print ">>>>>>>>>> ", snapshot Util.copy_dir(srcPath,snapshot) git_command = "git checkout -f " + commit_id print git_command with cd(snapshot): Util.runCmd("git reset --hard") Util.runCmd(git_command) Util.runCmd("git clean -df") Util.runCmd("git reset --hard")
def dumpSnapshotsBySha(srcPath, destPath, shaList): #print srcPath, destPath #print len(shaList) for comp_sha in shaList: #print comp_sha bug_id, commit_id, sha_date = comp_sha[0], comp_sha[1], comp_sha[2] sha_date_str = "%s" % (sha_date.date()) dir_name = ('__').join((sha_date_str, bug_id)) #print dir_name snapshot = os.path.join(destPath, dir_name) if not os.path.isdir(snapshot): print ">>>>>>>>>> ", snapshot Util.copy_dir(srcPath, snapshot) git_command = "git checkout -f " + commit_id print git_command with cd(snapshot): Util.runCmd("git reset --hard") Util.runCmd(git_command) Util.runCmd("git clean -df") Util.runCmd("git reset --hard")