Esempio n. 1
0
  def dumpFile(self, fileName, sha, destination):
    
    with cd(self.repo_path):   
      try:
	command = 'git show ' + sha + ":" + fileName + " > " + destination
	os.system(command)
      except Exception, e:
	print fileName, sha, destination, e
Esempio n. 2
0
def dumpSnapshotsByInterval(srcPath, destPath, ss_interval_len, commitDateMin,
                            commitDateMax):

    print srcPath, destPath, commitDateMin, commitDateMax

    repo = Repo(srcPath)
    branch = repo.active_branch

    print branch

    project_name = getProjName(srcPath)

    print project_name

    start_date = commitDateMin + timedelta(days=1)

    while start_date <= commitDateMax:
        #snapshot = destPath + os.sep + project_name + os.sep + project_name + "_" + str(start_date)
        #snapshot = destPath + os.sep + project_name + os.sep + str(start_date)
        snapshot = destPath + os.sep + str(start_date)
        print snapshot

        if not os.path.isdir(snapshot):
            Util.copy_dir(srcPath, snapshot)
            git_command = "git checkout `git rev-list -n 1 --no-merges --before=\"" + str(
                start_date) + "\" " + str(branch) + "`"
            with cd(snapshot):
                gitDumpSnapshot(git_command)

        start_date = start_date + timedelta(days=ss_interval_len * 30)

    #snapshot = destPath + os.sep + project_name + os.sep + project_name + "_" + str(commitDateMax)
    start_date = commitDateMax
    snapshot = destPath + os.sep + str(start_date)

    print snapshot
    if not os.path.isdir(snapshot):
        Util.copy_dir(srcPath, snapshot)
        git_command = "git checkout `git rev-list -n 1 --no-merges --before=\"" + str(
            start_date) + "\" " + str(branch) + "`"
        with cd(snapshot):
            gitDumpSnapshot(git_command)
Esempio n. 3
0
    def dumpPatches(self, bug_only=True):
        
        patch_folder = os.path.join(self.project,'patches')
        Util.create_dir(patch_folder)

        for sha, co in self.sha2commit.iteritems():
          if bug_only == False or co.isbug == False:
            continue

          for ch in co.changes:
            insertion, deletion, file_name, language = ch.get()
            
            #print insertion, deletion
            if int(deletion) > DEL_MAX or int(insertion) > ADD_MAX:
              continue
            if int(deletion) == DEL_MIN or int(insertion) == ADD_MIN:
              continue

            fname = file_name.replace(os.sep, self.configInfo.SEP)
            #print fname
            _ , project_name = ntpath.split(self.project)


            patch_name = project_name \
                + self.configInfo.SEP + co.getAuthorDate() \
                + self.configInfo.SEP + fname \
                + self.configInfo.SEP + co.sha \
                + '.patch'


            patch_path = os.path.join('patches', patch_name)
            #out_str = ('git diff -U10 -w %s{\^,} -- %s >%s' % \
            #    (co.sha, file_name, patch_path))
            '''
            out_str = ('git diff -U10 -w %s^ %s -- %s >%s' % \
                (co.sha, co.sha, file_name, patch_path))
            '''

            out_str = ('git diff -U10 -w %s^ %s -- %s' % \
                (co.sha, co.sha, file_name))
            
            with cd(self.project):
              #print self.project
              #print ">>>>>"
              #os.system('pwd')
              print out_str
              #os.system(out_str)
              exitcode, out, err = Util.runCmd(out_str)
              if exitcode != 0:
                print "!!Error Git Diff", err
              else:
                with open(patch_path, 'w') as patch_file:
                    patch_file.write("%s"%out)
Esempio n. 4
0
def dumpSnapshotsByInterval(srcPath, destPath, ss_interval_len,
         commitDateMin, commitDateMax):

    print srcPath, destPath, commitDateMin, commitDateMax

    repo = Repo(srcPath)
    branch = repo.active_branch

    print branch

    project_name = getProjName(srcPath)

    print project_name

    start_date = commitDateMin + timedelta(days=1)

    while start_date <= commitDateMax:
        #snapshot = destPath + os.sep + project_name + os.sep + project_name + "_" + str(start_date)
        #snapshot = destPath + os.sep + project_name + os.sep + str(start_date)
        snapshot = destPath + os.sep + str(start_date)
        print snapshot

        if not os.path.isdir(snapshot):
            Util.copy_dir(srcPath,snapshot)
            git_command = "git checkout `git rev-list -n 1 --no-merges --before=\"" + str(start_date) + "\" " +  str(branch) + "`"
            with cd(snapshot):
                gitDumpSnapshot(git_command)

        start_date = start_date + timedelta(days=ss_interval_len*30)

    #snapshot = destPath + os.sep + project_name + os.sep + project_name + "_" + str(commitDateMax)
    start_date = commitDateMax
    snapshot = destPath + os.sep + str(start_date)

    print snapshot
    if not os.path.isdir(snapshot):
        Util.copy_dir(srcPath,snapshot)
        git_command = "git checkout `git rev-list -n 1 --no-merges --before=\"" + str(start_date) + "\" " +  str(branch) + "`"
        with cd(snapshot):
            gitDumpSnapshot(git_command)
Esempio n. 5
0
def main():
  print "Run the run.py for all git corpus inside to directory"

  if len(sys.argv) < 2:
    print "Pass a top level corpus directory"
    sys.exit()

  proj_dir = sys.argv[1]
  print "corpus directory :" , proj_dir

  if not os.path.isdir(proj_dir):
    print "Not a valid directory, please pass a top level corpus directory"
    sys.exit()

  output_dir = sys.argv[2]
  print "output directory :" , output_dir

  cache_min_order = sys.argv[3]
  cache_backoff_weight = sys.argv[4]
  #lm_setting = sys.argv[5]

  curr_path = os.path.dirname(os.path.realpath(__file__))
  projects = os.listdir(proj_dir)

  for prj in projects:
    print "echo \"============= " , prj , " =============\""
    p = proj_dir + os.sep + prj

    run_command_plm = "nohup python get_cross_entropy_thread.py -p " + p \
                  + " -d " + output_dir  + " --snapshot --append " \
                  + " -b "  + cache_backoff_weight + " -c " + cache_min_order \
		  + " > " + prj + ".out 2> " + prj + ".err &"

    run_command_slm = "python get_cross_entropy_thread.py -p " + p \
                  + " -d " + output_dir  + " --snapshot --reverse --append " \
                  + " -b "  + cache_backoff_weight + " -c " + cache_min_order \
		  + " > " + prj + "_r.out 2> " + prj + "_r.err &"

    with cd(curr_path):
      print run_command_plm
      #os.system(run_command_plm)
      print run_command_slm
      #os.system(run_command_slm)

  print "run4all done!!"
Esempio n. 6
0
def resetSnapShots(projPath):

    print projPath

    #repo = Repo(projPath)
    #branch = repo.active_branch

    #print branch
    #project_name = getProjName(projPath)
    snapshots = os.listdir(projPath)
    
    for s in snapshots:
        snap_dir = os.path.join(projPath,s)
        if os.path.isdir(snap_dir):
	   print snap_dir
           with cd(snap_dir):
#             os.system("git reset --hard HEAD")
             os.system("git clean -f")
Esempio n. 7
0
def dumpSnapshotsBySha(srcPath, destPath, shaList):
    #print srcPath, destPath
    #print len(shaList)
  
    for comp_sha in shaList:

        #print comp_sha
        bug_id, commit_id, sha_date = comp_sha[0],comp_sha[1],comp_sha[2]

        sha_date_str = "%s" % (sha_date.date())
        dir_name = ('__').join((sha_date_str,bug_id))
        #print dir_name
        snapshot = os.path.join(destPath, dir_name)
        
        if not os.path.isdir(snapshot):
            print ">>>>>>>>>> ", snapshot
            Util.copy_dir(srcPath,snapshot)
            git_command = "git checkout -f " + commit_id
            print git_command
            with cd(snapshot):
                Util.runCmd("git reset --hard")
                Util.runCmd(git_command)
                Util.runCmd("git clean -df")
                Util.runCmd("git reset --hard")
Esempio n. 8
0
def dumpSnapshotsBySha(srcPath, destPath, shaList):
    #print srcPath, destPath
    #print len(shaList)

    for comp_sha in shaList:

        #print comp_sha
        bug_id, commit_id, sha_date = comp_sha[0], comp_sha[1], comp_sha[2]

        sha_date_str = "%s" % (sha_date.date())
        dir_name = ('__').join((sha_date_str, bug_id))
        #print dir_name
        snapshot = os.path.join(destPath, dir_name)

        if not os.path.isdir(snapshot):
            print ">>>>>>>>>> ", snapshot
            Util.copy_dir(srcPath, snapshot)
            git_command = "git checkout -f " + commit_id
            print git_command
            with cd(snapshot):
                Util.runCmd("git reset --hard")
                Util.runCmd(git_command)
                Util.runCmd("git clean -df")
                Util.runCmd("git reset --hard")