def dump2db(self, bug_only): if self.configInfo.DATABASE is False: return dl = dumpLogs(self.dbPass, self.configInfo) dl.createFileChangesTable() for sha, co in self.sha2commit.iteritems(): for ch in co.changes: insertion, deletion, file_name, language = ch.get() if "test" in file_name: is_test = "True" else: is_test = "False" out_str = (',').join((toStr(co.project), toStr(co.sha), \ toStr(language), toStr(file_name), toStr(is_test), \ toStr(co.committer), toStr(co.commit_date), toStr(co.author), toStr(co.author_date), \ toStr(co.isbug),toStr(insertion),toStr(deletion))) dl.dumpFileChanges(out_str) dl.close()
def processLog(self, config = ""): if(config == ""): config = self.config_info.CONFIG signal.signal(signal.SIGALRM, timeout) project1 = os.path.split(self.log_file)[0] project1 = project1.rstrip(os.sep) self.project_name = os.path.basename(project1) print("---------- %s ------------\n" % (self.project_name)) if(self.config_info.DATABASE): dl = dumpLogs(self.dbPass, self.config_info) if(self.config_info.CSV): if not os.path.isdir("../Results"): os.mkdir("../Results") inf1=open("../Results/"+str(self.project_name)+"ChangeSummary.csv",'w') fPtrChangeSummary=open("../Results/"+"ChangeSummary.csv",'w') inf1.write("project,sha,author,commit_date,is_bug\n") inf2=open("../Results/"+str(self.project_name)+"PatchSummary.csv",'w') fPtrPatchSummary=open("../Results/"+"PatchSummary.csv",'w') lst=[] listToDict={} mockChunk=logChunk("", "C", self.config_info) #TODO: This is C specific, Why is this C specific? lst = mockChunk.readKeywords(lst) keywords= [k[0] for k in lst if k[1] == INCLUDED] for keyword in keywords: listToDict[str(keyword)+" Adds"]=0 listToDict[str(keyword)+" Dels"]=0 inf2.write("project, sha, language, file_name, is_test, method_name,total_add,total_del,%s\n"%",".join(sorted(listToDict.keys()))) inf = codecs.open(self.log_file, "r", "iso-8859-1") shaObj = None patchObj = None is_diff = False log_mssg = "" is_no_prev_ver = False is_no_next_ver = False curLogChunk = logChunk("", "C", self.config_info) linenum = 0 for l in inf: try: signal.alarm(0) sha = self.isSha(l) line = l #if(self.config_info.DEBUGLITE): # try: # print(line) # except: # pass if sha: #Reverting back to version that outputs at the end... #if(shaObj != None): # if(self.config_info.DEBUGLITE): # print("Writing Sha:" + sha) # if(self.config_info.DATABASE): # shaObj.dumpSha(dl) # elif(self.config_info.CSV): # shaObj.shaToCsv(inf1,inf2,fPtrChangeSummary,fPtrPatchSummary) # else: # shaObj.printSha() shaObj = Sha(self.project_name, sha) #if(self.config_info.DEBUGLITE): #Save for testing. self.shas.append(shaObj) #This will become very memory intensive in large git logs. is_diff = False log_mssg = "" continue elif self.isAuthor(line,shaObj): continue elif self.isDate(line,shaObj): continue fullLine=line line=line.rstrip() if line.startswith('diff --git '): shaObj.setLog(log_mssg) is_diff = True is_no_prev_ver = False is_no_next_ver = False continue if patchObj != None: shaObj.patches.append(patchObj) elif is_diff == False: if not line.strip(): continue log_mssg += line + "\t" if is_diff: if line.startswith("--- a/"): #Finish the changes to the old patch object if(patchObj != None): #If there is an existing chunk to parse, process it if(curLogChunk.header != ""): if(self.config_info.DEBUG): print("New diff with previous version: " + line) print("HEADER: " + curLogChunk.header) self.processLastChunk(patchObj, curLogChunk) #Reset the current chunk obj if (self.config_info.DEBUG): print("Resetting.") curLogChunk.reset() curLogChunk.setLang("." + self.cur_lang) #DOUBLE CHECK ME! patchObj = self.createPatch(line) shaObj.patches.append(patchObj) #print patchObj #print shaObj.patches elif (line == '--- /dev/null'): #earlier file was empty is_no_prev_ver = True elif (line == '+++ /dev/null'): #next file version was empty is_no_next_ver = True continue elif (is_no_prev_ver == True) and line.startswith("+++ b/"): #Finish the changes to the old patch object if(patchObj != None): if(curLogChunk.header != ""): #If there is an existing chunk if (self.config_info.DEBUG): print("New diff with no previous version: " + line) print("HEADER: " + curLogChunk.header) self.processLastChunk(patchObj, curLogChunk) if (self.config_info.DEBUG): print("Resetting.") curLogChunk.reset() curLogChunk.setLang("." + self.cur_lang) #DOUBLE CHECK ME! patchObj = self.createPatchWithNoPrevVersion(line) shaObj.patches.append(patchObj) else: #Then we reached a content line. self.processPatch(fullLine, patchObj, curLogChunk) except TimeExceededError.TimeExceededError: print("Line Timed out, moving to next.") continue #Clear timeouts. signal.alarm(0) #Make sure to get the last patch in the file! if(curLogChunk.header != ""): #If there is an existing chunk to parse if(self.config_info.DEBUG): print("Last Patch: " + line) print("HEADER: " + curLogChunk.header) self.processLastChunk(patchObj, curLogChunk) #if shaObj != None: # shaObj.patches.append(patchObj) parseFinish = datetime.now() if(self.shas != []): #If the log wasn't empty... #Create the change summary table and the method change table now if necessary if(self.config_info.DATABASE): cfg = Config(self.config_info.CONFIG) keywordFile = cfg.ConfigSectionMap("Keywords") full_title = dumpLogs.getFullTitleString(curLogChunk.getEmptyKeywordDict()) dl.createSummaryTable() if(full_title != ""): #Check if the changes table exists and create it if we have a title. dl.createMethodChangesTable(full_title) for s in self.shas: #s.printSha() if s != None: if(self.config_info.DATABASE): s.dumpSha(dl) elif(self.config_info.CSV): s.shaToCsv(inf1,inf2,fPtrChangeSummary,fPtrPatchSummary) else: s.printSha() #Write out last sha. #if(shaObj != None and self.config_info.DATABASE): # if(self.config_info.DEBUGLITE): # print("Writing to db.") # shaObj.dumpSha(dl) if(self.config_info.DATABASE): print("Closing Time.") dl.close() if(self.config_info.CSV): inf1.close() inf2.close() fPtrChangeSummary.close() fPtrPatchSummary.close() print("Sha's processed:") print(len(self.shas)) return parseFinish
def processLog(self): project1 = os.path.split(self.log_file)[0] project1 = project1.rstrip(os.sep) self.project_name = os.path.basename(project1) print("---------- %s ------------\n" % (self.project_name)) dl = dumpLogs() inf = codecs.open(self.log_file, "r", "iso-8859-1") #lines = inf.readlines() #inf.close() shaObj = None patchObj = None is_diff = False log_mssg = "" is_no_prev_ver = False is_no_next_ver = False #for i,l in enumerate(lines): for l in inf: #continue #print i+1, line sha = self.isSha(l) #line = l.strip() line = l #print line if sha: # if shaObj != None: # shaObj.dumpSha(dl) shaObj = Sha(self.project_name, sha) self.shas.append(shaObj) is_diff = False log_mssg = "" continue elif self.isAuthor(line,shaObj): continue elif self.isDate(line,shaObj): continue line = line.strip() if line.startswith('diff --git '): shaObj.setLog(log_mssg) is_diff = True is_no_prev_ver = False is_no_next_ver = False continue ''' if patchObj != None: shaObj.patches.append(patchObj) ''' elif is_diff == False: if not line.strip(): continue log_mssg += line + "\t" if is_diff: if line.startswith("--- a/"): #print "<a> : " , line patchObj = self.createPatch(line) shaObj.patches.append(patchObj) #print patchObj #print shaObj.patches elif (line == '--- /dev/null'): #earlier file was empty #print "<b> : " , line is_no_prev_ver = True elif (line == '+++ /dev/null'): #next file version was empty #print "<c> : " , line is_no_next_ver = True continue elif (is_no_prev_ver == True) and line.startswith("+++ b/"): #print "<d> : " , line patchObj = self.createPatchWithNoPrevVersion(line) shaObj.patches.append(patchObj) #print shaObj.patches else: #print "<e> : " , line self.processPatch(line,patchObj) if shaObj != None: shaObj.patches.append(patchObj) for s in self.shas: #s.printSha() if s != None: s.dumpSha(dl) dl.close() inf.close() print len(self.shas)
def processLog(self, config=""): if (config == ""): config = self.config_info.CONFIG signal.signal(signal.SIGALRM, timeout) project1 = os.path.split(self.log_file)[0] project1 = project1.rstrip(os.sep) self.project_name = os.path.basename(project1) print(("---------- %s ------------\n" % (self.project_name))) if (self.config_info.DATABASE): dl = dumpLogs(self.dbPass, self.config_info) if (self.config_info.CSV): if not os.path.isdir("../Results"): os.mkdir("../Results") inf1 = open( "../Results/" + str(self.project_name) + "ChangeSummary.csv", 'w') fPtrChangeSummary = open("../Results/" + "ChangeSummary.csv", 'w') inf1.write("project,sha,author,author_email,commit_date,is_bug\n") inf2 = open( "../Results/" + str(self.project_name) + "PatchSummary.csv", 'w') fPtrPatchSummary = open("../Results/" + "PatchSummary.csv", 'w') lst = [] listToDict = {} mockChunk = logChunk( "", "C", self.config_info ) #TODO: This is C specific, Why is this C specific? lst = mockChunk.readKeywords(lst) keywords = [k[0] for k in lst if k[1] == INCLUDED] for keyword in keywords: listToDict["\"" + str(keyword) + "\" adds"] = 0 listToDict["\"" + str(keyword) + "\" dels"] = 0 inf2.write( "project, sha, language, file_name, is_test, method_name,total_add,total_del,%s\n" % ",".join(sorted(listToDict.keys()))) inf = codecs.open(self.log_file, "r", "iso-8859-1") shaObj = None patchObj = None is_diff = False log_mssg = "" is_no_prev_ver = False is_no_next_ver = False curLogChunk = logChunk("", "C", self.config_info) linenum = 0 for l in inf: try: signal.alarm(0) sha = self.isSha(l) line = l #if(self.config_info.DEBUGLITE): # try: # print(line) # except: # pass if sha: #Reverting back to version that outputs at the end... #if(shaObj != None): # if(self.config_info.DEBUGLITE): # print("Writing Sha:" + sha) # if(self.config_info.DATABASE): # shaObj.dumpSha(dl) # elif(self.config_info.CSV): # shaObj.shaToCsv(inf1,inf2,fPtrChangeSummary,fPtrPatchSummary) # else: # shaObj.printSha() shaObj = Sha(self.project_name, sha) #if(self.config_info.DEBUGLITE): #Save for testing. self.shas.append( shaObj ) #This will become very memory intensive in large git logs. is_diff = False log_mssg = "" continue elif self.isAuthor(line, shaObj): continue elif self.isDate(line, shaObj): continue fullLine = line line = line.rstrip() if line.startswith('diff --git '): shaObj.setLog(log_mssg) is_diff = True is_no_prev_ver = False is_no_next_ver = False continue if patchObj != None: shaObj.patches.append(patchObj) elif is_diff == False: if not line.strip(): continue log_mssg += line + "\t" if is_diff: if line.startswith("--- a/"): #Finish the changes to the old patch object if (patchObj != None): #If there is an existing chunk to parse, process it if (curLogChunk.header != ""): if (self.config_info.DEBUG): print(("New diff with previous version: " + line)) print(("HEADER: " + curLogChunk.header)) self.processLastChunk(patchObj, curLogChunk) #Reset the current chunk obj if (self.config_info.DEBUG): print("Resetting.") curLogChunk.reset() curLogChunk.setLang( "." + self.cur_lang) #DOUBLE CHECK ME! patchObj = self.createPatch(line) shaObj.patches.append(patchObj) #print patchObj #print shaObj.patches elif (line == '--- /dev/null'): #earlier file was empty is_no_prev_ver = True elif (line == '+++ /dev/null' ): #next file version was empty is_no_next_ver = True continue elif (is_no_prev_ver == True) and line.startswith("+++ b/"): #Finish the changes to the old patch object if (patchObj != None): if (curLogChunk.header != ""): #If there is an existing chunk if (self.config_info.DEBUG): print(( "New diff with no previous version: " + line)) print(("HEADER: " + curLogChunk.header)) self.processLastChunk(patchObj, curLogChunk) if (self.config_info.DEBUG): print("Resetting.") curLogChunk.reset() curLogChunk.setLang( "." + self.cur_lang) #DOUBLE CHECK ME! patchObj = self.createPatchWithNoPrevVersion(line) shaObj.patches.append(patchObj) else: #Then we reached a content line. self.processPatch(fullLine, patchObj, curLogChunk) except TimeExceededError.TimeExceededError: print("Line Timed out, moving to next.") continue #Clear timeouts. signal.alarm(0) #Make sure to get the last patch in the file! if (curLogChunk.header != ""): #If there is an existing chunk to parse if (self.config_info.DEBUG): print(("Last Patch: " + line)) print(("HEADER: " + curLogChunk.header)) self.processLastChunk(patchObj, curLogChunk) #if shaObj != None: # shaObj.patches.append(patchObj) parseFinish = datetime.now() if (self.shas != []): #If the log wasn't empty... #Create the change summary table and the method change table now if necessary if (self.config_info.DATABASE): cfg = Config(self.config_info.CONFIG) keywordFile = cfg.ConfigSectionMap("Keywords") full_title = dumpLogs.getFullTitleString( curLogChunk.getEmptyKeywordDict()) dl.createSummaryTable() if ( full_title != "" ): #Check if the changes table exists and create it if we have a title. dl.createMethodChangesTable(full_title) for s in self.shas: #s.printSha() if s != None: if (self.config_info.DATABASE): s.dumpSha(dl) elif (self.config_info.CSV): s.shaToCsv(inf1, inf2, fPtrChangeSummary, fPtrPatchSummary) else: s.printSha() #Write out last sha. #if(shaObj != None and self.config_info.DATABASE): # if(self.config_info.DEBUGLITE): # print("Writing to db.") # shaObj.dumpSha(dl) if (self.config_info.DATABASE): print("Closing Time.") dl.close() if (self.config_info.CSV): inf1.close() inf2.close() fPtrChangeSummary.close() fPtrPatchSummary.close() print("Sha's processed:") print((len(self.shas))) return parseFinish
import sys
def processLog(self, config = Util.CONFIG): project1 = os.path.split(self.log_file)[0] project1 = project1.rstrip(os.sep) self.project_name = os.path.basename(project1) print("---------- %s ------------\n" % (self.project_name)) if(Util.DATABASE == 1): dl = dumpLogs() if(Util.CSV==1): if not os.path.isdir("../Results"): os.mkdir("../Results") inf1=open("../Results/"+str(self.project_name)+"ChangeSummary.csv",'w') fPtrChangeSummary=open("../Results/"+"ChangeSummary.csv",'a') inf1.write("project,sha,author,commit_date,is_bug\n") inf2=open("../Results/"+str(self.project_name)+"PatchSummary.csv",'w') fPtrPatchSummary=open("../Results/"+"PatchSummary.csv",'a') lst=[] listToDict={} mockChunk=logChunk("", "C") mockChunk.readKeywords(lst) keywords= [sub_list[0] for sub_list in lst] for keyword in keywords: listToDict[str(keyword)+" Adds"]=0 listToDict[str(keyword)+" Dels"]=0 inf2.write("project, sha, language, file_name, is_test,isExceptionPatch, method_name,total_add,total_del,%s\n"%",".join(listToDict.keys())) inf = codecs.open(self.log_file, "r", "iso-8859-1") shaObj = None patchObj = None is_diff = False log_mssg = "" is_no_prev_ver = False is_no_next_ver = False curLogChunk = logChunk("", "C", config) linenum = 0 for l in inf: sha = self.isSha(l) line = l if sha: if(shaObj != None): if(Util.DATABASE): shaObj.dumpSha(dl) else: shaObj.printSha() if(Util.CSV): shaObj.shaToCsv(inf1,inf2,fPtrChangeSummary,fPtrPatchSummary) shaObj = Sha(self.project_name, sha) if(Util.DEBUGLITE): #Save for testing. self.shas.append(shaObj) #This will become very memory intensive in large git logs. is_diff = False log_mssg = "" continue elif self.isAuthor(line,shaObj): continue elif self.isDate(line,shaObj): continue fullLine=line line=line.strip() if line.startswith('diff --git '): shaObj.setLog(log_mssg) is_diff = True is_no_prev_ver = False is_no_next_ver = False continue if patchObj != None: shaObj.patches.append(patchObj) elif is_diff == False: if not line.strip(): continue log_mssg += line + "\t" if is_diff: if line.startswith("--- a/"): #Finish the changes to the old patch object if(patchObj != None): #If there is an existing chunk to parse, process it if(curLogChunk.header != ""): if Util.DEBUG == 1: print("New diff with previous version: " + line) print("HEADER: " + curLogChunk.header) self.processLastChunk(patchObj, curLogChunk) #Reset the current chunk obj if Util.DEBUG == 1: print("Resetting.") curLogChunk.reset() curLogChunk.setLang("." + self.cur_lang) #DOUBLE CHECK ME! patchObj = self.createPatch(line) shaObj.patches.append(patchObj) #print patchObj #print shaObj.patches elif (line == '--- /dev/null'): #earlier file was empty is_no_prev_ver = True elif (line == '+++ /dev/null'): #next file version was empty is_no_next_ver = True continue elif (is_no_prev_ver == True) and line.startswith("+++ b/"): #Finish the changes to the old patch object if(patchObj != None): if(curLogChunk.header != ""): #If there is an existing chunk if Util.DEBUG == 1: print("New diff with no previous version: " + line) print("HEADER: " + curLogChunk.header) self.processLastChunk(patchObj, curLogChunk) if Util.DEBUG == 1: print("Resetting.") curLogChunk.reset() curLogChunk.setLang("." + self.cur_lang) #DOUBLE CHECK ME! patchObj = self.createPatchWithNoPrevVersion(line) shaObj.patches.append(patchObj) else: #Then we reached a content line. self.processPatch(fullLine, patchObj, curLogChunk) #if shaObj != None: # shaObj.patches.append(patchObj) #Make sure to get the last patch in the file! if(curLogChunk.header != ""): #If there is an existing chunk to parse if Util.DEBUG == 1: print("Last Patch: " + line) print("HEADER: " + curLogChunk.header) self.processLastChunk(patchObj, curLogChunk) #Write out last sha. if(shaObj != None and Util.DATABASE): if(Util.DEBUGLITE): print("Writing to db.") shaObj.dumpSha(dl) if(Util.DATABASE == 1): print("Closing Time.") dl.close() if(Util.CSV == 1): shaObj.printSha(); shaObj.shaToCsv(inf1,inf2,fPtrChangeSummary,fPtrPatchSummary) inf1.close() inf2.close() fPtrChangeSummary.close() fPtrPatchSummary.close()