def parseLineForKeywords(self, line, lineType, keywords, keywordDict, blockContext = []): assert(lineType == ADD or lineType == REMOVE) #How do we handle block statements where only internal part modified? line = self.removeExcludedKeywords(line, keywords) #Make sure keywords are sorted by decreasing length, in case one is contained #inside the other, e.g. ut_ad and ut_a keywords = sorted(keywords, key=lambda tup: -len(tup[0])) if(Util.DEBUG): try: print("LINE TO PARSE FOR KEYWORD:" + line) except: print("LINE TO PARSE FOR KEYWORD:" + unicode(line, 'utf-8', errors='ignore')) includedKeywords = [k for k in keywords if k[1] == INCLUDED] tmp = line if(blockContext==[]): for keyword in includedKeywords: (k, matched) = self.keywordMatch(keyword[0], tmp) if(matched): tmp = tmp.replace(k, "") #Then remove so we don't double count if(lineType == ADD): incrementDict(str(k) + " Adds", keywordDict, 1) elif(lineType == REMOVE): incrementDict(str(k) + " Dels", keywordDict, 1) else: #I don't this case has been handled correctly for blocks. print("Unmodified") assert(0) elif(blockContext != []): #Sum over all block keywords keywordDict = self.incrementBlockContext(keywordDict, lineType, includedKeywords, blockContext) return keywordDict
def incrementBlockContext(self, keywordDict, lineType, includedKeywords, blockContext): for b in blockContext: found = False for keyword in includedKeywords: if(b == keyword[0]): assert(keyword[1] == INCLUDED and keyword[2] == BLOCK) found = True break if(not found): print("Invalid block keyword.") assert(False) if(lineType == ADD): incrementDict(str(b) + " Adds", keywordDict, 1) elif(lineType == REMOVE): incrementDict(str(b) + " Dels", keywordDict, 1) return keywordDict