def makeTermStr(term,byteStart,byteEnd): Str = helperFunctions.makeFixedLengthSpace(term,constants.termSize) Str = Str+constants.space Str = Str+helperFunctions.makeFixedLengthStr(byteStart,constants.byteLen) Str = Str+constants.space Str = Str+helperFunctions.makeFixedLengthStr(byteEnd,constants.byteLen)+constants.newLine return Str
def writetoFile(filename,d): FILE = open(filename,"w") for query_num,result in d.items(): i = 1 #print("For query no : ",query_num) for rs in result: FILE.write(str(query_num)+" Q0 "+helperFunctions.makeFixedLengthStr(rs[0],4)+" "+str(i)+" "+str(rs[1]) +" Exp\n") i+=1 FILE.close()
def getDocStuff(dDocProps): lAllLists = [] if (constants.T in dDocProps): lAllLists.append(dDocProps[constants.T]) putinDPLace("1",dDocProps[constants.T]) if (constants.W in dDocProps): lAllLists.append(dDocProps[constants.W]) putinDPLace("2",dDocProps[constants.W]) if (constants.A in dDocProps): lAllLists.append(dDocProps[constants.A]) putinDPLace("3",dDocProps[constants.A]) lAllLines = [] for lList in lAllLists: lAllLines.extend(lList) lAllWords = [] for sLine in lAllLines: sLine = re.sub('[^a-zA-Z0-9]', ' ', sLine) lWords = sLine.lower().split() lAllWords.extend(lWords) lw = copy.deepcopy(lAllWords) lAllWords = helperFunctions.remStopWords(lAllWords) p = PorterStemmer() lAllWordsStemmed = [] for word in lAllWords: word = p.stem(word,0,len(word)-1) lAllWordsStemmed.append(word) lUniqueWords = list(set(lAllWordsStemmed)) lenAllWords = len(lAllWordsStemmed) constants.allDocsLen = constants.allDocsLen+lenAllWords sRet = helperFunctions.makeFixedLengthStr(len(lAllWordsStemmed),constants.docWordCntLen)+constants.space+helperFunctions.makeFixedLengthStr(len(lUniqueWords),constants.docWordCntLen)+constants.newLine return [sRet,lAllWordsStemmed," ".join(lw)]
def makeTermStr(byteStart,byteEnd): Str = helperFunctions.makeFixedLengthStr(byteStart,8) Str = Str+constants.space Str = Str+helperFunctions.makeFixedLengthStr(byteEnd,8)+constants.newLine return Str