def setQueryDict(qID,dDocProps): global queryDict lAllLists = [] if (constants.N in dDocProps): lAllLists.append(dDocProps[constants.N]) if (constants.W in dDocProps): lAllLists.append(dDocProps[constants.W]) if (constants.A in dDocProps): lAllLists.append(dDocProps[constants.A]) lAllLines = [] for lList in lAllLists: lAllLines.extend(lList) lAllWords = [] for sLine in lAllLines: sLine = re.sub('[^a-zA-Z0-9]', ' ', sLine) lWords = sLine.lower().split() lAllWords.extend(lWords) lAllWords = helperFunctions.remStopWords(lAllWords) p = porter.PorterStemmer() lAllWordsStemmed = [] for word in lAllWords: word = p.stem(word,0,len(word)-1) lAllWordsStemmed.append(word) #print("All stemmed : ",lAllWordsStemmed) queryDict[qID] = lAllWordsStemmed
def putinDPLace(place,list1): global dPlace # print(list1) x = "" for l2 in list1: l3 = stemList(helperFunctions.remStopWords(l2.split())) for w in l3: # print(w) if w in dPlace: x = dPlace[w] dPlace[w] = x+place
def setUserQueryDict(Query): global queryDict lAllWords = [] sLine = re.sub('[^a-zA-Z0-9]', ' ', Query) lWords = sLine.lower().split() lAllWords.extend(lWords) lAllWords = helperFunctions.remStopWords(lAllWords) p = porter.PorterStemmer() lAllWordsStemmed = [] for word in lAllWords: word = p.stem(word,0,len(word)-1) lAllWordsStemmed.append(word) #print("All stemmed : ",lAllWordsStemmed) queryDict[0] = lAllWordsStemmed
def getDocStuff(dDocProps): lAllLists = [] if (constants.T in dDocProps): lAllLists.append(dDocProps[constants.T]) putinDPLace("1",dDocProps[constants.T]) if (constants.W in dDocProps): lAllLists.append(dDocProps[constants.W]) putinDPLace("2",dDocProps[constants.W]) if (constants.A in dDocProps): lAllLists.append(dDocProps[constants.A]) putinDPLace("3",dDocProps[constants.A]) lAllLines = [] for lList in lAllLists: lAllLines.extend(lList) lAllWords = [] for sLine in lAllLines: sLine = re.sub('[^a-zA-Z0-9]', ' ', sLine) lWords = sLine.lower().split() lAllWords.extend(lWords) lw = copy.deepcopy(lAllWords) lAllWords = helperFunctions.remStopWords(lAllWords) p = PorterStemmer() lAllWordsStemmed = [] for word in lAllWords: word = p.stem(word,0,len(word)-1) lAllWordsStemmed.append(word) lUniqueWords = list(set(lAllWordsStemmed)) lenAllWords = len(lAllWordsStemmed) constants.allDocsLen = constants.allDocsLen+lenAllWords sRet = helperFunctions.makeFixedLengthStr(len(lAllWordsStemmed),constants.docWordCntLen)+constants.space+helperFunctions.makeFixedLengthStr(len(lUniqueWords),constants.docWordCntLen)+constants.newLine return [sRet,lAllWordsStemmed," ".join(lw)]
def getDocStuff(dDocProps): global T,W,B,A,N,I lAllLists = [] if (T in dDocProps): lAllLists.append(dDocProps[T]) if (W in dDocProps): lAllLists.append(dDocProps[W]) #if (B in dDocProps): # lAllLists.append(dDocProps[B]) if (A in dDocProps): lAllLists.append(dDocProps[A]) #if (N in dDocProps): # lAllLists.append(dDocProps[N]) lAllLines = [] for lList in lAllLists: lAllLines.extend(lList) lAllWords = [] for sLine in lAllLines: lWords = sLine.split() lAllWords.extend(lWords) lAllWords = helperFunctions.remStopWords(lAllWords) p = PorterStemmer() lAllWordsStemmed = [] for word in lAllWords: word = p.stem(word,0,len(word)-1) lAllWordsStemmed.append(word) #print("All words :", lAllWordsStemmed,"\n") lUniqueWords = list(set(lAllWordsStemmed)) lenAllWords = len(lAllWordsStemmed) lenAllWords sRet = makeFixedLengthStr(len(lAllWordsStemmed),6)+" "+makeFixedLengthStr(len(lUniqueWords),6) #+":"+dDocProps[B][0] return [sRet,lAllWordsStemmed]