Esempio n. 1
0
def setQueryDict(qID,dDocProps):
    global queryDict

    lAllLists = []
    if (constants.N in dDocProps):
        lAllLists.append(dDocProps[constants.N])
    if (constants.W in dDocProps):
        lAllLists.append(dDocProps[constants.W])
    if (constants.A in dDocProps):
        lAllLists.append(dDocProps[constants.A])

    lAllLines = []
    for lList in lAllLists:
        lAllLines.extend(lList)
    
    lAllWords = []
    for sLine in lAllLines:
        sLine = re.sub('[^a-zA-Z0-9]', ' ', sLine)
        lWords = sLine.lower().split()
        lAllWords.extend(lWords)

    lAllWords = helperFunctions.remStopWords(lAllWords)

    p = porter.PorterStemmer()
    lAllWordsStemmed = []
    for word in lAllWords:
        word = p.stem(word,0,len(word)-1)
        lAllWordsStemmed.append(word)
        #print("All stemmed : ",lAllWordsStemmed)
    queryDict[qID] = lAllWordsStemmed
Esempio n. 2
0
def putinDPLace(place,list1):
    global dPlace
 #   print(list1)
    x = ""
    for l2 in list1:
        l3 = stemList(helperFunctions.remStopWords(l2.split()))
        for w in l3:
#            print(w)
            if w in dPlace:
                x = dPlace[w]
            dPlace[w] = x+place
Esempio n. 3
0
def setUserQueryDict(Query):
    global queryDict
    lAllWords = []
    sLine = re.sub('[^a-zA-Z0-9]', ' ', Query)
    lWords = sLine.lower().split()
    lAllWords.extend(lWords)

    lAllWords = helperFunctions.remStopWords(lAllWords)

    p = porter.PorterStemmer()
    lAllWordsStemmed = []
    for word in lAllWords:
        word = p.stem(word,0,len(word)-1)
        lAllWordsStemmed.append(word)
        #print("All stemmed : ",lAllWordsStemmed)
    queryDict[0] = lAllWordsStemmed
Esempio n. 4
0
def getDocStuff(dDocProps):
    lAllLists = []

    if (constants.T in dDocProps):
        lAllLists.append(dDocProps[constants.T])
        putinDPLace("1",dDocProps[constants.T])
    if (constants.W in dDocProps):
        lAllLists.append(dDocProps[constants.W])
        putinDPLace("2",dDocProps[constants.W])
    if (constants.A in dDocProps):
        lAllLists.append(dDocProps[constants.A])
        putinDPLace("3",dDocProps[constants.A])

    lAllLines = []
    for lList in lAllLists:
        lAllLines.extend(lList)
    
    lAllWords = []
    for sLine in lAllLines:
        sLine = re.sub('[^a-zA-Z0-9]', ' ', sLine)
        lWords = sLine.lower().split()
        lAllWords.extend(lWords)
    lw = copy.deepcopy(lAllWords)
    lAllWords = helperFunctions.remStopWords(lAllWords)

    p = PorterStemmer()
    lAllWordsStemmed = []
    for word in lAllWords:
        word = p.stem(word,0,len(word)-1)
        lAllWordsStemmed.append(word)

    lUniqueWords = list(set(lAllWordsStemmed))
    lenAllWords = len(lAllWordsStemmed)
    constants.allDocsLen = constants.allDocsLen+lenAllWords
    sRet = helperFunctions.makeFixedLengthStr(len(lAllWordsStemmed),constants.docWordCntLen)+constants.space+helperFunctions.makeFixedLengthStr(len(lUniqueWords),constants.docWordCntLen)+constants.newLine

    return [sRet,lAllWordsStemmed," ".join(lw)]
Esempio n. 5
0
def getDocStuff(dDocProps):
    global T,W,B,A,N,I
    lAllLists = []
    if (T in dDocProps):
        lAllLists.append(dDocProps[T])
    if (W in dDocProps):
        lAllLists.append(dDocProps[W])
    #if (B in dDocProps):
    #    lAllLists.append(dDocProps[B])
    if (A in dDocProps):
        lAllLists.append(dDocProps[A])
    #if (N in dDocProps):
    #    lAllLists.append(dDocProps[N])

    lAllLines = []
    for lList in lAllLists:
        lAllLines.extend(lList)
    
    lAllWords = []
    for sLine in lAllLines:
        lWords = sLine.split()
        lAllWords.extend(lWords)

    lAllWords = helperFunctions.remStopWords(lAllWords)

    p = PorterStemmer()
    lAllWordsStemmed = []
    for word in lAllWords:
        word = p.stem(word,0,len(word)-1)
        lAllWordsStemmed.append(word)
    #print("All words :", lAllWordsStemmed,"\n")
    lUniqueWords = list(set(lAllWordsStemmed))
    lenAllWords = len(lAllWordsStemmed)
    lenAllWords
    sRet = makeFixedLengthStr(len(lAllWordsStemmed),6)+" "+makeFixedLengthStr(len(lUniqueWords),6) #+":"+dDocProps[B][0]
    return [sRet,lAllWordsStemmed]