Ejemplo n.º 1
0
def makeTermStr(term,byteStart,byteEnd):
    Str = helperFunctions.makeFixedLengthSpace(term,constants.termSize)
    Str = Str+constants.space
    Str = Str+helperFunctions.makeFixedLengthStr(byteStart,constants.byteLen)
    Str = Str+constants.space
    Str = Str+helperFunctions.makeFixedLengthStr(byteEnd,constants.byteLen)+constants.newLine
    return Str
Ejemplo n.º 2
0
def writetoFile(filename,d):
    FILE = open(filename,"w")
    for query_num,result in d.items():
        i = 1
        #print("For query no : ",query_num)
        for rs in result:
            FILE.write(str(query_num)+" Q0 "+helperFunctions.makeFixedLengthStr(rs[0],4)+" "+str(i)+" "+str(rs[1])
                       +" Exp\n")
            i+=1
            
    FILE.close()
Ejemplo n.º 3
0
def getDocStuff(dDocProps):
    lAllLists = []

    if (constants.T in dDocProps):
        lAllLists.append(dDocProps[constants.T])
        putinDPLace("1",dDocProps[constants.T])
    if (constants.W in dDocProps):
        lAllLists.append(dDocProps[constants.W])
        putinDPLace("2",dDocProps[constants.W])
    if (constants.A in dDocProps):
        lAllLists.append(dDocProps[constants.A])
        putinDPLace("3",dDocProps[constants.A])

    lAllLines = []
    for lList in lAllLists:
        lAllLines.extend(lList)
    
    lAllWords = []
    for sLine in lAllLines:
        sLine = re.sub('[^a-zA-Z0-9]', ' ', sLine)
        lWords = sLine.lower().split()
        lAllWords.extend(lWords)
    lw = copy.deepcopy(lAllWords)
    lAllWords = helperFunctions.remStopWords(lAllWords)

    p = PorterStemmer()
    lAllWordsStemmed = []
    for word in lAllWords:
        word = p.stem(word,0,len(word)-1)
        lAllWordsStemmed.append(word)

    lUniqueWords = list(set(lAllWordsStemmed))
    lenAllWords = len(lAllWordsStemmed)
    constants.allDocsLen = constants.allDocsLen+lenAllWords
    sRet = helperFunctions.makeFixedLengthStr(len(lAllWordsStemmed),constants.docWordCntLen)+constants.space+helperFunctions.makeFixedLengthStr(len(lUniqueWords),constants.docWordCntLen)+constants.newLine

    return [sRet,lAllWordsStemmed," ".join(lw)]
Ejemplo n.º 4
0
def makeTermStr(byteStart,byteEnd):
    Str = helperFunctions.makeFixedLengthStr(byteStart,8)
    Str = Str+constants.space
    Str = Str+helperFunctions.makeFixedLengthStr(byteEnd,8)+constants.newLine
    return Str