Beispiel #1
0
DOCS_PATH = dict()


#directory = "C:/Users/ualis/Desktop/REV/"
directory = sys.argv[1]

H = dict()
N = len(os.listdir(directory))
for i, filename in enumerate(os.listdir(directory)):
    if filename.endswith(".txt") or filename.endswith(".html") or filename.endswith(".story"):
        path = os.path.join(directory, filename)
        print('Documentos Indexados: ', i)
        doc = Document(i+1, filename, directory)
        HashTableVector = doc.to_vector()
        for token in HashTableVector:
            idDoc = 'D'+str(doc.getID())
            DOCS_PATH[idDoc] = path
            if token not in H:
                TokenInfo = list()
                TokenInfo.append(0)
                tupla = (idDoc, HashTableVector[token])
                TokenInfo.append(tupla)
                H[token] = TokenInfo
                DOC_REFERENCE[idDoc] = 0
            else:
                DOC_REFERENCE[idDoc] = 0
                tupla = (idDoc, HashTableVector[token])
                TokenInfo = H[token]
                TokenInfo.append(tupla)
                H[token] = TokenInfo
print('ÍNDICE INVERTIDO')