Exemple #1
0
def exec():
    #Querry comes from command line 
    querry_ = sys.argv[1:]
    ii = InvertedIndex()
    for file in [f for f in listdir('../data') if isfile(join('../data', f))]:
        document_ = Document('../data/'+file)
        ii.add_document(document_)
    sim_table = sorted(ii.querry(querry_))
    for document in sim_table:
        print(document)
def main():
    # Read in the sample data we used in class
    documents = read_data("./dataA")

    # Now we have the data we can create a crude inverted index using
    # a simple (inefficient!) linked list class.
    inv_ind = InvertedIndex()

    # Add the documents
    for d in documents:
        inv_ind.add_document(d)

    # Create our semantic space and SVD computation
    ss = SemanticSpace(inv_ind)

    # Fold in our query vector
    q = ss.create_query_vector("Human Computer Interaction")

    # This loop prints out the similarity scores between each document
    # and our query vector - results should match those of class.
    print(
        "Showing similarities between our query and documents in semantic space"
    )
    for i in range(9):
        print(inv_ind.docs[i] + ": " +
              '{0:0.3f}'.format(ss.cosine_with_doc(q, i)))

    print()

    # This code snippet iterates through every pair of terms in our semantic
    # space printing out the similarities (cosine) score of each pair.
    print("Showing similarity between all terms with one another")
    for i in range(12):
        print("Similarity with " + inv_ind.terms[i][0])
        for j in range(12):
            print('{0: >10}'.format(inv_ind.terms[j][0]) + ": " +
                  '{0:0.3f}'.format(ss.cosine_with_term(i, j)))
        print()
 def add_document(self):
     new_doc = Document('test_file')
     new_index = InvertedIndex()
     new_index.add_document(new_doc)
     self.assertIsInstance(new_index, InvertedIndex)