def exec(): #Querry comes from command line querry_ = sys.argv[1:] ii = InvertedIndex() for file in [f for f in listdir('../data') if isfile(join('../data', f))]: document_ = Document('../data/'+file) ii.add_document(document_) sim_table = sorted(ii.querry(querry_)) for document in sim_table: print(document)
def main(): # Read in the sample data we used in class documents = read_data("./dataA") # Now we have the data we can create a crude inverted index using # a simple (inefficient!) linked list class. inv_ind = InvertedIndex() # Add the documents for d in documents: inv_ind.add_document(d) # Create our semantic space and SVD computation ss = SemanticSpace(inv_ind) # Fold in our query vector q = ss.create_query_vector("Human Computer Interaction") # This loop prints out the similarity scores between each document # and our query vector - results should match those of class. print( "Showing similarities between our query and documents in semantic space" ) for i in range(9): print(inv_ind.docs[i] + ": " + '{0:0.3f}'.format(ss.cosine_with_doc(q, i))) print() # This code snippet iterates through every pair of terms in our semantic # space printing out the similarities (cosine) score of each pair. print("Showing similarity between all terms with one another") for i in range(12): print("Similarity with " + inv_ind.terms[i][0]) for j in range(12): print('{0: >10}'.format(inv_ind.terms[j][0]) + ": " + '{0:0.3f}'.format(ss.cosine_with_term(i, j))) print()
def add_document(self): new_doc = Document('test_file') new_index = InvertedIndex() new_index.add_document(new_doc) self.assertIsInstance(new_index, InvertedIndex)