def test_save_load(self): '''Test save()/load() functionality''' with io.BytesIO() as output: self.sim_index.save(output) output.seek(0) loaded_sim_index = MemorySimIndex.load(output) self.sim_index = loaded_sim_index self.test_query_simple_scorer() # make sure test_query() still works
def sample_sim_index(): # Create an in-memory index and query it print() print("Creating in-memory index of university homepages") sim_index = MemorySimIndex() sim_index.index_urls('http://www.stanford.edu/', 'http://www.berkeley.edu', 'http://www.ucla.edu', 'http://www.mit.edu') print("Postings list for 'university':") pprint(sim_index.postings_list('university')) print("Pages containing terms 'university' and 'california'") pprint(list(sim_index.docnames_with_terms('university', 'california'))) # Issue some similarity queries print() print("Similarity search for query 'stanford university' (simple scorer)") sim_index.set_query_scorer('simple_count') pprint(list(sim_index.query("stanford university"))) print() print("Similarity search for query 'stanford university' (tf.idf scorer)") sim_index.set_query_scorer('tfidf') pprint(list(sim_index.query("stanford university"))) # Save the index to disk, then load it back in print() print("Saving index to disk") with open("myindex.idx", "w") as index_file: sim_index.save(index_file) print() print("Loading index from disk") with open("myindex.idx", "r") as index_file: sim_index2 = MemorySimIndex.load(index_file) print() print("Pages containing terms 'university' and 'california' in loaded index") pprint(list(sim_index2.docnames_with_terms('university', 'california')))
def sample_sim_index(): # Create an in-memory index and query it print() print("Creating in-memory index of university homepages") sim_index = MemorySimIndex() sim_index.index_urls('http://www.stanford.edu/', 'http://www.berkeley.edu', 'http://www.ucla.edu', 'http://www.mit.edu') print("Postings list for 'university':") pprint(sim_index.postings_list('university')) print("Pages containing terms 'university' and 'california'") pprint(list(sim_index.docnames_with_terms('university', 'california'))) # Issue some similarity queries print() print("Similarity search for query 'stanford university' (simple scorer)") sim_index.set_query_scorer('simple_count') pprint(list(sim_index.query("stanford university"))) print() print("Similarity search for query 'stanford university' (tf.idf scorer)") sim_index.set_query_scorer('tfidf') pprint(list(sim_index.query("stanford university"))) # Save the index to disk, then load it back in print() print("Saving index to disk") with open("myindex.idx", "w") as index_file: sim_index.save(index_file) print() print("Loading index from disk") with open("myindex.idx", "r") as index_file: sim_index2 = MemorySimIndex.load(index_file) print() print( "Pages containing terms 'university' and 'california' in loaded index") pprint(list(sim_index2.docnames_with_terms('university', 'california')))