class TestMySearchEngine(unittest.TestCase): def setUp(self): self.x = SearchEngine("database") self.x.database.update(idealdict) def test_searchengine_type(self): result = self.x.search("round") self.assertIsInstance(result, dict) def test_MyError_type_number(self): with self.assertRaises(ValueError): self.x.search(15) def test_empty_string(self): result = self.x.search('') self.assertIsInstance(result, dict) self.assertEqual(result, {}) def test_search_by_token(self): result = self.x.search('The') self.assertIsInstance(result, dict) self.assertEqual(result, idealdict['The']) def tearDown(self): del self.x for f in os.listdir('.'): if f.startswith('database.'): os.remove(f)
for x, y in opts: if x == '-d': dictionary_file = y elif x == '-p': postings_file = y elif x == '-q': query_file = y elif x == '-o': results_file = y else: raise AssertionError('unhandled option') if dictionary_file == None or postings_file == None or query_file == None or results_file == None: print(f'usage: {sys.argv[0]} -d dictionary-file -p postings-file -q file-of-queries -o output-file-of-results') sys.exit(2) document_file = 'document.txt' dictionary = load_dictionary(dictionary_file) documents = load_documents(document_file) search_engine = SearchEngine(dictionary, documents, postings_file) query, relevant_doc_ids = read_query(query_file) with open(results_file, 'w') as f: f.seek(0) try: result = search_engine.search(query, relevant_doc_ids) f.write(' '.join([str(i) for i in result]) + '\n') except ParseError as e: f.write(f'parse error encountered: {e}')
from searchengine import SearchEngine from tokenstore import TokenStore # Todo: reformat command line interface def command_line(arg=None): while True: raw_query = input("Please type in what do you want to search: ").lower() if " --" in raw_query and not arg: raw_query, arg = raw_query.split(" --") query = re.sub("[^A-Za-z0-9 ]+", "", raw_query) if len(query) > 32: print("the query is too long, please shorten the search word") else: return query.split(), arg if __name__ == '__main__': store = TokenStore() search_engine = SearchEngine(store) signal.signal(signal.SIGINT, lambda _s, _f: exit(0)) while True: search_queries, arg = command_line() search_engine.search(*[search_queries, False] if arg == "all" else [search_queries])