def search(): if request.method =='POST': searchTerm = request.form['searchTermInput'] engine = SearchEngine() resultList = engine.search(searchTerm.lower()) return render_template("search.html", resultList=resultList, listSize=len(resultList)) return render_template("search.html")
filename = int(os.path.split(file)[1].split(".csv")[0]) helpers.bulk(es, genData(snippets, filename)) while True: print("\n\n0. Standard query") print("1. Allows positional indexing") print("2. Allows wildcard terms") print("3. Allows both wildcards and positional indexing") print("Type a query and mention the type of query. Ex - \"0, Standard query\" ") print("Ctrl + D to exit") queryType, query = [w.strip() for w in input().split(',')] start = time.time() res = engine.search(query, int(queryType)) end = time.time() modified_output = {"took": end - start, "total": len(res), "hits": []} for doc_q in res: for doc in doc_q: res_file = os.path.join(dataPath, str(doc[0][0]) + ".csv") with open(res_file) as fd: reader = csv.DictReader(fd) row_no = 0 for row in reader: if row_no == doc[0][1] - 2: current_row = row break row_no += 1
# test search engine methods from searchEngine import SearchEngine se = SearchEngine("index") print(se.search("cristina lopes", 5)) print(se.search("machine learning", 5)) print(se.search("ACM", 5)) print(se.search("master of software engineering", 5)) # import pickle # # file = open("pIndex1.pkl", "rb") # d = pickle.load(file) # file.close() # # print(len(d)) # for k, v in d.items(): # #print(k,v) # print(k, ": ", v) # LINKED LIST OR SET UF POSTINGS? # IF YOU USE A SET, YOU NEED TO IMPLEMENT THE __EQ__ ETC AND HASH # USE STEMMING TO CUT DOWN ON # OF ENTRIES IN INDICES # MERGING STRATEGY # have an index for every letter # create a partial index # go through that sorted index and load to memory each letter
class Test(unittest.TestCase): def setUp(self): with open('test0.txt', 'w') as f: f.write('All we need is,\n all we need is,\n all we need is') with open('test1.txt', 'w') as f: f.write('Blood, blood,\n blood') with open('test2.txt', 'w') as f: f.write('All we need is, all we need is,\n all we need is') with open('test.txt', 'w') as f: f.write('All we need is, all we need is, all we need is') with open('testtest.txt', 'w') as f: f.write('Blood, blood, blood') with open('testtesttest.txt', 'w') as f: f.write('All we need is, all we need is,\n all we need is') with open('testSentence.txt', 'w') as f: f.write( 'What do we need? All we need is blood. Pain pain pain pain') indexer = Indexator('TestDatabase') indexer.indexize('test0.txt') indexer.indexize('test1.txt') indexer.indexize('test2.txt') self.searchEngine = SearchEngine("TestDatabase") # unittests for search def test_input_type_number(self): with self.assertRaises(ValueError): self.searchEngine.search(13) def test_input_type_not_exists(self): self.assertEqual(self.searchEngine.search('вискас'), {}) def test_we(self): expected = { 'test0.txt': [ indexator.Position(4, 6, 1), indexator.Position(5, 7, 2), indexator.Position(5, 7, 3) ], 'test2.txt': [ indexator.Position(4, 6, 1), indexator.Position(20, 22, 1), indexator.Position(5, 7, 2) ] } self.assertEqual(self.searchEngine.search('we'), expected) def test_blood(self): expected = { 'test1.txt': [indexator.Position(7, 12, 1), indexator.Position(1, 6, 2)] } self.assertEqual(self.searchEngine.search("blood"), expected) # unittests for searchQuery def test__query_input_type_number(self): with self.assertRaises(ValueError): self.searchEngine.searchQuery(13) def test_query_input_type_not_exists(self): self.assertEqual(self.searchEngine.searchQuery('вискас'), {}) def test_we_is(self): expected = { 'test0.txt': [ indexator.Position(4, 6, 1), indexator.Position(5, 7, 2), indexator.Position(5, 7, 3), indexator.Position(12, 14, 1), indexator.Position(13, 15, 2), indexator.Position(13, 15, 3) ], 'test2.txt': [ indexator.Position(4, 6, 1), indexator.Position(20, 22, 1), indexator.Position(5, 7, 2), indexator.Position(12, 14, 1), indexator.Position(28, 30, 1), indexator.Position(13, 15, 2) ] } self.assertEqual(self.searchEngine.searchQuery('we is'), expected) def test_need(self): expected = { 'test0.txt': [ indexator.Position(7, 11, 1), indexator.Position(8, 12, 2), indexator.Position(8, 12, 3) ], 'test2.txt': [ indexator.Position(7, 11, 1), indexator.Position(23, 27, 1), indexator.Position(8, 12, 2) ] } self.assertEqual(self.searchEngine.searchQuery('need'), expected) # unittests for contexts def test_context(self): pos = indexator.Position(20, 22, 1) context = searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'test.txt', pos) self.assertEqual(context.string, "is, all we need is") def test_context_line_not_exists(self): pos = indexator.Position(20, 22, 2) with self.assertRaises(ValueError): searchEngine.ContextWindow.makeWindowGreatAgain(2, 'test.txt', pos) def test_context_large_size(self): pos = indexator.Position(20, 22, 1) context = searchEngine.ContextWindow.makeWindowGreatAgain( 8, 'test.txt', pos) self.assertEqual(context.string, "All we need is, all we need is, all we need is") def test_context_zero_size(self): pos = indexator.Position(20, 22, 1) context = searchEngine.ContextWindow.makeWindowGreatAgain( 0, 'test.txt', pos) self.assertEqual(context.string, "we") def test_context_two_windows(self): poss = [indexator.Position(20, 22, 1), indexator.Position(32, 35, 1)] contexts = [ searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'test.txt', poss[0]), searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'test.txt', poss[1]) ] contextUnion = searchEngine.ContextWindow().unionWindows(contexts) targetTokensPositions = [ indexator.Position(20, 22, 1), indexator.Position(32, 35, 1) ] expected = searchEngine.ContextWindow.initWithData( "All we need is, all we need is, all we need is", targetTokensPositions, 43, 12, "is, all we need is, all we need", "test.txt", 1) expectedList = [] expectedList.append(expected) self.assertEqual(contextUnion, expectedList) def test_context_many_windows(self): poss = [ indexator.Position(20, 22, 1), indexator.Position(32, 35, 1), indexator.Position(7, 12, 1), indexator.Position(20, 22, 1), indexator.Position(28, 30, 1), indexator.Position(1, 4, 2) ] contexts = [ searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'test.txt', poss[0]), searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'test.txt', poss[1]), searchEngine.ContextWindow.makeWindowGreatAgain( 1, 'testtest.txt', poss[2]), searchEngine.ContextWindow.makeWindowGreatAgain( 8, 'testtesttest.txt', poss[3]), searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'testtesttest.txt', poss[4]), searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'testtesttest.txt', poss[5]) ] contextUnion = searchEngine.ContextWindow().unionWindows(contexts) targetTokensPositions1 = [ indexator.Position(20, 22, 1), indexator.Position(32, 35, 1) ] expected1 = searchEngine.ContextWindow.initWithData( "All we need is, all we need is, all we need is", targetTokensPositions1, 43, 12, "is, all we need is, all we need", "test.txt", 1) targetTokensPositions2 = [indexator.Position(7, 12, 1)] expected2 = searchEngine.ContextWindow.initWithData( "Blood, blood, blood", targetTokensPositions2, 19, 0, "Blood, blood, blood", "testtest.txt", 1) targetTokensPositions3 = [ indexator.Position(20, 22, 1), indexator.Position(28, 30, 1) ] expected3 = searchEngine.ContextWindow.initWithData( "All we need is, all we need is,\n", targetTokensPositions3, 30, 0, "All we need is, all we need is", "testtesttest.txt", 1) targetTokensPositions4 = [indexator.Position(1, 4, 2)] expected4 = searchEngine.ContextWindow.initWithData( " all we need is", targetTokensPositions4, 12, 1, "all we need", "testtesttest.txt", 2) expectedList = [] expectedList.append(expected1) expectedList.append(expected2) expectedList.append(expected3) expectedList.append(expected4) self.assertEqual(contextUnion, expectedList) def test_context_expand_to_sentence(self): pos = indexator.Position(24, 28, 1) context = searchEngine.ContextWindow.makeWindowGreatAgain( 1, 'testSentence.txt', pos) context.expandToSentence() targetTokensPositions = [indexator.Position(24, 28, 1)] expected = searchEngine.ContextWindow.initWithData( "What do we need? All we need is blood. Pain pain pain pain", targetTokensPositions, 38, 17, "All we need is blood.", "testSentence.txt", 1) self.assertEqual(context, expected) def test_context_expand_to_sentence_two_tokens(self): poss = [indexator.Position(21, 23, 1), indexator.Position(24, 28, 1)] contexts = [ searchEngine.ContextWindow.makeWindowGreatAgain( 1, 'testSentence.txt', poss[0]), searchEngine.ContextWindow.makeWindowGreatAgain( 1, 'testSentence.txt', poss[1]) ] contextUnion = searchEngine.ContextWindow().unionWindows(contexts) contextUnion[0].expandToSentence() context = contextUnion[0] targetTokensPositions = [ indexator.Position(21, 23, 1), indexator.Position(24, 28, 1) ] expected = searchEngine.ContextWindow.initWithData( "What do we need? All we need is blood. Pain pain pain pain", targetTokensPositions, 38, 17, "All we need is blood.", "testSentence.txt", 1) self.assertEqual(context, expected) # def test_query_context(self): # expected = { # 'test.txt': [ # indexator.Position(4, 6, 1), # indexator.Position(5, 7, 2), # indexator.Position(5, 7, 3), # indexator.Position(12, 14, 1), # indexator.Position(13, 15, 2), # indexator.Position(13, 15, 3)], # 'test2.txt': [ # indexator.Position(4, 6, 1), # indexator.Position(20, 22, 1), # indexator.Position(5, 7, 2), # indexator.Position(12, 14, 1), # indexator.Position(28, 30, 1), # indexator.Position(13, 15, 2)]} # print(searchEngine.ContextWindow.makeWindowGreatAgain( # 3, 'test0.txt', indexator.Position(12, 14, 1),)) # self.assertEqual(self.searchEngine.searchQueryWindow('blood pain', 3), expected) def tearDown(self): self.searchEngine.__del__() files = os.listdir(path=".") for file in files: if file.startswith('TestDatabase'): os.remove(file) if file.startswith('test'): os.remove(file)