def test_blood(self): expected = { 'test1.txt': [indexator.Position(7, 12, 1), indexator.Position(1, 6, 2)] } self.assertEqual(self.searchEngine.search("blood"), expected)
def test_context_expand_to_sentence(self): pos = indexator.Position(24, 28, 1) context = searchEngine.ContextWindow.makeWindowGreatAgain( 1, 'testSentence.txt', pos) context.expandToSentence() targetTokensPositions = [indexator.Position(24, 28, 1)] expected = searchEngine.ContextWindow.initWithData( "What do we need? All we need is blood. Pain pain pain pain", targetTokensPositions, 38, 17, "All we need is blood.", "testSentence.txt", 1) self.assertEqual(context, expected)
def test_context_expand_to_sentence_two_tokens(self): poss = [indexator.Position(21, 23, 1), indexator.Position(24, 28, 1)] contexts = [ searchEngine.ContextWindow.makeWindowGreatAgain( 1, 'testSentence.txt', poss[0]), searchEngine.ContextWindow.makeWindowGreatAgain( 1, 'testSentence.txt', poss[1]) ] contextUnion = searchEngine.ContextWindow().unionWindows(contexts) contextUnion[0].expandToSentence() context = contextUnion[0] targetTokensPositions = [ indexator.Position(21, 23, 1), indexator.Position(24, 28, 1) ] expected = searchEngine.ContextWindow.initWithData( "What do we need? All we need is blood. Pain pain pain pain", targetTokensPositions, 38, 17, "All we need is blood.", "testSentence.txt", 1) self.assertEqual(context, expected)
def test_context_two_windows(self): poss = [indexator.Position(20, 22, 1), indexator.Position(32, 35, 1)] contexts = [ searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'test.txt', poss[0]), searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'test.txt', poss[1]) ] contextUnion = searchEngine.ContextWindow().unionWindows(contexts) targetTokensPositions = [ indexator.Position(20, 22, 1), indexator.Position(32, 35, 1) ] expected = searchEngine.ContextWindow.initWithData( "All we need is, all we need is, all we need is", targetTokensPositions, 43, 12, "is, all we need is, all we need", "test.txt", 1) expectedList = [] expectedList.append(expected) self.assertEqual(contextUnion, expectedList)
def test_need(self): expected = { 'test0.txt': [ indexator.Position(7, 11, 1), indexator.Position(8, 12, 2), indexator.Position(8, 12, 3) ], 'test2.txt': [ indexator.Position(7, 11, 1), indexator.Position(23, 27, 1), indexator.Position(8, 12, 2) ] } self.assertEqual(self.searchEngine.searchQuery('need'), expected)
def test_we(self): expected = { 'test0.txt': [ indexator.Position(4, 6, 1), indexator.Position(5, 7, 2), indexator.Position(5, 7, 3) ], 'test2.txt': [ indexator.Position(4, 6, 1), indexator.Position(20, 22, 1), indexator.Position(5, 7, 2) ] } self.assertEqual(self.searchEngine.search('we'), expected)
def makeWindowGreatAgain(size, filename, position): """ Method for creating a context window using the source file and token position @param size: size of the context window @param filename: path to the source file @param position: position of the target token """ contextWindow = ContextWindow() contextWindow.filename = filename contextWindow.lineNumber = position.posLine contextWindow.lineString = "" flag = False with open(filename, 'r') as f: for i, l in enumerate(f): if i + 1 == position.posLine: contextWindow.lineString = l flag = True if flag == False: raise ValueError contextWindow.targetToken = contextWindow.lineString[ position.posBegin:position.posEnd] contextWindow.targetTokensPositions = [] contextWindow.targetTokensPositions.append( indexator.Position(position.posBegin, position.posEnd, position.posLine)) right = "" # rightArray = [] left = "" # leftArray = [] rawTokens = tokenizer.Tokenizer().genclasstokenize( contextWindow.lineString[position.posEnd:]) i = 0 temp = "" # buffer against wrong tokens # tempT = [] for rawToken in rawTokens: if i < size: if '\n' in rawToken.string: break temp += rawToken.string # buffer updates till alpha/digit token # tempT.append(rawToken) # rightArray.append(rawToken) if (rawToken.category == 'alpha') or (rawToken.category == 'digit'): right += temp # buffer flushes to other context temp = "" # rightArray += tempT # tempT = [] i += 1 else: break contextWindow.rightBoundary = position.posEnd + len(right) rawTokensLeft = tokenizer.Tokenizer().genclasstokenize( contextWindow.lineString[position.posBegin - 1::-1]) i = 0 temp = "" # tempT = [] for rawToken in rawTokensLeft: if i < size: if '\n' in rawToken.string: break # print("token: '" + rawToken.string + "'") temp = rawToken.string[::-1] + temp # tempT.append(tokenizer.ClassifiedToken( # rawToken.position, rawToken.string[::-1], rawToken.category)) # leftArray.append(tokenizer.ClassifiedToken( # rawToken.position, rawToken.string[::-1], rawToken.category)) if (rawToken.category == 'alpha') or (rawToken.category == 'digit'): left = temp + left temp = "" # leftArray += tempT # tempT = [] i += 1 else: break # leftArray = leftArray[::-1] contextWindow.leftBoundary = position.posBegin - len(left) contextWindow.string = left + contextWindow.targetToken + right return contextWindow
i += 1 rawTokensLeft = tokenizer.Tokenizer().genclasstokenize(lineString[19::-1]) i = 0 for rawToken in rawTokensLeft: if i < 3: left = rawToken.string[::-1] + left leftArray.append(rawToken) if (rawToken.category == 'alpha') or (rawToken.category == 'digit'): i += 1 # print(targetToken) # print(right) # print(left) # print(left + targetToken + right) pos2 = indexator.Position(20, 22, 1) pos1 = indexator.Position(23, 27, 1) context1 = ContextWindow.makeWindowGreatAgain(5, 'test.txt', pos1) context2 = ContextWindow.makeWindowGreatAgain(3, 'test.txt', pos2) contexts = [] contexts.append(context1) contexts.append(context2) unionContexts = ContextWindow().unionWindows(contexts) # context1.expandToSentence() # print(context1.string) # string = "all we need is, all we need is" print(unionContexts[0].markTarget()) # pos1 = indexator.Position(20, 22, 0) # pos2 = indexator.Position(32, 35, 0)
def test_we_is(self): expected = { 'test0.txt': [ indexator.Position(4, 6, 1), indexator.Position(5, 7, 2), indexator.Position(5, 7, 3), indexator.Position(12, 14, 1), indexator.Position(13, 15, 2), indexator.Position(13, 15, 3) ], 'test2.txt': [ indexator.Position(4, 6, 1), indexator.Position(20, 22, 1), indexator.Position(5, 7, 2), indexator.Position(12, 14, 1), indexator.Position(28, 30, 1), indexator.Position(13, 15, 2) ] } self.assertEqual(self.searchEngine.searchQuery('we is'), expected)
def test_context_many_windows(self): poss = [ indexator.Position(20, 22, 1), indexator.Position(32, 35, 1), indexator.Position(7, 12, 1), indexator.Position(20, 22, 1), indexator.Position(28, 30, 1), indexator.Position(1, 4, 2) ] contexts = [ searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'test.txt', poss[0]), searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'test.txt', poss[1]), searchEngine.ContextWindow.makeWindowGreatAgain( 1, 'testtest.txt', poss[2]), searchEngine.ContextWindow.makeWindowGreatAgain( 8, 'testtesttest.txt', poss[3]), searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'testtesttest.txt', poss[4]), searchEngine.ContextWindow.makeWindowGreatAgain( 2, 'testtesttest.txt', poss[5]) ] contextUnion = searchEngine.ContextWindow().unionWindows(contexts) targetTokensPositions1 = [ indexator.Position(20, 22, 1), indexator.Position(32, 35, 1) ] expected1 = searchEngine.ContextWindow.initWithData( "All we need is, all we need is, all we need is", targetTokensPositions1, 43, 12, "is, all we need is, all we need", "test.txt", 1) targetTokensPositions2 = [indexator.Position(7, 12, 1)] expected2 = searchEngine.ContextWindow.initWithData( "Blood, blood, blood", targetTokensPositions2, 19, 0, "Blood, blood, blood", "testtest.txt", 1) targetTokensPositions3 = [ indexator.Position(20, 22, 1), indexator.Position(28, 30, 1) ] expected3 = searchEngine.ContextWindow.initWithData( "All we need is, all we need is,\n", targetTokensPositions3, 30, 0, "All we need is, all we need is", "testtesttest.txt", 1) targetTokensPositions4 = [indexator.Position(1, 4, 2)] expected4 = searchEngine.ContextWindow.initWithData( " all we need is", targetTokensPositions4, 12, 1, "all we need", "testtesttest.txt", 2) expectedList = [] expectedList.append(expected1) expectedList.append(expected2) expectedList.append(expected3) expectedList.append(expected4) self.assertEqual(contextUnion, expectedList)
def test_context_zero_size(self): pos = indexator.Position(20, 22, 1) context = searchEngine.ContextWindow.makeWindowGreatAgain( 0, 'test.txt', pos) self.assertEqual(context.string, "we")
def test_context_large_size(self): pos = indexator.Position(20, 22, 1) context = searchEngine.ContextWindow.makeWindowGreatAgain( 8, 'test.txt', pos) self.assertEqual(context.string, "All we need is, all we need is, all we need is")
def test_context_line_not_exists(self): pos = indexator.Position(20, 22, 2) with self.assertRaises(ValueError): searchEngine.ContextWindow.makeWindowGreatAgain(2, 'test.txt', pos)