def test_blood(self):
     expected = {
         'test1.txt':
         [indexator.Position(7, 12, 1),
          indexator.Position(1, 6, 2)]
     }
     self.assertEqual(self.searchEngine.search("blood"), expected)
 def test_context_expand_to_sentence(self):
     pos = indexator.Position(24, 28, 1)
     context = searchEngine.ContextWindow.makeWindowGreatAgain(
         1, 'testSentence.txt', pos)
     context.expandToSentence()
     targetTokensPositions = [indexator.Position(24, 28, 1)]
     expected = searchEngine.ContextWindow.initWithData(
         "What do we need? All we need is blood. Pain pain pain pain",
         targetTokensPositions, 38, 17, "All we need is blood.",
         "testSentence.txt", 1)
     self.assertEqual(context, expected)
 def test_context_expand_to_sentence_two_tokens(self):
     poss = [indexator.Position(21, 23, 1), indexator.Position(24, 28, 1)]
     contexts = [
         searchEngine.ContextWindow.makeWindowGreatAgain(
             1, 'testSentence.txt', poss[0]),
         searchEngine.ContextWindow.makeWindowGreatAgain(
             1, 'testSentence.txt', poss[1])
     ]
     contextUnion = searchEngine.ContextWindow().unionWindows(contexts)
     contextUnion[0].expandToSentence()
     context = contextUnion[0]
     targetTokensPositions = [
         indexator.Position(21, 23, 1),
         indexator.Position(24, 28, 1)
     ]
     expected = searchEngine.ContextWindow.initWithData(
         "What do we need? All we need is blood. Pain pain pain pain",
         targetTokensPositions, 38, 17, "All we need is blood.",
         "testSentence.txt", 1)
     self.assertEqual(context, expected)
 def test_context_two_windows(self):
     poss = [indexator.Position(20, 22, 1), indexator.Position(32, 35, 1)]
     contexts = [
         searchEngine.ContextWindow.makeWindowGreatAgain(
             2, 'test.txt', poss[0]),
         searchEngine.ContextWindow.makeWindowGreatAgain(
             2, 'test.txt', poss[1])
     ]
     contextUnion = searchEngine.ContextWindow().unionWindows(contexts)
     targetTokensPositions = [
         indexator.Position(20, 22, 1),
         indexator.Position(32, 35, 1)
     ]
     expected = searchEngine.ContextWindow.initWithData(
         "All we need is, all we need is, all we need is",
         targetTokensPositions, 43, 12, "is, all we need is, all we need",
         "test.txt", 1)
     expectedList = []
     expectedList.append(expected)
     self.assertEqual(contextUnion, expectedList)
 def test_need(self):
     expected = {
         'test0.txt': [
             indexator.Position(7, 11, 1),
             indexator.Position(8, 12, 2),
             indexator.Position(8, 12, 3)
         ],
         'test2.txt': [
             indexator.Position(7, 11, 1),
             indexator.Position(23, 27, 1),
             indexator.Position(8, 12, 2)
         ]
     }
     self.assertEqual(self.searchEngine.searchQuery('need'), expected)
 def test_we(self):
     expected = {
         'test0.txt': [
             indexator.Position(4, 6, 1),
             indexator.Position(5, 7, 2),
             indexator.Position(5, 7, 3)
         ],
         'test2.txt': [
             indexator.Position(4, 6, 1),
             indexator.Position(20, 22, 1),
             indexator.Position(5, 7, 2)
         ]
     }
     self.assertEqual(self.searchEngine.search('we'), expected)
Beispiel #7
0
    def makeWindowGreatAgain(size, filename, position):
        """
        Method for creating a context window using the source file and token position
        @param size: size of the context window
        @param filename: path to the source file
        @param position: position of the target token
        """
        contextWindow = ContextWindow()
        contextWindow.filename = filename
        contextWindow.lineNumber = position.posLine
        contextWindow.lineString = ""
        flag = False
        with open(filename, 'r') as f:
            for i, l in enumerate(f):
                if i + 1 == position.posLine:
                    contextWindow.lineString = l
                    flag = True
        if flag == False:
            raise ValueError
        contextWindow.targetToken = contextWindow.lineString[
            position.posBegin:position.posEnd]
        contextWindow.targetTokensPositions = []
        contextWindow.targetTokensPositions.append(
            indexator.Position(position.posBegin, position.posEnd,
                               position.posLine))
        right = ""
        # rightArray = []
        left = ""
        # leftArray = []
        rawTokens = tokenizer.Tokenizer().genclasstokenize(
            contextWindow.lineString[position.posEnd:])

        i = 0
        temp = ""  # buffer against wrong tokens
        # tempT = []
        for rawToken in rawTokens:
            if i < size:
                if '\n' in rawToken.string:
                    break
                temp += rawToken.string  # buffer updates till alpha/digit token
                # tempT.append(rawToken)
                # rightArray.append(rawToken)
                if (rawToken.category == 'alpha') or (rawToken.category
                                                      == 'digit'):
                    right += temp  # buffer flushes to other context
                    temp = ""
                    # rightArray += tempT
                    # tempT = []
                    i += 1
            else:
                break
        contextWindow.rightBoundary = position.posEnd + len(right)
        rawTokensLeft = tokenizer.Tokenizer().genclasstokenize(
            contextWindow.lineString[position.posBegin - 1::-1])

        i = 0
        temp = ""
        # tempT = []
        for rawToken in rawTokensLeft:
            if i < size:
                if '\n' in rawToken.string:
                    break
                # print("token: '" + rawToken.string + "'")
                temp = rawToken.string[::-1] + temp
                # tempT.append(tokenizer.ClassifiedToken(
                # rawToken.position, rawToken.string[::-1], rawToken.category))
                # leftArray.append(tokenizer.ClassifiedToken(
                #     rawToken.position, rawToken.string[::-1], rawToken.category))
                if (rawToken.category == 'alpha') or (rawToken.category
                                                      == 'digit'):
                    left = temp + left
                    temp = ""
                    # leftArray += tempT
                    # tempT = []
                    i += 1
            else:
                break
        # leftArray = leftArray[::-1]
        contextWindow.leftBoundary = position.posBegin - len(left)
        contextWindow.string = left + contextWindow.targetToken + right
        return contextWindow
Beispiel #8
0
                i += 1
    rawTokensLeft = tokenizer.Tokenizer().genclasstokenize(lineString[19::-1])
    i = 0
    for rawToken in rawTokensLeft:
        if i < 3:
            left = rawToken.string[::-1] + left
            leftArray.append(rawToken)
            if (rawToken.category == 'alpha') or (rawToken.category
                                                  == 'digit'):
                i += 1
    # print(targetToken)
    # print(right)
    # print(left)
    # print(left + targetToken + right)

    pos2 = indexator.Position(20, 22, 1)
    pos1 = indexator.Position(23, 27, 1)
    context1 = ContextWindow.makeWindowGreatAgain(5, 'test.txt', pos1)
    context2 = ContextWindow.makeWindowGreatAgain(3, 'test.txt', pos2)
    contexts = []
    contexts.append(context1)
    contexts.append(context2)
    unionContexts = ContextWindow().unionWindows(contexts)
    # context1.expandToSentence()
    # print(context1.string)

    # string = "all we need is, all we need is"
    print(unionContexts[0].markTarget())

    # pos1 = indexator.Position(20, 22, 0)
    # pos2 = indexator.Position(32, 35, 0)
 def test_we_is(self):
     expected = {
         'test0.txt': [
             indexator.Position(4, 6, 1),
             indexator.Position(5, 7, 2),
             indexator.Position(5, 7, 3),
             indexator.Position(12, 14, 1),
             indexator.Position(13, 15, 2),
             indexator.Position(13, 15, 3)
         ],
         'test2.txt': [
             indexator.Position(4, 6, 1),
             indexator.Position(20, 22, 1),
             indexator.Position(5, 7, 2),
             indexator.Position(12, 14, 1),
             indexator.Position(28, 30, 1),
             indexator.Position(13, 15, 2)
         ]
     }
     self.assertEqual(self.searchEngine.searchQuery('we is'), expected)
    def test_context_many_windows(self):
        poss = [
            indexator.Position(20, 22, 1),
            indexator.Position(32, 35, 1),
            indexator.Position(7, 12, 1),
            indexator.Position(20, 22, 1),
            indexator.Position(28, 30, 1),
            indexator.Position(1, 4, 2)
        ]
        contexts = [
            searchEngine.ContextWindow.makeWindowGreatAgain(
                2, 'test.txt', poss[0]),
            searchEngine.ContextWindow.makeWindowGreatAgain(
                2, 'test.txt', poss[1]),
            searchEngine.ContextWindow.makeWindowGreatAgain(
                1, 'testtest.txt', poss[2]),
            searchEngine.ContextWindow.makeWindowGreatAgain(
                8, 'testtesttest.txt', poss[3]),
            searchEngine.ContextWindow.makeWindowGreatAgain(
                2, 'testtesttest.txt', poss[4]),
            searchEngine.ContextWindow.makeWindowGreatAgain(
                2, 'testtesttest.txt', poss[5])
        ]
        contextUnion = searchEngine.ContextWindow().unionWindows(contexts)

        targetTokensPositions1 = [
            indexator.Position(20, 22, 1),
            indexator.Position(32, 35, 1)
        ]
        expected1 = searchEngine.ContextWindow.initWithData(
            "All we need is, all we need is, all we need is",
            targetTokensPositions1, 43, 12, "is, all we need is, all we need",
            "test.txt", 1)

        targetTokensPositions2 = [indexator.Position(7, 12, 1)]
        expected2 = searchEngine.ContextWindow.initWithData(
            "Blood, blood, blood", targetTokensPositions2, 19, 0,
            "Blood, blood, blood", "testtest.txt", 1)

        targetTokensPositions3 = [
            indexator.Position(20, 22, 1),
            indexator.Position(28, 30, 1)
        ]
        expected3 = searchEngine.ContextWindow.initWithData(
            "All we need is, all we need is,\n", targetTokensPositions3, 30, 0,
            "All we need is, all we need is", "testtesttest.txt", 1)

        targetTokensPositions4 = [indexator.Position(1, 4, 2)]
        expected4 = searchEngine.ContextWindow.initWithData(
            " all we need is", targetTokensPositions4, 12, 1, "all we need",
            "testtesttest.txt", 2)

        expectedList = []
        expectedList.append(expected1)
        expectedList.append(expected2)
        expectedList.append(expected3)
        expectedList.append(expected4)
        self.assertEqual(contextUnion, expectedList)
 def test_context_zero_size(self):
     pos = indexator.Position(20, 22, 1)
     context = searchEngine.ContextWindow.makeWindowGreatAgain(
         0, 'test.txt', pos)
     self.assertEqual(context.string, "we")
 def test_context_large_size(self):
     pos = indexator.Position(20, 22, 1)
     context = searchEngine.ContextWindow.makeWindowGreatAgain(
         8, 'test.txt', pos)
     self.assertEqual(context.string,
                      "All we need is, all we need is, all we need is")
 def test_context_line_not_exists(self):
     pos = indexator.Position(20, 22, 2)
     with self.assertRaises(ValueError):
         searchEngine.ContextWindow.makeWindowGreatAgain(2, 'test.txt', pos)