コード例 #1
0
 def test_context_window_search_sentence_extension_acc(self):
     testfile = open("text.txt", 'w')
     testfile.write("There are only fluffy kittens! Only kittens")
     testfile.close()
     self.testindexer.index_with_lines("text.txt")
     testfile2 = open("text2.txt", 'w')
     testfile2.write("only kittens and puppies.")
     testfile2.close()
     self.testindexer.index_with_lines("text2.txt")
     testsearch = search_engine.SearchEngine('database')
     windowsdict = testsearch.several_tokens_search_with_sentence_context_acc(
         "only", 3, -10)
     expectedwindowresult = {
         "text.txt": [
             search_engine.ContextWindow(
                 "There are only fluffy kittens! Only kittens",
                 [indexer.Position_with_lines(10, 14, 0)],
                 search_engine.WindowPosition(0, 30, 0, "text.txt"))
         ],
         "text2.txt": [
             search_engine.ContextWindow(
                 "only kittens and puppies.",
                 [indexer.Position_with_lines(0, 4, 0)],
                 search_engine.WindowPosition(0, 25, 0, "text2.txt"))
         ]
     }
     self.assertEqual(windowsdict, expectedwindowresult)
     windowsdict = testsearch.several_tokens_search_with_sentence_context_acc(
         "only", 1, 8)
     expectedwindowresult = {}
     self.assertEqual(windowsdict, expectedwindowresult)
コード例 #2
0
 def test_search_one_token_one_file(self):
     testfile = open("text.txt", 'w')
     testfile.write("There are only kittens!")
     testfile.close()
     self.testindexer.index_with_lines("text.txt")
     testsearch = search_engine.SearchEngine('database')
     expectedresult = dict({
         "There": {
             "text.txt": [indexer.Position_with_lines(0, 5, 0)]
         },
         "are": {
             "text.txt": [indexer.Position_with_lines(6, 9, 0)]
         },
         "only": {
             "text.txt": [indexer.Position_with_lines(10, 14, 0)]
         },
         "kittens": {
             "text.txt": [indexer.Position_with_lines(15, 22, 0)]
         }
     })
     resulteddictionary = dict(shelve.open('database'))
     self.assertEqual(resulteddictionary, expectedresult)
     searchresulteddictionary = testsearch.search_by_token("only")
     expectedsearchresult = {
         "text.txt": [indexer.Position_with_lines(10, 14, 0)]
     }
     self.assertIsInstance(searchresulteddictionary, dict)
     self.assertEqual(searchresulteddictionary, expectedsearchresult)
コード例 #3
0
 def test_several_tokens_search_acc(self):
     testfile = open("text.txt", 'w')
     testfile.write("There are only kittens!")
     testfile.close()
     testfile2 = open("text2.txt", 'w')
     testfile2.write("only kittens and puppies...")
     testfile2.close()
     self.testindexer.index_with_lines("text2.txt")
     self.testindexer.index_with_lines("text.txt")
     testsearch = search_engine.SearchEngine('database')
     searchresulteddictionary = testsearch.several_tokens_search_acc(
         "only kittens", 0, 0)
     expectedsearchresult = {}
     self.assertEqual(searchresulteddictionary, expectedsearchresult)
     searchresulteddictionary = testsearch.several_tokens_search_acc(
         "only kittens", 1, 0)
     expectedsearchresult = {
         "text.txt": [
             indexer.Position_with_lines(10, 14, 0),
             indexer.Position_with_lines(15, 22, 0)
         ]
     }
     self.assertEqual(searchresulteddictionary, expectedsearchresult)
     searchresulteddictionary = testsearch.several_tokens_search_acc(
         "only kittens", 2, 1)
     expectedsearchresult = {
         "text2.txt": [
             indexer.Position_with_lines(0, 4, 0),
             indexer.Position_with_lines(5, 12, 0)
         ]
     }
     self.assertEqual(searchresulteddictionary, expectedsearchresult)
コード例 #4
0
 def test_context_window_search_several_tokens_several_files_3_3(self):
     testfile = open("text.txt", 'w')
     testfile.write("There are only fluffy kittens!")
     testfile.close()
     testfile2 = open("text2.txt", 'w')
     testfile2.write("only kittens and puppies...")
     testfile2.close()
     self.testindexer.index_with_lines("text2.txt")
     self.testindexer.index_with_lines("text.txt")
     testsearch = search_engine.SearchEngine('database')
     # context '3,3'
     windowsdict = testsearch.several_tokens_search_with_customizable_context(
         "only kittens", 3, 3)
     expectedwindowresult = {
         "text.txt": [
             search_engine.ContextWindow(
                 "There are only fluffy kittens", [
                     indexer.Position_with_lines(10, 14, 0),
                     indexer.Position_with_lines(22, 29, 0)
                 ], search_engine.WindowPosition(0, 29, 0, "text.txt"))
         ],
         "text2.txt": [
             search_engine.ContextWindow(
                 "only kittens and puppies", [
                     indexer.Position_with_lines(0, 4, 0),
                     indexer.Position_with_lines(5, 12, 0)
                 ], search_engine.WindowPosition(0, 24, 0, "text2.txt"))
         ]
     }
     self.assertEqual(expectedwindowresult, windowsdict)
コード例 #5
0
 def test_position_generator(self):
     testfile = open("text.txt", 'w')
     testfile.write("")
     testfile.close()
     testsearch = search_engine.SearchEngine('database')
     lists1 = [[1, 2, 3, 4, 6], [9, 5, 10, 31]]
     list1result = list(testsearch.position_generator(lists1))
     expectedlist1 = [1, 2, 3, 4, 5, 6, 9, 10, 31]
     self.assertEqual(list1result, expectedlist1)
     lists2 = [[-5, 9, 0, 20], [1, 15]]
     list2result = list(testsearch.position_generator(lists2))
     expectedlist2 = [-5, 0, 1, 9, 15, 20]
     self.assertEqual(list2result, expectedlist2)
     lists3 = [[
         indexer.Position_with_lines(6, 9, 0),
         indexer.Position_with_lines(2, 4, 1)
     ],
               [
                   indexer.Position_with_lines(0, 2, 1),
                   indexer.Position_with_lines(4, 10, 0)
               ]]
     expectedlist3 = [
         indexer.Position_with_lines(4, 10, 0),
         indexer.Position_with_lines(6, 9, 0),
         indexer.Position_with_lines(0, 2, 1),
         indexer.Position_with_lines(2, 4, 1)
     ]
     list3result = list(testsearch.position_generator(lists3))
     self.assertEqual(list3result, expectedlist3)
コード例 #6
0
 def test_input_two_same_words(self):
     testfile = open("text.txt", 'w')
     testfile.write("sun sun")
     testfile.close()
     expectedresult = dict({
         "sun": {
             "text.txt": [
                 indexer.Position_with_lines(0, 3, 0),
                 indexer.Position_with_lines(4, 7, 0)
             ]
         }
     })
     self.testindexer.index_with_lines("text.txt")
     resulteddictionary = dict(shelve.open('database'))
     self.assertEqual(resulteddictionary, expectedresult)
コード例 #7
0
 def test_search_one_token_several_files(self):
     testfile = open("text.txt", 'w')
     testfile.write("There are only kittens!")
     testfile.close()
     testfile2 = open("text2.txt", 'w')
     testfile2.write("only...")
     testfile2.close()
     self.testindexer.index_with_lines("text.txt")
     self.testindexer.index_with_lines("text2.txt")
     testsearch = search_engine.SearchEngine('database')
     searchresulteddictionary = testsearch.search_by_token("only")
     expectedsearchresult = {
         "text.txt": [indexer.Position_with_lines(10, 14, 0)],
         "text2.txt": [indexer.Position_with_lines(0, 4, 0)]
     }
     self.assertIsInstance(searchresulteddictionary, dict)
     self.assertEqual(searchresulteddictionary, expectedsearchresult)
コード例 #8
0
    def test_context_window_search_sentence_extension(self):
        testfile = open("text.txt", 'w')
        testfile.write("There are only fluffy kittens! Only kittens")
        testfile.close()
        self.testindexer.index_with_lines("text.txt")

        testsearch = search_engine.SearchEngine('database')

        windowsdict = testsearch.several_tokens_search_with_sentence_context(
            "only")
        expectedwindowresult = {
            "text.txt": [
                search_engine.ContextWindow(
                    "There are only fluffy kittens! Only kittens",
                    [indexer.Position_with_lines(10, 14, 0)],
                    search_engine.WindowPosition(0, 30, 0, "text.txt"))
            ]
        }
        self.assertEqual(windowsdict, expectedwindowresult)

        windowsdict = testsearch.several_tokens_search_with_sentence_context(
            "only fluffy")
        expectedwindowresult = {
            "text.txt": [
                search_engine.ContextWindow(
                    "There are only fluffy kittens!", [
                        indexer.Position_with_lines(10, 14, 0),
                        indexer.Position_with_lines(15, 21, 0)
                    ], search_engine.WindowPosition(0, 30, 0, "text.txt"))
            ]
        }
        self.assertEqual(windowsdict, expectedwindowresult)

        windowsdict = testsearch.several_tokens_search_with_sentence_context(
            "kittens")
        expectedwindowresult = {
            "text.txt": [
                search_engine.ContextWindow(
                    "There are only fluffy kittens! Only kittens", [
                        indexer.Position_with_lines(22, 29, 0),
                        indexer.Position_with_lines(36, 43, 0)
                    ], search_engine.WindowPosition(0, 43, 0, "text.txt"))
            ]
        }
        self.assertEqual(windowsdict, expectedwindowresult)
コード例 #9
0
    def test_several_tokens_search_gen(self):
        testfile = open("text.txt", 'w')
        testfile.write("There are only fluffy kittens kittens")
        testfile.close()
        self.testindexer.index_with_lines("text.txt")
        testfile2 = open("text2.txt", 'w')
        testfile2.write("only kittens and puppies...")
        testfile2.close()
        self.testindexer.index_with_lines("text2.txt")
        testsearch = search_engine.SearchEngine('database')
        searchresult1 = testsearch.several_tokens_search_gen("", 1, 0)
        self.assertEqual(searchresult1, {})

        searchresult2 = testsearch.several_tokens_search_gen("?", 2, 0)
        self.assertEqual(searchresult2, {})

        searchresult3 = testsearch.several_tokens_search_gen("kittens", 2, 0)
        expectedsearchresult3 = {
            "text.txt": [
                indexer.Position_with_lines(22, 29, 0),
                indexer.Position_with_lines(30, 37, 0)
            ],
            "text2.txt": [indexer.Position_with_lines(5, 12, 0)]
        }
        for file in searchresult3:
            self.assertEqual(list(searchresult3[file]),
                             expectedsearchresult3[file])

        searchresult4 = testsearch.several_tokens_search_gen("kittens", 1, 0)
        expectedsearchresult4 = {
            "text.txt": [
                indexer.Position_with_lines(22, 29, 0),
                indexer.Position_with_lines(30, 37, 0)
            ]
        }
        for file in searchresult4:
            self.assertEqual(list(searchresult4[file]),
                             expectedsearchresult4[file])

        searchresult5 = testsearch.several_tokens_search_gen("kittens", 1, 3)
        self.assertEqual(searchresult5, {})

        searchresult6 = testsearch.several_tokens_search_gen("kittens", -5, 0)
        self.assertEqual(searchresult6, {})
コード例 #10
0
 def get_context_window_one_position_one_file(cls, tokenposition, doc, line, leftcontext, rightcontext):
     """
     This method can construct a context window of customizable size.
     @param tokenposition: position of the token
     @param doc: name of the document to work with
     @param leftcontext: number of words from the left side of the token
     to be added to the context window
     @param rightcontext: number of words from the right side of the token
     to be added to the context window
     @return mycontextwindow: object of the type ContextWindow,
     window for ONE position in ONE document
     """
     tokenizerresult = []
     t = Tokenizator()
     lineno = tokenposition.line
     i = 0
     # left context
     mylist = []
     myleftline = line[:tokenposition.wordend]
     myreversedleftline = myleftline[::-1]
     tokenizerresult = list(t.generate_alpha_and_digits(myreversedleftline))
     for i, token in enumerate(tokenizerresult):
         if i==0:
             leftstart = tokenposition.wordbeg
             if i == leftcontext:
                 leftstart = tokenposition.wordbeg
                 break
             mylist.append(token.word)
         if i>0:
             mylist.append(token.word)
             # token.position is the position of the first token's symbol
             leftstart = token.position + len(token.word)
             if i == leftcontext or i == len(tokenizerresult)-1:
                 leftstart = tokenposition.wordend - leftstart
                 break
     mylist.reverse()
     for i,token in enumerate(mylist):
         mylist[i] = token[::-1]
     # right context
     myrightline = line[tokenposition.wordbeg:]
     tokenizerresult = list(t.generate_alpha_and_digits(myrightline))
     for i, token in enumerate(tokenizerresult):
         if i==0:
             rightend = tokenposition.wordend
             if i == rightcontext:
                 break
         if i>0:
             mylist.append(token.word)
             rightend = token.position + len(token.word)
             if i == rightcontext or i == len(tokenizerresult)-1:
                 rightend = tokenposition.wordbeg + rightend
                 break
     mycontextwindow = cls(line, [indexer.Position_with_lines(
         tokenposition.wordbeg, tokenposition.wordend, tokenposition.line)],
                         WindowPosition(leftstart, rightend, lineno, doc))
     return mycontextwindow
コード例 #11
0
 def test_several_tokens_one_file(self):
     testfile = open("text.txt", 'w')
     testfile.write("There are only kittens!")
     testfile.close()
     self.testindexer.index_with_lines("text.txt")
     testsearch = search_engine.SearchEngine('database')
     searchresulteddictionary = testsearch.several_tokens_search(
         "only kittens")
     expectedsearchresult = {
         "text.txt": [
             indexer.Position_with_lines(10, 14, 0),
             indexer.Position_with_lines(15, 22, 0)
         ]
     }
     self.assertEqual(searchresulteddictionary, expectedsearchresult)
     searchresulteddictionary = testsearch.several_tokens_search(
         "only kittens and")
     expectedsearchresult = {}
     self.assertEqual(searchresulteddictionary, expectedsearchresult)
コード例 #12
0
 def test_context_window_search_several_tokens_several_files_0_0(self):
     testfile = open("text.txt", 'w')
     testfile.write("There are only fluffy kittens!")
     testfile.close()
     self.testindexer.index_with_lines("text.txt")
     testsearch = search_engine.SearchEngine('database')
     # context '0,0'
     windowsdict = testsearch.several_tokens_search_with_customizable_context(
         "only kittens", 0, 0)
     expectedwindowresult = {
         'text.txt': [
             search_engine.ContextWindow(
                 "There are only fluffy kittens!",
                 [indexer.Position_with_lines(10, 14, 0)],
                 search_engine.WindowPosition(10, 14, 0, "text.txt")),
             search_engine.ContextWindow(
                 "There are only fluffy kittens!",
                 [indexer.Position_with_lines(22, 29, 0)],
                 search_engine.WindowPosition(22, 29, 0, "text.txt"))
         ]
     }
     self.assertEqual(expectedwindowresult, windowsdict)
コード例 #13
0
 def test_input_sentence(self):
     testfile = open("text.txt", 'w')
     testfile.write("This is a sentence \nsentence.")
     testfile.close()
     expectedresult = dict({
         "This": {
             "text.txt": [indexer.Position_with_lines(0, 4, 0)]
         },
         "is": {
             "text.txt": [indexer.Position_with_lines(5, 7, 0)]
         },
         "a": {
             "text.txt": [indexer.Position_with_lines(8, 9, 0)]
         },
         "sentence": {
             "text.txt": [
                 indexer.Position_with_lines(10, 18, 0),
                 indexer.Position_with_lines(0, 8, 1)
             ]
         }
     })
     self.testindexer.index_with_lines("text.txt")
     resulteddictionary = dict(shelve.open('database'))
     self.assertEqual(resulteddictionary, expectedresult)
コード例 #14
0
 def test_context_window_context_window_one_position_one_file(self):
     testfile = open("text.txt", 'w')
     testfile.write("There are only kittens!")
     testfile.close()
     testfile2 = open("text2.txt", 'w')
     testfile2.write("only...")
     testfile2.close()
     self.testindexer.index_with_lines("text.txt")
     self.testindexer.index_with_lines("text2.txt")
     testsearch = search_engine.SearchEngine('database')
     window1 = search_engine.ContextWindow.get_context_window_one_position_one_file(
         indexer.Position_with_lines(0, 4, 0), "text2.txt", "only...", 2, 1)
     window2 = search_engine.ContextWindow.get_context_window_one_position_one_file(
         indexer.Position_with_lines(10, 14, 0), "text.txt",
         "There are only kittens!", 2, 1)
     expectedwindow1 = search_engine.ContextWindow(
         "only", [indexer.Position_with_lines(0, 4, 0)],
         search_engine.WindowPosition(0, 4, 0, "text2.txt"))
     expectedwindow2 = search_engine.ContextWindow(
         "There are only kittens", [indexer.Position_with_lines(10, 14, 0)],
         search_engine.WindowPosition(0, 22, 0, "text.txt"))
     self.assertEqual(expectedwindow1, window1)
     self.assertEqual(expectedwindow2, window2)
     windowsdict = testsearch.several_tokens_search_with_customizable_context(
         "only", 2, 1)
     expectedwindowresult = {
         "text.txt": [
             search_engine.ContextWindow(
                 "There are only kittens",
                 [indexer.Position_with_lines(10, 14, 0)],
                 search_engine.WindowPosition(0, 22, 0, "text.txt"))
         ],
         "text2.txt": [
             search_engine.ContextWindow(
                 "only", [indexer.Position_with_lines(0, 4, 0)],
                 search_engine.WindowPosition(0, 4, 0, "text2.txt"))
         ]
     }
     self.assertEqual(expectedwindowresult, windowsdict)
コード例 #15
0
 def test_not_equal(self):
     a = indexer.Position_with_lines(2, 5, 1)
     b = indexer.Position_with_lines(4, 6, 1)
     self.assertNotEqual(a, b)
コード例 #16
0
 def test_equal(self):
     a = indexer.Position_with_lines(1, 6, 3)
     b = indexer.Position_with_lines(1, 6, 3)
     self.assertEqual(a, b)