Esempio n. 1
0
 def context_generator(self, filename, position_generator, win_size):
     """
     This function creates context windows from a given
     file using the position generator
     @param filename: a name of a file
     @param position_generator: generator which generates positions
     @param win_size: a size of a future context window
     @return: contexts windows, i.e. objects of Context_Window class
     """
     if not isinstance(filename, str) or not isinstance(win_size, int):
         raise TypeError('Input has an unappropriate type!')
     for pos in position_generator:
         window = Context_Window.get_window(filename, pos, win_size)
         yield window
 def test_extend_window_rus_one(self):
     self.indexator = Indexer('database')
     test_file_one = open('test_extend_window_rus.txt', 'w')
     test_file_one.write('Пьер с грустью слышал над собою насмешки.')
     test_file_one.close()
     self.indexator.get_index_with_line('test_extend_window_rus.txt')
     del self.indexator
     self.search = SearchEngine('database')
     window = windows.Context_Window.get_window(
         'test_extend_window_rus.txt', Position_Plus(0, 0, 4), 1)
     window.extend_window()
     extended_window = Context_Window(
         'Пьер с грустью слышал над собою насмешки.',
         [Position_Plus(0, 0, 4)], 0, 41)
     self.assertEqual(window, extended_window)
 def test_extend_window(self):
     self.indexator = Indexer('database')
     test_file_one = open('test_extend_window.txt', 'w')
     test_file_one.write('Alina Zakharova is a student!!')
     test_file_one.close()
     self.indexator.get_index_with_line('test_extend_window.txt')
     del self.indexator
     self.search = SearchEngine('database')
     window = windows.Context_Window.get_window('test_extend_window.txt',
                                                Position_Plus(0, 6, 15), 1)
     window.extend_window()
     extended_window = Context_Window('Alina Zakharova is a student!!',
                                      [Position_Plus(0, 6, 15)], 0, 30)
     self.assertEqual(window, extended_window)
     os.remove('test_extend_window.txt')
 def test_extend_window_rus_two(self):
     self.indexator = Indexer('database')
     test_file_one = open('test_extend_window_rus.txt', 'w')
     test_file_one.write(
         'С разных сторон виднелись пожары. Пьер тогда еще не понимал значения сожженной Москвы и с ужасом смотрел на эти пожары.'
     )
     test_file_one.close()
     self.indexator.get_index_with_line('test_extend_window_rus.txt')
     del self.indexator
     self.search = SearchEngine('database')
     window = windows.Context_Window.get_window(
         'test_extend_window_rus.txt', Position_Plus(0, 34, 38), 1)
     window.extend_window()
     extended_window = Context_Window(
         'С разных сторон виднелись пожары. Пьер тогда еще не понимал значения сожженной Москвы и с ужасом смотрел на эти пожары.',
         [Position_Plus(0, 34, 38)], 0, 119)
     self.assertEqual(window, extended_window)
 def test_extend_window_rus(self):
     self.indexator = Indexer('database')
     test_file_one = open('test_extend_window_rus.txt', 'w')
     test_file_one.write(
         'Прогать очень сложно! Алина Захарова студент лингвист!! Аня любит немецкий. В Петербурге идет дождь.'
     )
     test_file_one.close()
     self.indexator.get_index_with_line('test_extend_window_rus.txt')
     del self.indexator
     self.search = SearchEngine('database')
     window = windows.Context_Window.get_window(
         'test_extend_window_rus.txt', Position_Plus(0, 28, 36), 1)
     window.extend_window()
     extended_window = Context_Window(
         'Прогать очень сложно! Алина Захарова студент лингвист!! Аня любит немецкий. В Петербурге идет дождь.',
         [Position_Plus(0, 28, 36)], 22, 55)
     self.assertEqual(window, extended_window)
Esempio n. 6
0
    def unite_all(self, dictionary, win_size):
        '''
       This function unites context windows
       @param dictionary: input dictionary filename:Positions
       @param win_size: a size of a context window
       @return: a dictionary filename:Context Windows генератор контекстных окон
       '''
        if not isinstance(dictionary, dict):
            raise TypeError('Input has an unappropriate type!')
        output_dict = {}
        # value is an array of positions
        for key, value in dictionary.items():
            # создаем список каждый раз, чтобы у каждого окна был свой список позиций
            win_array = output_dict.setdefault(key, [])
            pos_array = value
            # print(pos_array,'pos_array')
            # for each position in values get window()
            for num, pos in enumerate(pos_array):
                # print(pos,'pos')
                # когда мы проходим по массиву и сравниваем элемент с предыдущим надо помнить, что мы начинаем с 0 элемента и если мы сравниваем его с -1,
                # то мы сравниваем с тем элементом, который в самом конце, а это нам не надо; еще может статься, что элемент будет сравниваться сам с собой, если он там один
                # то он будет удален так, как если бы он был дубликатом(по факту он дублирует сам себя) и по итогу имеем пустой массив и все плохо!!! вот так)))
                # поэтому проверяем if num > 0
                if num > 0 and pos_array[num] == pos_array[num - 1]:
                    # print('positions are equal!!!')
                    continue
                # print('positions are not equal!!')
                window = Context_Window.get_window(key, pos, win_size)
                win_array.append(window)
                # print(window,'window!!!')

        i = 0
        # тут окна объединяются
        for key, win_array in output_dict.items():
            while i < len(win_array) - 1:
                if win_array[i].is_crossed(win_array[i + 1]):
                    win_array[i].get_united_window(win_array[i + 1])
                    win_array.remove(win_array[i + 1])
                else:
                    i += 1

        return output_dict
 def test_get_window_begin(self):
     self.indexator = Indexer('database')
     test_file_one = open('test_window_three.txt', 'w')
     test_file_one.write('Alina Zakharova is a student')
     test_file_one.close()
     self.indexator.get_index_with_line('test_window_three.txt')
     del self.indexator
     self.search = SearchEngine('database')
     result = windows.Context_Window.get_window('test_window_three.txt',
                                                Position_Plus(0, 0, 5), 1)
     self.win = Context_Window('string', 'positions', 'win_start',
                               'win_end')
     self.win.string = 'Alina Zakharova is a student'
     self.win.positions = [Position_Plus(0, 0, 5)]
     self.win.win_start = 0
     self.win.win_end = 15
     self.assertEqual(result.string, self.win.string)
     self.assertEqual(result.positions, self.win.positions)
     self.assertEqual(result.win_start, self.win.win_start)
     self.assertEqual(result.win_end, self.win.win_end)
     self.assertEqual(result, self.win)
     os.remove('test_window_three.txt')
 def test_get_window_simple_plus(self):
     self.indexator = Indexer('database')
     test_file_one = open('test_window_two.txt', 'w')
     test_file_one.write('Little Alina Zakharova is a linguist student)))')
     test_file_one.close()
     self.indexator.get_index_with_line('test_window_two.txt')
     del self.indexator
     self.search = SearchEngine('database')
     result = windows.Context_Window.get_window('test_window_two.txt',
                                                Position_Plus(0, 23, 25), 2)
     self.win = Context_Window('string', 'positions', 'win_start',
                               'win_end')
     self.win.string = 'Little Alina Zakharova is a linguist student)))'
     self.win.positions = [Position_Plus(0, 23, 25)]
     self.win.win_start = 7
     self.win.win_end = 36
     self.assertEqual(result.string, self.win.string)
     self.assertEqual(result.positions, self.win.positions)
     self.assertEqual(result.win_start, self.win.win_start)
     self.assertEqual(result.win_end, self.win.win_end)
     self.assertEqual(result, self.win)
     os.remove('test_window_two.txt')
 def setUp(self):
     self.maxDiff = None
     self.window = Context_Window(
         'The girl named Alina Zakharova is a student',
         [Position_Plus(0, 4, 20),
          Position_Plus(0, 9, 30)], 8, 20)
class TestMyCode(unittest.TestCase):
    def setUp(self):
        self.maxDiff = None
        self.window = Context_Window(
            'The girl named Alina Zakharova is a student',
            [Position_Plus(0, 4, 20),
             Position_Plus(0, 9, 30)], 8, 20)

    def tearDown(self):
        if hasattr(self, 'search'):
            del self.search
        file_list = os.listdir(path=".")
        for i in file_list:
            if i == 'database':
                database_exists = True
                os.remove(i)
            elif i.startswith('database.'):
                database_exists = True
                os.remove(i)

    def test_get_window_error(self):
        with self.assertRaises(TypeError):
            self.window.get_window(12, '12')

    def test_get_window_simple(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_window_one.txt', 'w')
        test_file_one.write('Alina Zakharova is a student)))')
        test_file_one.close()
        self.indexator.get_index_with_line('test_window_one.txt')
        del self.indexator
        self.search = SearchEngine('database')
        result = windows.Context_Window.get_window('test_window_one.txt',
                                                   Position_Plus(0, 16, 18), 1)
        self.win = Context_Window('string', 'positions', 'win_start',
                                  'win_end')
        self.win.string = 'Alina Zakharova is a student)))'
        self.win.positions = [Position_Plus(0, 16, 18)]
        self.win.win_start = 6
        self.win.win_end = 20
        self.assertEqual(result.string, self.win.string)
        self.assertEqual(result.positions, self.win.positions)
        self.assertEqual(result.win_start, self.win.win_start)
        self.assertEqual(result.win_end, self.win.win_end)
        self.assertEqual(result, self.win)
        os.remove('test_window_one.txt')

    def test_get_window_simple_plus(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_window_two.txt', 'w')
        test_file_one.write('Little Alina Zakharova is a linguist student)))')
        test_file_one.close()
        self.indexator.get_index_with_line('test_window_two.txt')
        del self.indexator
        self.search = SearchEngine('database')
        result = windows.Context_Window.get_window('test_window_two.txt',
                                                   Position_Plus(0, 23, 25), 2)
        self.win = Context_Window('string', 'positions', 'win_start',
                                  'win_end')
        self.win.string = 'Little Alina Zakharova is a linguist student)))'
        self.win.positions = [Position_Plus(0, 23, 25)]
        self.win.win_start = 7
        self.win.win_end = 36
        self.assertEqual(result.string, self.win.string)
        self.assertEqual(result.positions, self.win.positions)
        self.assertEqual(result.win_start, self.win.win_start)
        self.assertEqual(result.win_end, self.win.win_end)
        self.assertEqual(result, self.win)
        os.remove('test_window_two.txt')

    def test_get_window_begin(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_window_three.txt', 'w')
        test_file_one.write('Alina Zakharova is a student')
        test_file_one.close()
        self.indexator.get_index_with_line('test_window_three.txt')
        del self.indexator
        self.search = SearchEngine('database')
        result = windows.Context_Window.get_window('test_window_three.txt',
                                                   Position_Plus(0, 0, 5), 1)
        self.win = Context_Window('string', 'positions', 'win_start',
                                  'win_end')
        self.win.string = 'Alina Zakharova is a student'
        self.win.positions = [Position_Plus(0, 0, 5)]
        self.win.win_start = 0
        self.win.win_end = 15
        self.assertEqual(result.string, self.win.string)
        self.assertEqual(result.positions, self.win.positions)
        self.assertEqual(result.win_start, self.win.win_start)
        self.assertEqual(result.win_end, self.win.win_end)
        self.assertEqual(result, self.win)
        os.remove('test_window_three.txt')

    def test_get_window_end(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_window_four.txt', 'w')
        test_file_one.write('Alina Zakharova is a student')
        test_file_one.close()
        self.indexator.get_index_with_line('test_window_four.txt')
        del self.indexator
        self.search = SearchEngine('database')
        result = windows.Context_Window.get_window('test_window_four.txt',
                                                   Position_Plus(0, 21, 28), 3)
        self.win = Context_Window('string', 'positions', 'win_start',
                                  'win_end')
        self.win.string = 'Alina Zakharova is a student'
        self.win.positions = [Position_Plus(0, 21, 28)]
        self.win.win_start = 6
        self.win.win_end = 28
        self.assertEqual(result.string, self.win.string)
        self.assertEqual(result.positions, self.win.positions)
        self.assertEqual(result.win_start, self.win.win_start)
        self.assertEqual(result.win_end, self.win.win_end)
        self.assertEqual(result, self.win)
        os.remove('test_window_four.txt')

    def test_myError_str_not_found(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_window_five.txt', 'w')
        test_file_one.write('Alina Zakharova is a student')
        test_file_one.close()
        self.indexator.get_index_with_line('test_window_five.txt')
        del self.indexator
        self.search = SearchEngine('database')
        with self.assertRaises(TypeError):
            result = windows.Context_Window.get_window(
                'test_window_five.txt', Position_Plus(3, 21, 28), 3)
        os.remove('test_window_five.txt')

    def test_united_type_error(self):
        with self.assertRaises(TypeError):
            self.window.get_united_window(12, 'window)))')

    def test_crossed_type_error(self):
        with self.assertRaises(TypeError):
            self.window.is_crossed(12, 'window)))')

    def test_united_window(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_united_window.txt', 'w')
        test_file_one.write('The girl named Alina Zakharova is a student')
        test_file_one.close()
        self.indexator.get_index_with_line('test_united_window.txt')
        del self.indexator
        self.search = SearchEngine('database')
        window_A = windows.Context_Window.get_window('test_united_window.txt',
                                                     Position_Plus(0, 4, 20),
                                                     1)
        window_B = windows.Context_Window.get_window('test_united_window.txt',
                                                     Position_Plus(0, 9, 30),
                                                     1)
        window_A.get_united_window(window_B)
        self.win = windows.Context_Window(
            'The girl named Alina Zakharova is a student',
            [Position_Plus(0, 4, 20),
             Position_Plus(0, 9, 30)], 9, 20)
        self.assertEqual(window_A.string, self.win.string)
        self.assertEqual(window_A.win_start, self.win.win_start)
        self.assertEqual(window_A.win_end, self.win.win_end)
        os.remove('test_united_window.txt')

    def test_is_crossed(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_crossed_window.txt', 'w')
        test_file_one.write('The girl named Alina Zakharova is a student')
        test_file_one.close()
        self.indexator.get_index_with_line('test_crossed_window.txt')
        del self.indexator
        self.search = SearchEngine('database')
        window_A = windows.Context_Window.get_window('test_crossed_window.txt',
                                                     Position_Plus(0, 15, 20),
                                                     1)
        window_B = windows.Context_Window.get_window('test_crossed_window.txt',
                                                     Position_Plus(0, 8, 14),
                                                     1)
        crossed_AB = window_A.is_crossed(window_B)
        self.assertEqual(True, crossed_AB)
        os.remove('test_crossed_window.txt')

    def test_not_crossed(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_not_crossed_window.txt', 'w')
        test_file_one.write('The girl named Alina Zakharova is a student')
        test_file_one.close()
        self.indexator.get_index_with_line('test_not_crossed_window.txt')
        del self.indexator
        self.search = SearchEngine('database')
        window_A = windows.Context_Window.get_window(
            'test_not_crossed_window.txt', Position_Plus(0, 31, 33), 1)
        window_B = windows.Context_Window.get_window(
            'test_not_crossed_window.txt', Position_Plus(0, 8, 14), 1)
        crossed_AB = window_A.is_crossed(window_B)
        self.assertEqual(False, crossed_AB)
        os.remove('test_not_crossed_window.txt')

    def test_extend_window(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_extend_window.txt', 'w')
        test_file_one.write('Alina Zakharova is a student!!')
        test_file_one.close()
        self.indexator.get_index_with_line('test_extend_window.txt')
        del self.indexator
        self.search = SearchEngine('database')
        window = windows.Context_Window.get_window('test_extend_window.txt',
                                                   Position_Plus(0, 6, 15), 1)
        window.extend_window()
        extended_window = Context_Window('Alina Zakharova is a student!!',
                                         [Position_Plus(0, 6, 15)], 0, 30)
        self.assertEqual(window, extended_window)
        os.remove('test_extend_window.txt')

    def test_extend_window_two_words(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_extend_window.txt', 'w')
        test_file_one.write('Alina Zakharova is a student!!')
        test_file_one.close()
        self.indexator.get_index_with_line('test_extend_window.txt')
        del self.indexator
        self.search = SearchEngine('database')
        window_one = windows.Context_Window.get_window(
            'test_extend_window.txt', Position_Plus(0, 6, 15), 1)
        window_two = windows.Context_Window.get_window(
            'test_extend_window.txt', Position_Plus(0, 0, 5), 1)
        window_one.get_united_window(window_two)
        window_one.extend_window()
        extended_window = Context_Window(
            'Alina Zakharova is a student!!',
            [Position_Plus(0, 6, 15),
             Position_Plus(0, 0, 5)], 0, 30)
        self.assertEqual(window_one, extended_window)
        os.remove('test_extend_window.txt')

    def test_extend_window_rus(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_extend_window_rus.txt', 'w')
        test_file_one.write(
            'Прогать очень сложно! Алина Захарова студент лингвист!! Аня любит немецкий. В Петербурге идет дождь.'
        )
        test_file_one.close()
        self.indexator.get_index_with_line('test_extend_window_rus.txt')
        del self.indexator
        self.search = SearchEngine('database')
        window = windows.Context_Window.get_window(
            'test_extend_window_rus.txt', Position_Plus(0, 28, 36), 1)
        window.extend_window()
        extended_window = Context_Window(
            'Прогать очень сложно! Алина Захарова студент лингвист!! Аня любит немецкий. В Петербурге идет дождь.',
            [Position_Plus(0, 28, 36)], 22, 55)
        self.assertEqual(window, extended_window)

    def test_extend_window_rus_one(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_extend_window_rus.txt', 'w')
        test_file_one.write('Пьер с грустью слышал над собою насмешки.')
        test_file_one.close()
        self.indexator.get_index_with_line('test_extend_window_rus.txt')
        del self.indexator
        self.search = SearchEngine('database')
        window = windows.Context_Window.get_window(
            'test_extend_window_rus.txt', Position_Plus(0, 0, 4), 1)
        window.extend_window()
        extended_window = Context_Window(
            'Пьер с грустью слышал над собою насмешки.',
            [Position_Plus(0, 0, 4)], 0, 41)
        self.assertEqual(window, extended_window)

    def test_extend_window_rus_two(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_extend_window_rus.txt', 'w')
        test_file_one.write(
            'С разных сторон виднелись пожары. Пьер тогда еще не понимал значения сожженной Москвы и с ужасом смотрел на эти пожары.'
        )
        test_file_one.close()
        self.indexator.get_index_with_line('test_extend_window_rus.txt')
        del self.indexator
        self.search = SearchEngine('database')
        window = windows.Context_Window.get_window(
            'test_extend_window_rus.txt', Position_Plus(0, 34, 38), 1)
        window.extend_window()
        extended_window = Context_Window(
            'С разных сторон виднелись пожары. Пьер тогда еще не понимал значения сожженной Москвы и с ужасом смотрел на эти пожары.',
            [Position_Plus(0, 34, 38)], 0, 119)
        self.assertEqual(window, extended_window)

    def test_already_extended_window(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_already_extended_window.txt', 'w')
        test_file_one.write('Alina Zakharova is a student!!')
        test_file_one.close()
        self.indexator.get_index_with_line('test_already_extended_window.txt')
        del self.indexator
        self.search = SearchEngine('database')
        window = windows.Context_Window.get_window(
            'test_already_extended_window.txt', Position_Plus(0, 16, 18), 2)
        os.remove('test_already_extended_window.txt')

    def test_highlight_window_one(self):
        self.indexator = Indexer('database')
        test_file_one = open('test_highlight_window.txt', 'w')
        test_file_one.write('Alina Zakharova is a student')
        test_file_one.close()
        self.indexator.get_index_with_line('test_highlight_window.txt')
        del self.indexator
        self.search = SearchEngine('database')
        window = windows.Context_Window.get_window('test_highlight_window.txt',
                                                   Position_Plus(0, 6, 15), 1)
        result = window.highlight_window()
        output_string = 'Alina <b>Zakharova</b> is'
        self.assertEqual(result, output_string)
        os.remove('test_highlight_window.txt')