Beispiel #1
0
 def test_highlight(self):
     text = 'Мама мыла раму'
     df = Separator.separate_string(text)
     df['highlight'] = df.word_id == 1
     v = DfViewer(as_html_object=False, highlight_column='highlight')
     self.assertEqual(
         'Мама <span style="background-color:#ffdddd;">мыла</span> раму',
         v.convert(df))
Beispiel #2
0
 def test_separation(self):
     text = '«Какой-нибудь»   текст —  с знаками… И еще словами!.. Вот так.'
     df = Separator.separate_string(text)
     self.assertListEqual(
         list(df.word_offset),
         [0, 1, 13, 17, 23, 26, 28, 35, 37, 39, 43, 50, 54, 58, 61])
     self.assertListEqual(list(df.word_length),
                          [1, 12, 1, 5, 1, 1, 7, 1, 1, 3, 7, 3, 3, 3, 1])
Beispiel #3
0
 def setUpClass(cls) -> None:
     super(SlovnetFeaturizersTestCase, cls).setUpClass()
     cls.analyzer = SlovnetFeaturizer()
     cls.context_featurizer = SlovnetContextFeaturizer()
     df = Separator.separate_string(text)
     for c in ['word_id', 'sentence_id', 'paragraph_id']:
         df[c] += 100
     cls.result = cls.analyzer.featurize(df)
     cls.context_result = cls.context_featurizer.featurize(cls.result)
Beispiel #4
0
 def test_pymorphy(self):
     df = Separator.separate_string(text)
     df.word_id += 100
     result = PyMorphyFeaturizer().featurize(df)
     self.assertListEqual(
         [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110],
         list(result.index))
     self.assertListEqual([
         'он', 'подойти', 'к', 'дверь', '.', 'за', 'она', 'никто', 'не',
         'быть', '.'
     ], list(result.normal_form))
     self.assertListEqual([
         'normal_form', 'alternatives', 'score', 'delta_score', 'POS',
         'animacy', 'gender', 'number', 'case', 'aspect', 'transitivity',
         'person', 'tense', 'mood', 'voice', 'involvement'
     ], list(result.columns))
Beispiel #5
0
    def test_usage_of_provided_pymorphy_column(self):
        df = Separator.separate_string("окно открыто")
        df['check_requested'] = True

        df1 = df.copy()
        alg = RepetitionsAlgorithm(50, False, True, False)
        alg.run_on_bundle(DataBundle(src=df1))
        self.assertTrue(df1.repetition_status.all())

        df2 = df.copy()
        pym = df2[['word_id']].copy()
        pym['normal_form'] = 'окно'
        pym = pym.set_index('word_id')
        alg = RepetitionsAlgorithm(50, False, True, False)
        alg.run_on_bundle(DataBundle(src=df2, pymorphy=pym))
        self.assertFalse(df2.repetition_status.all())

        df3 = df.copy()
        pym = df3[['word_id']].copy()
        pym['normal_form'] = ['двуединый', 'единообразие']
        pym = pym.set_index('word_id')
        alg = RepetitionsAlgorithm(50, False, False, True)
        alg.run_on_bundle(DataBundle(src=df3, pymorphy=pym))
        self.assertFalse(df3.repetition_status.all())
Beispiel #6
0
 def test_viewer(self):
     text = 'Мама мыла раму'
     df = Separator.separate_string(text)
     v = DfViewer(as_html_object=False)
     self.assertEqual(text, v.convert(df))
Beispiel #7
0
 def test_separation_columns(self):
     text = '«Какой-нибудь»   текст —  с знаками… И еще словами!.. Вот так.'
     df = Separator.separate_string(text)
     self.assertListEqual(Separator.COLUMNS, list(df.columns))
Beispiel #8
0
 def test_separator_types(self):
     df = Separator.separate_string('Слово сло' + chr(8242) +
                                    'во! Qwe - йцу ' + "it's")
     self.assertListEqual(
         ['ru', 'ru', 'punct', 'unk', 'punct', 'ru', 'unk', 'unk', 'unk'],
         list(df.word_type))
Beispiel #9
0
 def test_separation_string_with_nl(self):
     df = Separator.separate_string('Строка\nВторая строка')
     self.assertListEqual([0, 1, 1], list(df.paragraph_id))