Esempio n. 1
0
    def test_update(self):
        pars = Query.en(text.split('\n')).where(lambda z: z != '').to_list()
        df = Separator.separate_paragraphs(pars)

        new_pars = [
            pars[0], pars[1], 'Нулевая. Вставка', pars[4], pars[3],
            'Первая. Вставка', 'Вторая. Вставка', pars[6], 'Третья. Вставка'
        ]
        df1 = Separator.separate_paragraphs(new_pars)

        df2 = Separator.update_separation(
            df, new_pars, [0, 1, None, 4, 3, None, None, 6, None])

        for c in df1.columns:
            self.assertListEqual(list(df1[c]), list(df2[c]))

        pd.options.display.width = None

        self.assertListEqual([
            False, False, False, False, False, False, True, True, True, False,
            False, False, False, False, False, True, True, True, True, True,
            True, False, False, False, True, True, True
        ], list(df2.updated))
        self.assertListEqual([
            0, 1, 2, 3, 4, 5, -1, -1, -1, 12, 13, 14, 9, 10, 11, -1, -1, -1,
            -1, -1, -1, 18, 19, 20, -1, -1, -1
        ], list(df2.original_word_id))
        self.assertListEqual([
            0, 0, 1, 2, 2, 3, -1, -1, -1, 8, 8, 9, 6, 6, 7, -1, -1, -1, -1, -1,
            -1, 12, 12, 13, -1, -1, -1
        ], list(df2.original_sentence_id))
        self.assertListEqual([
            0, 0, 0, 1, 1, 1, -1, -1, -1, 4, 4, 4, 3, 3, 3, -1, -1, -1, -1, -1,
            -1, 6, 6, 6, -1, -1, -1
        ], list(df2.original_paragraph_id))
Esempio n. 2
0
 def test_separation_multi(self):
     df = Separator.separate_paragraphs(
         ['первое предожение. Второе.', 'Второй параграф'])
     #pd.set_option('max_columns',None); pd.set_option('display.width', 1000);print(df)
     self.assertListEqual([0, 0, 0, 1, 1, 2, 2], list(df.sentence_id))
     self.assertListEqual([0, 1, 2, 3, 4, 5, 6], list(df.word_id))
     self.assertListEqual([0, 0, 0, 0, 0, 1, 1], list(df.paragraph_id))
    def test_update_enrich_pymorphy(self):
        enricher = PyMorphyFeaturizer().as_enricher()

        pars0 = Query.en(text.split('\n')).where(lambda z: z != '').to_list()
        df0 = Separator.separate_paragraphs(pars0)
        db0 = DataBundle(src=df0)
        enricher.enrich(db0)

        pars1 = [pars0[1], pars0[0], 'Вставленное предложение.', pars0[2]]

        df_test = Separator.update_separation(df0, pars1, [1, 0, None, 2])
        db_test = DataBundle(src=df_test)
        enricher.update_enrich(db0, db_test)

        df_control = Separator.separate_paragraphs(pars1)
        db_control = DataBundle(src=df_control)
        enricher.enrich(db_control)

        py_test = db_test.pymorphy.reset_index().sort_values('word_id')
        py_control = db_control.pymorphy.reset_index().sort_values('word_id')

        for c in py_control.columns:
            self.check(py_control, py_test, c)