Ejemplos de LineSplitParams.line_breaks en Python

Lenguaje de programación: Python

Namespace/Package Name: lexnlp.utils.lines_processing.line_processor

Clase / Tipo: LineSplitParams

Método / Función: line_breaks

Ejemplos en hotexamples.com: 10

Python LineSplitParams.line_breaks - 10 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de lexnlp.utils.lines_processing.line_processor.LineSplitParams.line_breaks extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

LineSplitParams(12)

line_breaks(10)

abbr_ignore_case(8)

abbreviations(8)

Ejemplo n.º 1

Mostrar archivo

Archivo: copyrights.py Proyecto: leiqi/lexpredict-lexnlp

 def init_parser():
     split_params = LineSplitParams()
     split_params.line_breaks = {'\n', '.', ';', '!', '?'}
     split_params.abbreviations = DeLanguageTokens.abbreviations
     split_params.abbr_ignore_case = True
     CopyrightDeParser.line_processor = LineProcessor(
         line_split_params=split_params)

Ejemplo n.º 2

Mostrar archivo

 def test_line_processor_phrases_de(self):
     text = """
     (2) Vermögenswerte im Sinne dieses Gesetzes sind bebaute und unbebaute Grundstücke sowie rechtlich selbständige Gebäude und Baulichkeiten (im folgenden Grundstücke und Gebäude genannt), Nutzungsrechte und dingliche Rechte an Grundstücken oder Gebäuden, bewegliche Sachen sowie gewerbliche Schutzrechte, Urheberrechte und verwandte Schutzrechte. Vermögenswerte im Sinne dieses Gesetzes sind auch Kontoguthaben und sonstige auf Geldzahlungen gerichtete Forderungen sowie Eigentum/Beteiligungen an Unternehmen oder an Betriebsstätten/Zweigniederlassungen von Unternehmen mit Sitz außerhalb der Deutschen Demokratischen Republik.
     """
     ptrs = LineSplitParams()
     ptrs.line_breaks = {'\n', '.', ';'}
     proc = LineProcessor(line_split_params=ptrs)
     lines = [line for line in proc.split_text_on_line_with_endings(text)]
     assert len(lines) == 3  # plus one for an empty line

Ejemplo n.º 3

Mostrar archivo

 def test_de_linebreaks(self):
     split_params = LineSplitParams()
     split_params.line_breaks = {'.', ';', '!', '?'}
     split_params.abbreviations = {
         'nr.', 'abs.', 'no.', 'act.', 'inc.', 'p.'
     }
     split_params.abbr_ignore_case = True
     text = 'Nach der Allgemeine\nGebührenverordnung'
     proc = LineProcessor(line_split_params=split_params)
     sents = list(proc.split_text_on_line_with_endings(text))
     self.assertEqual(1, len(sents))

Ejemplo n.º 4

Mostrar archivo

    def test_line_processor_phrases(self):
        text = """
Once upon a midnight dreary

While I pounded, weak and weary. Over many a quaint and curious volume of forgotten lore,
While I nodded, nearly napping; suddenly there came a tapping,
As of some one gently rapping, rapping at my chamber door."""
        ptrs = LineSplitParams()
        ptrs.line_breaks = {'\n', '.', ';'}
        proc = LineProcessor(line_split_params=ptrs)
        lines = [line for line in proc.split_text_on_line_with_endings(text)]
        assert len(lines) == 6

Ejemplo n.º 5

Mostrar archivo

Archivo: definitions.py Proyecto: tjxnor/lexpredict-lexnlp

def make_es_definitions_parser():
    split_params = LineSplitParams()
    split_params.line_breaks = {'\n', '.', ';', '!', '?'}
    split_params.abbreviations = EsLanguageTokens.abbreviations
    split_params.abbr_ignore_case = True

    functions = [CommonDefinitionPatterns.match_es_def_by_semicolon,
                 SpanishParsingMethods.match_es_def_by_hereafter,
                 SpanishParsingMethods.match_es_def_by_reffered]

    parser = UniversalDefinitionsParser(functions, split_params)
    return parser

Ejemplo n.º 6

Mostrar archivo

Archivo: definitions.py Proyecto: tjxnor/lexpredict-lexnlp

def make_de_definitions_parser():
    split_params = LineSplitParams()
    split_params.line_breaks = {'\n', '.', ';', '!', '?'}
    split_params.abbreviations = DeLanguageTokens.abbreviations
    split_params.abbr_ignore_case = True

    functions = [CommonDefinitionPatterns.match_es_def_by_semicolon,
                 DeutscheParsingMethods.match_ist_jeder,
                 DeutscheParsingMethods.match_im_sinne]

    parser = UniversalDefinitionsParser(functions, split_params)
    parser.prohibited_words = {w for w in DeLanguageTokens.articles + DeLanguageTokens.conjunctions}
    return parser

Ejemplo n.º 7

Mostrar archivo

    def test_line_processor_phrases_abbr(self):
        text = 'Articolul saisprezece (16) nr. 2. Textul:'
        ptrs = LineSplitParams()
        ptrs.line_breaks = {'\n', '.', ';'}
        proc = LineProcessor(line_split_params=ptrs)

        lines = [line for line in proc.split_text_on_line_with_endings(text)]
        assert len(lines) == 3

        ptrs.abbreviations = {'nr.', 'abs.'}
        ptrs.abbr_ignore_case = True
        proc = LineProcessor(line_split_params=ptrs)
        lines = [line for line in proc.split_text_on_line_with_endings(text)]
        assert len(lines) == 2

Ejemplo n.º 8

Mostrar archivo

    def test_de_abbrs(self):
        split_params = LineSplitParams()
        split_params.line_breaks = {'.', ';', '!', '?'}
        split_params.abbreviations = {'nr.', 'abs.', 'no.', 'act.', 'a.D.'}
        split_params.abbr_ignore_case = True

        text = '1000 a.D. und drang'
        proc = LineProcessor(line_split_params=split_params)
        sents = list(proc.split_text_on_line_with_endings(text))
        self.assertEqual(1, len(sents))

        text = '1000 A.d. und drang'
        sents = list(proc.split_text_on_line_with_endings(text))
        self.assertGreater(len(sents), 1)

Ejemplo n.º 9

Mostrar archivo

def make_es_copyrights_parser():
    split_params = LineSplitParams()
    split_params.line_breaks = {'\n', '.', ';', '!', '?'}
    split_params.abbreviations = EsLanguageTokens.abbreviations
    split_params.abbr_ignore_case = True
    methods = SpanishCopyrightParsingMethods()

    functions = [methods.match_word_c_years, methods.match_c_years_word]

    p = CopyrightParser(functions, split_params)
    p.prohibited_words = {
        w
        for w in EsLanguageTokens.articles + EsLanguageTokens.conjunctions
    }
    return p

Ejemplo n.º 10

Mostrar archivo

    def __init__(self, gesetze_df: pd.DataFrame, verordnungen_df: pd.DataFrame,
                 concept_df: pd.DataFrame):
        self.locale = ''
        parse_columns = ('Kurztitel', 'Titel', 'Abkürzung')
        dependent_columns = {'Titel': 'External Reference Normalized'}
        preformed_entity = {
            'External Reference Type': 'Laws and Rules',
            'External Reference Source': 'BaFin',
            'External Reference Issuing Country': 'Germany'
        }
        split_params = LineSplitParams()
        split_params.line_breaks = {'.', ';', '!', '?'}
        split_params.abbreviations = DeLanguageTokens.abbreviations
        split_params.abbr_ignore_case = True
        proc = LineProcessor(line_split_params=split_params)

        self.gesetze_parser = DataframeEntityParser(
            gesetze_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)

        self.verordnungen_parser = DataframeEntityParser(
            verordnungen_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)

        parse_columns = ('b', )
        dependent_columns = {
            'b': 'External Reference Normalized',
            'a': 'External Reference Type'
        }
        preformed_entity.pop('External Reference Type')

        self.concept_parser = DataframeEntityParser(
            concept_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)