Esempi in Python per LineSplitParams.line_breaks

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: lexnlp.utils.lines_processing.line_processor

Classe/tipologia: LineSplitParams

Metodo/funzione: line_breaks

Esempi su hotexamples.com: 10

LineSplitParams.line_breaks in Python: 10 esempi trovati. Questi sono i migliori esempi reali in Python per lexnlp.utils.lines_processing.line_processor.LineSplitParams.line_breaks, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

LineSplitParams(12)

line_breaks(10)

abbr_ignore_case(8)

abbreviations(8)

Esempio n. 1

Mostra file

File: copyrights.py Progetto: leiqi/lexpredict-lexnlp

 def init_parser():
     split_params = LineSplitParams()
     split_params.line_breaks = {'\n', '.', ';', '!', '?'}
     split_params.abbreviations = DeLanguageTokens.abbreviations
     split_params.abbr_ignore_case = True
     CopyrightDeParser.line_processor = LineProcessor(
         line_split_params=split_params)

Esempio n. 2

Mostra file

 def test_line_processor_phrases_de(self):
     text = """
     (2) Vermögenswerte im Sinne dieses Gesetzes sind bebaute und unbebaute Grundstücke sowie rechtlich selbständige Gebäude und Baulichkeiten (im folgenden Grundstücke und Gebäude genannt), Nutzungsrechte und dingliche Rechte an Grundstücken oder Gebäuden, bewegliche Sachen sowie gewerbliche Schutzrechte, Urheberrechte und verwandte Schutzrechte. Vermögenswerte im Sinne dieses Gesetzes sind auch Kontoguthaben und sonstige auf Geldzahlungen gerichtete Forderungen sowie Eigentum/Beteiligungen an Unternehmen oder an Betriebsstätten/Zweigniederlassungen von Unternehmen mit Sitz außerhalb der Deutschen Demokratischen Republik.
     """
     ptrs = LineSplitParams()
     ptrs.line_breaks = {'\n', '.', ';'}
     proc = LineProcessor(line_split_params=ptrs)
     lines = [line for line in proc.split_text_on_line_with_endings(text)]
     assert len(lines) == 3  # plus one for an empty line

Esempio n. 3

Mostra file

 def test_de_linebreaks(self):
     split_params = LineSplitParams()
     split_params.line_breaks = {'.', ';', '!', '?'}
     split_params.abbreviations = {
         'nr.', 'abs.', 'no.', 'act.', 'inc.', 'p.'
     }
     split_params.abbr_ignore_case = True
     text = 'Nach der Allgemeine\nGebührenverordnung'
     proc = LineProcessor(line_split_params=split_params)
     sents = list(proc.split_text_on_line_with_endings(text))
     self.assertEqual(1, len(sents))

Esempio n. 4

Mostra file

    def test_line_processor_phrases(self):
        text = """
Once upon a midnight dreary

While I pounded, weak and weary. Over many a quaint and curious volume of forgotten lore,
While I nodded, nearly napping; suddenly there came a tapping,
As of some one gently rapping, rapping at my chamber door."""
        ptrs = LineSplitParams()
        ptrs.line_breaks = {'\n', '.', ';'}
        proc = LineProcessor(line_split_params=ptrs)
        lines = [line for line in proc.split_text_on_line_with_endings(text)]
        assert len(lines) == 6

Esempio n. 5

Mostra file

File: definitions.py Progetto: tjxnor/lexpredict-lexnlp

def make_es_definitions_parser():
    split_params = LineSplitParams()
    split_params.line_breaks = {'\n', '.', ';', '!', '?'}
    split_params.abbreviations = EsLanguageTokens.abbreviations
    split_params.abbr_ignore_case = True

    functions = [CommonDefinitionPatterns.match_es_def_by_semicolon,
                 SpanishParsingMethods.match_es_def_by_hereafter,
                 SpanishParsingMethods.match_es_def_by_reffered]

    parser = UniversalDefinitionsParser(functions, split_params)
    return parser

Esempio n. 6

Mostra file

File: definitions.py Progetto: tjxnor/lexpredict-lexnlp

def make_de_definitions_parser():
    split_params = LineSplitParams()
    split_params.line_breaks = {'\n', '.', ';', '!', '?'}
    split_params.abbreviations = DeLanguageTokens.abbreviations
    split_params.abbr_ignore_case = True

    functions = [CommonDefinitionPatterns.match_es_def_by_semicolon,
                 DeutscheParsingMethods.match_ist_jeder,
                 DeutscheParsingMethods.match_im_sinne]

    parser = UniversalDefinitionsParser(functions, split_params)
    parser.prohibited_words = {w for w in DeLanguageTokens.articles + DeLanguageTokens.conjunctions}
    return parser

Esempio n. 7

Mostra file

    def test_line_processor_phrases_abbr(self):
        text = 'Articolul saisprezece (16) nr. 2. Textul:'
        ptrs = LineSplitParams()
        ptrs.line_breaks = {'\n', '.', ';'}
        proc = LineProcessor(line_split_params=ptrs)

        lines = [line for line in proc.split_text_on_line_with_endings(text)]
        assert len(lines) == 3

        ptrs.abbreviations = {'nr.', 'abs.'}
        ptrs.abbr_ignore_case = True
        proc = LineProcessor(line_split_params=ptrs)
        lines = [line for line in proc.split_text_on_line_with_endings(text)]
        assert len(lines) == 2

Esempio n. 8

Mostra file

    def test_de_abbrs(self):
        split_params = LineSplitParams()
        split_params.line_breaks = {'.', ';', '!', '?'}
        split_params.abbreviations = {'nr.', 'abs.', 'no.', 'act.', 'a.D.'}
        split_params.abbr_ignore_case = True

        text = '1000 a.D. und drang'
        proc = LineProcessor(line_split_params=split_params)
        sents = list(proc.split_text_on_line_with_endings(text))
        self.assertEqual(1, len(sents))

        text = '1000 A.d. und drang'
        sents = list(proc.split_text_on_line_with_endings(text))
        self.assertGreater(len(sents), 1)

Esempio n. 9

Mostra file

def make_es_copyrights_parser():
    split_params = LineSplitParams()
    split_params.line_breaks = {'\n', '.', ';', '!', '?'}
    split_params.abbreviations = EsLanguageTokens.abbreviations
    split_params.abbr_ignore_case = True
    methods = SpanishCopyrightParsingMethods()

    functions = [methods.match_word_c_years, methods.match_c_years_word]

    p = CopyrightParser(functions, split_params)
    p.prohibited_words = {
        w
        for w in EsLanguageTokens.articles + EsLanguageTokens.conjunctions
    }
    return p

Esempio n. 10

Mostra file

    def __init__(self, gesetze_df: pd.DataFrame, verordnungen_df: pd.DataFrame,
                 concept_df: pd.DataFrame):
        self.locale = ''
        parse_columns = ('Kurztitel', 'Titel', 'Abkürzung')
        dependent_columns = {'Titel': 'External Reference Normalized'}
        preformed_entity = {
            'External Reference Type': 'Laws and Rules',
            'External Reference Source': 'BaFin',
            'External Reference Issuing Country': 'Germany'
        }
        split_params = LineSplitParams()
        split_params.line_breaks = {'.', ';', '!', '?'}
        split_params.abbreviations = DeLanguageTokens.abbreviations
        split_params.abbr_ignore_case = True
        proc = LineProcessor(line_split_params=split_params)

        self.gesetze_parser = DataframeEntityParser(
            gesetze_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)

        self.verordnungen_parser = DataframeEntityParser(
            verordnungen_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)

        parse_columns = ('b', )
        dependent_columns = {
            'b': 'External Reference Normalized',
            'a': 'External Reference Type'
        }
        preformed_entity.pop('External Reference Type')

        self.concept_parser = DataframeEntityParser(
            concept_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)