Ejemplo n.º 1
0
def setup_de_parser():
    def preproc_func(text):
        return re.sub('e$', '[e]?', text)

    ptrs = ParserInitParams()
    ptrs.key_word_preproc_func = preproc_func
    ptrs.court_pattern_checker = re.compile('gericht')
    ptrs.split_ptrs = LineSplitParams()
    ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ','
                                   }.union(set(DeLanguageTokens.conjunctions))
    ptrs.split_ptrs.abbreviations = DeLanguageTokens.abbreviations
    ptrs.split_ptrs.abbr_ignore_case = True

    ptrs.column_names = {
        'type': 'Court Type (de-DE)',
        'name': 'Court Name (de-DE)',
        'jurisdiction': 'Jurisdiction',
        'alias': 'Alias (de-DE)'
    }

    path = os.path.join(os.path.dirname(__file__),
                        "../../config/de/de_courts.csv")
    ptrs.dataframe_paths = [path]
    parser = UniversalCourtsParser(ptrs)
    return parser
Ejemplo n.º 2
0
def setup_es_parser():
    ptrs = ParserInitParams()
    ptrs.dataframe_paths = [os.path.join(lexnlp_base_path, 'lexnlp/config/es/es_courts.csv')]
    ptrs.split_ptrs = LineSplitParams()
    ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ','}.union(set(EsLanguageTokens.conjunctions))
    ptrs.split_ptrs.abbreviations = EsLanguageTokens.abbreviations
    ptrs.split_ptrs.abbr_ignore_case = True
    ptrs.court_pattern_checker = re.compile('tribunal', re.IGNORECASE)
    return UniversalCourtsParser(ptrs)
Ejemplo n.º 3
0
def setup_en_parser():
    ptrs = ParserInitParams()
    ptrs.dataframe_paths = [os.path.join(os.path.dirname(__file__), "../../config/en/us_state_courts.csv"),
             os.path.join(os.path.dirname(__file__), "../../config/en/us_courts.csv"),
             os.path.join(os.path.dirname(__file__), "../../config/en/ca_courts.csv"),
             os.path.join(os.path.dirname(__file__), "../../config/en/au_courts.csv")]
    ptrs.split_ptrs = LineSplitParams()
    ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ','}.union(set(EnLanguageTokens.conjunctions))
    ptrs.split_ptrs.abbreviations = EnLanguageTokens.abbreviations
    ptrs.split_ptrs.abbr_ignore_case = True
    ptrs.court_pattern_checker = re.compile('court', re.IGNORECASE)
    return UniversalCourtsParser(ptrs)
    def make_en_parser(self):
        url = "https://raw.githubusercontent.com/LexPredict/lexpredict-legal-dictionary/1.0.2/en/legal/us_courts.csv"

        ptrs = ParserInitParams()
        ptrs.court_pattern_checker = re.compile('court', re.IGNORECASE)
        ptrs.dataframe_paths = [url]
        ptrs.split_ptrs = LineSplitParams()
        ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ','}.union(set(EnLanguageTokens.conjunctions))
        ptrs.split_ptrs.abbreviations = EnLanguageTokens.abbreviations
        ptrs.split_ptrs.abbr_ignore_case = True

        parser = UniversalCourtsParser(ptrs)
        return parser
Ejemplo n.º 5
0
def setup_en_parser():
    ptrs = ParserInitParams()
    file_path = os.path.join(lexnlp_base_path, 'lexnlp/config/en')
    ptrs.dataframe_paths = ['us_state_courts.csv',
                            'us_courts.csv',
                            'ca_courts.csv',
                            'au_courts.csv']
    ptrs.dataframe_paths = [os.path.join(file_path, p)
                            for p in ptrs.dataframe_paths]

    ptrs.split_ptrs = LineSplitParams()
    ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ','}.union(set(EnLanguageTokens.conjunctions))
    ptrs.split_ptrs.abbreviations = EnLanguageTokens.abbreviations
    ptrs.split_ptrs.abbr_ignore_case = True
    ptrs.court_pattern_checker = re.compile('court', re.IGNORECASE)
    return UniversalCourtsParser(ptrs)