def make_en_parser(self): url = "https://raw.githubusercontent.com/LexPredict/lexpredict-legal-dictionary/1.0.2/en/legal/us_courts.csv" ptrs = ParserInitParams() ptrs.court_pattern_checker = re.compile('court', re.IGNORECASE) ptrs.dataframe_paths = [url] ptrs.split_ptrs = LineSplitParams() ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ','}.union( set(EnLanguageTokens.conjunctions)) ptrs.split_ptrs.abbreviations = EnLanguageTokens.abbreviations ptrs.split_ptrs.abbr_ignore_case = True parser = UniversalCourtsParser(ptrs) return parser
def setup_en_parser(): ptrs = ParserInitParams() file_path = os.path.join(lexnlp_base_path, 'lexnlp/config/en') ptrs.dataframe_paths = ['us_state_courts.csv', 'us_courts.csv', 'ca_courts.csv', 'au_courts.csv'] ptrs.dataframe_paths = [os.path.join(file_path, p) for p in ptrs.dataframe_paths] ptrs.split_ptrs = LineSplitParams() ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ','}.union(set(EnLanguageTokens.conjunctions)) ptrs.split_ptrs.abbreviations = EnLanguageTokens.abbreviations ptrs.split_ptrs.abbr_ignore_case = True ptrs.court_pattern_checker = re.compile('court', re.IGNORECASE) return UniversalCourtsParser(ptrs)
def setup_en_parser(): ptrs = ParserInitParams() ptrs.dataframe_paths = [ os.path.join(os.path.dirname(__file__), "../../config/en/us_state_courts.csv"), os.path.join(os.path.dirname(__file__), "../../config/en/us_courts.csv"), os.path.join(os.path.dirname(__file__), "../../config/en/ca_courts.csv"), os.path.join(os.path.dirname(__file__), "../../config/en/au_courts.csv") ] ptrs.split_ptrs = LineSplitParams() ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ',' }.union(set(EnLanguageTokens.conjunctions)) ptrs.split_ptrs.abbreviations = EnLanguageTokens.abbreviations ptrs.split_ptrs.abbr_ignore_case = True ptrs.court_pattern_checker = re.compile('court', re.IGNORECASE) return UniversalCourtsParser(ptrs)
def setup_de_parser(): def preproc_func(text): return re.sub('e$', '[e]?', text) ptrs = ParserInitParams() ptrs.key_word_preproc_func = preproc_func ptrs.court_pattern_checker = re.compile('gericht') ptrs.split_ptrs = LineSplitParams() ptrs.split_ptrs.line_breaks = {'\n', '.', ';', ',' }.union(set(DeLanguageTokens.conjunctions)) ptrs.split_ptrs.abbreviations = DeLanguageTokens.abbreviations ptrs.split_ptrs.abbr_ignore_case = True ptrs.column_names = { 'type': 'Court Type (de-DE)', 'name': 'Court Name (de-DE)', 'jurisdiction': 'Jurisdiction', 'alias': 'Alias (de-DE)' } path = os.path.join(os.path.dirname(__file__), "../../config/de/de_courts.csv") ptrs.dataframe_paths = [path] return UniversalCourtsParser(ptrs)
def init_parser(): split_params = LineSplitParams() split_params.line_breaks = {'\n', '.', ';', '!', '?'} split_params.abbreviations = DeLanguageTokens.abbreviations split_params.abbr_ignore_case = True CopyrightDeParser.line_processor = LineProcessor(line_split_params=split_params)