class LawsParser: def __init__(self, gesetze_df: pd.DataFrame, verordnungen_df: pd.DataFrame, concept_df: pd.DataFrame): self.locale = '' parse_columns = ('Kurztitel', 'Titel', 'Abkürzung') dependent_columns = {'Titel': 'External Reference Normalized'} preformed_entity = { 'External Reference Type': 'Laws and Rules', 'External Reference Source': 'BaFin', 'External Reference Issuing Country': 'Germany' } self.gesetze_parser = DataframeEntityParser(gesetze_df, parse_columns, dependent_columns, preformed_entity) self.verordnungen_parser = DataframeEntityParser( verordnungen_df, parse_columns, dependent_columns, preformed_entity) parse_columns = ('b', ) dependent_columns = { 'b': 'External Reference Normalized', 'a': 'External Reference Type' } preformed_entity.pop('External Reference Type') self.concept_parser = DataframeEntityParser(concept_df, parse_columns, dependent_columns, preformed_entity) def parse(self, text: str, locale: str = None) -> List[LawAnnotation]: res = [] self.locale = locale if locale else 'de' res.extend(self.gesetze_parser.get_entity_list(text)) res.extend(self.verordnungen_parser.get_entity_list(text)) res.extend(self.concept_parser.get_entity_list(text)) res_formatted = [] # type: List[LawAnnotation] for i in res: coords = (i.pop('location_start'), i.pop('location_end')) text = i.pop('source') ant = LawAnnotation(name=text, coords=coords, text=text, locale=self.locale) # new_item.update(i) res_formatted.append(ant) return res_formatted
def __init__(self, gesetze_df: pd.DataFrame, verordnungen_df: pd.DataFrame, concept_df: pd.DataFrame): self.locale = '' parse_columns = ('Kurztitel', 'Titel', 'Abkürzung') dependent_columns = {'Titel': 'External Reference Normalized'} preformed_entity = { 'External Reference Type': 'Laws and Rules', 'External Reference Source': 'BaFin', 'External Reference Issuing Country': 'Germany' } self.gesetze_parser = DataframeEntityParser(gesetze_df, parse_columns, dependent_columns, preformed_entity) self.verordnungen_parser = DataframeEntityParser( verordnungen_df, parse_columns, dependent_columns, preformed_entity) parse_columns = ('b', ) dependent_columns = { 'b': 'External Reference Normalized', 'a': 'External Reference Type' } preformed_entity.pop('External Reference Type') self.concept_parser = DataframeEntityParser(concept_df, parse_columns, dependent_columns, preformed_entity)
def get_geoentities(self, text: str, config: pd.DataFrame, parse_columns: Union[List[str], Tuple[str]] = None, result_columns: Union[dict, None] = None, preformed_entity: Union[dict, None] = None, priority_sort_column: Union[str, None] = None, priority_sort_ascending: bool = True, cell_values_separator: Union[str, None] = ';', unique_column_values: bool = True) -> Generator: parse_columns = parse_columns or self.default_selecting_columns yield from DataframeEntityParser(dataframe=config, parse_columns=parse_columns, result_columns=result_columns, preformed_entity=preformed_entity, priority_sort_column=priority_sort_column, priority_sort_ascending=priority_sort_ascending, cell_values_separator=cell_values_separator, unique_column_values=unique_column_values).get_entities(text)
def __init__(self, gesetze_df: pd.DataFrame, verordnungen_df: pd.DataFrame, concept_df: pd.DataFrame): self.locale = '' parse_columns = ('Kurztitel', 'Titel', 'Abkürzung') dependent_columns = {'Titel': 'External Reference Normalized'} preformed_entity = { 'External Reference Type': 'Laws and Rules', 'External Reference Source': 'BaFin', 'External Reference Issuing Country': 'Germany' } split_params = LineSplitParams() split_params.line_breaks = {'.', ';', '!', '?'} split_params.abbreviations = DeLanguageTokens.abbreviations split_params.abbr_ignore_case = True proc = LineProcessor(line_split_params=split_params) self.gesetze_parser = DataframeEntityParser( gesetze_df, parse_columns, result_columns=dependent_columns, preformed_entity=preformed_entity, line_processor=proc) self.verordnungen_parser = DataframeEntityParser( verordnungen_df, parse_columns, result_columns=dependent_columns, preformed_entity=preformed_entity, line_processor=proc) parse_columns = ('b', ) dependent_columns = { 'b': 'External Reference Normalized', 'a': 'External Reference Type' } preformed_entity.pop('External Reference Type') self.concept_parser = DataframeEntityParser( concept_df, parse_columns, result_columns=dependent_columns, preformed_entity=preformed_entity, line_processor=proc)
def get_entries(self, text: str, columns=None): columns = columns or self.default_columns parser = DataframeEntityParser(dataframe=entity_df, parse_columns=columns) return list(parser.get_entities(text))
class LawsParser: def __init__(self, gesetze_df: pd.DataFrame, verordnungen_df: pd.DataFrame, concept_df: pd.DataFrame): self.locale = '' parse_columns = ('Kurztitel', 'Titel', 'Abkürzung') dependent_columns = {'Titel': 'External Reference Normalized'} preformed_entity = { 'External Reference Type': 'Laws and Rules', 'External Reference Source': 'BaFin', 'External Reference Issuing Country': 'Germany' } split_params = LineSplitParams() split_params.line_breaks = {'.', ';', '!', '?'} split_params.abbreviations = DeLanguageTokens.abbreviations split_params.abbr_ignore_case = True proc = LineProcessor(line_split_params=split_params) self.gesetze_parser = DataframeEntityParser( gesetze_df, parse_columns, result_columns=dependent_columns, preformed_entity=preformed_entity, line_processor=proc) self.verordnungen_parser = DataframeEntityParser( verordnungen_df, parse_columns, result_columns=dependent_columns, preformed_entity=preformed_entity, line_processor=proc) parse_columns = ('b', ) dependent_columns = { 'b': 'External Reference Normalized', 'a': 'External Reference Type' } preformed_entity.pop('External Reference Type') self.concept_parser = DataframeEntityParser( concept_df, parse_columns, result_columns=dependent_columns, preformed_entity=preformed_entity, line_processor=proc) def parse(self, text: str, locale: str = None) -> List[LawAnnotation]: res = [] self.locale = locale if locale else 'de' res.extend(self.gesetze_parser.get_entity_list(text)) res.extend(self.verordnungen_parser.get_entity_list(text)) res.extend(self.concept_parser.get_entity_list(text)) res_formatted = [] # type: List[LawAnnotation] for i in res: coords = (i.pop('location_start'), i.pop('location_end')) text = i.pop('source') ant = LawAnnotation(name=text, coords=coords, text=text, locale=self.locale) # new_item.update(i) res_formatted.append(ant) return res_formatted