Example #1
0
class LawsParser:
    def __init__(self, gesetze_df: pd.DataFrame, verordnungen_df: pd.DataFrame,
                 concept_df: pd.DataFrame):
        self.locale = ''
        parse_columns = ('Kurztitel', 'Titel', 'Abkürzung')
        dependent_columns = {'Titel': 'External Reference Normalized'}
        preformed_entity = {
            'External Reference Type': 'Laws and Rules',
            'External Reference Source': 'BaFin',
            'External Reference Issuing Country': 'Germany'
        }

        self.gesetze_parser = DataframeEntityParser(gesetze_df, parse_columns,
                                                    dependent_columns,
                                                    preformed_entity)

        self.verordnungen_parser = DataframeEntityParser(
            verordnungen_df, parse_columns, dependent_columns,
            preformed_entity)

        parse_columns = ('b', )
        dependent_columns = {
            'b': 'External Reference Normalized',
            'a': 'External Reference Type'
        }
        preformed_entity.pop('External Reference Type')

        self.concept_parser = DataframeEntityParser(concept_df, parse_columns,
                                                    dependent_columns,
                                                    preformed_entity)

    def parse(self, text: str, locale: str = None) -> List[LawAnnotation]:
        res = []
        self.locale = locale if locale else 'de'
        res.extend(self.gesetze_parser.get_entity_list(text))
        res.extend(self.verordnungen_parser.get_entity_list(text))
        res.extend(self.concept_parser.get_entity_list(text))

        res_formatted = []  # type: List[LawAnnotation]
        for i in res:
            coords = (i.pop('location_start'), i.pop('location_end'))
            text = i.pop('source')
            ant = LawAnnotation(name=text,
                                coords=coords,
                                text=text,
                                locale=self.locale)
            # new_item.update(i)
            res_formatted.append(ant)
        return res_formatted
Example #2
0
class LawsParser:
    def __init__(self, gesetze_df: pd.DataFrame, verordnungen_df: pd.DataFrame,
                 concept_df: pd.DataFrame):
        self.locale = ''
        parse_columns = ('Kurztitel', 'Titel', 'Abkürzung')
        dependent_columns = {'Titel': 'External Reference Normalized'}
        preformed_entity = {
            'External Reference Type': 'Laws and Rules',
            'External Reference Source': 'BaFin',
            'External Reference Issuing Country': 'Germany'
        }
        split_params = LineSplitParams()
        split_params.line_breaks = {'.', ';', '!', '?'}
        split_params.abbreviations = DeLanguageTokens.abbreviations
        split_params.abbr_ignore_case = True
        proc = LineProcessor(line_split_params=split_params)

        self.gesetze_parser = DataframeEntityParser(
            gesetze_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)

        self.verordnungen_parser = DataframeEntityParser(
            verordnungen_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)

        parse_columns = ('b', )
        dependent_columns = {
            'b': 'External Reference Normalized',
            'a': 'External Reference Type'
        }
        preformed_entity.pop('External Reference Type')

        self.concept_parser = DataframeEntityParser(
            concept_df,
            parse_columns,
            result_columns=dependent_columns,
            preformed_entity=preformed_entity,
            line_processor=proc)

    def parse(self, text: str, locale: str = None) -> List[LawAnnotation]:
        res = []
        self.locale = locale if locale else 'de'
        res.extend(self.gesetze_parser.get_entity_list(text))
        res.extend(self.verordnungen_parser.get_entity_list(text))
        res.extend(self.concept_parser.get_entity_list(text))

        res_formatted = []  # type: List[LawAnnotation]
        for i in res:
            coords = (i.pop('location_start'), i.pop('location_end'))
            text = i.pop('source')
            ant = LawAnnotation(name=text,
                                coords=coords,
                                text=text,
                                locale=self.locale)
            # new_item.update(i)
            res_formatted.append(ant)
        return res_formatted