Esempio n. 1
0
    def parse(self, log: ProcessLogger, text, text_unit_id, text_unit_lang,
              document_initial_load: bool = False, **kwargs) -> ParseResults:
        priority = kwargs.get('priority', True)
        geo_config = dict_data_cache.get_geo_config()
        from apps.extract.app_vars import SIMPLE_LOCATOR_TOKENIZATION
        simple_norm = SIMPLE_LOCATOR_TOKENIZATION.val
        entity_alias_pairs = list(geoentities.get_geoentities(text,
                                                              geo_config,
                                                              text_languages=[text_unit_lang],
                                                              priority=priority,
                                                              simplified_normalization=simple_norm))

        entity_ids = [entity.id for entity, _alias in entity_alias_pairs]
        if entity_ids:
            unique_entities = set(entity_ids)
            alias_ids = [alias.alias_id for _entity, alias in entity_alias_pairs]
            unique_aliases = set(alias_ids)

            return ParseResults({
                GeoEntityUsage: [GeoEntityUsage(text_unit_id=text_unit_id,
                                                entity_id=idd,
                                                count=entity_ids.count(idd)) for idd in unique_entities],
                GeoAliasUsage: [GeoAliasUsage(text_unit_id=text_unit_id,
                                              alias_id=idd,
                                              count=alias_ids.count(idd)) for idd in unique_aliases if idd]})
    def parse(self,
              log: ProcessLogger,
              text,
              text_unit_id,
              text_unit_lang,
              document_initial_load: bool = False,
              **kwargs) -> ParseResults:
        priority = kwargs.get('priority', True)
        geo_config = dict_data_cache.get_geo_config()
        entity_alias_pairs = list(
            geoentities.get_geoentities(text,
                                        geo_config,
                                        text_languages=[text_unit_lang],
                                        priority=priority))

        entity_ids = [
            dict_entities.get_entity_id(entity)
            for entity, _alias in entity_alias_pairs
        ]
        if entity_ids:
            unique_entities = set(entity_ids)
            alias_ids = [
                dict_entities.get_alias_id(alias)
                for _entity, alias in entity_alias_pairs
            ]
            unique_aliases = set(alias_ids)

            return ParseResults({
                GeoEntityUsage: [
                    GeoEntityUsage(text_unit_id=text_unit_id,
                                   entity_id=idd,
                                   count=entity_ids.count(idd))
                    for idd in unique_entities
                ],
                GeoAliasUsage: [
                    GeoAliasUsage(text_unit_id=text_unit_id,
                                  alias_id=idd,
                                  count=alias_ids.count(idd))
                    for idd in unique_aliases if idd
                ]
            })