def parse(self, log: ProcessLogger, text, text_unit_id, text_unit_lang, document_initial_load: bool = False, **kwargs) -> ParseResults: priority = kwargs.get('priority', True) geo_config = dict_data_cache.get_geo_config() from apps.extract.app_vars import SIMPLE_LOCATOR_TOKENIZATION simple_norm = SIMPLE_LOCATOR_TOKENIZATION.val entity_alias_pairs = list(geoentities.get_geoentities(text, geo_config, text_languages=[text_unit_lang], priority=priority, simplified_normalization=simple_norm)) entity_ids = [entity.id for entity, _alias in entity_alias_pairs] if entity_ids: unique_entities = set(entity_ids) alias_ids = [alias.alias_id for _entity, alias in entity_alias_pairs] unique_aliases = set(alias_ids) return ParseResults({ GeoEntityUsage: [GeoEntityUsage(text_unit_id=text_unit_id, entity_id=idd, count=entity_ids.count(idd)) for idd in unique_entities], GeoAliasUsage: [GeoAliasUsage(text_unit_id=text_unit_id, alias_id=idd, count=alias_ids.count(idd)) for idd in unique_aliases if idd]})
def parse(self, log: ProcessLogger, text, text_unit_id, text_unit_lang, document_initial_load: bool = False, **kwargs) -> ParseResults: priority = kwargs.get('priority', True) geo_config = dict_data_cache.get_geo_config() entity_alias_pairs = list( geoentities.get_geoentities(text, geo_config, text_languages=[text_unit_lang], priority=priority)) entity_ids = [ dict_entities.get_entity_id(entity) for entity, _alias in entity_alias_pairs ] if entity_ids: unique_entities = set(entity_ids) alias_ids = [ dict_entities.get_alias_id(alias) for _entity, alias in entity_alias_pairs ] unique_aliases = set(alias_ids) return ParseResults({ GeoEntityUsage: [ GeoEntityUsage(text_unit_id=text_unit_id, entity_id=idd, count=entity_ids.count(idd)) for idd in unique_entities ], GeoAliasUsage: [ GeoAliasUsage(text_unit_id=text_unit_id, alias_id=idd, count=alias_ids.count(idd)) for idd in unique_aliases if idd ] })