def parse(self, log: ProcessLogger, text, text_unit_id, text_unit_lang, document_initial_load: bool = False, **kwargs) -> ParseResults: court_config = dict_data_cache.get_court_config() found = [ dict_entities.get_entity_id(i[0]) for i in courts.get_courts(text, court_config_list=court_config, text_languages=[text_unit_lang]) ] if found: unique = set(found) return ParseResults({ CourtUsage: [ CourtUsage(text_unit_id=text_unit_id, court_id=court_id, count=found.count(court_id)) for court_id in unique ] })
def parse(self, text, text_unit_id, text_unit_lang, **kwargs) -> ParseResults: priority = kwargs.get('priority', True) geo_config = dict_data_cache.get_geo_config() entity_alias_pairs = list( geoentities.get_geoentities(text, geo_config, text_languages=[text_unit_lang], priority=priority)) entity_ids = [ dict_entities.get_entity_id(entity) for entity, _alias in entity_alias_pairs ] if entity_ids: unique_entities = set(entity_ids) alias_ids = [ dict_entities.get_alias_id(alias) for _entity, alias in entity_alias_pairs ] unique_aliases = set(alias_ids) return ParseResults({ GeoEntityUsage: [ GeoEntityUsage(text_unit_id=text_unit_id, entity_id=idd, count=entity_ids.count(idd)) for idd in unique_entities ], GeoAliasUsage: [ GeoAliasUsage(text_unit_id=text_unit_id, alias_id=idd, count=alias_ids.count(idd)) for idd in unique_aliases if idd ] })
def test_get_entity_id(): entity = (1, 'name', []) assert_equals(1, get_entity_id(entity))