Beispiel #1
0
def load_entities_dict():
    entities_fn = os.path.join(
        os.path.dirname(lexnlp_tests.this_test_data_path()), 'geoentities.csv')
    aliases_fn = os.path.join(
        os.path.dirname(lexnlp_tests.this_test_data_path()), 'geoaliases.csv')

    entities = {}

    with open(entities_fn, 'r', encoding='utf8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            entities[row['id']] = entity_config(
                row['id'],
                row['name'],
                int(row['priority']) if row['priority'] else 0,
                name_is_alias=True)

    with open(aliases_fn, 'r', encoding='utf8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            entity = entities.get(row['entity_id'])
            if entity:
                add_aliases_to_entity(
                    entity, row['alias'], row['locale'],
                    row['type'].startswith('iso')
                    or row['type'] == 'abbreviation')
    return entities.values()
def cache_geo_config():
    geo_config = {}
    for name, pk, priority in GeoEntity.objects.values_list('name', 'pk', 'priority'):
        entity = dict_entities.entity_config(pk, name, priority or 0, name_is_alias=True)
        geo_config[pk] = entity
    for alias_id, alias_text, alias_type, entity_id, alias_lang \
            in GeoAlias.objects.values_list('pk', 'alias', 'type', 'entity', 'locale'):
        entity = geo_config[entity_id]
        if entity:
            is_abbrev = alias_type.startswith('iso') or alias_type.startswith('abbrev')
            dict_entities.add_aliases_to_entity(entity,
                                                aliases_csv=alias_text,
                                                language=alias_lang,
                                                is_abbreviation=is_abbrev,
                                                alias_id=alias_id)
    res = list(geo_config.values())
    DbCache.put_to_db(CACHE_KEY_GEO_CONFIG, res)
Beispiel #3
0
def load_entities_dict_by_path(entities_fn: str, aliases_fn: str):
    entities = {}
    import csv

    with open(entities_fn, 'r', encoding='utf8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            entities[row['id']] = entity_config(row['id'], row['name'], int(row['priority']) if row['priority'] else 0,
                                                name_is_alias=True)

    with open(aliases_fn, 'r', encoding='utf8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            entity = entities.get(row['entity_id'])
            if entity:
                add_aliases_to_entity(entity,
                                      row['alias'],
                                      row['locale'],
                                      row['type'].startswith('iso') or row['type'] == 'abbreviation')

    return entities.values()