예제 #1
0
파일: leads.py 프로젝트: wcyn/aleph
def generate_leads(entity_id):
    """Compute likely duplicates of a given entity and index these leads."""
    # Get rid of everything, also for deleted entities etc.
    delete_entity_leads(entity_id)

    entity = load_entity(entity_id)
    if entity is None:
        # log.warning("[%r] not indexed, skip lead generation.", entity_id)
        return
    if not entity.get('collection_id'):
        # log.warning("[%r] is not in a collecton, skip lead generation.", entity_id)  # noqa
        return

    log.debug("Generating leads for [%(id)s]: %(name)s", entity)
    authz = Authz(override=True)
    judgements = EntityIdentity.judgements_by_entity(entity_id)
    state = QueryState({}, authz, limit=100)
    result = similar_entities(entity, state)
    for other in result.get('results', []):
        score = entity_distance(entity, other)
        log.debug(" -[%.2f]-> %s", score, other.get('name'))
        # TODO: implement some cut-off
        index_lead({
            'entity_id': entity.get('id'),
            'entity_collection_id': entity.get('collection_id'),
            'score': score,
            'judgement': judgements.get(other.get('id'), 0),
            'match_id': other.get('id'),
            'schema': other.get('schema'),
            'schemata': other.get('schemata'),
            'collection_id': other.get('collection_id'),
            'dataset': other.get('dataset'),
            'roles': other.get('roles')
        })
예제 #2
0
파일: leads.py 프로젝트: wcyn/aleph
def update_lead(entity, match, judgement, judge=None):
    EntityIdentity.save(entity.get('id'), match.get('id'),
                        judgement, judge=judge)
    db.session.commit()
    score = entity_distance(entity, match)
    index_lead({
        'entity_id': entity.get('id'),
        'entity_collection_id': entity.get('collection_id'),
        'score': score,
        'judgement': judgement,
        'match_id': match.get('id'),
        'schema': match.get('schema'),
        'schemata': match.get('schemata'),
        'collection_id': match.get('collection_id'),
        'dataset': match.get('dataset'),
        'roles': match.get('roles')
    })
예제 #3
0
파일: entities.py 프로젝트: wcyn/aleph
def fetch_entity(entity_id):
    """Load entities from both the ES index and the database."""
    entity = load_entity(entity_id)
    obj = Entity.by_id(entity_id)
    if obj is not None:
        if entity is not None:
            entity.update(obj.to_dict())
        else:
            entity = obj.to_index()
            entity = finalize_index(entity, obj.schema)
        entity['ids'] = EntityIdentity.entity_ids(entity_id)
    elif entity is not None:
        entity['ids'] = [entity.get('id')]
    return entity, obj
예제 #4
0
파일: entities.py 프로젝트: wcyn/aleph
def combined_entity(entity):
    """Use EntityIdentity mappings to construct a combined model of the
    entity with all data applied."""
    if 'id' not in entity:
        return entity
    if 'ids' not in entity:
        entity['ids'] = EntityIdentity.entity_ids(entity['id'])
    combined = dict(entity)
    for mapped_id in entity['ids']:
        if mapped_id == entity['id']:
            continue
        mapped = load_entity(mapped_id)
        if mapped is None:
            continue
        combined = merge_data(combined, mapped)
    return combined