Beispiel #1
0
    def canonicalize(cls):
        """Write out canonical_uids based on entity mappings."""
        q = session.query(Entity)
        q = q.filter(Entity.project == project.name)
        q.update({Entity.canonical_uid: Entity.uid},
                 synchronize_session='fetch')

        q = session.query(Link)
        q = q.filter(Link.project == project.name)
        q.update({Link.source_canonical_uid: Link.source_uid},
                 synchronize_session='fetch')
        q.update({Link.target_canonical_uid: Link.target_uid},
                 synchronize_session='fetch')

        clusters = cls.generate_clusters()
        project.log.info("Canonicalize: %d clusters", len(clusters))
        for uids in clusters:
            canonical_uid = max(uids)
            q = session.query(Entity)
            q = q.filter(Entity.project == project.name)
            q = q.filter(Entity.uid.in_(uids))
            q.update({Entity.canonical_uid: canonical_uid},
                     synchronize_session='fetch')

            q = session.query(Link)
            q = q.filter(Link.project == project.name)
            q = q.filter(Link.source_uid.in_(uids))
            q.update({Link.source_canonical_uid: canonical_uid},
                     synchronize_session='fetch')
            q = session.query(Link)
            q = q.filter(Link.project == project.name)
            q = q.filter(Link.target_uid.in_(uids))
            q.update({Link.target_canonical_uid: canonical_uid},
                     synchronize_session='fetch')
Beispiel #2
0
 def find_judgements(cls, judgement):
     """Find entity IDs linked by judgements of a particular type."""
     q = session.query(cls.left_uid, cls.right_uid)
     q = q.filter(cls.project == project.name)
     q = q.filter(cls.judgement == judgement)
     for (uida, uidb) in q:
         yield cls.sort_uids(uida, uidb)
Beispiel #3
0
 def cleanup(cls):
     """Delete all undecided mappings."""
     q = session.query(cls)
     q = q.filter(cls.project == project.name)
     q = q.filter(cls.decided == False)  # noqa
     q = q.filter(cls.generated == True)  # noqa
     q.delete(synchronize_session='fetch')
Beispiel #4
0
 def find_by_result(cls, query_uid=None, match_uid=None):
     q = session.query(cls)
     q = q.filter(cls.project == project.name)
     if query_uid is not None and match_uid is not None:
         q = q.filter(cls.query_uid == query_uid)
         q = q.filter(cls.match_uid == match_uid)
     return q
Beispiel #5
0
 def get(cls, uida, uidb):
     """Load a mapping by it's end points."""
     left_uid, right_uid = cls.sort_uids(uida, uidb)
     q = session.query(cls)
     q = q.filter(cls.project == project.name)
     q = q.filter(cls.left_uid == left_uid)
     q = q.filter(cls.right_uid == right_uid)
     return q.first()
Beispiel #6
0
 def update(self, normalized, latitude, longitude):
     q = session.query(Address)
     q = q.filter(Address.project == project.name)
     q = q.filter(Address.slug == self.slug)
     q.update(
         {
             Address.normalized: normalized,
             Address.latitude: latitude,
             Address.longitude: longitude,
         },
         synchronize_session='fetch')
Beispiel #7
0
def entities():
    text_query = request.args.get('q', '').strip()
    offset = int(request.args.get('offset', '0'))
    limit = 50
    sq = session.query(Mapping.left_uid)
    sq = sq
    q = session.query(Entity)
    q = q.filter(Entity.project == project.name)
    q = q.filter(Entity.active == True)  # noqa
    if len(text_query):
        q = q.filter(Entity.data['name'].astext.ilike('%' + text_query + '%'))
    total = q.count()
    context = {
        'total': total,
        'has_prev': offset > 0,
        'has_next': total >= (offset + limit),
        'next': offset + limit,
        'prev': max(0, offset - limit),
        'text_query': text_query,
    }
    q = q.offset(offset).limit(limit)
    return render_template('entities.html', entities=q, **context)
Beispiel #8
0
def entity(uid):
    entity = Entity.get(uid)
    q = session.query(Mapping)
    q = q.filter(Mapping.project == project.name)
    q = q.filter(
        or_(Mapping.left_uid == entity.uid, Mapping.right_uid == entity.uid))
    q = q.order_by(Mapping.score.desc())
    decisions = Mapping.get_decisions()
    undecided = q.filter(Mapping.decided == False)  # noqa
    decided = q.filter(Mapping.decided == True)  # noqa
    sections = (('Undecided', undecided), ('Decided', decided))
    return render_template('entity.html',
                           entity=entity,
                           sections=sections,
                           decisions=decisions)
Beispiel #9
0
def review_entity_get(offset=None):
    """Jump to the next entity that needs disambiguation."""
    qa = session.query(Mapping.left_uid.label('uid'),
                       func.sum(Mapping.score).label('num'))
    qa = qa.filter(Mapping.project == project.name)
    qa = qa.filter(Mapping.decided == False)  # noqa
    qa = qa.group_by(Mapping.left_uid)
    qb = session.query(Mapping.right_uid.label('uid'),
                       func.sum(Mapping.score).label('num'))
    qb = qb.filter(Mapping.project == project.name)
    qb = qb.filter(Mapping.decided == False)  # noqa
    qb = qa.group_by(Mapping.right_uid)
    sq = qa.union(qb).subquery()
    q = session.query(sq.c.uid, func.sum(sq.c.num))
    q = q.join(Entity, Entity.uid == sq.c.uid)
    q = q.filter(Entity.active == True)  # noqa
    q = q.group_by(sq.c.uid, Entity.tasked)
    q = q.order_by(Entity.tasked.desc())
    q = q.order_by(func.sum(sq.c.num).desc())
    q = q.order_by(func.random())
    if q.count() == 0:
        return redirect(url_for('.entities'))
    q = q.limit(1)
    return redirect(url_for('.entity', uid=q.scalar()))
Beispiel #10
0
 def iter_composite(cls, origins=[], tasked=None):
     sq = session.query(cls.canonical_uid.distinct())
     sq = sq.filter(cls.project == project.name)
     sq = sq.filter(cls.active == True)  # noqa
     if len(origins):
         sq = sq.filter(cls.origin.in_(origins))
     if tasked is not None:
         sq = sq.filter(cls.tasked == tasked)
     q = session.query(cls)
     q = q.filter(cls.project == project.name)
     q = q.filter(cls.active == True)  # noqa
     q = q.filter(cls.canonical_uid.in_(sq))
     q = q.order_by(cls.canonical_uid.asc())
     entities = []
     canonical_uid = None
     for entity in q:
         if entity.canonical_uid != canonical_uid:
             if len(entities):
                 yield CompositeEntity(entities)
             entities = []
         entities.append(entity)
         canonical_uid = entity.canonical_uid
     if len(entities):
         yield CompositeEntity(entities)
Beispiel #11
0
 def find_undecided(cls, limit=10, offset=0):
     """Return candidates for manual matching."""
     decided = cls.get_decided()
     q = session.query(cls)
     q = q.filter(cls.project == project.name)
     q = q.filter(cls.decided == False)  # noqa
     q = q.filter(cls.judgement == None)  # noqa
     q = q.order_by(cls.score.desc())
     q = q.offset(offset)
     mappings = []
     for mapping in q.yield_per(limit):
         if (mapping.left_uid, mapping.right_uid) in decided or \
            mapping.left is None or mapping.right is None:
             mapping.delete()
             continue
         mappings.append(mapping)
         if len(mappings) == limit:
             break
     session.commit()
     return mappings
Beispiel #12
0
 def find_by_decision(cls, decided):
     """Find entity IDs linked by all, or only decided, judgments."""
     q = session.query(cls)
     q = q.filter(cls.project == project.name)
     q = q.filter(cls.decided == decided)  # noqa
     return q
Beispiel #13
0
 def find_by_origins(cls, origins):
     q = session.query(cls)
     q = q.filter(cls.project == project.name)
     if len(origins):
         q = q.filter(cls.origin.in_(origins))
     return q
Beispiel #14
0
 def find(cls):
     q = session.query(cls)
     q = q.filter(cls.project == project.name)
     return q