def matchall(): """ Re-do all matching. """ from datawire.store import load_frame from datawire.model import Frame from datawire.processing.matching import match for frame_ref in Frame.all(): frame = load_frame(frame_ref.urn) if frame is None: continue match(frame)
def get(urn): # TODO: authz checks. data = load_frame(urn) if data is None: raise NotFound('Frame: %s' % urn) headers = { 'X-Backend-Location': frame_url(urn), 'ETag': data['hash'], 'Cache-Control': 'public; max-age: 8460000' } return jsonify(data, headers=headers)
def backsearch(entity, step=5000): # TODO: Check if the string is already tracked, use existing results. found_count = 0 pattern = entity.pattern q = Frame.all().order_by(Frame.submitted_at.desc()) for offset in range(0, BACKSEARCH_LIMIT, step): log.info("Backsearch [%s] at %s (found: %s)", entity.text, offset, found_count) if 0 == q.limit(step).offset(offset).count(): return for frame_obj in q.limit(step).offset(offset): frame = load_frame(frame_obj.urn) matches = match(frame, pattern, [entity.id]) found_count += len(matches) if len(matches): db.session.commit() if found_count >= BACKSEARCH_FIND: return
def match(urn): """ Test entity matching. """ from datawire.store import load_frame from datawire.processing.matching import match frame = load_frame(urn) match(frame)