def reindex(step=5000): #elastic.indices.delete_index("datawire") q = Frame.all().order_by(Frame.submitted_at.desc()) for offset in count(0, step): log.info("Re-indexing at %s", offset) if 0 == q.limit(step).offset(offset).count(): return for frame_obj in q.limit(step).offset(offset): index(frame_obj)
def matchall(): """ Re-do all matching. """ from datawire.store import load_frame from datawire.model import Frame from datawire.processing.matching import match for frame_ref in Frame.all(): frame = load_frame(frame_ref.urn) if frame is None: continue match(frame)
def backsearch(entity, step=5000): # TODO: Check if the string is already tracked, use existing results. found_count = 0 pattern = entity.pattern q = Frame.all().order_by(Frame.submitted_at.desc()) for offset in range(0, BACKSEARCH_LIMIT, step): log.info("Backsearch [%s] at %s (found: %s)", entity.text, offset, found_count) if 0 == q.limit(step).offset(offset).count(): return for frame_obj in q.limit(step).offset(offset): frame = load_frame(frame_obj.urn) matches = match(frame, pattern, [entity.id]) found_count += len(matches) if len(matches): db.session.commit() if found_count >= BACKSEARCH_FIND: return
def user_index(id): require.user_id(id) esq = { "query": { "filtered": { "query": {"match_all": {}}, "filter": {} } }, "sort": [{"action_at": {"order": "desc"}}], "size": get_limit(), "from": get_offset(), "facets": {"entities": { "terms": {"field": "entities"}} } } filters = request.args.getlist('entity') if len(filters): esq['query']['filtered']['filter']['and'] = [] for entity_id in filters: fq = {"term": {"entities": entity_id}} esq['query']['filtered']['filter']['and'].append(fq) else: esq['query']['filtered']['filter']['or'] = [] for entity in Entity.all().filter(Entity.user_id == id): fq = {"term": {"entities": entity.id}} esq['query']['filtered']['filter']['or'].append(fq) res = elastic.search_raw(esq, elastic_index, 'frame') frame_urns = [r['_id'] for r in res['hits']['hits']] q = Frame.all().filter(Frame.urn.in_(frame_urns)) frames = dict([(f.urn, f) for f in q]) frames = [frames.get(urn) for urn in frame_urns] return query_pager(frames, 'frames.user_index', count=res['hits']['total'], paginate=False, id=id)
def index(): q = Frame.all() q = q.order_by(Frame.action_at.desc()) return query_pager(q, 'frames.index')