コード例 #1
0
def reindex(step=5000):
    #elastic.indices.delete_index("datawire")
    q = Frame.all().order_by(Frame.submitted_at.desc())
    for offset in count(0, step):
        log.info("Re-indexing at %s", offset)
        if 0 == q.limit(step).offset(offset).count():
            return
        for frame_obj in q.limit(step).offset(offset):
            index(frame_obj)
コード例 #2
0
def matchall():
    """ Re-do all matching. """
    from datawire.store import load_frame
    from datawire.model import Frame
    from datawire.processing.matching import match
    for frame_ref in Frame.all():
        frame = load_frame(frame_ref.urn)
        if frame is None:
            continue
        match(frame)
コード例 #3
0
ファイル: matching.py プロジェクト: backgroundcheck/datawi.re
def backsearch(entity, step=5000):
    # TODO: Check if the string is already tracked, use existing results.
    found_count = 0
    pattern = entity.pattern
    q = Frame.all().order_by(Frame.submitted_at.desc())
    for offset in range(0, BACKSEARCH_LIMIT, step):
        log.info("Backsearch [%s] at %s (found: %s)", entity.text, offset, found_count)
        if 0 == q.limit(step).offset(offset).count():
            return
        for frame_obj in q.limit(step).offset(offset):
            frame = load_frame(frame_obj.urn)
            matches = match(frame, pattern, [entity.id])
            found_count += len(matches)
            if len(matches):
                db.session.commit()
            if found_count >= BACKSEARCH_FIND:
                return
コード例 #4
0
def backsearch(entity, step=5000):
    # TODO: Check if the string is already tracked, use existing results.
    found_count = 0
    pattern = entity.pattern
    q = Frame.all().order_by(Frame.submitted_at.desc())
    for offset in range(0, BACKSEARCH_LIMIT, step):
        log.info("Backsearch [%s] at %s (found: %s)", entity.text, offset,
                 found_count)
        if 0 == q.limit(step).offset(offset).count():
            return
        for frame_obj in q.limit(step).offset(offset):
            frame = load_frame(frame_obj.urn)
            matches = match(frame, pattern, [entity.id])
            found_count += len(matches)
            if len(matches):
                db.session.commit()
            if found_count >= BACKSEARCH_FIND:
                return
コード例 #5
0
def user_index(id):
    require.user_id(id)

    esq = {
        "query": {
            "filtered": {
                "query": {"match_all": {}}, "filter": {}
            }
        },
        "sort": [{"action_at": {"order": "desc"}}],
        "size": get_limit(),
        "from": get_offset(),
        "facets": {"entities": {
            "terms": {"field": "entities"}}
        }
    }

    filters = request.args.getlist('entity')
    if len(filters):
        esq['query']['filtered']['filter']['and'] = []
        for entity_id in filters:
            fq = {"term": {"entities": entity_id}}
            esq['query']['filtered']['filter']['and'].append(fq)
    else:
        esq['query']['filtered']['filter']['or'] = []
        for entity in Entity.all().filter(Entity.user_id == id):
            fq = {"term": {"entities": entity.id}}
            esq['query']['filtered']['filter']['or'].append(fq)

    res = elastic.search_raw(esq, elastic_index, 'frame')
    frame_urns = [r['_id'] for r in res['hits']['hits']]
    q = Frame.all().filter(Frame.urn.in_(frame_urns))
    frames = dict([(f.urn, f) for f in q])
    frames = [frames.get(urn) for urn in frame_urns]
    return query_pager(frames, 'frames.user_index',
                       count=res['hits']['total'],
                       paginate=False,
                       id=id)
コード例 #6
0
def index():
    q = Frame.all()
    q = q.order_by(Frame.action_at.desc())
    return query_pager(q, 'frames.index')