Exemple #1
0
def get_results(query, limit):
    sources = {}
    for i, row in enumerate(scan_iter(query)):
        if i >= limit:
            return
        data = {
            'file_url': url_for('documents_api.file',
                                document_id=row.get('_id'))
        }
        for name, value in row.get('_source').items():
            if name == 'source_id':
                if value not in sources:
                    source = Source.by_id(value)
                    if source is None:
                        sources[value] = '[Deleted source %s]' % value
                    else:
                        sources[value] = source.label
                value = sources[value]
                name = 'source'
            if name not in FIELDS:
                continue
            if isinstance(value, (list, tuple, set)):
                value = ', '.join(value)
            data[name] = value
        yield data
Exemple #2
0
def alert_query(alert):
    """Execute the query and return a set of results."""
    q = text_query(alert.query_text)
    q = authz_sources_filter(q)
    if alert.entity_id:
        q = filter_query(q, [('entities.uuid', alert.entity_id)], OR_FIELDS)
    if alert.notified_at:
        q = add_filter(q, {"range": {"created_at": {"gt": alert.notified_at}}})
    q = {'query': q, 'size': 150}

    result, hits, output = execute_basic(TYPE_DOCUMENT, q)
    sub_queries = []
    sources = {}
    for doc in hits.get('hits', []):
        document = doc.get('_source')
        document['id'] = int(doc.get('_id'))
        source_id = document['source_id']
        if source_id not in sources:
            sources[source_id] = Source.by_id(source_id)
        if sources[source_id] is None:
            continue
        document['source'] = sources[source_id]
        document['records'] = {'results': [], 'total': 0}

        sq = records_query(document['id'], alert.to_query(), size=1)
        if sq is not None:
            sub_queries.append(json.dumps({}))
            sub_queries.append(json.dumps(sq))
        output['results'].append(document)

    run_sub_queries(output, sub_queries)
    return output
Exemple #3
0
def get_results(query, limit):
    sources = {}
    for i, row in enumerate(scan_iter(query)):
        if i >= limit:
            return
        data = {
            'file_url': url_for('documents_api.file',
                                document_id=row.get('_id'))
        }
        for name, value in row.get('_source').items():
            if name == 'source_id':
                if value not in sources:
                    source = Source.by_id(value)
                    if source is None:
                        sources[value] = '[Deleted source %s]' % value
                    else:
                        sources[value] = source.label
                value = sources[value]
                name = 'source'
            if name not in FIELDS:
                continue
            if isinstance(value, (list, tuple, set)):
                value = ', '.join(value)
            data[name] = value
        yield data
Exemple #4
0
def update(id):
    authz.require(authz.source_write(id))
    source = obj_or_404(Source.by_id(id))
    source.update(request_data())
    db.session.add(source)
    db.session.commit()
    return view(id)
Exemple #5
0
def update(id):
    authz.require(authz.source_write(id))
    source = obj_or_404(Source.by_id(id))
    source.update(request_data())
    db.session.add(source)
    db.session.commit()
    return view(id)
Exemple #6
0
def format_results(query):
    sources = {}
    entities = {}
    results = []
    for row in raw_iter(query):
        src = row.get('_source')
        data = {}
        for name, value in src.items():
            if isinstance(value, dict) or name in SKIP_FIELDS:
                continue
            if name == 'entities':
                load_ids = []
                for entity_id in value:
                    if entity_id not in entities:
                        load_ids.append(entity_id)
                if len(load_ids):
                    for id, ent in Entity.by_id_set(load_ids).items():
                        entities[id] = ent.name

                value = ', '.join([entities.get(e) for e in value
                                   if entities.get(e) is not None])
            if isinstance(value, (list, tuple, set)):
                value = ', '.join(value)
            if name == 'source_id':
                # WARNING: don't to one query per row
                if value not in sources:
                    source = Source.by_id(value)
                    if source is None:
                        sources[value] = '[Deleted source %s]' % value
                    else:
                        sources[value] = source.label
                value = sources[value]
            data[name] = value
            results.append(data)
    return results
Exemple #7
0
 def test_delete_source(self):
     source = Source.by_id(1000)
     res = self.client.get('/api/1/query?q="mention fruit"')
     assert res.json['total'] == 1, res.json
     delete_source(source.id)
     optimize_search()
     res = self.client.get('/api/1/query?q="mention fruit"')
     assert res.json['total'] == 0, res.json
Exemple #8
0
 def test_delete_source(self):
     source = Source.by_id(1000)
     res = self.client.get('/api/1/query?q="mention fruit"')
     assert res.json['total'] == 1, res.json
     delete_source(source.id)
     optimize_search()
     res = self.client.get('/api/1/query?q="mention fruit"')
     assert res.json['total'] == 0, res.json
Exemple #9
0
def alert_query(alert):
    """Execute the query and return a set of results."""
    q = text_query(alert.query_text)
    q = authz_sources_filter(q)
    if alert.entity_id:
        q = filter_query(q, [('entities.uuid', alert.entity_id)], OR_FIELDS)
    if alert.notified_at:
        q = add_filter(q, {
            "range": {
                "created_at": {
                    "gt": alert.notified_at
                }
            }
        })
    q = {
        'query': q,
        'size': 150
    }

    result, hits, output = execute_basic(TYPE_DOCUMENT, q)
    sub_queries = []
    sources = {}
    for doc in hits.get('hits', []):
        document = doc.get('_source')
        document['id'] = int(doc.get('_id'))
        source_id = document['source_id']
        if source_id not in sources:
            sources[source_id] = Source.by_id(source_id)
        if sources[source_id] is None:
            continue
        document['source'] = sources[source_id]
        document['records'] = {'results': [], 'total': 0}

        sq = records_query(document['id'], alert.to_query(), size=1)
        if sq is not None:
            sub_queries.append(json.dumps({}))
            sub_queries.append(json.dumps(sq))
        output['results'].append(document)

    run_sub_queries(output, sub_queries)
    return output
Exemple #10
0
def process(id):
    authz.require(authz.source_write(id))
    source = obj_or_404(Source.by_id(id))
    analyze_source.delay(source.id)
    return jsonify({'status': 'ok'})
Exemple #11
0
def view(id):
    authz.require(authz.source_read(id))
    source = obj_or_404(Source.by_id(id))
    return jsonify(source)
Exemple #12
0
def process(id):
    authz.require(authz.source_write(id))
    source = obj_or_404(Source.by_id(id))
    analyze_source.delay(source.id)
    return jsonify({'status': 'ok'})
Exemple #13
0
def view(id):
    authz.require(authz.source_read(id))
    source = obj_or_404(Source.by_id(id))
    return jsonify(source)
Exemple #14
0
def view(id):
    authz.require(authz.source_read(id))
    source = obj_or_404(Source.by_id(id))
    etag_cache_keygen(source)
    return jsonify(source)
Exemple #15
0
def get_source(sid):
    if sid not in SRCS:
        src = Source.by_id(sid)
        SRCS[sid] = src and src.label or ''
    return SRCS[sid]