Esempio n. 1
0
def format_results(query):
    sources = {}
    entities = {}
    results = []
    for row in raw_iter(query):
        src = row.get('_source')
        data = {}
        for name, value in src.items():
            if isinstance(value, dict) or name in SKIP_FIELDS:
                continue
            if name == 'entities':
                load_ids = []
                for entity_id in value:
                    if entity_id not in entities:
                        load_ids.append(entity_id)
                if len(load_ids):
                    for id, ent in Entity.by_id_set(load_ids).items():
                        entities[id] = ent.name

                value = ', '.join([entities.get(e) for e in value
                                   if entities.get(e) is not None])
            if isinstance(value, (list, tuple, set)):
                value = ', '.join(value)
            if name == 'source_id':
                # WARNING: don't to one query per row
                if value not in sources:
                    source = Source.by_id(value)
                    if source is None:
                        sources[value] = '[Deleted source %s]' % value
                    else:
                        sources[value] = source.label
                value = sources[value]
            data[name] = value
            results.append(data)
    return results
Esempio n. 2
0
def query_doc_ids(query):
    query = {
        'query': query,
        '_source': []
    }
    for row in raw_iter(query):
        yield row.get('_id')
Esempio n. 3
0
def refresh(selectors):
    packages = set()
    selectors = list(selectors)
    for i in xrange(0, len(selectors), BUNCH):
        for doc in raw_iter(entity_query(selectors[i:i + BUNCH])):
            package = (doc['_source']['collection'], doc['_source']['id'])
            if package not in packages:
                process_package.delay(*package)
                packages.add(package)
Esempio n. 4
0
def export():
    attributes = request.args.getlist('attribute')
    query = document_query(request.args, lists=authz.authz_lists('read'),
                           sources=authz.authz_sources('read'))
    query['_source'] = set(query['_source'])
    for attribute in attributes:
        if attribute in CORE_FIELDS:
            query['_source'].add(attribute)
        else:
            query['_source'].add('attributes')
    query['_source'] = list(query['_source'])
    output = (process_row(r, attributes) for r in raw_iter(query))
    output = make_excel(output, attributes)
    return send_file(output, mimetype=XLSX_MIME, as_attachment=True,
                     attachment_filename='export.xlsx')
Esempio n. 5
0
def export():
    attributes = request.args.getlist('attribute')
    query = document_query(request.args,
                           lists=authz.authz_lists('read'),
                           sources=authz.authz_sources('read'))
    query['_source'] = set(query['_source'])
    for attribute in attributes:
        if attribute in CORE_FIELDS:
            query['_source'].add(attribute)
        else:
            query['_source'].add('attributes')
    query['_source'] = list(query['_source'])
    output = (process_row(r, attributes) for r in raw_iter(query))
    output = make_excel(output, attributes)
    return send_file(output,
                     mimetype=XLSX_MIME,
                     as_attachment=True,
                     attachment_filename='export.xlsx')
Esempio n. 6
0
def generate_graph(args):
    fields = ['id', 'collection', 'entities.entity_id', 'entities.name',
              'entities.category']
    query = documents_query(args, fields=fields, facets=False)

    graph = nx.MultiGraph()
    for doc in raw_iter(query):
        entities = set()
        for entity in doc.get('_source').get('entities', []):
            if not graph.has_node(entity.get('entity_id')):
                graph.add_node(entity.get('entity_id'),
                               label=entity.get('name'),
                               category=entity.get('category'))
            entities.add(entity.get('entity_id'))
        for (src, dst) in combinations(entities, 2):
            graph.add_edge(src, dst, weight=1)
    graph = multigraph_to_weighted(graph)

    return paginate_graph(graph)
Esempio n. 7
0
def generate_graph(args):
    fields = [
        'id', 'collection', 'entities.id', 'entities.label',
        'entities.category'
    ]
    query = document_query(args,
                           fields=fields,
                           sources=authz.authz_sources('read'),
                           lists=authz.authz_lists('read'),
                           facets=False)
    graph = nx.MultiGraph()
    for doc in raw_iter(query):
        entities = set()
        for entity in doc.get('_source').get('entities', []):
            if not graph.has_node(entity.get('id')):
                graph.add_node(entity.get('id'),
                               label=entity.get('label'),
                               category=entity.get('category'))
            entities.add(entity.get('id'))
        for (src, dst) in combinations(entities, 2):
            graph.add_edge(src, dst, weight=1)
    graph = multigraph_to_weighted(graph)
    return paginate_graph(graph)