def format_results(query): sources = {} entities = {} results = [] for row in raw_iter(query): src = row.get('_source') data = {} for name, value in src.items(): if isinstance(value, dict) or name in SKIP_FIELDS: continue if name == 'entities': load_ids = [] for entity_id in value: if entity_id not in entities: load_ids.append(entity_id) if len(load_ids): for id, ent in Entity.by_id_set(load_ids).items(): entities[id] = ent.name value = ', '.join([entities.get(e) for e in value if entities.get(e) is not None]) if isinstance(value, (list, tuple, set)): value = ', '.join(value) if name == 'source_id': # WARNING: don't to one query per row if value not in sources: source = Source.by_id(value) if source is None: sources[value] = '[Deleted source %s]' % value else: sources[value] = source.label value = sources[value] data[name] = value results.append(data) return results
def query_doc_ids(query): query = { 'query': query, '_source': [] } for row in raw_iter(query): yield row.get('_id')
def refresh(selectors): packages = set() selectors = list(selectors) for i in xrange(0, len(selectors), BUNCH): for doc in raw_iter(entity_query(selectors[i:i + BUNCH])): package = (doc['_source']['collection'], doc['_source']['id']) if package not in packages: process_package.delay(*package) packages.add(package)
def export(): attributes = request.args.getlist('attribute') query = document_query(request.args, lists=authz.authz_lists('read'), sources=authz.authz_sources('read')) query['_source'] = set(query['_source']) for attribute in attributes: if attribute in CORE_FIELDS: query['_source'].add(attribute) else: query['_source'].add('attributes') query['_source'] = list(query['_source']) output = (process_row(r, attributes) for r in raw_iter(query)) output = make_excel(output, attributes) return send_file(output, mimetype=XLSX_MIME, as_attachment=True, attachment_filename='export.xlsx')
def generate_graph(args): fields = ['id', 'collection', 'entities.entity_id', 'entities.name', 'entities.category'] query = documents_query(args, fields=fields, facets=False) graph = nx.MultiGraph() for doc in raw_iter(query): entities = set() for entity in doc.get('_source').get('entities', []): if not graph.has_node(entity.get('entity_id')): graph.add_node(entity.get('entity_id'), label=entity.get('name'), category=entity.get('category')) entities.add(entity.get('entity_id')) for (src, dst) in combinations(entities, 2): graph.add_edge(src, dst, weight=1) graph = multigraph_to_weighted(graph) return paginate_graph(graph)
def generate_graph(args): fields = [ 'id', 'collection', 'entities.id', 'entities.label', 'entities.category' ] query = document_query(args, fields=fields, sources=authz.authz_sources('read'), lists=authz.authz_lists('read'), facets=False) graph = nx.MultiGraph() for doc in raw_iter(query): entities = set() for entity in doc.get('_source').get('entities', []): if not graph.has_node(entity.get('id')): graph.add_node(entity.get('id'), label=entity.get('label'), category=entity.get('category')) entities.add(entity.get('id')) for (src, dst) in combinations(entities, 2): graph.add_edge(src, dst, weight=1) graph = multigraph_to_weighted(graph) return paginate_graph(graph)