def get_results(query, limit): collections = {} for i, row in enumerate(scan_iter(query)): if i >= limit: return data = { 'file_url': url_for('documents_api.file', document_id=row.get('_id')) } for name, value in row.get('_source').items(): if name == 'collection_id': colls = [] for coll in value: if coll not in collections: source = Collection.by_id(coll) if source is None: collections[coll] = '[Deleted collection %s]' % value else: collections[coll] = source.label colls.append(collections[coll]) value = ', '.join(sorted(colls)) name = 'collections' if name not in FIELDS: continue if isinstance(value, (list, tuple, set)): value = ', '.join(value) data[name] = value yield data
def get_results(query, limit): sources = {} for i, row in enumerate(scan_iter(query)): if i >= limit: return data = { 'file_url': url_for('documents_api.file', document_id=row.get('_id')) } for name, value in row.get('_source').items(): if name == 'source_id': if value not in sources: source = Source.by_id(value) if source is None: sources[value] = '[Deleted source %s]' % value else: sources[value] = source.label value = sources[value] name = 'source' if name not in FIELDS: continue if isinstance(value, (list, tuple, set)): value = ', '.join(value) data[name] = value yield data
def generate_graph(args): fields = ['id', 'collection', 'entities.uuid', 'entities.name', 'entities.$schema'] query = documents_query(args, fields=fields, facets=False) query = {'query': query['query']} graph = nx.MultiGraph() for doc in scan_iter(query): entities = set() for entity in doc.get('_source').get('entities', []): if not graph.has_node(entity.get('uuid')): graph.add_node(entity.get('uuid'), label=entity.get('name'), schema=entity.get('$schema')) entities.add(entity.get('uuid')) for (src, dst) in combinations(entities, 2): graph.add_edge(src, dst, weight=1) graph = multigraph_to_weighted(graph) return paginate_graph(graph)
def generate_graph(args): fields = [ 'id', 'collection', 'entities.uuid', 'entities.name', 'entities.$schema' ] query = documents_query(args, fields=fields, facets=False) query = {'query': query['query']} graph = nx.MultiGraph() for doc in scan_iter(query): entities = set() for entity in doc.get('_source').get('entities', []): if not graph.has_node(entity.get('uuid')): graph.add_node(entity.get('uuid'), label=entity.get('name'), schema=entity.get('$schema')) entities.add(entity.get('uuid')) for (src, dst) in combinations(entities, 2): graph.add_edge(src, dst, weight=1) graph = multigraph_to_weighted(graph) return paginate_graph(graph)
def analyze_source(source_id): query = {'term': {'source_id': source_id}} query = {'query': query, '_source': False} for row in scan_iter(query): analyze_document.delay(row.get('_id'))
def analyze_collection(collection_id): query = {'term': {'collection_id': collection_id}} query = {'query': query, '_source': False} for row in scan_iter(query): analyze_document.delay(row.get('_id'))
def analyze_documents(collection_id): query = {'term': {'collection_id': collection_id}} query = {'query': query, '_source': False} for row in scan_iter(query, TYPE_DOCUMENT): analyze_document_id.delay(row.get('_id'))
def query_doc_ids(query): query = {'query': query, '_source': False} for row in scan_iter(query): yield row.get('_id')
def analyze_collection(collection_id): query = {'term': {'collection_id': collection_id}} query = {'query': query, '_source': False} for row in scan_iter(query): analyze_document_id.delay(row.get('_id'))
def analyze_collection(collection_id): query = {"term": {"collection_id": collection_id}} query = {"query": query, "_source": False} for row in scan_iter(query): analyze_document_id.delay(row.get("_id"))