def export_entities(request, result, format): assert format in (FORMAT_CSV, FORMAT_EXCEL) entities = [] for entity in result.results: resolver.queue(result, Collection, entity.get('collection_id')) entities.append(model.get_proxy(entity)) resolver.resolve(result) zip_archive = zipstream.ZipFile() if format == FORMAT_EXCEL: workbook = get_workbook() for entity in entities: collection_id = entity.context.get('collection_id') collection = resolver.get(result, Collection, collection_id) export_entity_excel(workbook, collection, entity) write_document(zip_archive, collection, entity) content = io.BytesIO(get_workbook_content(workbook)) zip_archive.write_iter('export.xlsx', content) elif format == FORMAT_CSV: handlers = {} for entity in entities: collection_id = entity.context.get('collection_id') collection = resolver.get(result, Collection, collection_id) export_entity_csv(handlers, collection, entity) write_document(zip_archive, collection, entity) for key in handlers: content = handlers[key] content.seek(0) content = io.BytesIO(content.read().encode()) zip_archive.write_iter(key+'.csv', content) for chunk in zip_archive: yield chunk
def export_entities(request, result, format): assert format in (FORMAT_CSV, FORMAT_EXCEL) entities = [] for entity in result.results: resolver.queue(result, Collection, entity.get('collection_id')) entities.append(model.get_proxy(entity)) resolver.resolve(result) zip_archive = zipstream.ZipFile() if format == FORMAT_EXCEL: workbook = get_workbook() for entity in entities: collection_id = entity.context.get('collection_id') collection = resolver.get(result, Collection, collection_id) export_entity_excel(workbook, collection, entity) write_document(zip_archive, collection, entity) content = io.BytesIO(get_workbook_content(workbook)) zip_archive.write_iter('export.xlsx', content) elif format == FORMAT_CSV: handlers = {} for entity in entities: collection_id = entity.context.get('collection_id') collection = resolver.get(result, Collection, collection_id) export_entity_csv(handlers, collection, entity) write_document(zip_archive, collection, entity) for key in handlers: content = handlers[key] content.seek(0) content = io.BytesIO(content.read().encode()) zip_archive.write_iter(key + '.csv', content) for chunk in zip_archive: yield chunk
def export_entities(export_id, result): from aleph.logic import resolver export_dir = ensure_path(mkdtemp(prefix="aleph.export.")) try: entities = [] stub = types.SimpleNamespace(result=result) for entity in result["results"]: resolver.queue(stub, Collection, entity.get("collection_id")) entities.append(model.get_proxy(entity)) resolver.resolve(stub) file_path = export_dir.joinpath("query-export.zip") zf = zipfile.ZipFile(file_path, "w") exporter = ExcelExporter(None, extra=EXTRA_HEADERS) for entity in entities: collection_id = entity.context.get("collection_id") collection = resolver.get(stub, Collection, collection_id) extra = [entity_url(entity.id), collection.get("label")] exporter.write(entity, extra=extra) write_document(export_dir, zf, collection, entity) content = exporter.get_bytesio().getvalue() zf.writestr("Export.xlsx", content) zf.close() complete_export(export_id, file_path) except Exception: log.exception("Failed to process export [%s]", export_id) export = Export.by_id(export_id) export.set_status(status=Export.STATUS_FAILED) db.session.commit() finally: shutil.rmtree(export_dir)
def render_notification(stub, notification): """Generate a text version of the notification, suitable for use in an email or text message.""" from aleph.logic import resolver for name, clazz, value in notification.iterparams(): resolver.queue(stub, clazz, value) resolver.resolve(stub) plain = str(notification.event.template) html = str(notification.event.template) for name, clazz, value in notification.iterparams(): data = resolver.get(stub, clazz, value) if data is None: return link, title = None, None if clazz == Role: title = data.get('label') elif clazz == Alert: title = data.get('query') elif clazz == Collection: title = data.get('label') link = collection_url(value) elif clazz == Entity: title = data.get('name') link = entity_url(value) template = '{{%s}}' % name html = html.replace(template, html_link(title, link)) plain = plain.replace(template, "'%s'" % title) if name == notification.event.link_to: plain = '%s (%s)' % (plain, link) return {'plain': plain, 'html': html}
def statistics(): """Get a summary of the data acessible to the current user.""" enable_cache() collections = request.authz.collections(request.authz.READ) for collection_id in collections: resolver.queue(request, Collection, collection_id) resolver.resolve(request) # Summarise stats. This is meant for display, so the counting is a bit # inconsistent between counting all collections, and source collections # only. schemata = defaultdict(int) countries = defaultdict(int) categories = defaultdict(int) for collection_id in collections: data = resolver.get(request, Collection, collection_id) if data is None or data.get('casefile'): continue categories[data.get('category')] += 1 for schema, count in data.get('schemata', {}).items(): schemata[schema] += count for country in data.get('countries', []): countries[country] += 1 return jsonify({ 'collections': len(collections), 'schemata': dict(schemata), 'countries': dict(countries), 'categories': dict(categories), 'things': sum(schemata.values()), })
def _iter_match_batch(stub, sheet, batch): matchable = [s.name for s in model if s.matchable] entities = set() for match in batch: entities.add(match.get("entity_id")) entities.add(match.get("match_id")) resolver.queue(stub, Collection, match.get("match_collection_id")) resolver.resolve(stub) entities = entities_by_ids(list(entities), schemata=matchable) entities = {e.get("id"): e for e in entities} for obj in batch: entity = entities.get(str(obj.get("entity_id"))) match = entities.get(str(obj.get("match_id"))) collection_id = obj.get("match_collection_id") collection = resolver.get(stub, Collection, collection_id) if entity is None or match is None or collection is None: continue eproxy = model.get_proxy(entity) mproxy = model.get_proxy(match) sheet.append( [ obj.get("score"), eproxy.caption, _format_date(eproxy), _format_country(eproxy), collection.get("label"), mproxy.caption, _format_date(mproxy), _format_country(mproxy), entity_url(eproxy.id), entity_url(mproxy.id), ] )
def resolve(self, clazz, key, serializer=None): if self.reference: return data = resolver.get(request, clazz, key) if data is not None and serializer is not None: serializer = serializer(reference=True) data = serializer.serialize(data) return data
def get_profile(entityset_id, authz=None): """A profile is an entityset having a party. The idea is to cache profile metadata for the API, and to generate a merged view of all the entities the current user has access to.""" if entityset_id is None: return key = cache.object_key(EntitySet, entityset_id) data = cache.get_complex(key) stub = Stub() if data is None: entityset = get_entityset(entityset_id) if entityset is None: return data = entityset.to_dict() data["items"] = [] for item in entityset.items(): data["items"].append(item.to_dict()) cache.set_complex(key, data, expires=cache.EXPIRE) # Filter the subset of items the current user can access if authz is not None: items = [ i for i in data["items"] if authz.can(i["collection_id"], authz.READ) ] data["items"] = items # Load the constituent entities for the profile and generate a # combined proxy with all of the given properties. for item in data["items"]: if Judgement(item["judgement"]) == Judgement.POSITIVE: resolver.queue(stub, Entity, item.get("entity_id")) resolver.resolve(stub) merged = None data["proxies"] = [] for item in data["items"]: item["entity"] = resolver.get(stub, Entity, item.get("entity_id")) if item["entity"] is not None: proxy = model.get_proxy(item["entity"]) proxy.context = {} data["proxies"].append(proxy) if merged is None: merged = proxy.clone() merged.context["entities"] = [proxy.id] else: merged.merge(proxy) merged.context["entities"].append(proxy.id) if merged is None: merged = model.make_entity(Entity.LEGAL_ENTITY) # Polish it a bit: merged.id = data.get("id") merged = name_entity(merged) data["merged"] = merged data["label"] = merged.caption data["shallow"] = False return data
def statistics(): """Get a summary of the data acessible to the current user.""" enable_cache() collections = request.authz.collections(request.authz.READ) for collection_id in collections: resolver.queue(request, Collection, collection_id) for role_id in request.authz.roles: resolver.queue(request, Role, role_id) resolver.resolve(request) # Summarise stats. This is meant for display, so the counting is a bit # inconsistent between counting all collections, and source collections # only. schemata = defaultdict(int) countries = defaultdict(int) categories = defaultdict(int) for collection_id in collections: data = resolver.get(request, Collection, collection_id) if data is None or data.get('casefile'): continue categories[data.get('category')] += 1 for schema, count in data.get('schemata', {}).items(): schemata[schema] += count for country in data.get('countries', []): countries[country] += 1 # Add a users roles to the home page: groups = [] for role_id in request.authz.roles: data = resolver.get(request, Role, role_id) if data is None or data.get('type') != Role.GROUP: continue groups.append(RoleSerializer().serialize(data)) return jsonify({ 'collections': len(collections), 'schemata': dict(schemata), 'countries': dict(countries), 'categories': dict(categories), 'groups': groups, 'things': sum(schemata.values()), })
def resolve(self): for id_ in self.queued: node_id = registry.entity.node_id_safe(id_) node = self.nodes.get(node_id) schema = None if node is None else node.schema resolver.queue(self, Entity, id_, schema=schema) resolver.resolve(self) for id_ in self.queued: entity = resolver.get(self, Entity, id_) if entity is not None: self.add(model.get_proxy(entity))
def render_notification(stub, notification): """Generate a text version of the notification, suitable for use in an email or text message.""" from aleph.logic import resolver notification = unpack_result(notification) event = Events.get(notification.get("event")) if event is None: return for name, clazz, value in _iter_params(notification, event): resolver.queue(stub, clazz, value) resolver.resolve(stub) plain = str(event.template) html = str(event.template) for name, clazz, value in _iter_params(notification, event): data = resolver.get(stub, clazz, value) if data is None: return link, title = None, None if clazz == Role: title = data.get("label") elif clazz == Alert: title = data.get("query") elif clazz == Collection: title = data.get("label") link = collection_url(value) elif clazz == Entity: proxy = model.get_proxy(data) title = proxy.caption link = entity_url(value) elif clazz == EntitySet: title = data.label link = entityset_url(data.id) elif clazz == Export: title = data.get("label") link = archive_url( data.get("content_hash"), file_name=data.get("file_name"), mime_type=data.get("file_name"), ) link = url_for("exports_api.download", export_id=data.get("id")) template = "{{%s}}" % name html = html.replace(template, html_link(title, link)) plain = plain.replace(template, "'%s'" % title) if name == event.link_to: plain = "%s (%s)" % (plain, link) return {"plain": plain, "html": html}
def statistics(): """Get a summary of the data acessible to the current user. --- get: summary: System-wide user statistics. description: > Get a summary of the data acessible to the current user. responses: '200': description: OK content: application/json: schema: type: object tags: - System """ enable_cache() collections = request.authz.collections(request.authz.READ) for collection_id in collections: resolver.queue(request, Collection, collection_id) resolver.resolve(request) # Summarise stats. This is meant for display, so the counting is a bit # inconsistent between counting all collections, and source collections # only. schemata = defaultdict(int) countries = defaultdict(int) categories = defaultdict(int) for collection_id in collections: data = resolver.get(request, Collection, collection_id) if data is None or data.get('casefile'): continue categories[data.get('category')] += 1 things = get_collection_things(collection_id) for schema, count in things.items(): schemata[schema] += count for country in data.get('countries', []): countries[country] += 1 return jsonify({ 'collections': len(collections), 'schemata': dict(schemata), 'countries': dict(countries), 'categories': dict(categories), 'things': sum(schemata.values()), })
def export_entities(request, result): entities = [] for entity in result.results: resolver.queue(result, Collection, entity.get('collection_id')) entities.append(model.get_proxy(entity)) resolver.resolve(result) zip_archive = zipstream.ZipFile() exporter = ExcelExporter(None, extra=EXTRA_HEADERS) for entity in entities: collection_id = entity.context.get('collection_id') collection = resolver.get(result, Collection, collection_id) extra = [entity_url(entity.id), collection.get('label')] exporter.write(entity, extra=extra) write_document(zip_archive, collection, entity) content = exporter.get_bytesio() zip_archive.write_iter('Export.xlsx', content) for chunk in zip_archive: yield chunk
def render_notification(stub, notification): """Generate a text version of the notification, suitable for use in an email or text message.""" from aleph.logic import resolver notification = unpack_result(notification) event = Events.get(notification.get('event')) if event is None: return for name, clazz, value in _iter_params(notification, event): resolver.queue(stub, clazz, value) resolver.resolve(stub) plain = str(event.template) html = str(event.template) for name, clazz, value in _iter_params(notification, event): data = resolver.get(stub, clazz, value) if data is None: return link, title = None, None if clazz == Role: title = data.get('label') elif clazz == Alert: title = data.get('query') elif clazz == Collection: title = data.get('label') link = collection_url(value) elif clazz == Entity: proxy = model.get_proxy(data) title = proxy.caption link = entity_url(value) elif clazz == Diagram: title = data.label link = diagram_url(data.id) template = '{{%s}}' % name html = html.replace(template, html_link(title, link)) plain = plain.replace(template, "'%s'" % title) if name == event.link_to: plain = '%s (%s)' % (plain, link) return {'plain': plain, 'html': html}
def export_entities(request, result): entities = [] for entity in result.results: resolver.queue(result, Collection, entity.get('collection_id')) entities.append(model.get_proxy(entity)) resolver.resolve(result) zip_archive = zipstream.ZipFile() workbook = get_workbook() for entity in entities: collection_id = entity.context.get('collection_id') collection = resolver.get(result, Collection, collection_id) fields = { 'url': entity_url(entity.id), 'collection': collection.get('label'), } write_entity_excel(workbook, entity, extra_fields=fields, extra_headers=EXTRA_HEADERS) write_document(zip_archive, collection, entity) content = io.BytesIO(get_workbook_content(workbook)) zip_archive.write_iter('export.xlsx', content) for chunk in zip_archive: yield chunk
def resolve(self, clazz, key, serializer=None): data = resolver.get(request, clazz, key) if data is not None and serializer is not None: serializer = serializer(nested=True) data = serializer.serialize(data) return data
def update(self, result, key): collection = resolver.get(self.parser, Collection, key) if collection is not None: result['label'] = collection.get('label') result['category'] = collection.get('category')
def update(self, result, key): collection = resolver.get(self.parser, Collection, key) if collection is not None: result["label"] = collection.get("label") result["category"] = collection.get("category")