def _iter_match_batch(batch, authz): matchable = [s.name for s in model if s.matchable] entities = set() for match in batch: entities.add(match.entity_id) entities.add(match.match_id) entities = entities_by_ids(list(entities), schemata=matchable) entities = {e.get('id'): e for e in entities} for obj in batch: if not authz.can(obj.match_collection_id, authz.READ): continue entity = entities.get(str(obj.entity_id)) match = entities.get(str(obj.match_id)) collection = get_collection(obj.match_collection_id) if entity is None or match is None or collection is None: continue eproxy = model.get_proxy(entity) mproxy = model.get_proxy(match) yield ( int(obj.score * 100), eproxy.caption, _format_date(eproxy), _format_country(eproxy), collection.get('label'), mproxy.caption, _format_date(mproxy), _format_country(mproxy), entity_url(eproxy.id), entity_url(mproxy.id), )
def _iter_match_batch(stub, sheet, batch): matchable = [s.name for s in model if s.matchable] entities = set() for match in batch: entities.add(match.get("entity_id")) entities.add(match.get("match_id")) resolver.queue(stub, Collection, match.get("match_collection_id")) resolver.resolve(stub) entities = entities_by_ids(list(entities), schemata=matchable) entities = {e.get("id"): e for e in entities} for obj in batch: entity = entities.get(str(obj.get("entity_id"))) match = entities.get(str(obj.get("match_id"))) collection_id = obj.get("match_collection_id") collection = resolver.get(stub, Collection, collection_id) if entity is None or match is None or collection is None: continue eproxy = model.get_proxy(entity) mproxy = model.get_proxy(match) sheet.append( [ obj.get("score"), eproxy.caption, _format_date(eproxy), _format_country(eproxy), collection.get("label"), mproxy.caption, _format_date(mproxy), _format_country(mproxy), entity_url(eproxy.id), entity_url(mproxy.id), ] )
def _iter_match_batch(batch, authz): entities = set() collections = set() for match in batch: entities.add(match.entity_id) entities.add(match.match_id) collections.add(match.match_collection_id) collections = Collection.all_by_ids(collections, authz=authz) collections = {c.id: c.label for c in collections} entities = iter_entities_by_ids(list(entities), authz=authz) entities = {e.get('id'): e for e in entities} for obj in batch: entity = entities.get(str(obj.entity_id)) match = entities.get(str(obj.match_id)) collection = collections.get(obj.match_collection_id) if entity is None or match is None or collection is None: continue eproxy = model.get_proxy(entity) mproxy = model.get_proxy(match) yield ( int(obj.score * 100), eproxy.caption, _format_date(eproxy), _format_country(eproxy), collection, mproxy.caption, _format_date(mproxy), _format_country(mproxy), entity_url(eproxy.id), entity_url(mproxy.id), )
def reconcile_index(collection=None): domain = settings.APP_UI_URL.strip("/") label = settings.APP_TITLE suggest_query = [] schemata = list(model) if collection is not None: label = "%s (%s)" % (collection.get("label"), label) suggest_query.append(("filter:collection_id", collection.get("id"))) things = get_collection_things(collection.get("id")) schemata = [model.get(s) for s in things.keys()] return jsonify({ "name": label, "identifierSpace": "http://rdf.freebase.com/ns/type.object.id", "schemaSpace": "http://rdf.freebase.com/ns/type.object.id", "view": { "url": entity_url("{{id}}") }, "preview": { "url": entity_url("{{id}}"), "width": 800, "height": 400 }, "suggest": { "entity": { "service_url": domain, "service_path": url_for( "reconcile_api.suggest_entity", _query=suggest_query, _authz=request.authz, _relative=True, ), }, "type": { "service_url": domain, "service_path": url_for("reconcile_api.suggest_type", _relative=True), }, "property": { "service_url": domain, "service_path": url_for("reconcile_api.suggest_property", _relative=True), }, }, "defaultTypes": [get_freebase_type(s) for s in schemata if s.matchable], })
def reconcile_index(collection=None): domain = settings.APP_UI_URL.strip('/') label = settings.APP_TITLE suggest_query = [] schemata = list(model) if collection is not None: label = '%s (%s)' % (collection.get('label'), label) suggest_query.append(('filter:collection_id', collection.get('id'))) things = get_collection_things(collection.get('id')) schemata = [model.get(s) for s in things.keys()] return jsonify({ 'name': label, 'identifierSpace': 'http://rdf.freebase.com/ns/type.object.id', 'schemaSpace': 'http://rdf.freebase.com/ns/type.object.id', 'view': { 'url': entity_url('{{id}}') }, 'preview': { 'url': entity_url('{{id}}'), 'width': 800, 'height': 400 }, 'suggest': { 'entity': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_entity', _query=suggest_query, _authorize=True, _relative=True) }, 'type': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_type', _relative=True) }, 'property': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_property', _relative=True) } }, 'defaultTypes': [get_freebase_type(s) for s in schemata if s.matchable] })
def render_notification(stub, notification): """Generate a text version of the notification, suitable for use in an email or text message.""" from aleph.logic import resolver for name, clazz, value in notification.iterparams(): resolver.queue(stub, clazz, value) resolver.resolve(stub) plain = str(notification.event.template) html = str(notification.event.template) for name, clazz, value in notification.iterparams(): data = resolver.get(stub, clazz, value) if data is None: return link, title = None, None if clazz == Role: title = data.get('label') elif clazz == Alert: title = data.get('query') elif clazz == Collection: title = data.get('label') link = collection_url(value) elif clazz == Entity: title = data.get('name') link = entity_url(value) template = '{{%s}}' % name html = html.replace(template, html_link(title, link)) plain = plain.replace(template, "'%s'" % title) if name == notification.event.link_to: plain = '%s (%s)' % (plain, link) return {'plain': plain, 'html': html}
def generate_sitemap(collection_id): """Generate entries for a collection-based sitemap.xml file.""" # cf. https://www.sitemaps.org/protocol.html query = { 'query': { 'bool': { 'filter': [ {'term': {'collection_id': collection_id}}, {'term': {'schemata': Entity.THING}}, authz_query(Authz.from_role(None)) ] } }, '_source': {'includes': ['schemata', 'updated_at']} } scanner = scan(es, index=entities_index(), query=query) # strictly, the limit for sitemap.xml is 50,000 for res in islice(scanner, 49500): source = res.get('_source', {}) updated_at = source.get('updated_at', '').split('T', 1)[0] if Document.SCHEMA in source.get('schemata', []): url = document_url(res.get('_id')) else: url = entity_url(res.get('_id')) yield (url, updated_at)
def entity_links(self, data, pk, schemata): return { 'self': url_for('entities_api.view', id=pk), 'references': url_for('entities_api.references', id=pk), 'tags': url_for('entities_api.tags', id=pk), 'ui': entity_url(pk) }
def export_entities(export_id): export = Export.by_id(export_id) log.info("Export entities [%r]...", export) export_dir = ensure_path(mkdtemp(prefix="aleph.export.")) collections = {} try: filters = [export.meta.get("query", {"match_none": {}})] file_path = export_dir.joinpath("query-export.zip") with ZipFile(file_path, mode="w") as zf: excel_path = export_dir.joinpath(EXCEL_FILE) exporter = ExcelExporter(excel_path, extra=EXTRA_HEADERS) for entity in iter_proxies(filters=filters): collection_id = entity.context.get("collection_id") if collection_id not in collections: collections[collection_id] = get_collection(collection_id) collection = collections[collection_id] if collection is None: continue extra = [entity_url(entity.id), collection.get("label")] exporter.write(entity, extra=extra) write_document(export_dir, zf, collection, entity) if file_path.stat().st_size >= Export.MAX_FILE_SIZE: log.warn("Export too large: %r", export) break exporter.finalize() zf.write(excel_path, arcname=EXCEL_FILE) complete_export(export_id, file_path) except Exception: log.exception("Failed to process export [%s]", export_id) export = Export.by_id(export_id) export.set_status(status=Status.FAILED) db.session.commit() finally: shutil.rmtree(export_dir)
def export_entities(export_id, result): from aleph.logic import resolver export_dir = ensure_path(mkdtemp(prefix="aleph.export.")) try: entities = [] stub = types.SimpleNamespace(result=result) for entity in result["results"]: resolver.queue(stub, Collection, entity.get("collection_id")) entities.append(model.get_proxy(entity)) resolver.resolve(stub) file_path = export_dir.joinpath("query-export.zip") zf = zipfile.ZipFile(file_path, "w") exporter = ExcelExporter(None, extra=EXTRA_HEADERS) for entity in entities: collection_id = entity.context.get("collection_id") collection = resolver.get(stub, Collection, collection_id) extra = [entity_url(entity.id), collection.get("label")] exporter.write(entity, extra=extra) write_document(export_dir, zf, collection, entity) content = exporter.get_bytesio().getvalue() zf.writestr("Export.xlsx", content) zf.close() complete_export(export_id, file_path) except Exception: log.exception("Failed to process export [%s]", export_id) export = Export.by_id(export_id) export.set_status(status=Export.STATUS_FAILED) db.session.commit() finally: shutil.rmtree(export_dir)
def _serialize(self, obj): pk = obj.get("id") collection_id = obj.pop("collection_id", None) obj["collection"] = self.resolve( Collection, collection_id, CollectionSerializer ) proxy = model.get_proxy(obj) properties = obj.get("properties", {}) for prop in proxy.iterprops(): if prop.type != registry.entity: continue values = ensure_list(properties.get(prop.name)) properties[prop.name] = [] for value in values: entity = self.resolve(Entity, value, EntitySerializer) properties[prop.name].append(entity or value) links = { "self": url_for("entities_api.view", entity_id=pk), "references": url_for("entities_api.references", entity_id=pk), "tags": url_for("entities_api.tags", entity_id=pk), "ui": entity_url(pk), } if proxy.schema.is_a(Document.SCHEMA): content_hash = first(properties.get("contentHash")) if content_hash: name = entity_filename(proxy) mime = first(properties.get("mimeType")) links["file"] = archive_url( content_hash, file_name=name, mime_type=mime, expire=request.authz.expire, ) pdf_hash = first(properties.get("pdfHash")) if pdf_hash: name = entity_filename(proxy, extension="pdf") links["pdf"] = archive_url( pdf_hash, file_name=name, mime_type=PDF, expire=request.authz.expire, ) csv_hash = first(properties.get("csvHash")) if csv_hash: name = entity_filename(proxy, extension="csv") links["csv"] = archive_url( csv_hash, file_name=name, mime_type=CSV, expire=request.authz.expire, ) obj["links"] = links obj["latinized"] = transliterate_values(proxy) obj["writeable"] = check_write_entity(obj, request.authz) obj["shallow"] = obj.get("shallow", True) return obj
def _serialize(self, obj): pk = obj.get('id') obj['id'] = str(pk) authz = request.authz collection_id = obj.pop('collection_id', None) obj['collection'] = self.resolve(Collection, collection_id, CollectionSerializer) schema = model.get(obj.get('schema')) if schema is None: return None obj['schemata'] = schema.names properties = obj.get('properties', {}) for prop in schema.properties.values(): if prop.type != registry.entity: continue values = ensure_list(properties.get(prop.name)) properties[prop.name] = [] for value in values: entity = self.resolve(Entity, value, EntitySerializer) properties[prop.name].append(entity) links = { 'self': url_for('entities_api.view', entity_id=pk), 'references': url_for('entities_api.references', entity_id=pk), 'tags': url_for('entities_api.tags', entity_id=pk), 'ui': entity_url(pk) } if schema.is_a(Document.SCHEMA): links['content'] = url_for('entities_api.content', entity_id=pk) file_name = first(properties.get('fileName')) content_hash = first(properties.get('contentHash')) if content_hash: mime_type = first(properties.get('mimeType')) name = safe_filename(file_name, default=pk) links['file'] = archive_url(request.authz.id, content_hash, file_name=name, mime_type=mime_type) pdf_hash = first(properties.get('pdfHash')) if pdf_hash: name = safe_filename(file_name, default=pk, extension='.pdf') links['pdf'] = archive_url(request.authz.id, pdf_hash, file_name=name, mime_type=PDF) csv_hash = first(properties.get('csvHash')) if csv_hash: name = safe_filename(file_name, default=pk, extension='.csv') links['csv'] = archive_url(request.authz.id, csv_hash, file_name=name, mime_type=CSV) obj['links'] = links obj['writeable'] = authz.can(collection_id, authz.WRITE) obj.pop('_index', None) return self._clean_response(obj)
def export_entity_excel(workbook, collection, entity): fields = { 'url': entity_url(entity.id), 'collection': collection.get('label'), 'collection_url': collection_url(collection.get('id')) } write_entity_excel(workbook, entity, extra_fields=fields, extra_headers=EXTRA_HEADERS)
def export_entity_csv(handlers, collection, entity): fh = handlers.get(entity.schema.plural) if fh is None: handlers[entity.schema.plural] = fh = io.StringIO() write_headers(fh, entity.schema, extra_headers=EXTRA_HEADERS) write_entity_csv(fh, entity, extra_fields={ 'url': entity_url(entity.id), 'collection': collection.get('label'), 'collection_url': collection_url(collection.get('id')) })
def _serialize(self, obj): pk = obj.get('id') collection_id = obj.pop('collection_id', None) obj['collection'] = self.resolve(Collection, collection_id, CollectionSerializer) proxy = model.get_proxy(obj) obj['schemata'] = proxy.schema.names properties = obj.get('properties', {}) for prop in proxy.iterprops(): if prop.type != registry.entity: continue values = ensure_list(properties.get(prop.name)) properties[prop.name] = [] for value in values: entity = self.resolve(Entity, value, EntitySerializer) properties[prop.name].append(entity or value) links = { 'self': url_for('entities_api.view', entity_id=pk), 'references': url_for('entities_api.references', entity_id=pk), 'tags': url_for('entities_api.tags', entity_id=pk), 'ui': entity_url(pk) } if proxy.schema.is_a(Document.SCHEMA): links['content'] = url_for('entities_api.content', entity_id=pk) content_hash = first(properties.get('contentHash')) if content_hash: name = entity_filename(proxy) mime_type = first(properties.get('mimeType')) links['file'] = archive_url(request.authz.id, content_hash, file_name=name, mime_type=mime_type) pdf_hash = first(properties.get('pdfHash')) if pdf_hash: name = entity_filename(proxy, extension='pdf') links['pdf'] = archive_url(request.authz.id, pdf_hash, file_name=name, mime_type=PDF) csv_hash = first(properties.get('csvHash')) if csv_hash: name = entity_filename(proxy, extension='csv') links['csv'] = archive_url(request.authz.id, csv_hash, file_name=name, mime_type=CSV) obj['links'] = links write = request.authz.WRITE obj['writeable'] = request.authz.can(collection_id, write) return obj
def reconcile_index(collection=None): domain = settings.APP_UI_URL.strip('/') label = settings.APP_TITLE suggest_query = [] schemata = list(model) if collection is not None: label = '%s (%s)' % (collection.get('label'), label) suggest_query.append(('filter:collection_id', collection.get('id'))) schemata = [model.get(s) for s in collection.get('schemata').keys()] return jsonify({ 'name': label, 'identifierSpace': 'http://rdf.freebase.com/ns/type.object.id', 'schemaSpace': 'http://rdf.freebase.com/ns/type.object.id', 'view': {'url': entity_url('{{id}}')}, 'preview': { 'url': entity_url('{{id}}'), 'width': 800, 'height': 400 }, 'suggest': { 'entity': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_entity', _query=suggest_query, _authorize=True, _relative=True) }, 'type': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_type', _relative=True) }, 'property': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_property', _relative=True) } }, 'defaultTypes': [get_freebase_type(s) for s in schemata if s.matchable] })
def reconcile_index(): domain = settings.APP_UI_URL.strip('/') api_key = None if request.authz.logged_in: role = Role.by_id(request.authz.id) api_key = role.api_key meta = { 'name': settings.APP_TITLE, 'identifierSpace': 'http://rdf.freebase.com/ns/type.object.id', 'schemaSpace': 'http://rdf.freebase.com/ns/type.object.id', 'view': { 'url': entity_url('{{id}}') }, 'preview': { 'url': entity_url('{{id}}') + '?api_key=%s' % api_key, 'width': 800, 'height': 400 }, 'suggest': { 'entity': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_entity', api_key=api_key) }, 'type': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_type') }, 'property': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_property') } }, 'defaultTypes': [{ 'id': 'Entity', 'name': 'Persons and Companies' }] } return jsonify(meta)
def reconcile_index(): domain = settings.APP_UI_URL.strip('/') meta = { 'name': settings.APP_TITLE, 'identifierSpace': 'http://rdf.freebase.com/ns/type.object.id', 'schemaSpace': 'http://rdf.freebase.com/ns/type.object.id', 'view': { 'url': entity_url('{{id}}') }, 'preview': { 'url': entity_url('{{id}}'), 'width': 800, 'height': 400 }, 'suggest': { 'entity': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_entity', _authorize=True) }, 'type': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_type') }, 'property': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_property') } }, 'defaultTypes': [{ 'id': Entity.THING, 'name': model.get(Entity.THING).label }] } return jsonify(meta)
def entity_matches(result): for doc in result.get('hits').get('hits'): entity = unpack_result(doc) proxy = model.get_proxy(entity) yield { 'id': proxy.id, 'name': proxy.caption, 'n:type': get_freebase_type(proxy.schema), 'type': [get_freebase_type(proxy.schema)], 'r:score': doc.get('_score'), 'uri': entity_url(proxy.id, _relative=True), 'match': False }
def entity_matches(result): for doc in result.get("hits").get("hits"): entity = unpack_result(doc) proxy = model.get_proxy(entity) yield { "id": proxy.id, "name": proxy.caption, "n:type": get_freebase_type(proxy.schema), "type": [get_freebase_type(proxy.schema)], "r:score": doc.get("_score"), "uri": entity_url(proxy.id, _relative=True), "match": False, }
def render_notification(stub, notification): """Generate a text version of the notification, suitable for use in an email or text message.""" from aleph.logic import resolver notification = unpack_result(notification) event = Events.get(notification.get("event")) if event is None: return for name, clazz, value in _iter_params(notification, event): resolver.queue(stub, clazz, value) resolver.resolve(stub) plain = str(event.template) html = str(event.template) for name, clazz, value in _iter_params(notification, event): data = resolver.get(stub, clazz, value) if data is None: return link, title = None, None if clazz == Role: title = data.get("label") elif clazz == Alert: title = data.get("query") elif clazz == Collection: title = data.get("label") link = collection_url(value) elif clazz == Entity: proxy = model.get_proxy(data) title = proxy.caption link = entity_url(value) elif clazz == EntitySet: title = data.label link = entityset_url(data.id) elif clazz == Export: title = data.get("label") link = archive_url( data.get("content_hash"), file_name=data.get("file_name"), mime_type=data.get("file_name"), ) link = url_for("exports_api.download", export_id=data.get("id")) template = "{{%s}}" % name html = html.replace(template, html_link(title, link)) plain = plain.replace(template, "'%s'" % title) if name == event.link_to: plain = "%s (%s)" % (plain, link) return {"plain": plain, "html": html}
def generate_sitemap(collection_id): """Generate entries for a collection-based sitemap.xml file.""" # cf. https://www.sitemaps.org/protocol.html entities = iter_entities(authz=Authz.from_role(None), collection_id=collection_id, schemata=[Entity.THING], includes=['schemata', 'updated_at']) # strictly, the limit for sitemap.xml is 50,000 for entity in islice(entities, 49500): updated_at = entity.get('updated_at', '').split('T', 1)[0] if Document.SCHEMA in entity.get('schemata', []): url = document_url(entity.get('id')) else: url = entity_url(entity.get('id')) yield (url, updated_at)
def _serialize(self, obj): pk = obj.get('id') authz = request.authz collection_id = obj.pop('collection_id', None) obj['collection'] = self.resolve(Collection, collection_id, CollectionSerializer) schema = model.get(obj.get('schema')) if schema is None: return None obj['schemata'] = schema.names properties = obj.get('properties', {}) for prop in schema.properties.values(): if prop.type != registry.entity: continue values = ensure_list(properties.get(prop.name)) properties[prop.name] = [] for value in values: entity = self.resolve(Entity, value, EntitySerializer) properties[prop.name].append(entity) links = { 'self': url_for('entities_api.view', entity_id=pk), 'references': url_for('entities_api.references', entity_id=pk), 'tags': url_for('entities_api.tags', entity_id=pk), 'ui': entity_url(pk) } if schema.is_a(Document.SCHEMA): links['content'] = url_for('entities_api.content', entity_id=pk) for content_hash in ensure_list(properties.get('contentHash')): links['file'] = url_for('documents_api.file', document_id=pk, _authorize=True) for pdf_hash in ensure_list(properties.get('pdfHash')): links['pdf'] = url_for('documents_api.pdf', document_id=pk, _authorize=True) obj['links'] = links obj['writeable'] = authz.can(collection_id, authz.WRITE) if obj.get('bulk'): obj['writeable'] = False obj.pop('_index', None) return self._clean_response(obj)
def export_entities(request, result): entities = [] for entity in result.results: resolver.queue(result, Collection, entity.get('collection_id')) entities.append(model.get_proxy(entity)) resolver.resolve(result) zip_archive = zipstream.ZipFile() exporter = ExcelExporter(None, extra=EXTRA_HEADERS) for entity in entities: collection_id = entity.context.get('collection_id') collection = resolver.get(result, Collection, collection_id) extra = [entity_url(entity.id), collection.get('label')] exporter.write(entity, extra=extra) write_document(zip_archive, collection, entity) content = exporter.get_bytesio() zip_archive.write_iter('Export.xlsx', content) for chunk in zip_archive: yield chunk
def export_entities(export_id): export = Export.by_id(export_id) log.info("Export entities [%r]...", export) export_dir = ensure_path(mkdtemp(prefix="aleph.export.")) collections = {} try: filters = [export.meta.get("query", {"match_none": {}})] file_path = export_dir.joinpath("export.zip") with ZipFile(file_path, mode="w") as zf: excel_name = safe_filename(export.label, extension="xlsx") excel_path = export_dir.joinpath(excel_name) exporter = ExcelExporter(excel_path, extra=EXTRA_HEADERS) for idx, entity in enumerate(iter_proxies(filters=filters)): collection_id = entity.context.get("collection_id") if collection_id not in collections: collections[collection_id] = get_collection(collection_id) collection = collections[collection_id] if collection is None: continue extra = [entity_url(entity.id), collection.get("label")] exporter.write(entity, extra=extra) write_document(export_dir, zf, collection, entity) if file_path.stat().st_size >= settings.EXPORT_MAX_SIZE: concern = "total size of the" zf.writestr("EXPORT_TOO_LARGE.txt", WARNING % concern) break if idx >= settings.EXPORT_MAX_RESULTS: concern = "number of" zf.writestr("EXPORT_TOO_LARGE.txt", WARNING % concern) break exporter.finalize() zf.write(excel_path, arcname=excel_name) file_name = "Export: %s" % export.label file_name = safe_filename(file_name, extension="zip") complete_export(export_id, file_path, file_name) except Exception: log.exception("Failed to process export [%s]", export_id) export = Export.by_id(export_id) export.set_status(status=Status.FAILED) db.session.commit() finally: shutil.rmtree(export_dir)
def generate_sitemap(collection_id): """Generate entries for a collection-based sitemap.xml file.""" # cf. https://www.sitemaps.org/protocol.html document = model.get(Document.SCHEMA) entities = iter_entities(authz=Authz.from_role(None), collection_id=collection_id, schemata=[Entity.THING], includes=['schema', 'updated_at']) # strictly, the limit for sitemap.xml is 50,000 for entity in islice(entities, 49500): updated_at = entity.get('updated_at', '').split('T', 1)[0] updated_at = max(settings.SITEMAP_FLOOR, updated_at) schema = model.get(entity.get('schema')) if schema is None: continue if schema.is_a(document): url = document_url(entity.get('id')) else: url = entity_url(entity.get('id')) yield (url, updated_at)
def reconcile_op(query): """Reconcile operation for a single query.""" parser = SearchQueryParser({ 'limit': query.get('limit', '5'), 'strict': 'false' }, request.authz) name = query.get('query', '') schema = query.get('type') or Entity.THING entity = { 'id': 'fake', 'names': [name], 'fingerprints': [fingerprints.generate(name)], 'schemata': ensure_list(schema), 'schema': schema } for p in query.get('properties', []): entity[p.get('pid')] = ensure_list(p.get('v')) query = SimilarEntitiesQuery(parser, entity=entity) matches = [] for doc in query.search().get('hits').get('hits'): source = doc.get('_source') match = { 'id': doc.get('_id'), 'name': source.get('name'), 'score': min(100, doc.get('_score') * 10), 'uri': entity_url(doc.get('_id')), 'match': source.get('name') == name } for type_ in get_freebase_types(): if source['schema'] == type_['id']: match['type'] = [type_] matches.append(match) log.info("Reconciled: %r -> %d matches", name, len(matches)) return { 'result': matches, 'num': len(matches) }
def render_notification(stub, notification): """Generate a text version of the notification, suitable for use in an email or text message.""" from aleph.logic import resolver notification = unpack_result(notification) event = Events.get(notification.get('event')) if event is None: return for name, clazz, value in _iter_params(notification, event): resolver.queue(stub, clazz, value) resolver.resolve(stub) plain = str(event.template) html = str(event.template) for name, clazz, value in _iter_params(notification, event): data = resolver.get(stub, clazz, value) if data is None: return link, title = None, None if clazz == Role: title = data.get('label') elif clazz == Alert: title = data.get('query') elif clazz == Collection: title = data.get('label') link = collection_url(value) elif clazz == Entity: proxy = model.get_proxy(data) title = proxy.caption link = entity_url(value) elif clazz == Diagram: title = data.label link = diagram_url(data.id) template = '{{%s}}' % name html = html.replace(template, html_link(title, link)) plain = plain.replace(template, "'%s'" % title) if name == event.link_to: plain = '%s (%s)' % (plain, link) return {'plain': plain, 'html': html}
def resolve_id(object_id, clazz): """From an object ID and class type, generate a human-readable label and a link that can be rendered into the notification. """ if clazz == Role: role = Role.by_id(object_id) return role.name, None elif clazz == Alert: alert = Alert.by_id(object_id) return alert.query, None elif clazz == Collection: collection = Collection.by_id(object_id) if collection is not None: return collection.label, collection_url(object_id) elif clazz in [Document, Entity]: entity = get_entity(object_id) if entity is not None: if Document.SCHEMA in entity.get('schemata'): title = entity.get('title', entity.get('file_name')) return title, document_url(object_id) else: return entity.get('name'), entity_url(object_id) return None, None
def sitemap(id): """Generate entries for a collection-based sitemap.xml file.""" # cf. https://www.sitemaps.org/protocol.html collection = get_db_collection(id, request.authz.READ) document = model.get(Document.SCHEMA) entries = [] for entity in get_sitemap_entities(id): updated_at = entity.get('updated_at', '').split('T', 1)[0] updated_at = max(settings.SITEMAP_FLOOR, updated_at) schema = model.get(entity.get('schema')) if schema is None: continue if schema.is_a(document): url = document_url(entity.get('id')) else: url = entity_url(entity.get('id')) entries.append((url, updated_at)) url = collection_url(collection_id=collection.id) updated_at = collection.updated_at.date().isoformat() return render_xml('sitemap.xml', url=url, updated_at=updated_at, entries=entries)
def reconcile_op(query): """Reconcile operation for a single query.""" parser = SearchQueryParser( { 'limit': query.get('limit', '5'), 'strict': 'false' }, request.authz) name = query.get('query', '') schema = query.get('type') or Entity.THING proxy = model.make_entity(schema) proxy.add('name', query.get('query', '')) for p in query.get('properties', []): proxy.add(p.get('pid'), p.get('v'), quiet=True) query = MatchQuery(parser, entity=proxy) matches = [] for doc in query.search().get('hits').get('hits'): entity = unpack_result(doc) if entity is None: continue entity = model.get_proxy(entity) score = math.ceil(compare(model, proxy, entity) * 100) match = { 'id': entity.id, 'name': entity.caption, 'score': score, 'uri': entity_url(entity.id), 'match': False } for type_ in get_freebase_types(): if entity.schema.name == type_['id']: match['type'] = [type_] matches.append(match) log.info("Reconciled: %r -> %d matches", name, len(matches)) return {'result': matches, 'num': len(matches)}