def get_proxy_context(self): """Metadata to be added to each generated entity.""" return { 'created_at': iso_text(self.created_at), 'updated_at': iso_text(self.updated_at), 'role_id': self.role_id, }
def get_proxy_context(self): """Metadata to be added to each generated entity.""" return { "created_at": iso_text(self.created_at), "updated_at": iso_text(self.updated_at), "role_id": self.role_id, "mutable": True, }
def to_proxy(self): return model.get_proxy({ 'id': self.id, 'schema': self.schema, 'properties': self.data, 'created_at': iso_text(self.created_at), 'updated_at': iso_text(self.updated_at), 'role_id': self.role_id, 'mutable': True })
def to_proxy(self): data = { "id": self.id, "schema": self.schema, "properties": self.data, "created_at": iso_text(self.created_at), "updated_at": iso_text(self.updated_at), "role_id": self.role_id, "mutable": True, } return model.get_proxy(data, cleaned=False)
def to_proxy(self, ns=None): ns = ns or self.collection.ns proxy = model.get_proxy({ "id": ns.sign(self.id), "schema": self.model, "properties": {}, "created_at": iso_text(self.created_at), "updated_at": iso_text(self.updated_at), "role_id": self.role_id, "mutable": False, }) meta = dict(self.meta) headers = meta.pop("headers", None) if is_mapping(headers): headers = {slugify(k, sep="_"): v for k, v in headers.items()} proxy.set("headers", registry.json.pack(headers), quiet=True) else: headers = {} proxy.set("contentHash", self.content_hash) proxy.set("parent", ns.sign(self.parent_id)) proxy.set("ancestors", [ns.sign(a) for a in self.ancestors]) proxy.set("crawler", meta.get("crawler")) proxy.set("sourceUrl", meta.get("source_url")) proxy.set("title", meta.get("title")) proxy.set("fileName", meta.get("file_name")) if not proxy.has("fileName"): disposition = headers.get("content_disposition") if disposition is not None: _, attrs = cgi.parse_header(disposition) proxy.set("fileName", attrs.get("filename")) proxy.set("mimeType", meta.get("mime_type")) if not proxy.has("mimeType"): proxy.set("mimeType", headers.get("content_type")) proxy.set("language", meta.get("languages")) proxy.set("country", meta.get("countries")) proxy.set("keywords", meta.get("keywords")) proxy.set("authoredAt", meta.get("authored_at")) proxy.set("modifiedAt", meta.get("modified_at")) proxy.set("publishedAt", meta.get("published_at")) proxy.set("retrievedAt", meta.get("retrieved_at")) proxy.set("sourceUrl", meta.get("source_url")) return proxy
def format_proxy(proxy, collection): """Apply final denormalisations to the index.""" data = proxy.to_full_dict() data['schemata'] = list(proxy.schema.names) names = ensure_list(data.get('names')) fps = set([fingerprints.generate(name) for name in names]) fps.update(names) data['fingerprints'] = [fp for fp in fps if fp is not None] # Slight hack: a magic property in followthemoney that gets taken out # of the properties and added straight to the index text. properties = data.get('properties') text = properties.pop('indexText', []) text.extend(fps) data['text'] = text # integer casting numeric = {} for prop in proxy.iterprops(): if prop.type in NUMERIC_TYPES: values = proxy.get(prop) numeric[prop.name] = _numeric_values(prop.type, values) # also cast group field for dates numeric['dates'] = _numeric_values(registry.date, data.get('dates')) data['numeric'] = numeric # Context data - from aleph system, not followthemoney. now = iso_text(datetime.utcnow()) data['created_at'] = min(ensure_list(data.get('created_at')), default=now) data['updated_at'] = min(ensure_list(data.get('updated_at')), default=now) # FIXME: Can there ever really be multiple role_ids? data['role_id'] = first(data.get('role_id')) data['mutable'] = max(ensure_list(data.get('mutable')), default=False) data['origin'] = ensure_list(data.get('origin')) data['collection_id'] = collection.id # log.info("%s", pformat(data)) entity_id = data.pop('id') return { '_id': entity_id, '_index': entities_write_index(data.get('schema')), '_source': data }