def dispatch_task(self, collection, task): stage = task.stage payload = task.payload sync = task.context.get("sync", False) if stage.stage == OP_INDEX: index_many(stage, collection, sync=sync, **payload) if stage.stage == OP_LOAD_MAPPING: load_mapping(stage, collection, **payload) if stage.stage == OP_FLUSH_MAPPING: flush_mapping(stage, collection, sync=sync, **payload) if stage.stage == OP_REINGEST: reingest_collection(collection, job_id=stage.job.id, **payload) if stage.stage == OP_REINDEX: reindex_collection(collection, sync=sync, **payload) if stage.stage == OP_XREF: xref_collection(stage, collection) if stage.stage == OP_XREF_ITEM: xref_item(stage, collection, **payload) log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
def handle(self, task): stage = task.stage payload = task.payload collection = Collection.by_foreign_id(task.job.dataset.name) if collection is None: log.error("Collection not found: %s", task.job.dataset) return sync = task.context.get('sync', False) if stage.stage == OP_INDEX: index_aggregate(stage, collection, sync=sync, **payload) if stage.stage == OP_BULKLOAD: bulk_load(stage, collection, payload) if stage.stage == OP_PROCESS: process_collection(stage, collection, sync=sync, **payload) if stage.stage == OP_XREF: xref_collection(stage, collection, **payload) if stage.stage == OP_XREF_ITEM: xref_item(stage, collection, **payload) log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
def handle(self, task): stage = task.stage payload = task.payload collection = Collection.by_foreign_id(task.job.dataset.name) if collection is None: log.error("Collection not found: %s", task.job.dataset) return sync = task.context.get('sync', False) if stage.stage == OP_INDEX: index_aggregate(stage, collection, sync=sync, **payload) if stage.stage == OP_LOAD_MAPPING: load_mapping(stage, collection, **payload) if stage.stage == OP_FLUSH_MAPPING: flush_mapping(stage, collection, sync=sync, **payload) if stage.stage == OP_PROCESS: if payload.pop('reset', False): reset_collection(collection, sync=True) process_collection(stage, collection, sync=sync, **payload) if stage.stage == OP_XREF: xref_collection(stage, collection) if stage.stage == OP_XREF_ITEM: xref_item(stage, collection, **payload) log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
def handle(self, task): stage = task.stage payload = task.payload collection = Collection.by_foreign_id(task.job.dataset.name) if collection is None: log.error("Collection not found: %s", task.job.dataset) return sync = task.context.get('sync', False) if stage.stage == OP_INDEX: index_many(stage, collection, sync=sync, **payload) if stage.stage == OP_LOAD_MAPPING: load_mapping(stage, collection, **payload) if stage.stage == OP_FLUSH_MAPPING: flush_mapping(stage, collection, sync=sync, **payload) if stage.stage == OP_REINGEST: reingest_collection(collection, job_id=stage.job.id, **payload) if stage.stage == OP_REINDEX: reindex_collection(collection, sync=sync, **payload) if stage.stage == OP_XREF: xref_collection(stage, collection) if stage.stage == OP_XREF_ITEM: xref_item(stage, collection, **payload) log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
def expand_entity(entity): """Transform an entity into a set of statements. This can accept either an entity object or an entity ID.""" if not is_mapping(entity): entity = get_entity(entity) if entity is None: return proxy = model.get_proxy(entity) yield from proxy.links # TODO: factor out inference thing = model.get(Entity.THING) if proxy.schema.is_a(thing): sameAs = thing.get("sameAs") for (score, _, other) in xref_item(proxy): yield Link(proxy.node, sameAs, other.id, weight=score, inferred=True)