def process_collection(stage, collection, ingest=True, reset=False, sync=False): """Trigger a full re-parse of all documents and re-build the search index from the aggregator.""" ingest = ingest or reset if reset: reset_collection(collection, sync=True) aggregator = get_aggregator(collection) try: writer = aggregator.bulk() for proxy in _collection_proxies(collection): writer.put(proxy, fragment='db') stage.report_finished(1) writer.flush() if ingest: for proxy in aggregator: ingest_entity(collection, proxy, job_id=stage.job.id) else: queue_task(collection, OP_INDEX, job_id=stage.job.id, context={'sync': sync}) finally: aggregator.close()
def update(foreign_id=None, index=False, process=False, reset=False): """Re-index all the collections and entities.""" update_roles() q = Collection.all(deleted=True) if foreign_id is not None: q = [get_collection(foreign_id)] for collection in q: if reset: reset_collection(collection, sync=True) refresh_collection(collection.id) index_collection(collection) if collection.deleted_at is not None: continue if index or process: payload = {'ingest': process} queue_task(collection, OP_PROCESS, payload=payload)
def process_collection(collection, ingest=True, reset=False): """Trigger a full re-parse of all documents and re-build the search index from the aggregator.""" if reset: reset_collection(collection) aggregator = get_aggregator(collection) try: writer = aggregator.bulk() for proxy in _collection_proxies(collection): writer.put(proxy, fragment='db') if ingest: ingest_entity(collection, proxy) writer.flush() if ingest: ingest_wait(collection) else: index_entities(collection, aggregator) finally: aggregator.close()
def handle(self, task): stage = task.stage payload = task.payload collection = Collection.by_foreign_id(task.job.dataset.name) if collection is None: log.error("Collection not found: %s", task.job.dataset) return sync = task.context.get('sync', False) if stage.stage == OP_INDEX: index_aggregate(stage, collection, sync=sync, **payload) if stage.stage == OP_BULKLOAD: bulk_load(stage, collection, payload) if stage.stage == OP_PROCESS: if payload.pop('reset', False): reset_collection(collection, sync=True) process_collection(stage, collection, sync=sync, **payload) if stage.stage == OP_XREF: xref_collection(stage, collection, **payload) if stage.stage == OP_XREF_ITEM: xref_item(stage, collection, **payload) log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
def reset(foreign_id, sync=False): """Clear the search index and entity cache or a collection.""" collection = get_collection(foreign_id) reset_collection(collection, sync=False)