def handle_task(queue, payload, context): log.info("Task [%s]: %s (begin)", queue.dataset, queue.operation) try: collection = Collection.by_foreign_id(queue.dataset) if collection is None: log.error("Collection not found: %s", queue.dataset) return if queue.operation == OP_INDEX: index_aggregate(queue, collection) if queue.operation == OP_BULKLOAD: bulk_load(queue, collection, payload) if queue.operation == OP_PROCESS: process_collection(collection, **payload) if queue.operation == OP_XREF: xref_collection(queue, collection, **payload) log.info("Task [%s]: %s (done)", queue.dataset, queue.operation) except (SystemExit, KeyboardInterrupt, Exception): retries = int(context.get('retries', 0)) if retries < settings.QUEUE_RETRY: log.info("Queueing failed task for re-try...") context['retries'] = retries + 1 queue.queue_task(payload, context) raise finally: queue.task_done()
def load_fixtures(self): self.admin = self.create_user(foreign_id='admin', is_admin=True) self.private_coll = self.create_collection(foreign_id='test_private', label="Private Collection", category='grey', casefile=False, creator=self.admin) self._banana = Entity.create( { 'schema': 'Person', 'properties': { 'name': ['Banana'], } }, self.private_coll) user = Role.by_foreign_id(Role.SYSTEM_USER) Permission.grant(self.private_coll, user, True, False) self.public_coll = self.create_collection(foreign_id='test_public', label="Public Collection", category='news', casefile=False, creator=self.admin) self._kwazulu = Entity.create( { 'schema': 'Company', 'properties': { 'name': ['KwaZulu'], 'alias': ['kwazulu'] } }, self.public_coll) visitor = Role.by_foreign_id(Role.SYSTEM_GUEST) Permission.grant(self.public_coll, visitor, True, False) db.session.commit() drop_aggregator(self.public_coll) stage = get_stage(self.public_coll, OP_PROCESS) process_collection(stage, self.public_coll, ingest=False, sync=True) aggregator = get_aggregator(self.private_coll) aggregator.delete() stage = get_stage(self.private_coll, OP_PROCESS) for sample in read_entities(self.get_fixture_path('samples.ijson')): aggregator.put(sample, fragment='sample') index_aggregate(stage, self.private_coll, entity_id=sample.id, sync=True) aggregator.close() process_collection(stage, self.private_coll, ingest=False, sync=True)
def handle(self, task): stage = task.stage payload = task.payload collection = Collection.by_foreign_id(task.job.dataset.name) if collection is None: log.error("Collection not found: %s", task.job.dataset) return sync = task.context.get('sync', False) if stage.stage == OP_INDEX: index_aggregate(stage, collection, sync=sync, **payload) if stage.stage == OP_BULKLOAD: bulk_load(stage, collection, payload) if stage.stage == OP_PROCESS: process_collection(stage, collection, sync=sync, **payload) if stage.stage == OP_XREF: xref_collection(stage, collection, **payload) if stage.stage == OP_XREF_ITEM: xref_item(stage, collection, **payload) log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
def handle(self, task): stage = task.stage payload = task.payload collection = Collection.by_foreign_id(task.job.dataset.name) if collection is None: log.error("Collection not found: %s", task.job.dataset) return sync = task.context.get('sync', False) if stage.stage == OP_INDEX: index_aggregate(stage, collection, sync=sync, **payload) if stage.stage == OP_LOAD_MAPPING: load_mapping(stage, collection, **payload) if stage.stage == OP_FLUSH_MAPPING: flush_mapping(stage, collection, sync=sync, **payload) if stage.stage == OP_PROCESS: if payload.pop('reset', False): reset_collection(collection, sync=True) process_collection(stage, collection, sync=sync, **payload) if stage.stage == OP_XREF: xref_collection(stage, collection) if stage.stage == OP_XREF_ITEM: xref_item(stage, collection, **payload) log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
def reindex(foreign_id, sync=False): """Clear the search index and entity cache or a collection.""" collection = get_collection(foreign_id) stage = get_stage(collection, OP_PROCESS) index_aggregate(stage, collection, sync=sync)
def reindex(foreign_id): """Index all the aggregator contents for a collection.""" collection = get_collection(foreign_id) stage = get_stage(collection, OP_PROCESS) index_aggregate(stage, collection) update_collection(collection)