def handle(self, task): manager = Manager(task.stage, task.context) entity = model.get_proxy(task.payload) log.debug("Ingest: %r", entity) manager.ingest_entity(entity) manager.close() self.dispatch_next(task, manager.emitted)
def _ingest(self, dataset, task): manager = Manager(dataset, task.stage, task.context) entity = model.get_proxy(task.payload) log.debug("Ingest: %r", entity) try: manager.ingest_entity(entity) finally: manager.close() return manager.emitted
def _ingest_path(db, conn, dataset, path, languages=[]): context = {'languages': languages} job = Job.create(conn, dataset) stage = job.get_stage(OP_INGEST) manager = Manager(db, stage, context) path = ensure_path(path) if path is not None: if path.is_file(): entity = manager.make_entity('Document') checksum = manager.store(path) entity.set('contentHash', checksum) entity.make_id(checksum) entity.set('fileName', path.name) manager.queue_entity(entity) if path.is_dir(): DirectoryIngestor.crawl(manager, path) manager.close()
def handle_task(cls, queue, payload, context): queue.task_done() try: manager = Manager(queue, context) entity = model.get_proxy(payload) log.debug("Ingest: %r", entity) manager.ingest_entity(entity) manager.close() cls.handle_done(queue) except (KeyboardInterrupt, SystemExit, RuntimeError): cls.handle_retry(queue, payload, context) cls.handle_done(queue) raise except Exception: cls.handle_retry(queue, payload, context) cls.handle_done(queue) log.exception("Processing failed.")
def ingest(path, dataset, languages=None): """Queue a set of files for ingest.""" context = {'languages': languages} conn = get_redis() queue = ServiceQueue(conn, ServiceQueue.OP_INGEST, dataset) manager = Manager(queue, context) path = ensure_path(path) if path is not None: if path.is_file(): entity = manager.make_entity('Document') checksum = manager.store(path) entity.set('contentHash', checksum) entity.make_id(checksum) entity.set('fileName', path.name) manager.queue_entity(entity) if path.is_dir(): DirectoryIngestor.crawl(manager, path) manager.close()
def _ingest_path(db, conn, dataset, path, languages=[]): context = {"languages": languages} job = Job.create(conn, dataset) stage = job.get_stage(OP_INGEST) manager = Manager(db, stage, context) path = ensure_path(path) if path is not None: if path.is_file(): entity = manager.make_entity("Document") checksum = manager.store(path) entity.set("contentHash", checksum) entity.make_id(checksum) entity.set("fileName", path.name) log.info("Queue: %r", entity.to_dict()) manager.queue_entity(entity) if path.is_dir(): DirectoryIngestor.crawl(manager, path) manager.close()