def test_run(self):
     conn = get_fakeredis()
     operation = "lala"
     worker = CountingWorker(conn=conn, stages=[operation])
     worker.sync()
     assert worker.test_done == 0, worker.test_done
     job = Job.create(conn, "test")
     stage = job.get_stage(operation)
     task = stage.queue({}, {})
     assert not job.is_done()
     assert worker.test_done == 0, worker.test_done
     worker.sync()
     assert worker.test_done == 1, worker.test_done
     assert job.is_done()
     worker.retry(task)
     assert not job.is_done()
     worker.sync()
     assert job.is_done()
     assert worker.exit_code == 0, worker.exit_code
     assert worker.test_done == 1, worker.test_done
     worker._handle_signal(5, None)
     assert worker.exit_code == 5, worker.exit_code
     worker.retry(task)
     worker.run(blocking=False)
     assert job.is_done()
     assert worker.exit_code == 0, worker.exit_code
     worker.num_threads = None
     worker.retry(task)
     worker.run(blocking=False)
     assert job.is_done()
     assert worker.exit_code == 0, worker.exit_code
 def test_job_queue(self):
     job = Job.create(self.conn, self.dataset)
     stage = job.get_stage(Stage.INGEST)
     status = stage.get_status()
     assert status['pending'] == 0
     assert status['finished'] == 0
     assert job.is_done()
     stage.queue({'test': 'foo'}, {})
     status = job.get_status()
     assert status['pending'] == 1
     assert status['finished'] == 0
     assert status['running'] == 0
     assert not job.is_done()
     task = Stage.get_task(self.conn, Stage.INGEST,
                           timeout=None)
     assert task.job.dataset.name == job.dataset.name
     assert task.payload['test'] == 'foo'
     status = job.get_status()
     assert status['pending'] == 0
     assert status['running'] == 1
     assert status['finished'] == 0
     assert not job.is_done()
     task.done()
     status = job.get_status()
     assert status['pending'] == 0
     assert status['running'] == 0
     assert status['finished'] == 1
     assert job.is_done()
Exemple #3
0
 def test_job_queue(self):
     job = Job.create(self.conn, self.dataset)
     stage = job.get_stage("ingest")
     status = stage.get_status()
     assert status["pending"] == 0
     assert status["finished"] == 0
     assert job.is_done()
     stage.queue({"test": "foo"}, {})
     status = job.get_status()
     assert status["pending"] == 1
     assert status["finished"] == 0
     assert status["running"] == 0
     assert not job.is_done()
     task = Stage.get_task(self.conn, "ingest", timeout=None)
     assert task.job.dataset.name == job.dataset.name
     assert task.payload["test"] == "foo"
     status = job.get_status()
     assert status["pending"] == 0
     assert status["running"] == 1
     assert status["finished"] == 0
     assert not job.is_done()
     task.done()
     status = job.get_status()
     assert status["pending"] == 0
     assert status["running"] == 0
     assert status["finished"] == 1
     assert job.is_done()
 def test_fake_finished(self):
     job = Job.create(self.conn, self.dataset)
     stage = job.get_stage(Stage.INGEST)
     status = stage.get_status()
     assert status['finished'] == 0
     stage.report_finished(500)
     status = stage.get_status()
     assert status['finished'] == 500
     status = job.dataset.get_status()
     assert status['finished'] == 500, status
Exemple #5
0
 def test_fake_finished(self):
     job = Job.create(self.conn, self.dataset)
     stage = job.get_stage("ingest")
     status = stage.get_status()
     assert status["finished"] == 0
     stage.report_finished(500)
     status = stage.get_status()
     assert status["finished"] == 500
     status = job.dataset.get_status()
     assert status["finished"] == 500, status
 def test_active_dataset_status(self):
     job = Job.create(self.conn, self.dataset)
     stage = job.get_stage(Stage.INGEST)
     stage.queue({'test': 'foo'}, {})
     stage.queue({'test': 'bar'}, {})
     status = Dataset.get_active_dataset_status(self.conn)
     assert len(status['datasets']) == 1
     assert status['total'] == 1
     assert status['datasets']['test_1']['pending'] == 2
     job.dataset.cancel()
     status = Dataset.get_active_dataset_status(self.conn)
     assert status['datasets'] == {}
     assert status['total'] == 0
Exemple #7
0
 def test_active_dataset_status(self):
     job = Job.create(self.conn, self.dataset)
     stage = job.get_stage("ingest")
     stage.queue({"test": "foo"}, {})
     stage.queue({"test": "bar"}, {})
     status = Dataset.get_active_dataset_status(self.conn)
     assert len(status["datasets"]) == 1
     assert status["total"] == 1
     assert status["datasets"]["test_1"]["pending"] == 2
     job.dataset.cancel()
     status = Dataset.get_active_dataset_status(self.conn)
     assert status["datasets"] == {}
     assert status["total"] == 0
 def test_fetch_multiple_task(self):
     job = Job.create(self.conn, self.dataset)
     stage = job.get_stage(Stage.INGEST)
     stage.queue({'test': 'foo'}, {})
     stage.queue({'test': 'bar'}, {})
     status = job.get_status()
     assert status['pending'] == 2
     tasks = list(stage.get_tasks(limit=5))
     assert len(tasks) == 2
     for task in tasks:
         assert isinstance(task, Task)
     assert tasks[0].payload == {'test': 'foo'}
     assert tasks[1].payload == {'test': 'bar'}
     job.dataset.cancel()
Exemple #9
0
 def test_fetch_multiple_task(self):
     job = Job.create(self.conn, self.dataset)
     stage = job.get_stage("ingest")
     stage.queue({"test": "foo"}, {})
     stage.queue({"test": "bar"}, {})
     status = job.get_status()
     assert status["pending"] == 2
     tasks = list(stage.get_tasks(limit=5))
     assert len(tasks) == 2
     for task in tasks:
         assert isinstance(task, Task)
     assert tasks[0].payload == {"test": "foo"}
     assert tasks[1].payload == {"test": "bar"}
     job.dataset.cancel()
    def test_queue_clear(self):
        job = Job.create(self.conn, self.dataset)
        stage = job.get_stage(Stage.INGEST)

        stage.queue({'test': 'foo'}, {})
        status = stage.get_status()
        assert status['pending'] == 1
        job.dataset.cancel()
        status = stage.get_status()
        assert status['pending'] == 0

        stage.queue({'test': 'foo'}, {})
        status = stage.get_status()
        assert status['pending'] == 1
        job.remove()
        status = stage.get_status()
        assert status['pending'] == 0
Exemple #11
0
def _ingest_path(db, conn, dataset, path, languages=[]):
    context = {'languages': languages}
    job = Job.create(conn, dataset)
    stage = job.get_stage(OP_INGEST)
    manager = Manager(db, stage, context)
    path = ensure_path(path)
    if path is not None:
        if path.is_file():
            entity = manager.make_entity('Document')
            checksum = manager.store(path)
            entity.set('contentHash', checksum)
            entity.make_id(checksum)
            entity.set('fileName', path.name)
            manager.queue_entity(entity)
        if path.is_dir():
            DirectoryIngestor.crawl(manager, path)
    manager.close()
Exemple #12
0
 def setUp(self):
     # Force tests to use fake configuration
     service_settings.REDIS_URL = None
     service_settings.ARCHIVE_TYPE = 'file'
     service_settings.ARCHIVE_PATH = mkdtemp()
     balkhash_settings.BACKEND = 'LEVELDB'
     balkhash_settings.LEVELDB_PATH = mkdtemp()
     conn = get_fakeredis()
     job = Job.create(conn, 'test')
     stage = Stage(job, Stage.INGEST)
     self.manager = Manager(stage, {})
     self.manager.entities = []
     self.manager.emit_entity = types.MethodType(emit_entity, self.manager)
     self.manager.queue_entity = types.MethodType(queue_entity,
                                                  self.manager)  # noqa
     self.archive = init_archive()
     self.manager._archive = self.archive
Exemple #13
0
    def test_queue_clear(self):
        job = Job.create(self.conn, self.dataset)
        stage = job.get_stage("ingest")

        stage.queue({"test": "foo"}, {})
        status = stage.get_status()
        assert status["pending"] == 1
        job.dataset.cancel()
        status = stage.get_status()
        assert status["pending"] == 0

        stage.queue({"test": "foo"}, {})
        status = stage.get_status()
        assert status["pending"] == 1
        job.remove()
        status = stage.get_status()
        assert status["pending"] == 0
Exemple #14
0
 def setUp(self):
     # Force tests to use fake configuration
     ingestors_settings.TESTING = True
     service_settings.REDIS_URL = None
     service_settings.ARCHIVE_TYPE = 'file'
     service_settings.ARCHIVE_PATH = mkdtemp()
     ftmstore_settings.DATABASE_URI = 'sqlite://'
     conn = get_fakeredis()
     job = Job.create(conn, 'test')
     stage = Stage(job, OP_INGEST)
     dataset = get_dataset(job.dataset.name, OP_INGEST)
     self.manager = Manager(dataset, stage, {})
     self.manager.entities = []
     self.manager.emit_entity = types.MethodType(emit_entity, self.manager)
     self.manager.queue_entity = types.MethodType(queue_entity,
                                                  self.manager)  # noqa
     self.archive = init_archive()
     self.manager._archive = self.archive
Exemple #15
0
def _ingest_path(db, conn, dataset, path, languages=[]):
    context = {"languages": languages}
    job = Job.create(conn, dataset)
    stage = job.get_stage(OP_INGEST)
    manager = Manager(db, stage, context)
    path = ensure_path(path)
    if path is not None:
        if path.is_file():
            entity = manager.make_entity("Document")
            checksum = manager.store(path)
            entity.set("contentHash", checksum)
            entity.make_id(checksum)
            entity.set("fileName", path.name)
            log.info("Queue: %r", entity.to_dict())
            manager.queue_entity(entity)
        if path.is_dir():
            DirectoryIngestor.crawl(manager, path)
    manager.close()
Exemple #16
0
def ingest(path, dataset, languages=None):
    """Queue a set of files for ingest."""
    context = {'languages': languages}
    conn = get_redis()
    job = Job.create(conn, dataset)
    stage = job.get_stage(Stage.INGEST)
    manager = Manager(stage, context)
    path = ensure_path(path)
    if path is not None:
        if path.is_file():
            entity = manager.make_entity('Document')
            checksum = manager.store(path)
            entity.set('contentHash', checksum)
            entity.make_id(checksum)
            entity.set('fileName', path.name)
            manager.queue_entity(entity)
        if path.is_dir():
            DirectoryIngestor.crawl(manager, path)
    manager.close()
Exemple #17
0
 def setUp(self):
     self.conn = get_redis()
     self.dataset = "my-dataset"
     self.operation = "OP_FOO"
     self.job = Job.create(self.conn, self.dataset)