def test_tasks(app, db, halt_workflow, sample_record_filename): """Test tasks.""" job_id = uuid.uuid4().hex # init random value with app.app_context(): with pytest.raises(CrawlerInvalidResultsPath): submit_results(job_id, results_uri="", errors=None, log_file=None) with pytest.raises(CrawlerInvalidResultsPath): submit_results(job_id, results_uri="", errors=None, log_file=None) with pytest.raises(CrawlerJobNotExistError): submit_results( job_id, results_uri=sample_record_filename, errors=None, log_file=None ) CrawlerJob.create( job_id=job_id, spider="Test", workflow=halt_workflow.__name__, logs=None, results=None, ) db.session.commit() with app.app_context(): job = CrawlerJob.get_by_job(job_id) assert job assert str(job.status) assert job.status == JobStatus.PENDING submit_results( job_id=job_id, results_uri=sample_record_filename, errors=None, log_file="/foo/bar" ) job = CrawlerJob.get_by_job(job_id) assert job.logs == "/foo/bar" assert job.results == sample_record_filename workflow = WorkflowObject.get(1) assert workflow assert workflow.extra_data['crawler_job_id'] == job_id crawler_results_path = workflow.extra_data['crawler_results_path'] assert crawler_results_path == urlparse(sample_record_filename).path with pytest.raises(CrawlerJobError): submit_results( job_id, results_uri=sample_record_filename, errors=["Some error"], log_file=None ) job = CrawlerJob.get_by_job(job_id) assert job.status == JobStatus.ERROR
def schedule_and_wait_crawl(max_wait, *args, **kwargs): """ Calls inspire-crawler schedule_task and waits for the created task to finish. :return: if the job finished successfully """ job_id = schedule_crawl(*args, **kwargs) log('Crawler job scheduled.', job_id=job_id) job = CrawlerJob.get_by_job(job_id) sleep_time = current_app.config.get('CLI_HARVEST_SLEEP_TIME', 0.5) sleep_counter = 0 while job.status not in (JobStatus.ERROR, JobStatus.FINISHED): if sleep_counter * sleep_time > max_wait: log('Timeout reached, skip waiting for job.', logging.ERROR, job_id=job_id, job_status=job.status) break sleep(sleep_time) sleep_counter += 1 db.session.refresh(job) if job.status in (JobStatus.ERROR, JobStatus.FINISHED): log('Job finished.', job_id=job_id, job_status=job.status) return job.status == JobStatus.FINISHED
def test_receivers(app, db, sample_record): """Test receivers.""" job_id = uuid.uuid4().hex responses.add( responses.POST, "http://localhost:6800/schedule.json", body=json.dumps({"jobid": job_id, "status": "ok"}), status=200 ) mock_record = MagicMock() prop_mock = PropertyMock(return_value=sample_record) type(mock_record).raw = prop_mock with app.app_context(): assert receive_oaiharvest_job( request=None, records=[mock_record], name="" ) is None receive_oaiharvest_job( request=None, records=[mock_record], name="", spider="Test", workflow="test" ) job = CrawlerJob.get_by_job(job_id) assert job
def test_receivers(app, db, sample_record_string): """Test receivers.""" job_id = uuid.uuid4().hex responses.add(responses.POST, "http://localhost:6800/schedule.json", body=json.dumps({ "jobid": job_id, "status": "ok" }), status=200) mock_record = MagicMock() prop_mock = PropertyMock(return_value=sample_record_string) type(mock_record).raw = prop_mock with app.app_context(): assert receive_oaiharvest_job( request=None, records=[mock_record], name="") is None receive_oaiharvest_job(request=None, records=[mock_record], name="", spider="Test", workflow="test") job = CrawlerJob.get_by_job(job_id) assert job
def test_receivers(app, db, sample_record_string): with requests_mock.Mocker() as requests_mocker: job_id = uuid.uuid4().hex requests_mocker.register_uri('POST', 'http://localhost:6800/schedule.json', json={ 'jobid': job_id, 'status': 'ok' }) mock_record = MagicMock() prop_mock = PropertyMock(return_value=sample_record_string) type(mock_record).raw = prop_mock with app.app_context(): assert receive_oaiharvest_job( request=None, records=[mock_record], name="") is None receive_oaiharvest_job(request=None, records=[mock_record], name='', spider='Test', workflow='test') job = CrawlerJob.get_by_job(job_id) assert job