def test_tasks(app, db, halt_workflow, sample_record_filename):
    """Test tasks."""
    job_id = uuid.uuid4().hex  # init random value
    with app.app_context():
        with pytest.raises(CrawlerInvalidResultsPath):
            submit_results(job_id, results_uri="", errors=None, log_file=None)
        with pytest.raises(CrawlerInvalidResultsPath):
            submit_results(job_id, results_uri="", errors=None, log_file=None)
        with pytest.raises(CrawlerJobNotExistError):
            submit_results(
                job_id, results_uri=sample_record_filename,
                errors=None, log_file=None
            )

        CrawlerJob.create(
            job_id=job_id,
            spider="Test",
            workflow=halt_workflow.__name__,
            logs=None,
            results=None,
        )
        db.session.commit()

    with app.app_context():
        job = CrawlerJob.get_by_job(job_id)

        assert job
        assert str(job.status)
        assert job.status == JobStatus.PENDING

        submit_results(
            job_id=job_id,
            results_uri=sample_record_filename,
            errors=None,
            log_file="/foo/bar"
        )

        job = CrawlerJob.get_by_job(job_id)
        assert job.logs == "/foo/bar"
        assert job.results == sample_record_filename

        workflow = WorkflowObject.get(1)
        assert workflow
        assert workflow.extra_data['crawler_job_id'] == job_id
        crawler_results_path = workflow.extra_data['crawler_results_path']
        assert crawler_results_path == urlparse(sample_record_filename).path

        with pytest.raises(CrawlerJobError):
            submit_results(
                job_id, results_uri=sample_record_filename,
                errors=["Some error"], log_file=None
            )

        job = CrawlerJob.get_by_job(job_id)
        assert job.status == JobStatus.ERROR
Exemple #2
0
def schedule_and_wait_crawl(max_wait, *args, **kwargs):
    """
    Calls inspire-crawler schedule_task and waits for the created task to finish.

    :return: if the job finished successfully
    """

    job_id = schedule_crawl(*args, **kwargs)
    log('Crawler job scheduled.', job_id=job_id)
    job = CrawlerJob.get_by_job(job_id)

    sleep_time = current_app.config.get('CLI_HARVEST_SLEEP_TIME', 0.5)
    sleep_counter = 0

    while job.status not in (JobStatus.ERROR, JobStatus.FINISHED):
        if sleep_counter * sleep_time > max_wait:
            log('Timeout reached, skip waiting for job.', logging.ERROR, job_id=job_id, job_status=job.status)
            break

        sleep(sleep_time)
        sleep_counter += 1

        db.session.refresh(job)

    if job.status in (JobStatus.ERROR, JobStatus.FINISHED):
        log('Job finished.', job_id=job_id, job_status=job.status)

    return job.status == JobStatus.FINISHED
def test_receivers(app, db, sample_record):
    """Test receivers."""
    job_id = uuid.uuid4().hex
    responses.add(
        responses.POST, "http://localhost:6800/schedule.json",
        body=json.dumps({"jobid": job_id, "status": "ok"}),
        status=200
    )

    mock_record = MagicMock()
    prop_mock = PropertyMock(return_value=sample_record)
    type(mock_record).raw = prop_mock
    with app.app_context():
        assert receive_oaiharvest_job(
            request=None, records=[mock_record], name=""
        ) is None

        receive_oaiharvest_job(
            request=None,
            records=[mock_record],
            name="",
            spider="Test",
            workflow="test"
        )
        job = CrawlerJob.get_by_job(job_id)

        assert job
def test_receivers(app, db, sample_record_string):
    """Test receivers."""
    job_id = uuid.uuid4().hex
    responses.add(responses.POST,
                  "http://localhost:6800/schedule.json",
                  body=json.dumps({
                      "jobid": job_id,
                      "status": "ok"
                  }),
                  status=200)

    mock_record = MagicMock()
    prop_mock = PropertyMock(return_value=sample_record_string)
    type(mock_record).raw = prop_mock
    with app.app_context():
        assert receive_oaiharvest_job(
            request=None, records=[mock_record], name="") is None

        receive_oaiharvest_job(request=None,
                               records=[mock_record],
                               name="",
                               spider="Test",
                               workflow="test")
        job = CrawlerJob.get_by_job(job_id)

        assert job
Exemple #5
0
def schedule_and_wait_crawl(max_wait, *args, **kwargs):
    """
    Calls inspire-crawler schedule_task and waits for the created task to finish.

    :return: if the job finished successfully
    """

    job_id = schedule_crawl(*args, **kwargs)
    log('Crawler job scheduled.', job_id=job_id)
    job = CrawlerJob.get_by_job(job_id)

    sleep_time = current_app.config.get('CLI_HARVEST_SLEEP_TIME', 0.5)
    sleep_counter = 0

    while job.status not in (JobStatus.ERROR, JobStatus.FINISHED):
        if sleep_counter * sleep_time > max_wait:
            log('Timeout reached, skip waiting for job.',
                logging.ERROR,
                job_id=job_id,
                job_status=job.status)
            break

        sleep(sleep_time)
        sleep_counter += 1

        db.session.refresh(job)

    if job.status in (JobStatus.ERROR, JobStatus.FINISHED):
        log('Job finished.', job_id=job_id, job_status=job.status)

    return job.status == JobStatus.FINISHED
Exemple #6
0
def test_receivers(app, db, sample_record_string):
    with requests_mock.Mocker() as requests_mocker:
        job_id = uuid.uuid4().hex

        requests_mocker.register_uri('POST',
                                     'http://localhost:6800/schedule.json',
                                     json={
                                         'jobid': job_id,
                                         'status': 'ok'
                                     })

        mock_record = MagicMock()
        prop_mock = PropertyMock(return_value=sample_record_string)
        type(mock_record).raw = prop_mock

        with app.app_context():
            assert receive_oaiharvest_job(
                request=None, records=[mock_record], name="") is None

            receive_oaiharvest_job(request=None,
                                   records=[mock_record],
                                   name='',
                                   spider='Test',
                                   workflow='test')
            job = CrawlerJob.get_by_job(job_id)

            assert job