Esempio n. 1
0
def test_report_hunter_timeout(bson_library, mongo_host, status, time_later,
                               should_timeout, test_db_name, test_lib_name):
    job_id = str(uuid.uuid4())
    report_name = str(uuid.uuid4())

    serializer = PyMongoNotebookResultSerializer(
        database_name=test_db_name,
        mongo_host=mongo_host,
        result_collection_name=test_lib_name)
    start_time = time_now = datetime.datetime(2018, 1, 12, 2, 30)
    with freezegun.freeze_time(time_now):
        serializer.save_check_stub(job_id, report_name, status=status)
        _report_hunter(
            Serializer.PYMONGO.value,
            mongo_host=mongo_host,
            database_name=test_db_name,
            result_collection_name=test_lib_name,
            run_once=True,
        )
        expected = NotebookResultPending(
            job_id=job_id,
            report_name=report_name,
            report_title=report_name,
            status=status,
            update_time=time_now,
            job_start_time=start_time,
        )
        assert get_report_cache(report_name, job_id) == expected

    time_now += time_later
    with freezegun.freeze_time(time_now):
        _report_hunter(
            Serializer.PYMONGO.value,
            mongo_host=mongo_host,
            database_name=test_db_name,
            result_collection_name=test_lib_name,
            run_once=True,
        )

        if should_timeout:
            mins = (time_later.total_seconds() / 60) - 1
            expected = NotebookResultError(
                job_id=job_id,
                report_name=report_name,
                report_title=report_name,
                status=JobStatus.TIMEOUT,
                update_time=time_now,
                job_start_time=start_time,
                error_info=
                "This request timed out while being submitted to run. "
                "Please try again! "
                "Timed out after {:.0f} minutes 0 seconds.".format(mins),
            )
        else:
            # expected does not change
            pass
        assert get_report_cache(report_name, job_id) == expected
Esempio n. 2
0
def test_report_hunter_with_status_change(bson_library, mongo_host,
                                          test_db_name, test_lib_name):
    initialise_base_dirs()
    serializer = PyMongoNotebookResultSerializer(
        database_name=test_db_name,
        mongo_host=mongo_host,
        result_collection_name=test_lib_name)

    job_id = str(uuid.uuid4())
    report_name = str(uuid.uuid4())
    with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 30)):
        serializer.save_check_stub(job_id, report_name)
        _report_hunter(
            Serializer.PYMONGO.value,
            mongo_host=mongo_host,
            database_name=test_db_name,
            result_collection_name=test_lib_name,
            run_once=True,
        )
        expected = NotebookResultPending(
            job_id=job_id,
            report_name=report_name,
            report_title=report_name,
            update_time=datetime.datetime(2018, 1, 12, 2, 30),
            job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
        )
        assert get_report_cache(report_name, job_id) == expected

    with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 32)):
        serializer.update_check_status(job_id,
                                       JobStatus.CANCELLED,
                                       error_info="This was cancelled!")
        _report_hunter(
            Serializer.PYMONGO.value,
            mongo_host=mongo_host,
            database_name=test_db_name,
            result_collection_name=test_lib_name,
            run_once=True,
        )

        expected = NotebookResultError(
            job_id=job_id,
            report_name=report_name,
            report_title=report_name,
            status=JobStatus.CANCELLED,
            update_time=datetime.datetime(2018, 1, 12, 2, 32),
            job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
            error_info="This was cancelled!",
        )
        assert get_report_cache(report_name, job_id) == expected
Esempio n. 3
0
def test_report_hunter_with_one(bson_library, mongo_host, test_db_name,
                                test_lib_name):
    serializer = PyMongoNotebookResultSerializer(
        database_name=test_db_name,
        mongo_host=mongo_host,
        result_collection_name=test_lib_name)

    job_id = str(uuid.uuid4())
    report_name = str(uuid.uuid4())
    serializer.save_check_stub(job_id, report_name)
    _report_hunter(
        Serializer.PYMONGO.value,
        mongo_host=mongo_host,
        database_name=test_db_name,
        result_collection_name=test_lib_name,
        run_once=True,
    )
    expected = NotebookResultPending(
        job_id=job_id,
        report_name=report_name,
        report_title=report_name,
        update_time=datetime.datetime(2018, 1, 12),
        job_start_time=datetime.datetime(2018, 1, 12),
    )
    assert get_report_cache(report_name, job_id) == expected
Esempio n. 4
0
def _get_job_results(
    job_id: str,
    report_name: str,
    serializer: MongoResultSerializer,
    retrying: Optional[bool] = False,
    ignore_cache: Optional[bool] = False,
) -> constants.NotebookResultBase:
    report_name = convert_report_name_url_to_path(report_name)
    current_result = get_report_cache(report_name, job_id)
    if current_result and not ignore_cache:
        logger.info("Fetched result from cache.")
        notebook_result = current_result
    else:
        notebook_result = serializer.get_check_result(job_id)
        set_report_cache(report_name, job_id, notebook_result)

    if not notebook_result:
        err_info = "Job results not found for report name={} / job id={}. " "Did you use an invalid job ID?".format(
            report_name, job_id)
        return constants.NotebookResultError(job_id,
                                             error_info=err_info,
                                             report_name=report_name,
                                             job_start_time=dt.now())
    if isinstance(notebook_result, str):
        if not retrying:
            return _get_job_results(job_id,
                                    report_name,
                                    serializer,
                                    retrying=True)
        raise NotebookRunException(
            "An unexpected string was found as a result. Please run your request again."
        )

    return notebook_result
Esempio n. 5
0
def _report_hunter(serializer_cls: str, run_once: bool = False, timeout: int = 5, **serializer_kwargs):
    """
    This is a function designed to run in a thread alongside the webapp. It updates the cache which the
    web app reads from and performs some admin on pending/running jobs. The function terminates either when
    run_once is set to True, or the "NOTEBOOKER_APP_STOPPING" environment variable is set.
    :param serializer_cls:
        The name of the serialiser (as acquired from Serializer.SERIALIZERNAME.value)
    :param run_once:
        Whether to infinitely run this function or not.
    :param timeout:
        The time in seconds that we cache results.
    :param serializer_kwargs:
        Any kwargs which are required for a Serializer to be initialised successfully.
    """
    serializer = get_serializer_from_cls(serializer_cls, **serializer_kwargs)
    last_query = None
    while not os.getenv("NOTEBOOKER_APP_STOPPING"):
        try:
            ct = 0
            # Now, get all pending requests and check they haven't timed out...
            all_pending = serializer.get_all_results(
                mongo_filter={"status": {"$in": [JobStatus.SUBMITTED.value, JobStatus.PENDING.value]}}
            )
            now = datetime.datetime.now()
            cutoff = {
                JobStatus.SUBMITTED: now - datetime.timedelta(minutes=SUBMISSION_TIMEOUT),
                JobStatus.PENDING: now - datetime.timedelta(minutes=RUNNING_TIMEOUT),
            }
            for result in all_pending:
                this_cutoff = cutoff.get(result.status)
                if result.job_start_time <= this_cutoff:
                    delta_seconds = (now - this_cutoff).total_seconds()
                    serializer.update_check_status(
                        result.job_id,
                        JobStatus.TIMEOUT,
                        error_info="This request timed out while being submitted to run. "
                        "Please try again! Timed out after {:.0f} minutes "
                        "{:.0f} seconds.".format(delta_seconds / 60, delta_seconds % 60),
                    )
            # Finally, check we have the latest updates
            _last_query = datetime.datetime.now() - datetime.timedelta(minutes=1)
            query_results = serializer.get_all_results(since=last_query)
            for result in query_results:
                ct += 1
                existing = get_report_cache(result.report_name, result.job_id)
                if not existing or result.status != existing.status:  # Only update the cache when the status changes
                    set_report_cache(result.report_name, result.job_id, result, timeout=timeout)
                    logger.info(
                        "Report-hunter found a change for {} (status: {}->{})".format(
                            result.job_id, existing.status if existing else None, result.status
                        )
                    )
            logger.info("Found {} updates since {}.".format(ct, last_query))
            last_query = _last_query
        except Exception as e:
            logger.exception(str(e))
        if run_once:
            break
        time.sleep(10)
    logger.info("Report-hunting thread successfully killed.")
Esempio n. 6
0
def test_report_hunter_timeout(bson_library, status, time_later,
                               should_timeout, webapp_config):
    job_id = str(uuid.uuid4())
    report_name = str(uuid.uuid4())

    serializer = initialize_serializer_from_config(webapp_config)
    start_time = time_now = datetime.datetime(2018, 1, 12, 2, 30)
    with freezegun.freeze_time(time_now):
        serializer.save_check_stub(job_id, report_name, status=status)
        _report_hunter(webapp_config=webapp_config, run_once=True)
        expected = NotebookResultPending(
            job_id=job_id,
            report_name=report_name,
            report_title=report_name,
            status=status,
            update_time=time_now,
            job_start_time=start_time,
        )
        assert get_report_cache(report_name,
                                job_id,
                                cache_dir=webapp_config.CACHE_DIR) == expected

    time_now += time_later
    with freezegun.freeze_time(time_now):
        _report_hunter(webapp_config=webapp_config, run_once=True)

        if should_timeout:
            mins = (time_later.total_seconds() / 60) - 1
            expected = NotebookResultError(
                job_id=job_id,
                report_name=report_name,
                report_title=report_name,
                status=JobStatus.TIMEOUT,
                update_time=time_now,
                job_start_time=start_time,
                error_info=
                "This request timed out while being submitted to run. "
                "Please try again! "
                "Timed out after {:.0f} minutes 0 seconds.".format(mins),
            )
        else:
            # expected does not change
            pass
        assert get_report_cache(report_name,
                                job_id,
                                cache_dir=webapp_config.CACHE_DIR) == expected
Esempio n. 7
0
def test_report_hunter_with_status_change(bson_library, webapp_config):
    initialise_base_dirs(webapp_config=webapp_config)
    serializer = initialize_serializer_from_config(webapp_config)

    job_id = str(uuid.uuid4())
    report_name = str(uuid.uuid4())
    with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 30)):
        serializer.save_check_stub(job_id, report_name)
        _report_hunter(webapp_config=webapp_config, run_once=True)
        expected = NotebookResultPending(
            job_id=job_id,
            report_name=report_name,
            report_title=report_name,
            update_time=datetime.datetime(2018, 1, 12, 2, 30),
            job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
        )
        assert get_report_cache(report_name,
                                job_id,
                                cache_dir=webapp_config.CACHE_DIR) == expected

    with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 32)):
        serializer.update_check_status(job_id,
                                       JobStatus.CANCELLED,
                                       error_info="This was cancelled!")
        _report_hunter(webapp_config=webapp_config, run_once=True)

        expected = NotebookResultError(
            job_id=job_id,
            report_name=report_name,
            report_title=report_name,
            status=JobStatus.CANCELLED,
            update_time=datetime.datetime(2018, 1, 12, 2, 32),
            job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
            error_info="This was cancelled!",
        )
        assert get_report_cache(report_name,
                                job_id,
                                cache_dir=webapp_config.CACHE_DIR) == expected
Esempio n. 8
0
def test_report_hunter_with_one(bson_library, webapp_config):
    serializer = initialize_serializer_from_config(webapp_config)

    job_id = str(uuid.uuid4())
    report_name = str(uuid.uuid4())
    serializer.save_check_stub(job_id, report_name)
    _report_hunter(webapp_config=webapp_config, run_once=True)
    expected = NotebookResultPending(
        job_id=job_id,
        report_name=report_name,
        report_title=report_name,
        update_time=datetime.datetime(2018, 1, 12),
        job_start_time=datetime.datetime(2018, 1, 12),
    )
    assert get_report_cache(report_name,
                            job_id,
                            cache_dir=webapp_config.CACHE_DIR) == expected
Esempio n. 9
0
def test_report_hunter_pending_to_done(bson_library, mongo_host, test_db_name,
                                       test_lib_name):
    job_id = str(uuid.uuid4())
    report_name = str(uuid.uuid4())
    serializer = PyMongoNotebookResultSerializer(
        database_name=test_db_name,
        mongo_host=mongo_host,
        result_collection_name=test_lib_name)

    with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 30)):
        serializer.save_check_stub(job_id,
                                   report_name,
                                   status=JobStatus.SUBMITTED)
        _report_hunter(
            Serializer.PYMONGO.value,
            mongo_host=mongo_host,
            database_name=test_db_name,
            result_collection_name=test_lib_name,
            run_once=True,
        )
        expected = NotebookResultPending(
            job_id=job_id,
            report_name=report_name,
            report_title=report_name,
            status=JobStatus.SUBMITTED,
            update_time=datetime.datetime(2018, 1, 12, 2, 30),
            job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
        )
        assert get_report_cache(report_name, job_id) == expected

    with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 32)):
        serializer.update_check_status(job_id, JobStatus.PENDING)
        _report_hunter(
            Serializer.PYMONGO.value,
            mongo_host=mongo_host,
            database_name=test_db_name,
            result_collection_name=test_lib_name,
            run_once=True,
        )

        expected = NotebookResultPending(
            job_id=job_id,
            report_name=report_name,
            report_title=report_name,
            status=JobStatus.PENDING,
            update_time=datetime.datetime(2018, 1, 12, 2, 32),
            job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
        )
        assert get_report_cache(report_name, job_id) == expected

    with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 37)):
        serializer.update_check_status(
            job_id,
            JobStatus.DONE,
            raw_html_resources={"outputs": {}},
            job_finish_time=datetime.datetime.now(),
            pdf="",
            raw_ipynb_json="[]",
            raw_html="",
        )
        _report_hunter(
            Serializer.PYMONGO.value,
            mongo_host=mongo_host,
            database_name=test_db_name,
            result_collection_name=test_lib_name,
            run_once=True,
        )

        expected = NotebookResultComplete(
            job_id=job_id,
            report_name=report_name,
            report_title=report_name,
            status=JobStatus.DONE,
            update_time=datetime.datetime(2018, 1, 12, 2, 37),
            job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
            job_finish_time=datetime.datetime(2018, 1, 12, 2, 37),
            raw_html="",
            raw_html_resources={"outputs": {}},
            raw_ipynb_json="[]",
        )
        assert get_report_cache(report_name, job_id) == expected
Esempio n. 10
0
def test_report_hunter_pending_to_done(bson_library, webapp_config):
    job_id = str(uuid.uuid4())
    report_name = str(uuid.uuid4())
    serializer = initialize_serializer_from_config(webapp_config)

    with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 30)):
        serializer.save_check_stub(job_id,
                                   report_name,
                                   status=JobStatus.SUBMITTED)
        _report_hunter(webapp_config=webapp_config, run_once=True)
        expected = NotebookResultPending(
            job_id=job_id,
            report_name=report_name,
            report_title=report_name,
            status=JobStatus.SUBMITTED,
            update_time=datetime.datetime(2018, 1, 12, 2, 30),
            job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
        )
        assert get_report_cache(report_name,
                                job_id,
                                cache_dir=webapp_config.CACHE_DIR) == expected

    with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 32)):
        serializer.update_check_status(job_id, JobStatus.PENDING)
        _report_hunter(webapp_config=webapp_config, run_once=True)

        expected = NotebookResultPending(
            job_id=job_id,
            report_name=report_name,
            report_title=report_name,
            status=JobStatus.PENDING,
            update_time=datetime.datetime(2018, 1, 12, 2, 32),
            job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
        )
        assert get_report_cache(report_name,
                                job_id,
                                cache_dir=webapp_config.CACHE_DIR) == expected

    with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 37)):
        serializer.update_check_status(
            job_id,
            JobStatus.DONE,
            raw_html_resources={"outputs": {}},
            job_finish_time=datetime.datetime.now(),
            pdf="",
            raw_ipynb_json="[]",
            raw_html="",
        )
        _report_hunter(webapp_config=webapp_config, run_once=True)

        expected = NotebookResultComplete(
            job_id=job_id,
            report_name=report_name,
            report_title=report_name,
            status=JobStatus.DONE,
            update_time=datetime.datetime(2018, 1, 12, 2, 37),
            job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
            job_finish_time=datetime.datetime(2018, 1, 12, 2, 37),
            raw_html="",
            raw_html_resources={"outputs": {}},
            raw_ipynb_json="[]",
        )
        assert get_report_cache(report_name,
                                job_id,
                                cache_dir=webapp_config.CACHE_DIR) == expected