def test_report_hunter_timeout(bson_library, mongo_host, status, time_later, should_timeout, test_db_name, test_lib_name): job_id = str(uuid.uuid4()) report_name = str(uuid.uuid4()) serializer = PyMongoNotebookResultSerializer( database_name=test_db_name, mongo_host=mongo_host, result_collection_name=test_lib_name) start_time = time_now = datetime.datetime(2018, 1, 12, 2, 30) with freezegun.freeze_time(time_now): serializer.save_check_stub(job_id, report_name, status=status) _report_hunter( Serializer.PYMONGO.value, mongo_host=mongo_host, database_name=test_db_name, result_collection_name=test_lib_name, run_once=True, ) expected = NotebookResultPending( job_id=job_id, report_name=report_name, report_title=report_name, status=status, update_time=time_now, job_start_time=start_time, ) assert get_report_cache(report_name, job_id) == expected time_now += time_later with freezegun.freeze_time(time_now): _report_hunter( Serializer.PYMONGO.value, mongo_host=mongo_host, database_name=test_db_name, result_collection_name=test_lib_name, run_once=True, ) if should_timeout: mins = (time_later.total_seconds() / 60) - 1 expected = NotebookResultError( job_id=job_id, report_name=report_name, report_title=report_name, status=JobStatus.TIMEOUT, update_time=time_now, job_start_time=start_time, error_info= "This request timed out while being submitted to run. " "Please try again! " "Timed out after {:.0f} minutes 0 seconds.".format(mins), ) else: # expected does not change pass assert get_report_cache(report_name, job_id) == expected
def test_report_hunter_with_status_change(bson_library, mongo_host, test_db_name, test_lib_name): initialise_base_dirs() serializer = PyMongoNotebookResultSerializer( database_name=test_db_name, mongo_host=mongo_host, result_collection_name=test_lib_name) job_id = str(uuid.uuid4()) report_name = str(uuid.uuid4()) with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 30)): serializer.save_check_stub(job_id, report_name) _report_hunter( Serializer.PYMONGO.value, mongo_host=mongo_host, database_name=test_db_name, result_collection_name=test_lib_name, run_once=True, ) expected = NotebookResultPending( job_id=job_id, report_name=report_name, report_title=report_name, update_time=datetime.datetime(2018, 1, 12, 2, 30), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), ) assert get_report_cache(report_name, job_id) == expected with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 32)): serializer.update_check_status(job_id, JobStatus.CANCELLED, error_info="This was cancelled!") _report_hunter( Serializer.PYMONGO.value, mongo_host=mongo_host, database_name=test_db_name, result_collection_name=test_lib_name, run_once=True, ) expected = NotebookResultError( job_id=job_id, report_name=report_name, report_title=report_name, status=JobStatus.CANCELLED, update_time=datetime.datetime(2018, 1, 12, 2, 32), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), error_info="This was cancelled!", ) assert get_report_cache(report_name, job_id) == expected
def test_report_hunter_with_one(bson_library, mongo_host, test_db_name, test_lib_name): serializer = PyMongoNotebookResultSerializer( database_name=test_db_name, mongo_host=mongo_host, result_collection_name=test_lib_name) job_id = str(uuid.uuid4()) report_name = str(uuid.uuid4()) serializer.save_check_stub(job_id, report_name) _report_hunter( Serializer.PYMONGO.value, mongo_host=mongo_host, database_name=test_db_name, result_collection_name=test_lib_name, run_once=True, ) expected = NotebookResultPending( job_id=job_id, report_name=report_name, report_title=report_name, update_time=datetime.datetime(2018, 1, 12), job_start_time=datetime.datetime(2018, 1, 12), ) assert get_report_cache(report_name, job_id) == expected
def _get_job_results( job_id: str, report_name: str, serializer: MongoResultSerializer, retrying: Optional[bool] = False, ignore_cache: Optional[bool] = False, ) -> constants.NotebookResultBase: report_name = convert_report_name_url_to_path(report_name) current_result = get_report_cache(report_name, job_id) if current_result and not ignore_cache: logger.info("Fetched result from cache.") notebook_result = current_result else: notebook_result = serializer.get_check_result(job_id) set_report_cache(report_name, job_id, notebook_result) if not notebook_result: err_info = "Job results not found for report name={} / job id={}. " "Did you use an invalid job ID?".format( report_name, job_id) return constants.NotebookResultError(job_id, error_info=err_info, report_name=report_name, job_start_time=dt.now()) if isinstance(notebook_result, str): if not retrying: return _get_job_results(job_id, report_name, serializer, retrying=True) raise NotebookRunException( "An unexpected string was found as a result. Please run your request again." ) return notebook_result
def _report_hunter(serializer_cls: str, run_once: bool = False, timeout: int = 5, **serializer_kwargs): """ This is a function designed to run in a thread alongside the webapp. It updates the cache which the web app reads from and performs some admin on pending/running jobs. The function terminates either when run_once is set to True, or the "NOTEBOOKER_APP_STOPPING" environment variable is set. :param serializer_cls: The name of the serialiser (as acquired from Serializer.SERIALIZERNAME.value) :param run_once: Whether to infinitely run this function or not. :param timeout: The time in seconds that we cache results. :param serializer_kwargs: Any kwargs which are required for a Serializer to be initialised successfully. """ serializer = get_serializer_from_cls(serializer_cls, **serializer_kwargs) last_query = None while not os.getenv("NOTEBOOKER_APP_STOPPING"): try: ct = 0 # Now, get all pending requests and check they haven't timed out... all_pending = serializer.get_all_results( mongo_filter={"status": {"$in": [JobStatus.SUBMITTED.value, JobStatus.PENDING.value]}} ) now = datetime.datetime.now() cutoff = { JobStatus.SUBMITTED: now - datetime.timedelta(minutes=SUBMISSION_TIMEOUT), JobStatus.PENDING: now - datetime.timedelta(minutes=RUNNING_TIMEOUT), } for result in all_pending: this_cutoff = cutoff.get(result.status) if result.job_start_time <= this_cutoff: delta_seconds = (now - this_cutoff).total_seconds() serializer.update_check_status( result.job_id, JobStatus.TIMEOUT, error_info="This request timed out while being submitted to run. " "Please try again! Timed out after {:.0f} minutes " "{:.0f} seconds.".format(delta_seconds / 60, delta_seconds % 60), ) # Finally, check we have the latest updates _last_query = datetime.datetime.now() - datetime.timedelta(minutes=1) query_results = serializer.get_all_results(since=last_query) for result in query_results: ct += 1 existing = get_report_cache(result.report_name, result.job_id) if not existing or result.status != existing.status: # Only update the cache when the status changes set_report_cache(result.report_name, result.job_id, result, timeout=timeout) logger.info( "Report-hunter found a change for {} (status: {}->{})".format( result.job_id, existing.status if existing else None, result.status ) ) logger.info("Found {} updates since {}.".format(ct, last_query)) last_query = _last_query except Exception as e: logger.exception(str(e)) if run_once: break time.sleep(10) logger.info("Report-hunting thread successfully killed.")
def test_report_hunter_timeout(bson_library, status, time_later, should_timeout, webapp_config): job_id = str(uuid.uuid4()) report_name = str(uuid.uuid4()) serializer = initialize_serializer_from_config(webapp_config) start_time = time_now = datetime.datetime(2018, 1, 12, 2, 30) with freezegun.freeze_time(time_now): serializer.save_check_stub(job_id, report_name, status=status) _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultPending( job_id=job_id, report_name=report_name, report_title=report_name, status=status, update_time=time_now, job_start_time=start_time, ) assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected time_now += time_later with freezegun.freeze_time(time_now): _report_hunter(webapp_config=webapp_config, run_once=True) if should_timeout: mins = (time_later.total_seconds() / 60) - 1 expected = NotebookResultError( job_id=job_id, report_name=report_name, report_title=report_name, status=JobStatus.TIMEOUT, update_time=time_now, job_start_time=start_time, error_info= "This request timed out while being submitted to run. " "Please try again! " "Timed out after {:.0f} minutes 0 seconds.".format(mins), ) else: # expected does not change pass assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected
def test_report_hunter_with_status_change(bson_library, webapp_config): initialise_base_dirs(webapp_config=webapp_config) serializer = initialize_serializer_from_config(webapp_config) job_id = str(uuid.uuid4()) report_name = str(uuid.uuid4()) with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 30)): serializer.save_check_stub(job_id, report_name) _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultPending( job_id=job_id, report_name=report_name, report_title=report_name, update_time=datetime.datetime(2018, 1, 12, 2, 30), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), ) assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 32)): serializer.update_check_status(job_id, JobStatus.CANCELLED, error_info="This was cancelled!") _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultError( job_id=job_id, report_name=report_name, report_title=report_name, status=JobStatus.CANCELLED, update_time=datetime.datetime(2018, 1, 12, 2, 32), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), error_info="This was cancelled!", ) assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected
def test_report_hunter_with_one(bson_library, webapp_config): serializer = initialize_serializer_from_config(webapp_config) job_id = str(uuid.uuid4()) report_name = str(uuid.uuid4()) serializer.save_check_stub(job_id, report_name) _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultPending( job_id=job_id, report_name=report_name, report_title=report_name, update_time=datetime.datetime(2018, 1, 12), job_start_time=datetime.datetime(2018, 1, 12), ) assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected
def test_report_hunter_pending_to_done(bson_library, mongo_host, test_db_name, test_lib_name): job_id = str(uuid.uuid4()) report_name = str(uuid.uuid4()) serializer = PyMongoNotebookResultSerializer( database_name=test_db_name, mongo_host=mongo_host, result_collection_name=test_lib_name) with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 30)): serializer.save_check_stub(job_id, report_name, status=JobStatus.SUBMITTED) _report_hunter( Serializer.PYMONGO.value, mongo_host=mongo_host, database_name=test_db_name, result_collection_name=test_lib_name, run_once=True, ) expected = NotebookResultPending( job_id=job_id, report_name=report_name, report_title=report_name, status=JobStatus.SUBMITTED, update_time=datetime.datetime(2018, 1, 12, 2, 30), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), ) assert get_report_cache(report_name, job_id) == expected with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 32)): serializer.update_check_status(job_id, JobStatus.PENDING) _report_hunter( Serializer.PYMONGO.value, mongo_host=mongo_host, database_name=test_db_name, result_collection_name=test_lib_name, run_once=True, ) expected = NotebookResultPending( job_id=job_id, report_name=report_name, report_title=report_name, status=JobStatus.PENDING, update_time=datetime.datetime(2018, 1, 12, 2, 32), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), ) assert get_report_cache(report_name, job_id) == expected with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 37)): serializer.update_check_status( job_id, JobStatus.DONE, raw_html_resources={"outputs": {}}, job_finish_time=datetime.datetime.now(), pdf="", raw_ipynb_json="[]", raw_html="", ) _report_hunter( Serializer.PYMONGO.value, mongo_host=mongo_host, database_name=test_db_name, result_collection_name=test_lib_name, run_once=True, ) expected = NotebookResultComplete( job_id=job_id, report_name=report_name, report_title=report_name, status=JobStatus.DONE, update_time=datetime.datetime(2018, 1, 12, 2, 37), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), job_finish_time=datetime.datetime(2018, 1, 12, 2, 37), raw_html="", raw_html_resources={"outputs": {}}, raw_ipynb_json="[]", ) assert get_report_cache(report_name, job_id) == expected
def test_report_hunter_pending_to_done(bson_library, webapp_config): job_id = str(uuid.uuid4()) report_name = str(uuid.uuid4()) serializer = initialize_serializer_from_config(webapp_config) with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 30)): serializer.save_check_stub(job_id, report_name, status=JobStatus.SUBMITTED) _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultPending( job_id=job_id, report_name=report_name, report_title=report_name, status=JobStatus.SUBMITTED, update_time=datetime.datetime(2018, 1, 12, 2, 30), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), ) assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 32)): serializer.update_check_status(job_id, JobStatus.PENDING) _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultPending( job_id=job_id, report_name=report_name, report_title=report_name, status=JobStatus.PENDING, update_time=datetime.datetime(2018, 1, 12, 2, 32), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), ) assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 37)): serializer.update_check_status( job_id, JobStatus.DONE, raw_html_resources={"outputs": {}}, job_finish_time=datetime.datetime.now(), pdf="", raw_ipynb_json="[]", raw_html="", ) _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultComplete( job_id=job_id, report_name=report_name, report_title=report_name, status=JobStatus.DONE, update_time=datetime.datetime(2018, 1, 12, 2, 37), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), job_finish_time=datetime.datetime(2018, 1, 12, 2, 37), raw_html="", raw_html_resources={"outputs": {}}, raw_ipynb_json="[]", ) assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected