def test_dispatcher_worker_retry(session_toy): config = read_config(database_config_template()) event_config = read_config(ramp_config_template()) dispatcher = Dispatcher(config=config, event_config=event_config, worker=CondaEnvWorker, n_workers=10, hunger_policy='exit') dispatcher.fetch_from_db(session_toy) dispatcher.launch_workers(session_toy) # Get one worker and set status to 'retry' worker, (submission_id, submission_name) = \ dispatcher._processing_worker_queue.get() setattr(worker, 'status', 'retry') assert worker.status == 'retry' # Add back to queue dispatcher._processing_worker_queue.put_nowait( (worker, (submission_id, submission_name))) while not dispatcher._processing_worker_queue.empty(): dispatcher.collect_result(session_toy) submissions = get_submissions(session_toy, 'iris_test', 'new') assert submission_name in [sub[1] for sub in submissions]
def test_unit_test_dispatcher(session_toy): # make sure that the size of the list is bigger than the number of # submissions config = read_config(database_config_template()) event_config = read_config(ramp_config_template()) dispatcher = Dispatcher(config=config, event_config=event_config, worker=CondaEnvWorker, n_workers=100, hunger_policy='exit') # check that all the queue are empty assert dispatcher._awaiting_worker_queue.empty() assert dispatcher._processing_worker_queue.empty() assert dispatcher._processed_submission_queue.empty() # check that all submissions are queued submissions = get_submissions(session_toy, 'iris_test', 'new') dispatcher.fetch_from_db(session_toy) # we should remove the starting kit from the length of the submissions for # each user assert dispatcher._awaiting_worker_queue.qsize() == len(submissions) - 2 submissions = get_submissions(session_toy, 'iris_test', 'sent_to_training') assert len(submissions) == 6 # start the training dispatcher.launch_workers(session_toy) # be sure that the training is finished while not dispatcher._processing_worker_queue.empty(): dispatcher.collect_result(session_toy) assert len(get_submissions(session_toy, 'iris_test', 'new')) == 2 assert (len(get_submissions(session_toy, 'iris_test', 'training_error')) == 2) assert len(get_submissions(session_toy, 'iris_test', 'tested')) == 4 dispatcher.update_database_results(session_toy) assert dispatcher._processed_submission_queue.empty() event = get_event(session_toy, 'iris_test') assert event.private_leaderboard_html assert event.public_leaderboard_html_with_links assert event.public_leaderboard_html_no_links assert event.failed_leaderboard_html assert event.new_leaderboard_html is None assert event.public_competition_leaderboard_html assert event.private_competition_leaderboard_html
def test_info_on_training_error(test_launch_ec2_instances, upload_submission, launch_train, is_spot_terminated, training_finished, training_successful, get_log_content, check_instance_status, download_log, session_toy_aws, caplog): # make sure that the Python error from the solution is passed to the # dispatcher # everything shoud be mocked as correct output from AWS instances # on setting up the instance and loading the submission # mock dummy AWS instance class DummyInstance: id = 1 test_launch_ec2_instances.return_value = (DummyInstance(), ), 0 upload_submission.return_value = 0 launch_train.return_value = 0 is_spot_terminated.return_value = 0 training_finished.return_value = False download_log.return_value = 0 config = read_config(database_config_template()) event_config = read_config(ramp_aws_config_template()) dispatcher = Dispatcher(config=config, event_config=event_config, worker=AWSWorker, n_workers=10, hunger_policy='exit') dispatcher.fetch_from_db(session_toy_aws) dispatcher.launch_workers(session_toy_aws) num_running_workers = dispatcher._processing_worker_queue.qsize() # worker, (submission_id, submission_name) = \ # dispatcher._processing_worker_queue.get() # assert worker.status == 'running' submissions = get_submissions(session_toy_aws, 'iris_aws_test', 'training') ids = [submissions[idx][0] for idx in range(len(submissions))] assert len(submissions) > 1 assert num_running_workers == len(ids) dispatcher.time_between_collection = 0 training_successful.return_value = False # now we will end the submission with training error training_finished.return_value = True training_error_msg = 'Python error here' get_log_content.return_value = training_error_msg check_instance_status.return_value = 'finished' dispatcher.collect_result(session_toy_aws) # the worker which we were using should have been teared down num_running_workers = dispatcher._processing_worker_queue.qsize() assert num_running_workers == 0 submissions = get_submissions(session_toy_aws, 'iris_aws_test', 'training_error') assert len(submissions) == len(ids) submission = get_submission_by_id(session_toy_aws, submissions[0][0]) assert training_error_msg in submission.error_msg