def dispatcher(config, event_config, verbose): """Launch the RAMP dispatcher. The RAMP dispatcher is in charge of starting RAMP workers, collecting results from them, and update the database. """ if verbose: if verbose == 1: level = logging.INFO else: level = logging.DEBUG logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', level=level, datefmt='%Y:%m:%d %H:%M:%S' ) internal_event_config = read_config(event_config) worker_type = available_workers[ internal_event_config['worker']['worker_type'] ] dispatcher_config = (internal_event_config['dispatcher'] if 'dispatcher' in internal_event_config else {}) n_workers = dispatcher_config.get('n_workers', -1) n_threads = dispatcher_config.get('n_threads', None) hunger_policy = dispatcher_config.get('hunger_policy', 'sleep') time_between_collection = dispatcher_config.get( 'time_between_collection', 1) disp = Dispatcher( config=config, event_config=event_config, worker=worker_type, n_workers=n_workers, n_threads=n_threads, hunger_policy=hunger_policy, time_between_collection=time_between_collection ) disp.launch()
def test_error_handling_worker_setup_error(session_toy, caplog): # make sure the error on the worker.setup is dealt with correctly # set mock worker class Worker_mock(): def __init__(self, *args, **kwargs): self.state = None def setup(self): raise Exception('Test error') def teardown(self): pass config = read_config(database_config_template()) event_config = read_config(ramp_config_template()) worker = Worker_mock() dispatcher = Dispatcher(config=config, event_config=event_config, worker=Worker_mock, n_workers=-1, hunger_policy='exit') dispatcher.launch() submissions = get_submissions(session_toy, event_config['ramp']['event_name'], 'checking_error') assert len(submissions) == 6 worker.status = 'error' assert 'Test error' in caplog.text
def test_deploy_ramp_event(session_scope_function): database_config = read_config(database_config_template()) event_config_filename = ramp_config_template() event_config = read_config(event_config_filename) ramp_config = generate_ramp_config(event_config) deploy_ramp_event(database_config_template(), ramp_config_template()) # simulate that we add users and sign-up for the event and that they # submitted the starting kit with session_scope(database_config['sqlalchemy']) as session: add_users(session) sign_up_team(session, ramp_config['event_name'], 'test_user') submit_starting_kits(session, ramp_config['event_name'], 'test_user', ramp_config['ramp_kit_submissions_dir']) # run the dispatcher on the event which are in the dataset dispatcher = Dispatcher(config=database_config, event_config=event_config, worker=CondaEnvWorker, n_workers=-1, hunger_policy='exit') dispatcher.launch() # the iris kit contain a submission which should fail for a user with session_scope(database_config['sqlalchemy']) as session: submission = get_submissions(session, event_config['ramp']['event_name'], 'training_error') assert len(submission) == 1
def dispatcher(config, event_config, n_workers, n_threads, hunger_policy, verbose): """Launch the RAMP dispatcher. The RAMP dispatcher is in charge of starting RAMP workers, collecting results from them, and update the database. """ if verbose: if verbose == 1: level = logging.INFO else: level = logging.DEBUG logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', level=level, datefmt='%Y:%m:%d %H:%M:%S') internal_event_config = read_config(event_config) worker_type = available_workers[internal_event_config['worker'] ['worker_type']] disp = Dispatcher(config=config, event_config=event_config, worker=worker_type, n_workers=n_workers, n_threads=n_threads, hunger_policy=hunger_policy) disp.launch()
def test_dispatcher_worker_retry(session_toy): config = read_config(database_config_template()) event_config = read_config(ramp_config_template()) dispatcher = Dispatcher(config=config, event_config=event_config, worker=CondaEnvWorker, n_workers=10, hunger_policy='exit') dispatcher.fetch_from_db(session_toy) dispatcher.launch_workers(session_toy) # Get one worker and set status to 'retry' worker, (submission_id, submission_name) = \ dispatcher._processing_worker_queue.get() setattr(worker, 'status', 'retry') assert worker.status == 'retry' # Add back to queue dispatcher._processing_worker_queue.put_nowait( (worker, (submission_id, submission_name))) while not dispatcher._processing_worker_queue.empty(): dispatcher.collect_result(session_toy) submissions = get_submissions(session_toy, 'iris_test', 'new') assert submission_name in [sub[1] for sub in submissions]
def test_integration_dispatcher(session_toy): config = read_config(database_config_template()) event_config = read_config(ramp_config_template()) dispatcher = Dispatcher(config=config, event_config=event_config, worker=CondaEnvWorker, n_worker=-1, hunger_policy='exit') dispatcher.launch() # the iris kit contain a submission which should fail for each user submission = get_submissions(session_toy, event_config['ramp']['event_name'], 'training_error') assert len(submission) == 2
def test_unit_test_dispatcher(session_toy): # make sure that the size of the list is bigger than the number of # submissions config = read_config(database_config_template()) event_config = read_config(ramp_config_template()) dispatcher = Dispatcher(config=config, event_config=event_config, worker=CondaEnvWorker, n_workers=100, hunger_policy='exit') # check that all the queue are empty assert dispatcher._awaiting_worker_queue.empty() assert dispatcher._processing_worker_queue.empty() assert dispatcher._processed_submission_queue.empty() # check that all submissions are queued submissions = get_submissions(session_toy, 'iris_test', 'new') dispatcher.fetch_from_db(session_toy) # we should remove the starting kit from the length of the submissions for # each user assert dispatcher._awaiting_worker_queue.qsize() == len(submissions) - 2 submissions = get_submissions(session_toy, 'iris_test', 'sent_to_training') assert len(submissions) == 6 # start the training dispatcher.launch_workers(session_toy) # be sure that the training is finished while not dispatcher._processing_worker_queue.empty(): dispatcher.collect_result(session_toy) assert len(get_submissions(session_toy, 'iris_test', 'new')) == 2 assert (len(get_submissions(session_toy, 'iris_test', 'training_error')) == 2) assert len(get_submissions(session_toy, 'iris_test', 'tested')) == 4 dispatcher.update_database_results(session_toy) assert dispatcher._processed_submission_queue.empty() event = get_event(session_toy, 'iris_test') assert event.private_leaderboard_html assert event.public_leaderboard_html_with_links assert event.public_leaderboard_html_no_links assert event.failed_leaderboard_html assert event.new_leaderboard_html is None assert event.public_competition_leaderboard_html assert event.private_competition_leaderboard_html
def test_dispatcher_timeout(session_toy): config = read_config(database_config_template()) event_config = read_config(ramp_config_template()) dispatcher = Dispatcher(config=config, event_config=event_config, worker=CondaEnvWorker, n_workers=-1, hunger_policy='exit') # override the timeout of the worker dispatcher._worker_config["timeout"] = 1 dispatcher.launch() # we should have at least 3 submissions which will fail: # 2 for errors and 1 for timeout submissions = get_submissions(session_toy, event_config['ramp']['event_name'], 'training_error') assert len(submissions) >= 2
def test_dispatcher_error(): config = read_config(database_config_template()) event_config = read_config(ramp_config_template()) # check that passing a not a number will raise a TypeError err_msg = "The parameter 'n_threads' should be a positive integer" with pytest.raises(TypeError, match=err_msg): Dispatcher(config=config, event_config=event_config, worker=CondaEnvWorker, n_workers=100, n_threads='whatever', hunger_policy='exit')
def test_export_leaderboard_to_dataframe(session_toy_db, event_name, expected_size): """ it will run iris_test if it was not run previously, ie test test_get_leaderboard already run """ config = read_config(database_config_template()) event_config = read_config(ramp_config_template()) dispatcher = Dispatcher(config, event_config, n_workers=-1, hunger_policy='exit') dispatcher.launch() session_toy_db.commit() leaderboard = get_leaderboard_all_info(session_toy_db, event_name) # assert only submissions with the event_name assert leaderboard.shape[0] == expected_size submissions = (session_toy_db.query(Submission).filter( Event.name == event_name).filter( Event.id == EventTeam.event_id).filter( EventTeam.id == Submission.event_team_id).filter( Submission.state == 'scored')).all() assert len(submissions) == leaderboard.shape[0]
def test_dispatcher_num_threads(n_threads): libraries = ('OMP', 'MKL', 'OPENBLAS') config = read_config(database_config_template()) event_config = read_config(ramp_config_template()) # check that by default we don't set the environment by default dispatcher = Dispatcher(config=config, event_config=event_config, worker=CondaEnvWorker, n_workers=100, n_threads=n_threads, hunger_policy='exit') if n_threads is None: assert dispatcher.n_threads is n_threads for lib in libraries: assert getattr(os.environ, lib + "_NUM_THREADS", None) is None else: assert dispatcher.n_threads == n_threads for lib in libraries: assert os.environ[lib + "_NUM_THREADS"] == str(n_threads)
def test_dispatcher_aws_not_launching(session_toy_aws, caplog): # given the test config file the instance should not be able to launch # due to authentication error # after unsuccessful try the worker should teardown config = read_config(database_config_template()) event_config = read_config(ramp_aws_config_template()) dispatcher = Dispatcher(config=config, event_config=event_config, worker=AWSWorker, n_workers=10, hunger_policy='exit') dispatcher.fetch_from_db(session_toy_aws) submissions = get_submissions(session_toy_aws, 'iris_aws_test', 'new') dispatcher.launch_workers(session_toy_aws) assert 'AuthFailure' in caplog.text # training should not have started assert 'training' not in caplog.text num_running_workers = dispatcher._processing_worker_queue.qsize() assert num_running_workers == 0 submissions2 = get_submissions(session_toy_aws, 'iris_aws_test', 'new') # assert that all the submissions are still in the 'new' state assert len(submissions) == len(submissions2)
def test_info_on_training_error(test_launch_ec2_instances, upload_submission, launch_train, is_spot_terminated, training_finished, training_successful, get_log_content, check_instance_status, download_log, session_toy_aws, caplog): # make sure that the Python error from the solution is passed to the # dispatcher # everything shoud be mocked as correct output from AWS instances # on setting up the instance and loading the submission # mock dummy AWS instance class DummyInstance: id = 1 test_launch_ec2_instances.return_value = (DummyInstance(), ), 0 upload_submission.return_value = 0 launch_train.return_value = 0 is_spot_terminated.return_value = 0 training_finished.return_value = False download_log.return_value = 0 config = read_config(database_config_template()) event_config = read_config(ramp_aws_config_template()) dispatcher = Dispatcher(config=config, event_config=event_config, worker=AWSWorker, n_workers=10, hunger_policy='exit') dispatcher.fetch_from_db(session_toy_aws) dispatcher.launch_workers(session_toy_aws) num_running_workers = dispatcher._processing_worker_queue.qsize() # worker, (submission_id, submission_name) = \ # dispatcher._processing_worker_queue.get() # assert worker.status == 'running' submissions = get_submissions(session_toy_aws, 'iris_aws_test', 'training') ids = [submissions[idx][0] for idx in range(len(submissions))] assert len(submissions) > 1 assert num_running_workers == len(ids) dispatcher.time_between_collection = 0 training_successful.return_value = False # now we will end the submission with training error training_finished.return_value = True training_error_msg = 'Python error here' get_log_content.return_value = training_error_msg check_instance_status.return_value = 'finished' dispatcher.collect_result(session_toy_aws) # the worker which we were using should have been teared down num_running_workers = dispatcher._processing_worker_queue.qsize() assert num_running_workers == 0 submissions = get_submissions(session_toy_aws, 'iris_aws_test', 'training_error') assert len(submissions) == len(ids) submission = get_submission_by_id(session_toy_aws, submissions[0][0]) assert training_error_msg in submission.error_msg
def test_update_leaderboard_functions(session_toy_function): event_name = 'iris_test' user_name = 'test_user' for leaderboard_type in ['public', 'private', 'failed', 'public competition', 'private competition']: leaderboard = get_leaderboard(session_toy_function, leaderboard_type, event_name) assert leaderboard is None leaderboard = get_leaderboard(session_toy_function, 'new', event_name) assert leaderboard event = get_event(session_toy_function, event_name) assert event.private_leaderboard_html is None assert event.public_leaderboard_html_with_links is None assert event.public_leaderboard_html_no_links is None assert event.failed_leaderboard_html is None assert event.public_competition_leaderboard_html is None assert event.private_competition_leaderboard_html is None assert event.new_leaderboard_html event_team = get_event_team_by_name(session_toy_function, event_name, user_name) assert event_team.leaderboard_html is None assert event_team.failed_leaderboard_html is None assert event_team.new_leaderboard_html event_teams = (session_toy_function.query(EventTeam) .filter_by(event=event) .all()) for et in event_teams: assert et.leaderboard_html is None assert et.failed_leaderboard_html is None assert et.new_leaderboard_html # run the dispatcher to process the different submissions config = read_config(database_config_template()) event_config = read_config(ramp_config_template()) dispatcher = Dispatcher( config, event_config, n_workers=-1, hunger_policy='exit' ) dispatcher.launch() session_toy_function.commit() update_leaderboards(session_toy_function, event_name) event = get_event(session_toy_function, event_name) assert event.private_leaderboard_html assert event.public_leaderboard_html_with_links assert event.public_leaderboard_html_no_links assert event.failed_leaderboard_html assert event.public_competition_leaderboard_html assert event.private_competition_leaderboard_html assert event.new_leaderboard_html is None update_user_leaderboards(session_toy_function, event_name, user_name) event_team = get_event_team_by_name(session_toy_function, event_name, user_name) assert event_team.leaderboard_html assert event_team.failed_leaderboard_html assert event_team.new_leaderboard_html is None update_all_user_leaderboards(session_toy_function, event_name) event_teams = (session_toy_function.query(EventTeam) .filter_by(event=event) .all()) for et in event_teams: assert et.leaderboard_html assert et.failed_leaderboard_html assert et.new_leaderboard_html is None
def test_get_leaderboard(session_toy_db): leaderboard_new = get_leaderboard(session_toy_db, 'new', 'iris_test') assert leaderboard_new.count('<tr>') == 6 leaderboard_new = get_leaderboard(session_toy_db, 'new', 'iris_test', 'test_user') assert leaderboard_new.count('<tr>') == 3 # run the dispatcher to process the different submissions config = read_config(database_config_template()) event_config = read_config(ramp_config_template()) dispatcher = Dispatcher( config, event_config, n_workers=-1, hunger_policy='exit' ) dispatcher.launch() session_toy_db.commit() assert get_leaderboard(session_toy_db, 'new', 'iris_test') is None # the iris dataset has a single submission which is failing leaderboard_failed = get_leaderboard(session_toy_db, 'failed', 'iris_test') assert leaderboard_failed.count('<tr>') == 2 leaderboard_failed = get_leaderboard(session_toy_db, 'failed', 'iris_test', 'test_user') assert leaderboard_failed.count('<tr>') == 1 # the remaining submission should be successful leaderboard_public = get_leaderboard(session_toy_db, 'public', 'iris_test') assert leaderboard_public.count('<tr>') == 4 leaderboard_public = get_leaderboard(session_toy_db, 'public', 'iris_test', 'test_user') assert leaderboard_public.count('<tr>') == 2 leaderboard_private = get_leaderboard(session_toy_db, 'private', 'iris_test') assert leaderboard_private.count('<tr>') == 4 leaderboard_private = get_leaderboard(session_toy_db, 'private', 'iris_test', 'test_user') assert leaderboard_private.count('<tr>') == 2 # the competition leaderboard will have the best solution for each user competition_public = get_leaderboard(session_toy_db, 'public competition', 'iris_test') assert competition_public.count('<tr>') == 2 competition_private = get_leaderboard(session_toy_db, 'private competition', 'iris_test') assert competition_private.count('<tr>') == 2 # check the difference between the public and private leaderboard assert leaderboard_private.count('<td>') > leaderboard_public.count('<td>') for private_term in ['bag', 'mean', 'std', 'private']: assert private_term not in leaderboard_public assert private_term in leaderboard_private # check the column name in each leaderboard assert """<th>team</th> <th>submission</th> <th>bag public acc</th> <th>bag public error</th> <th>bag public nll</th> <th>bag public f1_70</th> <th>bag private acc</th> <th>bag private error</th> <th>bag private nll</th> <th>bag private f1_70</th> <th>mean public acc</th> <th>mean public error</th> <th>mean public nll</th> <th>mean public f1_70</th> <th>mean private acc</th> <th>mean private error</th> <th>mean private nll</th> <th>mean private f1_70</th> <th>std public acc</th> <th>std public error</th> <th>std public nll</th> <th>std public f1_70</th> <th>std private acc</th> <th>std private error</th> <th>std private nll</th> <th>std private f1_70</th> <th>contributivity</th> <th>historical contributivity</th> <th>train time [s]</th> <th>test time [s]</th> <th>max RAM [MB]</th> <th>submitted at (UTC)</th>""" in leaderboard_private assert """<th>team</th> <th>submission</th> <th>acc</th> <th>error</th> <th>nll</th> <th>f1_70</th> <th>contributivity</th> <th>historical contributivity</th> <th>train time [s]</th> <th>test time [s]</th> <th>max RAM [MB]</th> <th>submitted at (UTC)</th>""" in leaderboard_public assert """<th>team</th> <th>submission</th> <th>submitted at (UTC)</th> <th>error</th>""" in leaderboard_failed # check the same for the competition leaderboard assert """<th>rank</th> <th>team</th> <th>submission</th> <th>acc</th> <th>train time [s]</th> <th>test time [s]</th> <th>submitted at (UTC)</th>""" in competition_public assert """<th>rank</th> <th>move</th> <th>team</th> <th>submission</th> <th>acc</th> <th>train time [s]</th> <th>test time [s]</th> <th>submitted at (UTC)</th>""" in competition_private