Beispiel #1
0
def dispatcher(config, event_config, verbose):
    """Launch the RAMP dispatcher.

    The RAMP dispatcher is in charge of starting RAMP workers, collecting
    results from them, and update the database.
    """
    if verbose:
        if verbose == 1:
            level = logging.INFO
        else:
            level = logging.DEBUG
        logging.basicConfig(
            format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
            level=level, datefmt='%Y:%m:%d %H:%M:%S'
        )
    internal_event_config = read_config(event_config)
    worker_type = available_workers[
        internal_event_config['worker']['worker_type']
    ]

    dispatcher_config = (internal_event_config['dispatcher']
                         if 'dispatcher' in internal_event_config else {})
    n_workers = dispatcher_config.get('n_workers', -1)
    n_threads = dispatcher_config.get('n_threads', None)
    hunger_policy = dispatcher_config.get('hunger_policy', 'sleep')
    time_between_collection = dispatcher_config.get(
        'time_between_collection', 1)

    disp = Dispatcher(
        config=config, event_config=event_config, worker=worker_type,
        n_workers=n_workers, n_threads=n_threads, hunger_policy=hunger_policy,
        time_between_collection=time_between_collection
    )
    disp.launch()
def test_error_handling_worker_setup_error(session_toy, caplog):
    # make sure the error on the worker.setup is dealt with correctly
    # set mock worker
    class Worker_mock():
        def __init__(self, *args, **kwargs):
            self.state = None

        def setup(self):
            raise Exception('Test error')

        def teardown(self):
            pass

    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())

    worker = Worker_mock()
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=Worker_mock,
                            n_workers=-1,
                            hunger_policy='exit')

    dispatcher.launch()
    submissions = get_submissions(session_toy,
                                  event_config['ramp']['event_name'],
                                  'checking_error')
    assert len(submissions) == 6
    worker.status = 'error'
    assert 'Test error' in caplog.text
Beispiel #3
0
def test_deploy_ramp_event(session_scope_function):
    database_config = read_config(database_config_template())
    event_config_filename = ramp_config_template()
    event_config = read_config(event_config_filename)
    ramp_config = generate_ramp_config(event_config)
    deploy_ramp_event(database_config_template(), ramp_config_template())

    # simulate that we add users and sign-up for the event and that they
    # submitted the starting kit
    with session_scope(database_config['sqlalchemy']) as session:
        add_users(session)
        sign_up_team(session, ramp_config['event_name'], 'test_user')
        submit_starting_kits(session, ramp_config['event_name'], 'test_user',
                             ramp_config['ramp_kit_submissions_dir'])

    # run the dispatcher on the event which are in the dataset
    dispatcher = Dispatcher(config=database_config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_workers=-1,
                            hunger_policy='exit')
    dispatcher.launch()

    # the iris kit contain a submission which should fail for a user
    with session_scope(database_config['sqlalchemy']) as session:
        submission = get_submissions(session,
                                     event_config['ramp']['event_name'],
                                     'training_error')
        assert len(submission) == 1
Beispiel #4
0
def dispatcher(config, event_config, n_workers, n_threads, hunger_policy,
               verbose):
    """Launch the RAMP dispatcher.

    The RAMP dispatcher is in charge of starting RAMP workers, collecting
    results from them, and update the database.
    """
    if verbose:
        if verbose == 1:
            level = logging.INFO
        else:
            level = logging.DEBUG
        logging.basicConfig(
            format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
            level=level,
            datefmt='%Y:%m:%d %H:%M:%S')
    internal_event_config = read_config(event_config)
    worker_type = available_workers[internal_event_config['worker']
                                    ['worker_type']]
    disp = Dispatcher(config=config,
                      event_config=event_config,
                      worker=worker_type,
                      n_workers=n_workers,
                      n_threads=n_threads,
                      hunger_policy=hunger_policy)
    disp.launch()
def test_dispatcher_worker_retry(session_toy):
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_workers=10,
                            hunger_policy='exit')

    dispatcher.fetch_from_db(session_toy)
    dispatcher.launch_workers(session_toy)

    # Get one worker and set status to 'retry'
    worker, (submission_id, submission_name) = \
        dispatcher._processing_worker_queue.get()
    setattr(worker, 'status', 'retry')
    assert worker.status == 'retry'
    # Add back to queue
    dispatcher._processing_worker_queue.put_nowait(
        (worker, (submission_id, submission_name)))

    while not dispatcher._processing_worker_queue.empty():
        dispatcher.collect_result(session_toy)
    submissions = get_submissions(session_toy, 'iris_test', 'new')
    assert submission_name in [sub[1] for sub in submissions]
Beispiel #6
0
def test_integration_dispatcher(session_toy):
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_worker=-1,
                            hunger_policy='exit')
    dispatcher.launch()

    # the iris kit contain a submission which should fail for each user
    submission = get_submissions(session_toy,
                                 event_config['ramp']['event_name'],
                                 'training_error')
    assert len(submission) == 2
def test_unit_test_dispatcher(session_toy):
    # make sure that the size of the list is bigger than the number of
    # submissions
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_workers=100,
                            hunger_policy='exit')

    # check that all the queue are empty
    assert dispatcher._awaiting_worker_queue.empty()
    assert dispatcher._processing_worker_queue.empty()
    assert dispatcher._processed_submission_queue.empty()

    # check that all submissions are queued
    submissions = get_submissions(session_toy, 'iris_test', 'new')
    dispatcher.fetch_from_db(session_toy)
    # we should remove the starting kit from the length of the submissions for
    # each user
    assert dispatcher._awaiting_worker_queue.qsize() == len(submissions) - 2
    submissions = get_submissions(session_toy, 'iris_test', 'sent_to_training')
    assert len(submissions) == 6

    # start the training
    dispatcher.launch_workers(session_toy)
    # be sure that the training is finished
    while not dispatcher._processing_worker_queue.empty():
        dispatcher.collect_result(session_toy)

    assert len(get_submissions(session_toy, 'iris_test', 'new')) == 2
    assert (len(get_submissions(session_toy, 'iris_test',
                                'training_error')) == 2)
    assert len(get_submissions(session_toy, 'iris_test', 'tested')) == 4

    dispatcher.update_database_results(session_toy)
    assert dispatcher._processed_submission_queue.empty()
    event = get_event(session_toy, 'iris_test')
    assert event.private_leaderboard_html
    assert event.public_leaderboard_html_with_links
    assert event.public_leaderboard_html_no_links
    assert event.failed_leaderboard_html
    assert event.new_leaderboard_html is None
    assert event.public_competition_leaderboard_html
    assert event.private_competition_leaderboard_html
def test_dispatcher_timeout(session_toy):
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_workers=-1,
                            hunger_policy='exit')
    # override the timeout of the worker
    dispatcher._worker_config["timeout"] = 1
    dispatcher.launch()

    # we should have at least 3 submissions which will fail:
    # 2 for errors and 1 for timeout
    submissions = get_submissions(session_toy,
                                  event_config['ramp']['event_name'],
                                  'training_error')
    assert len(submissions) >= 2
def test_dispatcher_error():
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())

    # check that passing a not a number will raise a TypeError
    err_msg = "The parameter 'n_threads' should be a positive integer"
    with pytest.raises(TypeError, match=err_msg):
        Dispatcher(config=config,
                   event_config=event_config,
                   worker=CondaEnvWorker,
                   n_workers=100,
                   n_threads='whatever',
                   hunger_policy='exit')
def test_export_leaderboard_to_dataframe(session_toy_db, event_name,
                                         expected_size):
    """ it will run iris_test if it was not run previously, ie
    test test_get_leaderboard already run """
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(config,
                            event_config,
                            n_workers=-1,
                            hunger_policy='exit')
    dispatcher.launch()
    session_toy_db.commit()

    leaderboard = get_leaderboard_all_info(session_toy_db, event_name)
    # assert only submissions with the event_name
    assert leaderboard.shape[0] == expected_size

    submissions = (session_toy_db.query(Submission).filter(
        Event.name == event_name).filter(
            Event.id == EventTeam.event_id).filter(
                EventTeam.id == Submission.event_team_id).filter(
                    Submission.state == 'scored')).all()
    assert len(submissions) == leaderboard.shape[0]
def test_dispatcher_num_threads(n_threads):
    libraries = ('OMP', 'MKL', 'OPENBLAS')
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())

    # check that by default we don't set the environment by default
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_workers=100,
                            n_threads=n_threads,
                            hunger_policy='exit')
    if n_threads is None:
        assert dispatcher.n_threads is n_threads
        for lib in libraries:
            assert getattr(os.environ, lib + "_NUM_THREADS", None) is None
    else:
        assert dispatcher.n_threads == n_threads
        for lib in libraries:
            assert os.environ[lib + "_NUM_THREADS"] == str(n_threads)
def test_dispatcher_aws_not_launching(session_toy_aws, caplog):
    # given the test config file the instance should not be able to launch
    # due to authentication error
    # after unsuccessful try the worker should teardown
    config = read_config(database_config_template())
    event_config = read_config(ramp_aws_config_template())

    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=AWSWorker,
                            n_workers=10,
                            hunger_policy='exit')
    dispatcher.fetch_from_db(session_toy_aws)
    submissions = get_submissions(session_toy_aws, 'iris_aws_test', 'new')

    dispatcher.launch_workers(session_toy_aws)
    assert 'AuthFailure' in caplog.text
    # training should not have started
    assert 'training' not in caplog.text
    num_running_workers = dispatcher._processing_worker_queue.qsize()
    assert num_running_workers == 0
    submissions2 = get_submissions(session_toy_aws, 'iris_aws_test', 'new')
    # assert that all the submissions are still in the 'new' state
    assert len(submissions) == len(submissions2)
def test_info_on_training_error(test_launch_ec2_instances, upload_submission,
                                launch_train, is_spot_terminated,
                                training_finished, training_successful,
                                get_log_content, check_instance_status,
                                download_log, session_toy_aws, caplog):
    # make sure that the Python error from the solution is passed to the
    # dispatcher
    # everything shoud be mocked as correct output from AWS instances
    # on setting up the instance and loading the submission
    # mock dummy AWS instance
    class DummyInstance:
        id = 1

    test_launch_ec2_instances.return_value = (DummyInstance(), ), 0
    upload_submission.return_value = 0
    launch_train.return_value = 0
    is_spot_terminated.return_value = 0
    training_finished.return_value = False
    download_log.return_value = 0

    config = read_config(database_config_template())
    event_config = read_config(ramp_aws_config_template())

    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=AWSWorker,
                            n_workers=10,
                            hunger_policy='exit')
    dispatcher.fetch_from_db(session_toy_aws)
    dispatcher.launch_workers(session_toy_aws)
    num_running_workers = dispatcher._processing_worker_queue.qsize()
    # worker, (submission_id, submission_name) = \
    #     dispatcher._processing_worker_queue.get()
    # assert worker.status == 'running'
    submissions = get_submissions(session_toy_aws, 'iris_aws_test', 'training')
    ids = [submissions[idx][0] for idx in range(len(submissions))]
    assert len(submissions) > 1
    assert num_running_workers == len(ids)

    dispatcher.time_between_collection = 0
    training_successful.return_value = False

    # now we will end the submission with training error
    training_finished.return_value = True
    training_error_msg = 'Python error here'
    get_log_content.return_value = training_error_msg
    check_instance_status.return_value = 'finished'

    dispatcher.collect_result(session_toy_aws)

    # the worker which we were using should have been teared down
    num_running_workers = dispatcher._processing_worker_queue.qsize()

    assert num_running_workers == 0

    submissions = get_submissions(session_toy_aws, 'iris_aws_test',
                                  'training_error')
    assert len(submissions) == len(ids)

    submission = get_submission_by_id(session_toy_aws, submissions[0][0])
    assert training_error_msg in submission.error_msg
Beispiel #14
0
def test_update_leaderboard_functions(session_toy_function):
    event_name = 'iris_test'
    user_name = 'test_user'
    for leaderboard_type in ['public', 'private', 'failed',
                             'public competition', 'private competition']:
        leaderboard = get_leaderboard(session_toy_function, leaderboard_type,
                                      event_name)
        assert leaderboard is None
    leaderboard = get_leaderboard(session_toy_function, 'new', event_name)
    assert leaderboard

    event = get_event(session_toy_function, event_name)
    assert event.private_leaderboard_html is None
    assert event.public_leaderboard_html_with_links is None
    assert event.public_leaderboard_html_no_links is None
    assert event.failed_leaderboard_html is None
    assert event.public_competition_leaderboard_html is None
    assert event.private_competition_leaderboard_html is None
    assert event.new_leaderboard_html

    event_team = get_event_team_by_name(session_toy_function, event_name,
                                        user_name)
    assert event_team.leaderboard_html is None
    assert event_team.failed_leaderboard_html is None
    assert event_team.new_leaderboard_html

    event_teams = (session_toy_function.query(EventTeam)
                                       .filter_by(event=event)
                                       .all())
    for et in event_teams:
        assert et.leaderboard_html is None
        assert et.failed_leaderboard_html is None
        assert et.new_leaderboard_html

    # run the dispatcher to process the different submissions
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(
        config, event_config, n_workers=-1, hunger_policy='exit'
    )
    dispatcher.launch()
    session_toy_function.commit()

    update_leaderboards(session_toy_function, event_name)
    event = get_event(session_toy_function, event_name)
    assert event.private_leaderboard_html
    assert event.public_leaderboard_html_with_links
    assert event.public_leaderboard_html_no_links
    assert event.failed_leaderboard_html
    assert event.public_competition_leaderboard_html
    assert event.private_competition_leaderboard_html
    assert event.new_leaderboard_html is None

    update_user_leaderboards(session_toy_function, event_name, user_name)
    event_team = get_event_team_by_name(session_toy_function, event_name,
                                        user_name)
    assert event_team.leaderboard_html
    assert event_team.failed_leaderboard_html
    assert event_team.new_leaderboard_html is None

    update_all_user_leaderboards(session_toy_function, event_name)
    event_teams = (session_toy_function.query(EventTeam)
                                       .filter_by(event=event)
                                       .all())
    for et in event_teams:
        assert et.leaderboard_html
        assert et.failed_leaderboard_html
        assert et.new_leaderboard_html is None
Beispiel #15
0
def test_get_leaderboard(session_toy_db):
    leaderboard_new = get_leaderboard(session_toy_db, 'new', 'iris_test')
    assert leaderboard_new.count('<tr>') == 6
    leaderboard_new = get_leaderboard(session_toy_db, 'new', 'iris_test',
                                      'test_user')
    assert leaderboard_new.count('<tr>') == 3

    # run the dispatcher to process the different submissions
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(
        config, event_config, n_workers=-1, hunger_policy='exit'
    )
    dispatcher.launch()
    session_toy_db.commit()

    assert get_leaderboard(session_toy_db, 'new', 'iris_test') is None
    # the iris dataset has a single submission which is failing
    leaderboard_failed = get_leaderboard(session_toy_db, 'failed', 'iris_test')
    assert leaderboard_failed.count('<tr>') == 2
    leaderboard_failed = get_leaderboard(session_toy_db, 'failed', 'iris_test',
                                         'test_user')
    assert leaderboard_failed.count('<tr>') == 1

    # the remaining submission should be successful
    leaderboard_public = get_leaderboard(session_toy_db, 'public', 'iris_test')
    assert leaderboard_public.count('<tr>') == 4
    leaderboard_public = get_leaderboard(session_toy_db, 'public', 'iris_test',
                                         'test_user')
    assert leaderboard_public.count('<tr>') == 2

    leaderboard_private = get_leaderboard(session_toy_db, 'private',
                                          'iris_test')
    assert leaderboard_private.count('<tr>') == 4
    leaderboard_private = get_leaderboard(session_toy_db, 'private',
                                          'iris_test', 'test_user')
    assert leaderboard_private.count('<tr>') == 2

    # the competition leaderboard will have the best solution for each user
    competition_public = get_leaderboard(session_toy_db, 'public competition',
                                         'iris_test')
    assert competition_public.count('<tr>') == 2
    competition_private = get_leaderboard(session_toy_db,
                                          'private competition', 'iris_test')
    assert competition_private.count('<tr>') == 2

    # check the difference between the public and private leaderboard
    assert leaderboard_private.count('<td>') > leaderboard_public.count('<td>')
    for private_term in ['bag', 'mean', 'std', 'private']:
        assert private_term not in leaderboard_public
        assert private_term in leaderboard_private

    # check the column name in each leaderboard
    assert """<th>team</th>
      <th>submission</th>
      <th>bag public acc</th>
      <th>bag public error</th>
      <th>bag public nll</th>
      <th>bag public f1_70</th>
      <th>bag private acc</th>
      <th>bag private error</th>
      <th>bag private nll</th>
      <th>bag private f1_70</th>
      <th>mean public acc</th>
      <th>mean public error</th>
      <th>mean public nll</th>
      <th>mean public f1_70</th>
      <th>mean private acc</th>
      <th>mean private error</th>
      <th>mean private nll</th>
      <th>mean private f1_70</th>
      <th>std public acc</th>
      <th>std public error</th>
      <th>std public nll</th>
      <th>std public f1_70</th>
      <th>std private acc</th>
      <th>std private error</th>
      <th>std private nll</th>
      <th>std private f1_70</th>
      <th>contributivity</th>
      <th>historical contributivity</th>
      <th>train time [s]</th>
      <th>test time [s]</th>
      <th>max RAM [MB]</th>
      <th>submitted at (UTC)</th>""" in leaderboard_private
    assert """<th>team</th>
      <th>submission</th>
      <th>acc</th>
      <th>error</th>
      <th>nll</th>
      <th>f1_70</th>
      <th>contributivity</th>
      <th>historical contributivity</th>
      <th>train time [s]</th>
      <th>test time [s]</th>
      <th>max RAM [MB]</th>
      <th>submitted at (UTC)</th>""" in leaderboard_public
    assert """<th>team</th>
      <th>submission</th>
      <th>submitted at (UTC)</th>
      <th>error</th>""" in leaderboard_failed

    # check the same for the competition leaderboard
    assert """<th>rank</th>
      <th>team</th>
      <th>submission</th>
      <th>acc</th>
      <th>train time [s]</th>
      <th>test time [s]</th>
      <th>submitted at (UTC)</th>""" in competition_public
    assert """<th>rank</th>
      <th>move</th>
      <th>team</th>
      <th>submission</th>
      <th>acc</th>
      <th>train time [s]</th>
      <th>test time [s]</th>
      <th>submitted at (UTC)</th>""" in competition_private