예제 #1
0
def test_aws_worker():
    if not os.path.isfile(os.path.join(HERE, 'config.yml')):
        pytest.skip("Only for local tests for now")

    ramp_kit_dir = os.path.join(HERE, 'kits', 'iris')

    # make sure predictio and log dirs exist, if not, add them
    add_empty_dir(os.path.join(ramp_kit_dir, 'predictions'))
    add_empty_dir(os.path.join(ramp_kit_dir, 'logs'))

    # if the prediction / log files are still there, remove them
    for subdir in os.listdir(os.path.join(ramp_kit_dir, 'predictions')):
        if os.path.isdir(subdir):
            shutil.rmtree(subdir)
    for subdir in os.listdir(os.path.join(ramp_kit_dir, 'logs')):
        if os.path.isdir(subdir):
            shutil.rmtree(subdir)

    config = read_config(os.path.join(HERE, 'config.yml'))
    worker_config = generate_worker_config(config)
    worker = AWSWorker(worker_config, submission='starting_kit_local')
    worker.setup()
    assert worker.status == 'setup'
    worker.launch_submission()
    assert worker.status in ('running', 'finished')
    worker.collect_results()
    assert worker.status == 'collected'
    assert os.path.isdir(
        os.path.join(ramp_kit_dir, 'predictions', 'starting_kit_local',
                     'fold_0'))
    assert os.path.isfile(
        os.path.join(ramp_kit_dir, 'logs', 'starting_kit_local', 'log'))

    worker.teardown()
    assert worker.status == 'killed'
예제 #2
0
def test_launch_ec2_instances_put_back_into_queue(test_launch_ec2_instances,
                                                  caplog):
    ''' checks if the retry status and the correct log is added if the
        api returns None instances and status retry '''

    test_launch_ec2_instances.return_value = None, 'retry'

    # setup the AWS worker
    event_config = read_config(ramp_aws_config_template())['worker']

    worker = AWSWorker(event_config, submission='starting_kit_local')
    worker.config = event_config

    # worker should be put back into the queue
    worker.setup()
    assert worker.status == 'retry'
    assert 'Adding it back to the queue and will try again' in caplog.text
예제 #3
0
def test_restart_on_sudden_instance_termination(training_finished,
                                                launch_train, spot_terminated,
                                                caplog):
    class DummyInstance:
        id = 1
    launch_train.return_value = 0

    # setup the AWS worker
    event_config = read_config(ramp_aws_config_template())['worker']

    worker = AWSWorker(event_config, submission='starting_kit_local')
    worker.config = event_config
    worker.submission = 'dummy submissions'
    worker.instance = DummyInstance

    # set the submission did not yet finish training
    training_finished.return_value = False
    spot_terminated.return_value = False

    worker.launch_submission()
    assert worker.status == 'running'
    assert caplog.text == ''

    # call CalledProcessError on checking if submission was finished
    training_finished.side_effect = subprocess.CalledProcessError(255, 'test')
    # make sure that the worker status is set to 'retry'
    assert worker.status == 'retry'
    assert 'Unable to connect to the instance' in caplog.text
    assert 'Adding the submission back to the queue' in caplog.text
예제 #4
0
def test_aws_worker_upload_error(test_launch_ec2_instances, test_rsync,
                                 caplog):
    # mock dummy AWS instance
    class DummyInstance:
        id = 1

    test_launch_ec2_instances.return_value = (DummyInstance(),), 0
    # mock the called process error
    test_rsync.side_effect = subprocess.CalledProcessError(255, 'test')

    # setup the AWS worker
    event_config = read_config(ramp_aws_config_template())['worker']

    worker = AWSWorker(event_config, submission='starting_kit_local')
    worker.config = event_config

    # CalledProcessError is thrown inside
    worker.setup()
    assert worker.status == 'error'
    assert 'Unable to connect during log download' in caplog.text
예제 #5
0
def test_aws_worker_launch_train_error(launch_train, caplog):
    # mock dummy AWS instance
    class DummyInstance:
        id = 1
    launch_train.side_effect = subprocess.CalledProcessError(255, 'test')

    # setup the AWS worker
    event_config = read_config(ramp_aws_config_template())['worker']

    worker = AWSWorker(event_config, submission='starting_kit_local')
    worker.config = event_config
    worker.submission = 'dummy submissions'
    worker.instance = DummyInstance

    # CalledProcessError is thrown inside
    status = worker.launch_submission()
    assert 'test' in caplog.text
    assert 'Cannot start training of submission' in caplog.text
    assert worker.status == 'error'
    assert status == 1
예제 #6
0
def test_aws_worker_download_log_error(superclass, test_rsync,
                                       caplog):
    # mock dummy AWS instance
    class DummyInstance:
        id = 'test'

    test_rsync.side_effect = subprocess.CalledProcessError(255, 'test')

    # setup the AWS worker
    superclass.return_value = True
    event_config = read_config(ramp_aws_config_template())['worker']

    worker = AWSWorker(event_config, submission='starting_kit_local')
    worker.config = event_config
    worker.status = 'finished'
    worker.instance = DummyInstance
    # worker will now through an CalledProcessError
    exit_status, error_msg = worker.collect_results()
    assert 'Error occurred when downloading the logs' in caplog.text
    assert 'Trying to download the log once again' in caplog.text
    assert exit_status == 2
    assert 'test' in error_msg
    assert worker.status == 'error'