예제 #1
0
def test_restart_on_sudden_instance_termination(training_finished,
                                                launch_train, spot_terminated,
                                                caplog):
    class DummyInstance:
        id = 1
    launch_train.return_value = 0

    # setup the AWS worker
    event_config = read_config(ramp_aws_config_template())['worker']

    worker = AWSWorker(event_config, submission='starting_kit_local')
    worker.config = event_config
    worker.submission = 'dummy submissions'
    worker.instance = DummyInstance

    # set the submission did not yet finish training
    training_finished.return_value = False
    spot_terminated.return_value = False

    worker.launch_submission()
    assert worker.status == 'running'
    assert caplog.text == ''

    # call CalledProcessError on checking if submission was finished
    training_finished.side_effect = subprocess.CalledProcessError(255, 'test')
    # make sure that the worker status is set to 'retry'
    assert worker.status == 'retry'
    assert 'Unable to connect to the instance' in caplog.text
    assert 'Adding the submission back to the queue' in caplog.text
예제 #2
0
def test_launch_ec2_instances_put_back_into_queue(test_launch_ec2_instances,
                                                  caplog):
    ''' checks if the retry status and the correct log is added if the
        api returns None instances and status retry '''

    test_launch_ec2_instances.return_value = None, 'retry'

    # setup the AWS worker
    event_config = read_config(ramp_aws_config_template())['worker']

    worker = AWSWorker(event_config, submission='starting_kit_local')
    worker.config = event_config

    # worker should be put back into the queue
    worker.setup()
    assert worker.status == 'retry'
    assert 'Adding it back to the queue and will try again' in caplog.text
예제 #3
0
def test_aws_worker_upload_error(test_launch_ec2_instances, test_rsync,
                                 caplog):
    # mock dummy AWS instance
    class DummyInstance:
        id = 1

    test_launch_ec2_instances.return_value = (DummyInstance(),), 0
    # mock the called process error
    test_rsync.side_effect = subprocess.CalledProcessError(255, 'test')

    # setup the AWS worker
    event_config = read_config(ramp_aws_config_template())['worker']

    worker = AWSWorker(event_config, submission='starting_kit_local')
    worker.config = event_config

    # CalledProcessError is thrown inside
    worker.setup()
    assert worker.status == 'error'
    assert 'Unable to connect during log download' in caplog.text
예제 #4
0
def test_aws_worker_launch_train_error(launch_train, caplog):
    # mock dummy AWS instance
    class DummyInstance:
        id = 1
    launch_train.side_effect = subprocess.CalledProcessError(255, 'test')

    # setup the AWS worker
    event_config = read_config(ramp_aws_config_template())['worker']

    worker = AWSWorker(event_config, submission='starting_kit_local')
    worker.config = event_config
    worker.submission = 'dummy submissions'
    worker.instance = DummyInstance

    # CalledProcessError is thrown inside
    status = worker.launch_submission()
    assert 'test' in caplog.text
    assert 'Cannot start training of submission' in caplog.text
    assert worker.status == 'error'
    assert status == 1
예제 #5
0
def test_aws_worker_download_log_error(superclass, test_rsync,
                                       caplog):
    # mock dummy AWS instance
    class DummyInstance:
        id = 'test'

    test_rsync.side_effect = subprocess.CalledProcessError(255, 'test')

    # setup the AWS worker
    superclass.return_value = True
    event_config = read_config(ramp_aws_config_template())['worker']

    worker = AWSWorker(event_config, submission='starting_kit_local')
    worker.config = event_config
    worker.status = 'finished'
    worker.instance = DummyInstance
    # worker will now through an CalledProcessError
    exit_status, error_msg = worker.collect_results()
    assert 'Error occurred when downloading the logs' in caplog.text
    assert 'Trying to download the log once again' in caplog.text
    assert exit_status == 2
    assert 'test' in error_msg
    assert worker.status == 'error'