Example #1
0
def test_chainer_mnist_single_machine(docker_image, sagemaker_local_session,
                                      instance_type, tmpdir):
    customer_script = 'single_machine_customer_script.py'
    hyperparameters = {'batch-size': 10000, 'epochs': 1}

    estimator = Chainer(entry_point=customer_script,
                        source_dir=mnist_path,
                        role=role,
                        image_name=docker_image,
                        train_instance_count=1,
                        train_instance_type=instance_type,
                        sagemaker_session=sagemaker_local_session,
                        hyperparameters=hyperparameters,
                        output_path='file://{}'.format(tmpdir))

    estimator.fit({
        'train': 'file://{}'.format(os.path.join(data_dir, 'train')),
        'test': 'file://{}'.format(os.path.join(data_dir, 'test'))
    })

    success_files = {
        'model': ['model.npz'],
        'output': [
            'success', 'data/accuracy.png', 'data/cg.dot', 'data/log',
            'data/loss.png'
        ],
    }
    test_utils.files_exist(str(tmpdir), success_files)

    request_data = np.zeros((100, 784), dtype='float32')

    test_utils.predict_and_assert_response_length(estimator, request_data,
                                                  instance_type)
    test_utils.predict_and_assert_response_length(estimator, request_data,
                                                  instance_type,
                                                  csv_serializer,
                                                  csv_deserializer, 'text/csv')

    test_arrays = [
        np.zeros((100, 784), dtype='float32'),
        np.zeros((100, 1, 28, 28), dtype='float32'),
        np.zeros((100, 28, 28), dtype='float32')
    ]

    with test_utils.local_mode_lock():
        try:
            predictor = _json_predictor(estimator, instance_type)
            for array in test_arrays:
                response = predictor.predict(array)
                assert len(response) == len(array)
        finally:
            predictor.delete_endpoint()
Example #2
0
def test_chainer_mnist_distributed(docker_image, sagemaker_local_session,
                                   instance_type, customer_script, tmpdir):
    if instance_type == 'local_gpu':
        pytest.skip('Local Mode does not support distributed GPU training.')

    # pure_nccl communicator hangs when only one gpu is available.
    cluster_size = 2
    hyperparameters = {
        'sagemaker_process_slots_per_host': 1,
        'sagemaker_num_processes': cluster_size,
        'batch-size': 10000,
        'epochs': 1,
        'communicator': 'hierarchical'
    }

    estimator = Chainer(entry_point=customer_script,
                        source_dir=mnist_path,
                        role=role,
                        image_name=docker_image,
                        train_instance_count=cluster_size,
                        train_instance_type=instance_type,
                        sagemaker_session=sagemaker_local_session,
                        hyperparameters=hyperparameters,
                        output_path='file://{}'.format(tmpdir))

    estimator.fit({
        'train': 'file://{}'.format(os.path.join(data_dir, 'train')),
        'test': 'file://{}'.format(os.path.join(data_dir, 'test'))
    })

    success_files = {
        'model': ['model.npz'],
        'output': [
            'success', 'data/accuracy.png', 'data/cg.dot', 'data/log',
            'data/loss.png'
        ],
    }

    test_utils.files_exist(str(tmpdir), success_files)

    request_data = np.zeros((100, 784), dtype='float32')

    test_utils.predict_and_assert_response_length(estimator, request_data,
                                                  instance_type)
    test_utils.predict_and_assert_response_length(estimator, request_data,
                                                  instance_type,
                                                  json_serializer,
                                                  json_deserializer,
                                                  'application/json')
    test_utils.predict_and_assert_response_length(estimator, request_data,
                                                  instance_type,
                                                  csv_serializer,
                                                  csv_deserializer, 'text/csv')
Example #3
0
def test_chainer_mnist_single_machine(docker_image, opt_ml, use_gpu):

    customer_script = 'single_machine_customer_script.py'
    hyperparameters = {'batch-size': 10000, 'epochs': 1}

    local_mode.train(customer_script,
                     data_dir,
                     docker_image,
                     opt_ml,
                     hyperparameters=hyperparameters,
                     source_dir=mnist_path,
                     use_gpu=use_gpu)

    files = [
        'model/model.npz', 'output/success', 'output/data/algo-1/accuracy.png',
        'output/data/algo-1/cg.dot', 'output/data/algo-1/log',
        'output/data/algo-1/loss.png'
    ]

    test_utils.files_exist(opt_ml, files)

    assert not local_mode.file_exists(opt_ml,
                                      'output/failure'), 'Failure happened'

    script_path = os.path.join(mnist_path, customer_script)

    with local_mode.serve(script_path,
                          model_dir=None,
                          image_name=docker_image,
                          opt_ml=opt_ml,
                          use_gpu=use_gpu,
                          source_dir=mnist_path):

        test_arrays = [
            np.zeros((100, 784), dtype='float32'),
            np.zeros((100, 1, 28, 28), dtype='float32'),
            np.zeros((100, 28, 28), dtype='float32')
        ]

        request_data = np.zeros((100, 784), dtype='float32')

        data_as_list = request_data.tolist()

        test_utils.predict_and_assert_response_length(data_as_list, 'text/csv')

        for array in test_arrays:
            # JSON and NPY can take multidimensional (n > 2) arrays
            data_as_list = array.tolist()
            test_utils.predict_and_assert_response_length(
                data_as_list, 'application/json')
            test_utils.predict_and_assert_response_length(
                request_data, 'application/x-npy')
def test_chainer_mnist_distributed(docker_image, opt_ml, use_gpu,
                                   customer_script):

    cluster_size = 2
    # pure_nccl communicator hangs when only one gpu is available.
    hyperparameters = {
        'sagemaker_process_slots_per_host': 1,
        'sagemaker_num_processes': cluster_size,
        'batch-size': 10000,
        'epochs': 1,
        'communicator': 'hierarchical'
    }

    local_mode.train(customer_script,
                     data_dir,
                     docker_image,
                     opt_ml,
                     hyperparameters=hyperparameters,
                     cluster_size=cluster_size,
                     source_dir=mnist_path,
                     use_gpu=use_gpu)

    files = [
        'model/model.npz', 'output/success', 'output/data/algo-1/accuracy.png',
        'output/data/algo-1/cg.dot', 'output/data/algo-1/log',
        'output/data/algo-1/loss.png'
    ]

    test_utils.files_exist(opt_ml, files)

    assert not local_mode.file_exists(opt_ml,
                                      'output/failure'), 'Failure happened'

    with local_mode.serve(os.path.join(mnist_path, customer_script),
                          model_dir=None,
                          image_name=docker_image,
                          opt_ml=opt_ml):

        request_data = np.zeros((100, 784), dtype='float32')

        data_as_list = request_data.tolist()

        test_utils.predict_and_assert_response_length(data_as_list,
                                                      'application/json')
        test_utils.predict_and_assert_response_length(data_as_list, 'text/csv')
        test_utils.predict_and_assert_response_length(request_data,
                                                      'application/x-npy')
Example #5
0
def test_chainer_mnist_custom_loop(docker_image, sagemaker_local_session,
                                   instance_type, tmpdir):
    customer_script = 'single_machine_custom_loop.py'
    hyperparameters = {'batch-size': 10000, 'epochs': 1}

    estimator = Chainer(entry_point=customer_script,
                        source_dir=mnist_path,
                        role=role,
                        image_name=docker_image,
                        train_instance_count=1,
                        train_instance_type=instance_type,
                        sagemaker_session=sagemaker_local_session,
                        hyperparameters=hyperparameters,
                        output_path='file://{}'.format(tmpdir))

    estimator.fit({
        'train': 'file://{}'.format(os.path.join(data_dir, 'train')),
        'test': 'file://{}'.format(os.path.join(data_dir, 'test'))
    })

    success_files = {
        'model': ['model.npz'],
        'output': ['success'],
    }

    test_utils.files_exist(str(tmpdir), success_files)

    request_data = np.zeros((100, 784), dtype='float32')

    test_utils.predict_and_assert_response_length(estimator, request_data,
                                                  instance_type)
    test_utils.predict_and_assert_response_length(estimator, request_data,
                                                  instance_type,
                                                  json_serializer,
                                                  json_deserializer,
                                                  'application/json')
    test_utils.predict_and_assert_response_length(estimator, request_data,
                                                  instance_type,
                                                  csv_serializer,
                                                  csv_deserializer, 'text/csv')
def test_chainer_mnist_custom_loop(docker_image, opt_ml, use_gpu):

    customer_script = 'single_machine_custom_loop.py'
    hyperparameters = {'batch-size': 10000, 'epochs': 1}

    local_mode.train(customer_script,
                     data_dir,
                     docker_image,
                     opt_ml,
                     hyperparameters=hyperparameters,
                     source_dir=mnist_path,
                     use_gpu=use_gpu)

    files = ['model/model.npz', 'output/success']

    test_utils.files_exist(opt_ml, files)

    assert not local_mode.file_exists(opt_ml,
                                      'output/failure'), 'Failure happened'

    script_path = os.path.join(mnist_path, customer_script)

    with local_mode.serve(script_path,
                          model_dir=None,
                          image_name=docker_image,
                          opt_ml=opt_ml):

        request_data = np.zeros((100, 784), dtype='float32')

        data_as_list = request_data.tolist()

        test_utils.predict_and_assert_response_length(data_as_list,
                                                      'application/json')
        test_utils.predict_and_assert_response_length(data_as_list, 'text/csv')
        test_utils.predict_and_assert_response_length(request_data,
                                                      'application/x-npy')