Example #1
0
def test_algorithm_required_hyperparameters_not_provided(session):
    hyperparameters = [
        {
            "Description": "A continuous hyperparameter",
            "Type": "Categorical",
            "Name": "hp1",
            "Range": {
                "CategoricalParameterRangeSpecification": {
                    "Values": ["TF", "MXNet"]
                }
            },
            "IsTunable": True,
            "IsRequired": True,
        },
        {
            "Name": "hp2",
            "Description": "A continuous hyperparameter",
            "Type": "Categorical",
            "IsTunable": False,
            "IsRequired": True,
        },
    ]

    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    some_algo["TrainingSpecification"][
        "SupportedHyperParameters"] = hyperparameters

    session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.2xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    # hp1 is required and was not provided
    with pytest.raises(ValueError):
        estimator.set_hyperparameters(hp2="TF2")

    # Calling fit with unset required hyperparameters should fail
    # this covers the use case of not calling set_hyperparameters() explicitly
    with pytest.raises(ValueError):
        estimator.fit({"training": "s3://some/place"})
def test_algorithm_trainining_channels_with_invalid_channels(
        sagemaker_session):
    training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)

    training_channels['TrainingSpecification']['TrainingChannels'] = [
        {
            'Name': 'training',
            'Description': 'Input channel that provides training data',
            'IsRequired': True,
            'SupportedContentTypes': ['text/csv'],
            'SupportedCompressionTypes': ['None'],
            'SupportedInputModes': ['File'],
        },
        {
            'Name': 'validation',
            'Description': 'Input channel that provides validation data',
            'IsRequired': False,
            'SupportedContentTypes': ['text/csv'],
            'SupportedCompressionTypes': ['None'],
            'SupportedInputModes': ['File'],
        },
    ]

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(
        return_value=training_channels)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )

    # Passing only validation should fail as training is required.
    with pytest.raises(ValueError):
        estimator.fit({'validation': 's3://some/thing'})

    # Passing an unknown channel should fail???
    with pytest.raises(ValueError):
        estimator.fit({
            'training': 's3://some/data',
            'training2': 's3://some/other/data'
        })
Example #3
0
def test_algorithm_trainining_channels_with_invalid_channels(session):
    training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)

    training_channels["TrainingSpecification"]["TrainingChannels"] = [
        {
            "Name": "training",
            "Description": "Input channel that provides training data",
            "IsRequired": True,
            "SupportedContentTypes": ["text/csv"],
            "SupportedCompressionTypes": ["None"],
            "SupportedInputModes": ["File"],
        },
        {
            "Name": "validation",
            "Description": "Input channel that provides validation data",
            "IsRequired": False,
            "SupportedContentTypes": ["text/csv"],
            "SupportedCompressionTypes": ["None"],
            "SupportedInputModes": ["File"],
        },
    ]

    session.sagemaker_client.describe_algorithm = Mock(
        return_value=training_channels)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    # Passing only validation should fail as training is required.
    with pytest.raises(ValueError):
        estimator.fit({"validation": "s3://some/thing"})

    # Passing an unknown channel should fail???
    with pytest.raises(ValueError):
        estimator.fit({
            "training": "s3://some/data",
            "training2": "s3://some/other/data"
        })
def test_algorithm_required_hyperparameters_not_provided(sagemaker_session):
    hyperparameters = [{
        'Description': 'A continuous hyperparameter',
        'Type': 'Categorical',
        'Name': 'hp1',
        'Range': {
            'CategoricalParameterRangeSpecification': {
                'Values': ['TF', 'MXNet']
            }
        },
        'IsTunable': True,
        'IsRequired': True,
    }, {
        'Name': 'hp2',
        'Description': 'A continuous hyperparameter',
        'Type': 'Categorical',
        'IsTunable': False,
        'IsRequired': True
    }]

    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    some_algo['TrainingSpecification'][
        'SupportedHyperParameters'] = hyperparameters

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(
        return_value=some_algo)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.2xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )

    # hp1 is required and was not provided
    with pytest.raises(ValueError):
        estimator.set_hyperparameters(hp2='TF2')

    # Calling fit with unset required hyperparameters should fail
    # this covers the use case of not calling set_hyperparameters() explicitly
    with pytest.raises(ValueError):
        estimator.fit({'training': 's3://some/place'})
def test_algorithm_trainining_channels_with_expected_channels(
        sagemaker_session):
    training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)

    training_channels['TrainingSpecification']['TrainingChannels'] = [
        {
            'Name': 'training',
            'Description': 'Input channel that provides training data',
            'IsRequired': True,
            'SupportedContentTypes': ['text/csv'],
            'SupportedCompressionTypes': ['None'],
            'SupportedInputModes': ['File'],
        },
        {
            'Name': 'validation',
            'Description': 'Input channel that provides validation data',
            'IsRequired': False,
            'SupportedContentTypes': ['text/csv'],
            'SupportedCompressionTypes': ['None'],
            'SupportedInputModes': ['File'],
        },
    ]

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(
        return_value=training_channels)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )

    # Pass training and validation channels. This should work
    estimator.fit({
        'training': 's3://some/place',
        'validation': 's3://some/other'
    })

    # Passing only the training channel. Validation is optional so this should also work.
    estimator.fit({'training': 's3://some/place'})
Example #6
0
def test_algorithm_trainining_channels_with_expected_channels(session):
    training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)

    training_channels["TrainingSpecification"]["TrainingChannels"] = [
        {
            "Name": "training",
            "Description": "Input channel that provides training data",
            "IsRequired": True,
            "SupportedContentTypes": ["text/csv"],
            "SupportedCompressionTypes": ["None"],
            "SupportedInputModes": ["File"],
        },
        {
            "Name": "validation",
            "Description": "Input channel that provides validation data",
            "IsRequired": False,
            "SupportedContentTypes": ["text/csv"],
            "SupportedCompressionTypes": ["None"],
            "SupportedInputModes": ["File"],
        },
    ]

    session.sagemaker_client.describe_algorithm = Mock(
        return_value=training_channels)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    # Pass training and validation channels. This should work
    estimator.fit({
        "training": "s3://some/place",
        "validation": "s3://some/other"
    })

    # Passing only the training channel. Validation is optional so this should also work.
    estimator.fit({"training": "s3://some/place"})