def test_algorithm_required_hyperparameters_not_provided(session): hyperparameters = [ { "Description": "A continuous hyperparameter", "Type": "Categorical", "Name": "hp1", "Range": { "CategoricalParameterRangeSpecification": { "Values": ["TF", "MXNet"] } }, "IsTunable": True, "IsRequired": True, }, { "Name": "hp2", "Description": "A continuous hyperparameter", "Type": "Categorical", "IsTunable": False, "IsRequired": True, }, ] some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) some_algo["TrainingSpecification"][ "SupportedHyperParameters"] = hyperparameters session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo) estimator = AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.2xlarge", train_instance_count=1, sagemaker_session=session, ) # hp1 is required and was not provided with pytest.raises(ValueError): estimator.set_hyperparameters(hp2="TF2") # Calling fit with unset required hyperparameters should fail # this covers the use case of not calling set_hyperparameters() explicitly with pytest.raises(ValueError): estimator.fit({"training": "s3://some/place"})
def test_algorithm_trainining_channels_with_invalid_channels( sagemaker_session): training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) training_channels['TrainingSpecification']['TrainingChannels'] = [ { 'Name': 'training', 'Description': 'Input channel that provides training data', 'IsRequired': True, 'SupportedContentTypes': ['text/csv'], 'SupportedCompressionTypes': ['None'], 'SupportedInputModes': ['File'], }, { 'Name': 'validation', 'Description': 'Input channel that provides validation data', 'IsRequired': False, 'SupportedContentTypes': ['text/csv'], 'SupportedCompressionTypes': ['None'], 'SupportedInputModes': ['File'], }, ] sagemaker_session.sagemaker_client.describe_algorithm = Mock( return_value=training_channels) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) # Passing only validation should fail as training is required. with pytest.raises(ValueError): estimator.fit({'validation': 's3://some/thing'}) # Passing an unknown channel should fail??? with pytest.raises(ValueError): estimator.fit({ 'training': 's3://some/data', 'training2': 's3://some/other/data' })
def test_algorithm_trainining_channels_with_invalid_channels(session): training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) training_channels["TrainingSpecification"]["TrainingChannels"] = [ { "Name": "training", "Description": "Input channel that provides training data", "IsRequired": True, "SupportedContentTypes": ["text/csv"], "SupportedCompressionTypes": ["None"], "SupportedInputModes": ["File"], }, { "Name": "validation", "Description": "Input channel that provides validation data", "IsRequired": False, "SupportedContentTypes": ["text/csv"], "SupportedCompressionTypes": ["None"], "SupportedInputModes": ["File"], }, ] session.sagemaker_client.describe_algorithm = Mock( return_value=training_channels) estimator = AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.xlarge", train_instance_count=1, sagemaker_session=session, ) # Passing only validation should fail as training is required. with pytest.raises(ValueError): estimator.fit({"validation": "s3://some/thing"}) # Passing an unknown channel should fail??? with pytest.raises(ValueError): estimator.fit({ "training": "s3://some/data", "training2": "s3://some/other/data" })
def test_algorithm_required_hyperparameters_not_provided(sagemaker_session): hyperparameters = [{ 'Description': 'A continuous hyperparameter', 'Type': 'Categorical', 'Name': 'hp1', 'Range': { 'CategoricalParameterRangeSpecification': { 'Values': ['TF', 'MXNet'] } }, 'IsTunable': True, 'IsRequired': True, }, { 'Name': 'hp2', 'Description': 'A continuous hyperparameter', 'Type': 'Categorical', 'IsTunable': False, 'IsRequired': True }] some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) some_algo['TrainingSpecification'][ 'SupportedHyperParameters'] = hyperparameters sagemaker_session.sagemaker_client.describe_algorithm = Mock( return_value=some_algo) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.2xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) # hp1 is required and was not provided with pytest.raises(ValueError): estimator.set_hyperparameters(hp2='TF2') # Calling fit with unset required hyperparameters should fail # this covers the use case of not calling set_hyperparameters() explicitly with pytest.raises(ValueError): estimator.fit({'training': 's3://some/place'})
def test_algorithm_trainining_channels_with_expected_channels( sagemaker_session): training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) training_channels['TrainingSpecification']['TrainingChannels'] = [ { 'Name': 'training', 'Description': 'Input channel that provides training data', 'IsRequired': True, 'SupportedContentTypes': ['text/csv'], 'SupportedCompressionTypes': ['None'], 'SupportedInputModes': ['File'], }, { 'Name': 'validation', 'Description': 'Input channel that provides validation data', 'IsRequired': False, 'SupportedContentTypes': ['text/csv'], 'SupportedCompressionTypes': ['None'], 'SupportedInputModes': ['File'], }, ] sagemaker_session.sagemaker_client.describe_algorithm = Mock( return_value=training_channels) estimator = AlgorithmEstimator( algorithm_arn= 'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees', role='SageMakerRole', train_instance_type='ml.m4.xlarge', train_instance_count=1, sagemaker_session=sagemaker_session, ) # Pass training and validation channels. This should work estimator.fit({ 'training': 's3://some/place', 'validation': 's3://some/other' }) # Passing only the training channel. Validation is optional so this should also work. estimator.fit({'training': 's3://some/place'})
def test_algorithm_trainining_channels_with_expected_channels(session): training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE) training_channels["TrainingSpecification"]["TrainingChannels"] = [ { "Name": "training", "Description": "Input channel that provides training data", "IsRequired": True, "SupportedContentTypes": ["text/csv"], "SupportedCompressionTypes": ["None"], "SupportedInputModes": ["File"], }, { "Name": "validation", "Description": "Input channel that provides validation data", "IsRequired": False, "SupportedContentTypes": ["text/csv"], "SupportedCompressionTypes": ["None"], "SupportedInputModes": ["File"], }, ] session.sagemaker_client.describe_algorithm = Mock( return_value=training_channels) estimator = AlgorithmEstimator( algorithm_arn= "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees", role="SageMakerRole", train_instance_type="ml.m4.xlarge", train_instance_count=1, sagemaker_session=session, ) # Pass training and validation channels. This should work estimator.fit({ "training": "s3://some/place", "validation": "s3://some/other" }) # Passing only the training channel. Validation is optional so this should also work. estimator.fit({"training": "s3://some/place"})