def test_config_fill_values(): vector_fill_values = ["1.0 0.0 1.04 10.49", "1 2 3 4 5" "0" "1.0" ""] binary_fill_values = ["yes", "No", "1", "TRUE", 1] for vector_fill_value, binary_fill_value in zip(vector_fill_values, binary_fill_values): config = { "input_features": [ vector_feature( preprocessing={"fill_value": vector_fill_value}), ], "output_features": [binary_feature(preprocessing={"fill_value": binary_fill_value})], } validate_config(config) bad_vector_fill_values = ["one two three", "1,2,3", 0] bad_binary_fill_values = ["one", 2, "maybe"] for vector_fill_value, binary_fill_value in zip(bad_vector_fill_values, bad_binary_fill_values): config = { "input_features": [ vector_feature( preprocessing={"fill_value": vector_fill_value}), ], "output_features": [binary_feature(preprocessing={"fill_value": binary_fill_value})], } with pytest.raises(ValidationError): validate_config(config)
def test_config_bad_preprocessing_param(): config = { "input_features": [ sequence_feature(reduce_output="sum", encoder="fake"), image_feature( "/tmp/destination_folder", preprocessing={ "in_memory": True, "height": 12, "width": 12, "num_channels": 3, "tokenizer": "space", }, ), ], "output_features": [category_feature(vocab_size=2, reduce_input="sum")], "combiner": { "type": "concat", "output_size": 14 }, } with pytest.raises(ValidationError, match=r"^'fake' is not one of .*"): validate_config(config)
def test_config_input_output_features(): config = { "input_features": [ category_feature(), number_feature(), ], "output_features": [binary_feature()], } validate_config(config)
def test_incorrect_output_features_config(): config = { "input_features": [ number_feature(), ], "output_features": [binary_feature(decoder="classifier")], } # Invalid decoder for binary output feature with pytest.raises(ValidationError): validate_config(config)
def test_config_bad_encoder_name(): config = { "input_features": [sequence_feature(reduce_output="sum", encoder="fake")], "output_features": [category_feature(vocab_size=2, reduce_input="sum")], "combiner": { "type": "concat", "output_size": 14 }, } with pytest.raises(ValidationError, match=r"^'fake' is not one of .*"): validate_config(config)
def test_config_encoders(): for encoder in ENCODERS: config = { "input_features": [ sequence_feature(reduce_output="sum", encoder=encoder), image_feature("/tmp/destination_folder"), ], "output_features": [category_feature(vocab_size=2, reduce_input="sum")], "combiner": { "type": "concat", "output_size": 14 }, } validate_config(config)
def test_validate_with_preprocessing_defaults(): config = { "input_features": [ audio_feature( "/tmp/destination_folder", preprocessing=AudioFeatureMixin.preprocessing_defaults()), bag_feature( preprocessing=BagFeatureMixin.preprocessing_defaults()), binary_feature( preprocessing=BinaryFeatureMixin.preprocessing_defaults()), category_feature( preprocessing=CategoryFeatureMixin.preprocessing_defaults()), date_feature( preprocessing=DateFeatureMixin.preprocessing_defaults()), h3_feature(preprocessing=H3FeatureMixin.preprocessing_defaults()), image_feature( "/tmp/destination_folder", preprocessing=ImageFeatureMixin.preprocessing_defaults()), number_feature( preprocessing=NumberFeatureMixin.preprocessing_defaults()), sequence_feature( preprocessing=SequenceFeatureMixin.preprocessing_defaults()), set_feature( preprocessing=SetFeatureMixin.preprocessing_defaults()), text_feature( preprocessing=TextFeatureMixin.preprocessing_defaults()), timeseries_feature( preprocessing=TimeseriesFeatureMixin.preprocessing_defaults()), vector_feature( preprocessing=VectorFeatureMixin.preprocessing_defaults()), ], "output_features": [{ "name": "target", "type": "category" }], TRAINER: { "decay": True, "learning_rate": 0.001, "validation_field": "target", "validation_metric": "accuracy", }, } validate_config(config) config = merge_with_defaults(config) validate_config(config)
def test_incorrect_input_features_config(): config = { "input_features": [ category_feature(preprocessing={"normalization": "zscore"}), ], "output_features": [binary_feature()], } # Not a preprocessing param for category feature with pytest.raises(ValidationError): validate_config(config) config = { "input_features": [ text_feature(preprocessing={"padding_symbol": 0}), ], "output_features": [binary_feature()], } # Incorrect type for padding_symbol preprocessing param with pytest.raises(ValidationError): validate_config(config) config = { "input_features": [ binary_feature(), ], "output_features": [binary_feature()], } del config["input_features"][0]["type"] # Incorrect type for padding_symbol preprocessing param with pytest.raises(ValidationError): validate_config(config)
def test_config_tabnet(eval_batch_size): config = { "input_features": [ category_feature(vocab_size=2, reduce_input="sum"), number_feature(), ], "output_features": [binary_feature(weight_regularization=None)], "combiner": { "type": "tabnet", "size": 24, "output_size": 26, "sparsity": 0.000001, "bn_virtual_divider": 32, "bn_momentum": 0.4, "num_steps": 5, "relaxation_factor": 1.5, "use_keras_batch_norm": False, "bn_virtual_bs": 512, }, TRAINER: { "batch_size": 16384, "eval_batch_size": eval_batch_size, "epochs": 1000, "early_stop": 20, "learning_rate": 0.02, "optimizer": { "type": "adam" }, "decay": True, "decay_steps": 20000, "decay_rate": 0.9, "staircase": True, "regularization_lambda": 1, "regularization_type": "l2", "validation_field": "label", }, } validate_config(config)
def test_config_features(): all_input_features = [ audio_feature("/tmp/destination_folder"), bag_feature(), binary_feature(), category_feature(), date_feature(), h3_feature(), image_feature("/tmp/destination_folder"), number_feature(), sequence_feature(), set_feature(), text_feature(), timeseries_feature(), vector_feature(), ] all_output_features = [ binary_feature(), category_feature(), number_feature(), sequence_feature(), set_feature(), text_feature(), vector_feature(), ] # validate config with all features config = { "input_features": all_input_features, "output_features": all_output_features, } validate_config(config) # make sure all defaults provided also registers as valid config = merge_with_defaults(config) validate_config(config) # test various invalid output features input_only_features = [ feature for feature in all_input_features if feature["type"] not in output_type_registry.keys() ] for input_feature in input_only_features: config = { "input_features": all_input_features, "output_features": all_output_features + [input_feature], } dtype = input_feature["type"] with pytest.raises(ValidationError, match=rf"^'{dtype}' is not one of .*"): validate_config(config)
def test_config_trainer_empty_null_and_default(): config = { "input_features": [ category_feature(vocab_size=2, reduce_input="sum"), number_feature(), ], "output_features": [binary_feature(weight_regularization=None)], "combiner": { "type": "tabnet", }, TRAINER: {}, } validate_config(config) config[TRAINER] = None with pytest.raises(ValidationError): validate_config(config) config[TRAINER] = ECDTrainerConfig.Schema().dump({}) validate_config(config)
def test_optimizer_property_validation(): config = { "input_features": [ category_feature(vocab_size=2, reduce_input="sum"), number_feature(), ], "output_features": [binary_feature(weight_regularization=None)], "combiner": { "type": "tabnet", }, TRAINER: {}, } validate_config(config) # Test that an optimizer's property types are enforced: config[TRAINER]["optimizer"] = {"type": "rmsprop"} validate_config(config) config[TRAINER]["optimizer"]["momentum"] = "invalid" with pytest.raises(ValidationError): validate_config(config) # Test extra keys are excluded and defaults are loaded appropriately: config[TRAINER]["optimizer"]["momentum"] = 10 config[TRAINER]["optimizer"]["extra_key"] = "invalid" validate_config(config) assert not hasattr( ECDTrainerConfig.Schema().load(config[TRAINER]).optimizer, "extra_key") # Test bad parameter range: config[TRAINER]["optimizer"] = {"type": "rmsprop", "eps": -1} with pytest.raises(ValidationError): validate_config(config) # Test config validation for tuple types: config[TRAINER]["optimizer"] = {"type": "adam", "betas": (0.1, 0.1)} validate_config(config)
def test_config_trainer_bad_optimizer(): config = { "input_features": [ category_feature(vocab_size=2, reduce_input="sum"), number_feature(), ], "output_features": [binary_feature(weight_regularization=None)], "combiner": { "type": "tabnet", }, TRAINER: {}, } validate_config(config) # Test manually set-to-null optimizer vs unspecified: config[TRAINER]["optimizer"] = None with pytest.raises(ValidationError): validate_config(config) assert ECDTrainerConfig.Schema().load({}).optimizer is not None # Test all types in optimizer_registry supported: for key in optimizer_registry.keys(): config[TRAINER]["optimizer"] = {"type": key} validate_config(config) # Test invalid optimizer type: config[TRAINER]["optimizer"] = {"type": 0} with pytest.raises(ValidationError): validate_config(config) config[TRAINER]["optimizer"] = {"type": {}} with pytest.raises(ValidationError): validate_config(config) config[TRAINER]["optimizer"] = {"type": "invalid"} with pytest.raises(ValidationError): validate_config(config)
def test_clipper_property_validation(): config = { "input_features": [ category_feature(vocab_size=2, reduce_input="sum"), number_feature(), ], "output_features": [binary_feature(weight_regularization=None)], "combiner": { "type": "tabnet", }, TRAINER: {}, } validate_config(config) # Test null/empty clipper: config[TRAINER]["gradient_clipping"] = None validate_config(config) config[TRAINER]["gradient_clipping"] = {} validate_config(config) assert (ECDTrainerConfig.Schema().load( config[TRAINER]).gradient_clipping == ECDTrainerConfig.Schema().load( {}).gradient_clipping) # Test invalid clipper type: config[TRAINER]["gradient_clipping"] = 0 with pytest.raises(ValidationError): validate_config(config) config[TRAINER]["gradient_clipping"] = "invalid" with pytest.raises(ValidationError): validate_config(config) # Test that an optimizer's property types are enforced: config[TRAINER]["gradient_clipping"] = {"clipglobalnorm": None} validate_config(config) config[TRAINER]["gradient_clipping"] = {"clipglobalnorm": 1} validate_config(config) config[TRAINER]["gradient_clipping"] = {"clipglobalnorm": "invalid"} with pytest.raises(ValidationError): validate_config(config) # Test extra keys are excluded and defaults are loaded appropriately: config[TRAINER]["gradient_clipping"] = {"clipnorm": 1} config[TRAINER]["gradient_clipping"]["extra_key"] = "invalid" validate_config(config) assert not hasattr( ECDTrainerConfig.Schema().load(config[TRAINER]).gradient_clipping, "extra_key")
def test_config_bad_combiner_types_enums(): config = { "input_features": [ category_feature(vocab_size=2, reduce_input="sum"), number_feature(), ], "output_features": [binary_feature(weight_regularization=None)], "combiner": { "type": "concat", "weights_initializer": "zeros" }, } # config is valid at this point validate_config(config) # Test weights initializer: config["combiner"]["weights_initializer"] = {"test": "fail"} with pytest.raises(ValidationError, match=r"{'test': 'fail'} is not of*"): validate_config(config) config["combiner"]["weights_initializer"] = "fail" with pytest.raises(ValidationError, match=r"'fail' is not of*"): validate_config(config) config["combiner"]["weights_initializer"] = {} with pytest.raises(ValidationError, match=r"Failed validating 'type'"): validate_config(config) config["combiner"]["weights_initializer"] = {"type": "fail"} with pytest.raises(ValidationError, match=r"'fail' is not one of*"): validate_config(config) config["combiner"]["weights_initializer"] = {"type": "normal", "stddev": 0} validate_config(config) # Test bias initializer: del config["combiner"]["weights_initializer"] config["combiner"]["bias_initializer"] = "kaiming_uniform" validate_config(config) config["combiner"]["bias_initializer"] = "fail" with pytest.raises(ValidationError, match=r"'fail' is not of*"): validate_config(config) config["combiner"]["bias_initializer"] = {} with pytest.raises(ValidationError, match=r"Failed validating 'type'"): validate_config(config) config["combiner"]["bias_initializer"] = {"type": "fail"} with pytest.raises(ValidationError, match=r"'fail' is not one of*"): validate_config(config) config["combiner"]["bias_initializer"] = {"type": "zeros", "stddev": 0} validate_config(config) # Test norm: del config["combiner"]["bias_initializer"] config["combiner"]["norm"] = "batch" validate_config(config) config["combiner"]["norm"] = "fail" with pytest.raises(ValidationError, match=r"'fail' is not one of*"): validate_config(config) # Test activation: del config["combiner"]["norm"] config["combiner"]["activation"] = "relu" validate_config(config) config["combiner"]["activation"] = 123 with pytest.raises(ValidationError, match=r"123 is not of type*"): validate_config(config) # Test reduce_output: del config["combiner"]["activation"] config2 = {**config} config2["combiner"]["type"] = "tabtransformer" config2["combiner"]["reduce_output"] = "sum" validate_config(config) config2["combiner"]["reduce_output"] = "fail" with pytest.raises(ValidationError, match=r"'fail' is not one of*"): validate_config(config2) # Test reduce_output = None: config2["combiner"]["reduce_output"] = None validate_config(config2)
def test_config_bad_combiner(): config = { "input_features": [ category_feature(vocab_size=2, reduce_input="sum"), number_feature(), ], "output_features": [binary_feature(weight_regularization=None)], "combiner": { "type": "tabnet", }, } # config is valid at this point validate_config(config) # combiner without type del config["combiner"]["type"] with pytest.raises(ValidationError, match=r"^'type' is a required .*"): validate_config(config) # bad combiner type config["combiner"]["type"] = "fake" with pytest.raises(ValidationError, match=r"^'fake' is not one of .*"): validate_config(config) # bad combiner format (list instead of dict) config["combiner"] = [{"type": "tabnet"}] with pytest.raises(ValidationError, match=r"^\[\{'type': 'tabnet'\}\] is not of .*"): validate_config(config) # bad combiner parameter types config["combiner"] = { "type": "tabtransformer", "num_layers": 10, "dropout": False, } with pytest.raises(ValidationError, match=r"^False is not of type.*"): validate_config(config) # bad combiner parameter range config["combiner"] = { "type": "transformer", "dropout": -1, } with pytest.raises(ValidationError, match=r"less than the minimum.*"): validate_config(config)