def test_pytorch_model_country_as_dense_id_list(self): net_spec, pytorch_net = train_bandit.build_pytorch_net( feature_specs=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_DENSE_ID_LIST["features"], product_sets=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_DENSE_ID_LIST[ "product_sets"], float_feature_order=Datasets. DATA_COUNTRY_DENSE_ID_LIST["final_float_feature_order"], id_feature_order=Datasets. DATA_COUNTRY_DENSE_ID_LIST["final_id_feature_order"], layers=Params.ML_PARAMS["model"]["layers"], activations=Params.ML_PARAMS["model"]["activations"], input_dim=train_bandit.num_float_dim( Datasets.DATA_COUNTRY_DENSE_ID_LIST), ) skorch_net = train_bandit.fit_custom_pytorch_module_w_skorch( module=pytorch_net, X=Datasets.X_COUNTRY_DENSE_ID_LIST["X_train"], y=Datasets.X_COUNTRY_DENSE_ID_LIST["y_train"], hyperparams=Params.ML_PARAMS, ) test_mse = skorch_net.history[-1]["valid_loss"] # make sure mse is better or close to out of the box GBDT & MLP # the GBDT doesn't need as much training so make tolerance more forgiving assert test_mse < self.results_gbdt["mse_test"] * 1.15 assert test_mse < self.results_mlp["mse_test"] * 1.15
def test_pytorch_model_country_as_categorical(self): model_spec, pytorch_net = model_constructors.build_pytorch_net( feature_specs=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL["features"], product_sets=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL["product_sets"], float_feature_order=Datasets. DATA_COUNTRY_CATEG["final_float_feature_order"], id_feature_order=Datasets. DATA_COUNTRY_CATEG["final_id_feature_order"], reward_type=Params.ML_PARAMS["reward_type"], layers=self.model_params["layers"], activations=self.model_params["activations"], input_dim=train_bandit.num_float_dim(Datasets.DATA_COUNTRY_CATEG), ) skorch_net = model_trainers.fit_custom_pytorch_module_w_skorch( reward_type=Params.ML_PARAMS["reward_type"], model=pytorch_net, X=Datasets.X_COUNTRY_CATEG["X_train"], y=Datasets.X_COUNTRY_CATEG["y_train"], hyperparams=self.model_params, ) test_mse = skorch_net.history[-1]["valid_loss"] # make sure mse is better or close to out of the box GBDT & MLP # the GBDT doesn't need as much training so make tolerance more forgiving assert test_mse < self.results_gbdt["mse_test"] * 1.15 assert test_mse < self.results_mlp["mse_test"] * 1.15
def test_pytorch_model_country_as_categorical_binary_reward(self): reward_type = "binary" model_spec, pytorch_net = model_constructors.build_pytorch_net( feature_specs=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL["features"], product_sets=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL["product_sets"], float_feature_order=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["final_float_feature_order"], id_feature_order=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["final_id_feature_order"], reward_type=reward_type, layers=self.model_params["layers"], activations=self.model_params["activations"], input_dim=train_bandit.num_float_dim( Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD), ) skorch_net = model_trainers.fit_custom_pytorch_module_w_skorch( reward_type=reward_type, model=pytorch_net, X=Datasets.X_COUNTRY_CATEG_BINARY_REWARD["X_train"], y=Datasets.X_COUNTRY_CATEG_BINARY_REWARD["y_train"].squeeze(), hyperparams=self.model_params, ) test_acc = skorch_net.history[-1]["valid_acc"] # make sure accuracy is better or close to out of the box GBDT. # The GBDT doesn't need as much training so make tolerance more forgiving assert test_acc > self.results_gbdt_classification["acc_test"] - 0.03
def test_pytorch_model_country_as_id_list_and_decision_as_id_list(self): model_spec, pytorch_net = model_constructors.build_pytorch_net( feature_specs=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AND_DECISION_AS_ID_LIST[ "features"], product_sets=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AND_DECISION_AS_ID_LIST[ "product_sets"], float_feature_order=Datasets. DATA_COUNTRY_AND_DECISION_ID_LIST["final_float_feature_order"], id_feature_order=Datasets. DATA_COUNTRY_AND_DECISION_ID_LIST["final_id_feature_order"], reward_type=Params.ML_PARAMS["reward_type"], layers=self.model_params["layers"], activations=self.model_params["activations"], input_dim=train_bandit.num_float_dim( Datasets.DATA_COUNTRY_AND_DECISION_ID_LIST), ) skorch_net = model_trainers.fit_custom_pytorch_module_w_skorch( reward_type=Params.ML_PARAMS["reward_type"], model=pytorch_net, X=Datasets.X_COUNTRY_AND_DECISION_ID_LIST["X_train"], y=Datasets.X_COUNTRY_AND_DECISION_ID_LIST["y_train"], hyperparams=self.model_params, ) test_mse = skorch_net.history[-1]["valid_loss"] # make sure mse is better or close to out of the box GBDT & MLP # the GBDT doesn't need as much training so make tolerance more forgiving # also this is learning 2 embedding tables so need more training time # to be compeitive assert test_mse < self.results_gbdt["mse_test"] * 1.15 assert test_mse < self.results_mlp["mse_test"] * 1.15
def test_mixture_density_networks_continuous(self): model_spec, pytorch_net = model_constructors.build_pytorch_net( feature_specs=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AND_DECISION_AS_ID_LIST[ "features"], product_sets=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AND_DECISION_AS_ID_LIST[ "product_sets"], float_feature_order=Datasets. DATA_COUNTRY_AND_DECISION_ID_LIST["final_float_feature_order"], id_feature_order=Datasets. DATA_COUNTRY_AND_DECISION_ID_LIST["final_id_feature_order"], reward_type=Params.ML_PARAMS["reward_type"], layers=self.model_params["layers"], activations=self.model_params["activations"], input_dim=train_bandit.num_float_dim( Datasets.DATA_COUNTRY_AND_DECISION_ID_LIST), is_mdn=True, ) skorch_net = model_trainers.fit_custom_pytorch_module_w_skorch( reward_type=Params.ML_PARAMS["reward_type"], model=pytorch_net, X=Datasets.X_COUNTRY_AND_DECISION_ID_LIST["X_train"], y=Datasets.X_COUNTRY_AND_DECISION_ID_LIST["y_train"], hyperparams=self.model_params, model_name="mixture_density_network", ) X0 = Datasets.X_COUNTRY_AND_DECISION_ID_LIST["X_train"] preds = skorch_net.predict(X0) Y0 = Datasets.X_COUNTRY_AND_DECISION_ID_LIST["y_train"] b_size = skorch_net.batch_size idx = range(preds.shape[0]) mu_est = [i for i in idx if i // b_size % 2 == 0] var_est = [i for i in idx if i // b_size % 2 == 1] mse = np.mean((preds[mu_est].flatten() - Y0.numpy().flatten())**2) assert (mse < 25)
def test_same_predictions_country_as_categorical(self): raw_data = shuffle(Datasets._raw_data) rand_idx = 0 test_input = raw_data.iloc[rand_idx] data = preprocessor.preprocess_data( raw_data, self.ml_params["data_reader"]["reward_function"], Params.EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL, shuffle_data=False, # don't shuffle so we can test the same observation ) _X, _y = preprocessor.data_to_pytorch(data) X_COUNTRY_CATEG = { "X_train": {"X_float": _X["X_float"][: Datasets._offset]}, "y_train": _y[: Datasets._offset], "X_test": {"X_float": _X["X_float"][Datasets._offset :]}, "y_test": _y[Datasets._offset :], } net_spec, pytorch_net = train_bandit.build_pytorch_net( feature_specs=Params.EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL[ "features" ], product_sets=Params.EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL[ "product_sets" ], float_feature_order=Datasets.DATA_COUNTRY_CATEG[ "final_float_feature_order" ], id_feature_order=Datasets.DATA_COUNTRY_CATEG["final_id_feature_order"], layers=self.ml_params["model"]["layers"], activations=self.ml_params["model"]["activations"], input_dim=train_bandit.num_float_dim(Datasets.DATA_COUNTRY_CATEG), ) pre_serialized_predictor = BanditPredictor( experiment_params=Params.EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL, float_feature_order=Datasets.DATA_COUNTRY_CATEG["float_feature_order"], id_feature_order=Datasets.DATA_COUNTRY_CATEG["id_feature_order"], id_feature_str_to_int_map=Datasets.DATA_COUNTRY_CATEG[ "id_feature_str_to_int_map" ], transforms=Datasets.DATA_COUNTRY_CATEG["transforms"], imputers=Datasets.DATA_COUNTRY_CATEG["imputers"], net=pytorch_net, net_spec=net_spec, ) skorch_net = train_bandit.fit_custom_pytorch_module_w_skorch( module=pre_serialized_predictor.net, X=X_COUNTRY_CATEG["X_train"], y=X_COUNTRY_CATEG["y_train"], hyperparams=self.ml_params, ) pre_serialized_predictor.config_to_file(self.tmp_config_path) pre_serialized_predictor.net_to_file(self.tmp_net_path) post_serialized_predictor = BanditPredictor.predictor_from_file( self.tmp_config_path, self.tmp_net_path ) pre_pred = pre_serialized_predictor.predict(json.loads(test_input.context)) post_pred = post_serialized_predictor.predict(json.loads(test_input.context)) assert np.allclose(pre_pred["scores"], post_pred["scores"], self.tol) assert pre_pred["ids"] == post_pred["ids"]
def test_same_predictions_country_as_categorical_binary_reward(self): reward_type = "binary" raw_data = shuffle(Datasets._raw_data_binary_reward) rand_idx = 0 test_input = raw_data.iloc[rand_idx] data = preprocessor.preprocess_data( raw_data, Params.EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL, reward_type, shuffle_data= False, # don't shuffle so we can test the same observation ) _X, _y = preprocessor.data_to_pytorch(data) X_COUNTRY_CATEG_BINARY_REWARD = { "X_train": { "X_float": _X["X_float"][:Datasets._offset_binary_reward] }, "y_train": _y[:Datasets._offset_binary_reward], "X_test": { "X_float": _X["X_float"][Datasets._offset_binary_reward:] }, "y_test": _y[Datasets._offset_binary_reward:], } model_spec, pytorch_net = model_constructors.build_pytorch_net( feature_specs=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL["features"], product_sets=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL["product_sets"], float_feature_order=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["final_float_feature_order"], id_feature_order=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["final_id_feature_order"], reward_type=reward_type, layers=self.model_params["layers"], activations=self.model_params["activations"], input_dim=train_bandit.num_float_dim( Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD), ) pre_serialized_predictor = BanditPredictor( experiment_params=Params. EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL, float_feature_order=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["float_feature_order"], id_feature_order=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["id_feature_order"], id_feature_str_to_int_map=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["id_feature_str_to_int_map"], transforms=Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD["transforms"], imputers=Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD["imputers"], model=pytorch_net, model_type=self.model_type, reward_type=reward_type, model_spec=model_spec, ) skorch_net = model_trainers.fit_custom_pytorch_module_w_skorch( reward_type=reward_type, model=pre_serialized_predictor.model, X=X_COUNTRY_CATEG_BINARY_REWARD["X_train"], y=X_COUNTRY_CATEG_BINARY_REWARD["y_train"], hyperparams=self.model_params, ) pre_serialized_predictor.config_to_file(self.tmp_config_path) pre_serialized_predictor.model_to_file(self.tmp_net_path) post_serialized_predictor = BanditPredictor.predictor_from_file( self.tmp_config_path, self.tmp_net_path) pre_pred = pre_serialized_predictor.predict( json.loads(test_input.context)) post_pred = post_serialized_predictor.predict( json.loads(test_input.context)) assert np.allclose(pre_pred["scores"], post_pred["scores"], self.tol) assert pre_pred["ids"] == post_pred["ids"] # add a test case for missing features in provided context pre_pred_missing_feature = pre_serialized_predictor.predict({}) post_pred_missing_feature = post_serialized_predictor.predict({}) assert np.allclose( pre_pred_missing_feature["scores"], post_pred_missing_feature["scores"], self.tol, ) assert pre_pred_missing_feature["ids"] == post_pred_missing_feature[ "ids"] # add a test case for garbage feature keys provided in context pre_pred_garbage_feature = pre_serialized_predictor.predict( {"blah": 42}) post_pred_garbage_feature = post_serialized_predictor.predict( {"blah": 42}) assert np.allclose( pre_pred_garbage_feature["scores"], post_pred_garbage_feature["scores"], self.tol, ) assert pre_pred_garbage_feature["ids"] == post_pred_garbage_feature[ "ids"]