def test_regression( regression_data, multi_target, embed_categorical, continuous_cols, categorical_cols, continuous_feature_transform, normalize_continuous_features, ): (train, test, target) = regression_data if len(continuous_cols) + len(categorical_cols) == 0: assert True else: data_config = DataConfig( target=target + ["MedInc"] if multi_target else target, continuous_cols=continuous_cols, categorical_cols=categorical_cols, continuous_feature_transform=continuous_feature_transform, normalize_continuous_features=normalize_continuous_features, ) model_config_params = dict(task="regression", depth=2, embed_categorical=embed_categorical) model_config = NodeConfig(**model_config_params) # model_config_params = dict(task="regression") # model_config = NodeConfig(**model_config_params) trainer_config = TrainerConfig(max_epochs=1, checkpoints=None, early_stopping=None) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit(train=train, test=test) result = tabular_model.evaluate(test) if multi_target: assert result[0]["valid_loss"] < 30 else: assert result[0]["valid_loss"] < 8 pred_df = tabular_model.predict(test) assert pred_df.shape[0] == test.shape[0]
# log_logits=True # ) optimizer_config = OptimizerConfig() def fake_metric(y_hat, y): return (y_hat - y).mean() from sklearn.preprocessing import PowerTransformer tr = PowerTransformer() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, # experiment_config=experiment_config, ) tabular_model.fit( train=train, test=test, metrics=[fake_metric], target_transform=tr, loss=torch.nn.L1Loss(), optimizer=torch.optim.Adagrad, optimizer_params={}, ) from pytorch_tabular.feature_extractor import DeepFeatureExtractor
run_name="node_forest_cov", exp_watch="gradients", log_target="wandb", log_logits=True) optimizer_config = OptimizerConfig() # tabular_model = TabularModel( # data_config="examples/data_config.yml", # model_config="examples/model_config.yml", # optimizer_config="examples/optimizer_config.yml", # trainer_config="examples/trainer_config.yml", # # experiment_config=experiment_config, # ) tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, # experiment_config=experiment_config, ) tabular_model.fit( train=train, validation=val) result = tabular_model.evaluate(test) print(result) test.drop(columns=target_name, inplace=True) pred_df = tabular_model.predict(test) pred_df.to_csv("output/temp2.csv") # tabular_model.save_model("test_save") # new_model = TabularModel.load_from_checkpoint("test_save") # result = new_model.evaluate(test)
True, #If True, will use a learned embedding, else it will use LeaveOneOutEncoding for categorical columns learning_rate=0.02, additional_tree_output_dim=25, ) # model_config.validate() # model_config = NodeConfig(task="regression", depth=2, embed_categorical=False) # trainer_config = TrainerConfig(checkpoints=None, max_epochs=5, gpus=1, profiler=None) # experiment_config = ExperimentConfig( # project_name="DeepGMM_test", # run_name="wand_debug", # log_target="wandb", # exp_watch="gradients", # log_logits=True # ) # optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, # experiment_config=experiment_config, model_callable=MultiStageModel) tabular_model.fit(train=train, test=test) result = tabular_model.evaluate(test) # print(result) # # print(result[0]['train_loss']) pred_df = tabular_model.predict(test, quantiles=[0.25]) print(pred_df.head()) # pred_df.to_csv("output/temp2.csv")
), ( dataset.frame[["MedInc"]].min().item(), dataset.frame[["MedInc"]].max().item(), ), ], ) # model_config.validate() # model_config = NodeConfig(task="regression", depth=2, embed_categorical=False) trainer_config = TrainerConfig(checkpoints=None, max_epochs=5) experiment_config = ExperimentConfig(project_name="Tabular_test", run_name="wand_debug", log_target="wandb", exp_watch="gradients", log_logits=True) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, experiment_config=experiment_config, ) tabular_model.fit(train=train, test=test) result = tabular_model.evaluate(test) # print(result) # # print(result[0]['train_loss']) # pred_df = tabular_model.predict(test) # pred_df.to_csv("output/temp2.csv")
exp_watch="gradients", log_target="wandb", log_logits=True) optimizer_config = OptimizerConfig() # tabular_model = TabularModel( # data_config="examples/data_config.yml", # model_config="examples/model_config.yml", # optimizer_config="examples/optimizer_config.yml", # trainer_config="examples/trainer_config.yml", # # experiment_config=experiment_config, # ) tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, # experiment_config=experiment_config, ) sampler = get_balanced_sampler(train[target_name].values.ravel()) # cust_loss = get_class_weighted_cross_entropy(train[target_name].values.ravel()) tabular_model.fit( train=train, validation=val, # loss=cust_loss, train_sampler=sampler) from pytorch_tabular.categorical_encoders import CategoricalEmbeddingTransformer transformer = CategoricalEmbeddingTransformer(tabular_model) train_transform = transformer.fit_transform(train) # test_transform = transformer.transform(test)