def test_regression(
    regression_data,
    multi_target,
    embed_categorical,
    continuous_cols,
    categorical_cols,
    continuous_feature_transform,
    normalize_continuous_features,
):
    (train, test, target) = regression_data
    if len(continuous_cols) + len(categorical_cols) == 0:
        assert True
    else:
        data_config = DataConfig(
            target=target + ["MedInc"] if multi_target else target,
            continuous_cols=continuous_cols,
            categorical_cols=categorical_cols,
            continuous_feature_transform=continuous_feature_transform,
            normalize_continuous_features=normalize_continuous_features,
        )
        model_config_params = dict(task="regression",
                                   depth=2,
                                   embed_categorical=embed_categorical)
        model_config = NodeConfig(**model_config_params)
        # model_config_params = dict(task="regression")
        # model_config = NodeConfig(**model_config_params)

        trainer_config = TrainerConfig(max_epochs=1,
                                       checkpoints=None,
                                       early_stopping=None)
        optimizer_config = OptimizerConfig()

        tabular_model = TabularModel(
            data_config=data_config,
            model_config=model_config,
            optimizer_config=optimizer_config,
            trainer_config=trainer_config,
        )
        tabular_model.fit(train=train, test=test)

        result = tabular_model.evaluate(test)
        if multi_target:
            assert result[0]["valid_loss"] < 30
        else:
            assert result[0]["valid_loss"] < 8
        pred_df = tabular_model.predict(test)
        assert pred_df.shape[0] == test.shape[0]
Exemple #2
0
from sklearn.preprocessing import PowerTransformer

tr = PowerTransformer()
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
    # experiment_config=experiment_config,
)
tabular_model.fit(
    train=train,
    test=test,
    metrics=[fake_metric],
    target_transform=tr,
    loss=torch.nn.L1Loss(),
    optimizer=torch.optim.Adagrad,
    optimizer_params={},
)

from pytorch_tabular.feature_extractor import DeepFeatureExtractor

# dt = DeepFeatureExtractor(tabular_model)
# enc_df = dt.fit_transform(test)
# print(enc_df.head())
# tabular_model.save_model("examples/sample")
result = tabular_model.evaluate(test)
print(result)
# # # print(result[0]['train_loss'])
# new_mdl = TabularModel.load_from_checkpoint("examples/sample")
Exemple #3
0
                                     run_name="node_forest_cov", 
                                     exp_watch="gradients", 
                                     log_target="wandb", 
                                     log_logits=True)
optimizer_config = OptimizerConfig()

# tabular_model = TabularModel(
#     data_config="examples/data_config.yml",
#     model_config="examples/model_config.yml",
#     optimizer_config="examples/optimizer_config.yml",
#     trainer_config="examples/trainer_config.yml",
#     # experiment_config=experiment_config,
# )
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
    # experiment_config=experiment_config,
)
tabular_model.fit(
    train=train, validation=val)

result = tabular_model.evaluate(test)
print(result)
test.drop(columns=target_name, inplace=True)
pred_df = tabular_model.predict(test)
pred_df.to_csv("output/temp2.csv")
# tabular_model.save_model("test_save")
# new_model = TabularModel.load_from_checkpoint("test_save")
# result = new_model.evaluate(test)
Exemple #4
0
    True,  #If True, will use a learned embedding, else it will use LeaveOneOutEncoding for categorical columns
    learning_rate=0.02,
    additional_tree_output_dim=25,
)
# model_config.validate()
# model_config = NodeConfig(task="regression", depth=2, embed_categorical=False)
# trainer_config = TrainerConfig(checkpoints=None, max_epochs=5, gpus=1, profiler=None)
# experiment_config = ExperimentConfig(
#     project_name="DeepGMM_test",
#     run_name="wand_debug",
#     log_target="wandb",
#     exp_watch="gradients",
#     log_logits=True
# )
# optimizer_config = OptimizerConfig()

tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
    # experiment_config=experiment_config,
    model_callable=MultiStageModel)
tabular_model.fit(train=train, test=test)

result = tabular_model.evaluate(test)
# print(result)
# # print(result[0]['train_loss'])
pred_df = tabular_model.predict(test, quantiles=[0.25])
print(pred_df.head())
# pred_df.to_csv("output/temp2.csv")
Exemple #5
0
#     optimizer_config="examples/optimizer_config.yml",
#     trainer_config="examples/trainer_config.yml",
#     # experiment_config=experiment_config,
# )
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
    # experiment_config=experiment_config,
)
sampler = get_balanced_sampler(train[target_name].values.ravel())
# cust_loss = get_class_weighted_cross_entropy(train[target_name].values.ravel())
tabular_model.fit(
    train=train,
    validation=val,
    # loss=cust_loss,
    train_sampler=sampler)

from pytorch_tabular.categorical_encoders import CategoricalEmbeddingTransformer
transformer = CategoricalEmbeddingTransformer(tabular_model)
train_transform = transformer.fit_transform(train)
# test_transform = transformer.transform(test)
# ft = tabular_model.model.feature_importance()
# result = tabular_model.evaluate(test)
# print(result)
# test.drop(columns=ta6rget_name, inplace=True)
# pred_df = tabular_model.predict(test)
# print(pred_df.head())
# pred_df.to_csv("output/temp2.csv")
# tabular_model.save_model("test_save")