Exemple #1
0
def test_embedding_transformer(regression_data):
    (train, test, target) = regression_data
    data_config = DataConfig(
        target=target,
        continuous_cols=[
            "AveRooms",
            "AveBedrms",
            "Population",
            "AveOccup",
            "Latitude",
            "Longitude",
        ],
        categorical_cols=["HouseAgeBin"],
    )
    model_config_params = dict(
        task="regression",
        input_embed_dim=8,
        num_attn_blocks=1,
        num_heads=2,
    )
    model_config = TabTransformerConfig(**model_config_params)
    trainer_config = TrainerConfig(
        max_epochs=1,
        checkpoints=None,
        early_stopping=None,
        gpus=None,
        fast_dev_run=True,
    )
    optimizer_config = OptimizerConfig()

    tabular_model = TabularModel(
        data_config=data_config,
        model_config=model_config,
        optimizer_config=optimizer_config,
        trainer_config=trainer_config,
    )
    tabular_model.fit(train=train, test=test)

    transformer = CategoricalEmbeddingTransformer(tabular_model)
    train_transform = transformer.fit_transform(train)
    embed_cols = [
        col for col in train_transform.columns
        if "HouseAgeBin_embed_dim" in col
    ]
    assert len(train["HouseAgeBin"].unique()) + 1 == len(
        transformer._mapping["HouseAgeBin"].keys())
    assert all([
        val.shape[0] == len(embed_cols)
        for val in transformer._mapping["HouseAgeBin"].values()
    ])
Exemple #2
0
# )
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
    # experiment_config=experiment_config,
)
sampler = get_balanced_sampler(train[target_name].values.ravel())
# cust_loss = get_class_weighted_cross_entropy(train[target_name].values.ravel())
tabular_model.fit(
    train=train,
    validation=val,
    # loss=cust_loss,
    train_sampler=sampler)

from pytorch_tabular.categorical_encoders import CategoricalEmbeddingTransformer
transformer = CategoricalEmbeddingTransformer(tabular_model)
train_transform = transformer.fit_transform(train)
# test_transform = transformer.transform(test)
# ft = tabular_model.model.feature_importance()
# result = tabular_model.evaluate(test)
# print(result)
# test.drop(columns=ta6rget_name, inplace=True)
# pred_df = tabular_model.predict(test)
# print(pred_df.head())
# pred_df.to_csv("output/temp2.csv")
# tabular_model.save_model("test_save")
# new_model = TabularModel.load_from_checkpoint("test_save")
# result = new_model.evaluate(test)
# print(result)