def test_save_load( regression_data, model_config_class, continuous_cols, categorical_cols, custom_metrics, custom_loss, custom_optimizer, tmpdir, ): (train, test, target) = regression_data data_config = DataConfig( target=target, continuous_cols=continuous_cols, categorical_cols=categorical_cols, ) model_config_class, model_config_params = model_config_class model_config_params["task"] = "regression" model_config = model_config_class(**model_config_params) trainer_config = TrainerConfig( max_epochs=3, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True, ) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit( train=train, test=test, metrics=custom_metrics, loss=custom_loss, optimizer=custom_optimizer, optimizer_params={}, ) result_1 = tabular_model.evaluate(test) # sv_dir = tmpdir/"save_model" # sv_dir.mkdir(exist_ok=True, parents=True) sv_dir = tmpdir.mkdir("saved_model") tabular_model.save_model(str(sv_dir)) new_mdl = TabularModel.load_from_checkpoint(str(sv_dir)) result_2 = new_mdl.evaluate(test) assert (result_1[0][f"test_{tabular_model.model.hparams.metrics[0]}"] == result_2[0][f"test_{new_mdl.model.hparams.metrics[0]}"])
def main(): # Generate Synthetic Data data, cat_col_names, num_col_names = make_mixed_classification( n_samples=10000, n_features=20, n_categories=4) train, test = train_test_split(data, random_state=42) train, val = train_test_split(train, random_state=42) # ##########Define the Configs############ data_config = DataConfig(target=["target"], continuous_cols=num_col_names, categorical_cols=cat_col_names) trainer_config = TrainerConfig(auto_lr_find=True, batch_size=1024, max_epochs=100, gpus=1) optimizer_config = OptimizerConfig() model_config = CategoryEmbeddingModelConfig(task="classification", layers="1024-512-512", activation="LeakyReLU", learning_rate=1e-3) tabular_mode = TabularModel(data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config) # Training the Model tabular_mode.fit(train=train, validation=val) # Evaluating the Model # #Loss and Metrics on New Data¶ result = tabular_mode.evaluate(test) # #New Predictions as DataFrame pred_df = tabular_mode.predict(test) pred_df.head() print_metrics(test['target'], pred_df["prediction"], tag="Holdout") # saving model tabular_mode.save_model("Analysis/basic")
'target' ], #target should always be a list. Multi-targets are only supported for regression. Multi-Task Classification is not implemented continuous_cols=num_col_names, categorical_cols=cat_col_names, ) trainer_config = TrainerConfig( auto_lr_find= True, # Runs the LRFinder to automatically derive a learning rate batch_size=1024, max_epochs=100, gpus=1, #index of the GPU to use. 0, means CPU ) optimizer_config = OptimizerConfig() model_config = CategoryEmbeddingModelConfig( task="classification", layers="1024-512-512", # Number of nodes in each layer activation="LeakyReLU", # Activation between each layers learning_rate=1e-3) tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit(train=train, validation=val) result = tabular_model.evaluate(test) pred_df = tabular_model.predict(test) tabular_model.save_model("Analysis/basic") loaded_model = TabularModel.load_from_checkpoint("Analysis/basic")
def main_64(): # Generate Synthetic Data global train data, test_data, cat_col_names, num_col_names = data_load() bsize = 2500*3*2*2 # ##########Define the Configs############ data_config = DataConfig( target=["target"], continuous_cols=num_col_names, categorical_cols=cat_col_names, num_workers=4 ) trainer_config = TrainerConfig( auto_lr_find=True, batch_size=bsize, max_epochs=100, gpus=1 ) optimizer_config = OptimizerConfig() model_config = TabNetModelConfig( task="classification", learning_rate=1e-3*bsize/1024, n_d=64, n_a=64, n_steps=5, gamma=1.3 ) # Training the Model # tabular_mode.fit(train=train, validation=val) # # Evaluating the Model # # #Loss and Metrics on New Data¶ # result = tabular_mode.evaluate(test) cv = StratifiedKFold(n_splits=10, shuffle=True) res_pred = [] res_test = [] for i, (train_idx, test_idx) in enumerate(cv.split(X=data, y=data.target.values)): train, test = data.iloc[train_idx], data.iloc[test_idx] train, val = train_test_split(train, random_state=42) tabular_mode = TabularModel( data_config=data_config, optimizer_config=optimizer_config, model_config=model_config, trainer_config=trainer_config ) weighted_loss = get_class_weighted_cross_entropy(train["target"].values.ravel(), mu=0.1) # Training the Model tabular_mode.fit(train=train, validation=val, max_epochs=100, loss=weighted_loss) pred_df = tabular_mode.predict(test).loc[:, ["prediction"]] res_pred.append(pred_df) tabular_mode.save_model(f"Analysis/basic_tabnet_rep{i}") pred = tabular_mode.predict(test_data) res_test.append(pred) # #New Predictions as DataFrame pred_tot = pd.concat(res_pred).sort_index() print_metrics(data['target'], pred_tot["prediction"], tag="Holdout") pred_df = pd.concat([res_testi.loc[:, ["0_probability"]] for res_testi in res_test], axis=1).apply(np.mean, axis=1) pred_df2 = pred_df.map(lambda x: 1 if x>0.5 else 0) sample_submisson = pd.read_csv("Data/sample_submission.csv") sample_submisson["target"] = pred_df2.values sample_submisson.to_csv("Analysis/submission_2.csv", index=False) print(confusion_matrix(data['target'], pred_tot["prediction"]))
def main(): # Generate Synthetic Data data, cat_col_names, num_col_names = data_load() bsize = 1024 # ##########Define the Configs############ data_config = DataConfig(target=["target"], continuous_cols=num_col_names, categorical_cols=cat_col_names) trainer_config = TrainerConfig(auto_lr_find=True, batch_size=bsize, max_epochs=100, gpus=1) optimizer_config = OptimizerConfig() model_config = CategoryEmbeddingModelConfig(task="classification", layers="1024-512-512", activation="LeakyReLU", learning_rate=1e-3) tabular_mode = TabularModel(data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config) # Training the Model # tabular_mode.fit(train=train, validation=val) # # Evaluating the Model # # #Loss and Metrics on New Data¶ # result = tabular_mode.evaluate(test) cv = StratifiedKFold(n_splits=10, shuffle=True) res_pred = [] for train_idx, test_idx in cv.split(X=data, y=data.target.values): train, test = data.iloc[train_idx], data.iloc[test_idx] train, val = train_test_split(train, random_state=42) tabular_mode = TabularModel(data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config) weighted_loss = get_class_weighted_cross_entropy( train["target"].values.ravel(), mu=0.1) # Training the Model tabular_mode.fit(train=train, validation=val, max_epochs=100, loss=weighted_loss) pred_df = tabular_mode.predict(test).loc[:, ["prediction"]] res_pred.append(pred_df) # #New Predictions as DataFrame pred_tot = pd.concat(res_pred).sort_index() print_metrics(data['target'], pred_tot["prediction"], tag="Holdout") confusion_matrix(data['target'], pred_tot["prediction"]) # saving model tabular_mode.save_model("Analysis/basic")
def test_pretrained_backbone( regression_data, model_config_class, continuous_cols, categorical_cols, custom_metrics, custom_loss, custom_optimizer, tmpdir, ): (train, test, target) = regression_data data_config = DataConfig( target=target, continuous_cols=continuous_cols, categorical_cols=categorical_cols, ) model_config_class, model_config_params = model_config_class model_config_params["task"] = "ssl" model_config_params["ssl_task"] = "Denoising" model_config_params["aug_task"] = "cutmix" model_config = model_config_class(**model_config_params) trainer_config = TrainerConfig( max_epochs=3, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True, ) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit( train=train, test=test, metrics=custom_metrics, loss=custom_loss, optimizer=custom_optimizer, optimizer_params={}, ) result_1 = tabular_model.evaluate(test) with pytest.raises(AssertionError): tabular_model.predict(test) assert "test_mean_squared_error" in result_1[0].keys() sv_dir = tmpdir.mkdir("saved_model") tabular_model.save_model(str(sv_dir)) old_mdl = TabularModel.load_from_checkpoint(str(sv_dir)) model_config_params["task"] = "regression" model_config_params["ssl_task"] = None model_config_params["aug_task"] = None model_config = model_config_class(**model_config_params) trainer_config = TrainerConfig( max_epochs=1, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True, ) tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit( train=train, test=test, metrics=custom_metrics, loss=custom_loss, optimizer=custom_optimizer, optimizer_params={}, trained_backbone=old_mdl.model.backbone, ) result_2 = tabular_model.evaluate(test) assert "test_mean_squared_error" in result_2[0].keys()