def test_regression( regression_data, multi_target, continuous_cols, categorical_cols, continuous_feature_transform, normalize_continuous_features, target_range, deep_layers, batch_norm_continuous_input, attention_pooling, ): (train, test, target) = regression_data if len(continuous_cols) + len(categorical_cols) == 0: assert True else: data_config = DataConfig( target=target + ["MedInc"] if multi_target else target, continuous_cols=continuous_cols, categorical_cols=categorical_cols, continuous_feature_transform=continuous_feature_transform, normalize_continuous_features=normalize_continuous_features, ) model_config_params = dict(task="regression") if target_range: _target_range = [] for target in data_config.target: _target_range.append( ( float(train[target].min()), float(train[target].max()), ) ) model_config_params["target_range"] = _target_range model_config_params["deep_layers"] = deep_layers model_config_params["batch_norm_continuous_input"] = batch_norm_continuous_input model_config_params["attention_pooling"] = attention_pooling model_config = AutoIntConfig(**model_config_params) trainer_config = TrainerConfig( max_epochs=3, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True, ) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit(train=train, test=test) result = tabular_model.evaluate(test) # print(result[0]["valid_loss"]) assert "test_mean_squared_error" in result[0].keys() pred_df = tabular_model.predict(test) assert pred_df.shape[0] == test.shape[0]
def test_regression( regression_data, multi_target, continuous_cols, categorical_cols, continuous_feature_transform, normalize_continuous_features, target_range, ): (train, test, target) = regression_data if len(continuous_cols) + len(categorical_cols) == 0: assert True else: data_config = DataConfig( target=target + ["MedInc"] if multi_target else target, continuous_cols=continuous_cols, categorical_cols=categorical_cols, continuous_feature_transform=continuous_feature_transform, normalize_continuous_features=normalize_continuous_features, ) model_config_params = dict( task="regression", input_embed_dim=8, num_attn_blocks=1, num_heads=2, ) if target_range: _target_range = [] for target in data_config.target: _target_range.append(( float(train[target].min()), float(train[target].max()), )) model_config_params["target_range"] = _target_range model_config = TabTransformerConfig(**model_config_params) trainer_config = TrainerConfig( max_epochs=1, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True, ) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit(train=train, test=test) result = tabular_model.evaluate(test) assert "test_mean_squared_error" in result[0].keys() pred_df = tabular_model.predict(test) assert pred_df.shape[0] == test.shape[0]
def test_regression( regression_data, multi_target, embed_categorical, continuous_cols, categorical_cols, continuous_feature_transform, normalize_continuous_features, target_range, ): (train, test, target) = regression_data if len(continuous_cols) + len(categorical_cols) == 0: assert True else: data_config = DataConfig( target=target + ["MedInc"] if multi_target else target, continuous_cols=continuous_cols, categorical_cols=categorical_cols, continuous_feature_transform=continuous_feature_transform, normalize_continuous_features=normalize_continuous_features, ) model_config_params = dict( task="regression", depth=2, num_trees=50, embed_categorical=embed_categorical, ) if target_range: _target_range = [] for target in data_config.target: _target_range.append(( train[target].min().item(), train[target].max().item(), )) model_config_params["target_range"] = _target_range model_config = NodeConfig(**model_config_params) trainer_config = TrainerConfig(max_epochs=1, checkpoints=None, early_stopping=None, gpus=0, fast_dev_run=True) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit(train=train, test=test) result = tabular_model.evaluate(test) assert "valid_loss" in result[0].keys() pred_df = tabular_model.predict(test) assert pred_df.shape[0] == test.shape[0]
def test2(nrows=10000): """ python source/models/torch_tabular.py test """ global model, session df,colcat, colnum, coly = test_dataset_covtype(1000) target_name = coly df.head() train, test = train_test_split(df, random_state=42) train, val = train_test_split(train, random_state=42) num_classes = len(set(train[target_name].values.ravel())) data_config = DataConfig( target=target_name, continuous_cols=colnum, categorical_cols=colcat, continuous_feature_transform=None,#"quantile_normal", normalize_continuous_features=False ) model_config = CategoryEmbeddingModelConfig(task="classification", metrics=["f1","accuracy"], metrics_params=[{"num_classes":num_classes},{}]) trainer_config = TrainerConfig(gpus=None, fast_dev_run=True) experiment_config = ExperimentConfig(project_name="PyTorch Tabular Example", run_name="node_forest_cov", exp_watch="gradients", log_target="wandb", log_logits=True) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, # experiment_config=experiment_config, ) tabular_model.fit( train=train, validation=val) result = tabular_model.evaluate(val) log(result) test.drop(columns=target_name, inplace=True) pred_df = tabular_model.predict(val.iloc[:100,:]) log(pred_df)
def test_regression( regression_data, multi_target, continuous_cols, categorical_cols, continuous_feature_transform, normalize_continuous_features, variant, num_gaussian, ): (train, test, target) = regression_data if len(continuous_cols) + len(categorical_cols) == 0: assert True else: data_config = DataConfig( target=target + ["MedInc"] if multi_target else target, continuous_cols=continuous_cols, categorical_cols=categorical_cols, continuous_feature_transform=continuous_feature_transform, normalize_continuous_features=normalize_continuous_features, ) model_config_params = dict(task="regression") mdn_config = MixtureDensityHeadConfig(num_gaussian=num_gaussian) model_config_params["mdn_config"] = mdn_config model_config = variant(**model_config_params) trainer_config = TrainerConfig( max_epochs=3, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True, ) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit(train=train, test=test) result = tabular_model.evaluate(test) # print(result[0]["valid_loss"]) assert "test_mean_squared_error" in result[0].keys() pred_df = tabular_model.predict(test) assert pred_df.shape[0] == test.shape[0]
def test_classification( classification_data, continuous_cols, categorical_cols, continuous_feature_transform, normalize_continuous_features, deep_layers, batch_norm_continuous_input, ): (train, test, target) = classification_data if len(continuous_cols) + len(categorical_cols) == 0: assert True else: data_config = DataConfig( target=target, continuous_cols=continuous_cols, categorical_cols=categorical_cols, continuous_feature_transform=continuous_feature_transform, normalize_continuous_features=normalize_continuous_features, ) model_config_params = dict(task="classification") model_config_params["deep_layers"] = deep_layers model_config_params["batch_norm_continuous_input"] = batch_norm_continuous_input model_config = AutoIntConfig(**model_config_params) trainer_config = TrainerConfig( max_epochs=3, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True, ) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit(train=train, test=test) result = tabular_model.evaluate(test) # print(result[0]["valid_loss"]) assert "test_accuracy" in result[0].keys() pred_df = tabular_model.predict(test) assert pred_df.shape[0] == test.shape[0]
def test_classification( classification_data, continuous_cols, categorical_cols, continuous_feature_transform, normalize_continuous_features, ): (train, test, target) = classification_data if len(continuous_cols) + len(categorical_cols) == 0: assert True else: data_config = DataConfig( target=target, continuous_cols=continuous_cols, categorical_cols=categorical_cols, continuous_feature_transform=continuous_feature_transform, normalize_continuous_features=normalize_continuous_features, ) model_config_params = dict( task="classification", input_embed_dim=8, num_attn_blocks=1, num_heads=2, ) model_config = TabTransformerConfig(**model_config_params) trainer_config = TrainerConfig( max_epochs=1, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True, ) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit(train=train, test=test) result = tabular_model.evaluate(test) assert "test_accuracy" in result[0].keys() pred_df = tabular_model.predict(test) assert pred_df.shape[0] == test.shape[0]
def test_classification( classification_data, continuous_cols, categorical_cols, embed_categorical, continuous_feature_transform, normalize_continuous_features, ): (train, test, target) = classification_data if len(continuous_cols) + len(categorical_cols) == 0: assert True else: data_config = DataConfig( target=target, continuous_cols=continuous_cols, categorical_cols=categorical_cols, continuous_feature_transform=continuous_feature_transform, normalize_continuous_features=normalize_continuous_features, ) model_config_params = dict( task="classification", depth=2, num_trees=50, embed_categorical=embed_categorical, ) model_config = NodeConfig(**model_config_params) trainer_config = TrainerConfig(max_epochs=1, checkpoints=None, early_stopping=None, gpus=0) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit(train=train, test=test) result = tabular_model.evaluate(test) assert "valid_loss" in result[0].keys() pred_df = tabular_model.predict(test) assert pred_df.shape[0] == test.shape[0]
def main(): # Generate Synthetic Data data, cat_col_names, num_col_names = make_mixed_classification( n_samples=10000, n_features=20, n_categories=4) train, test = train_test_split(data, random_state=42) train, val = train_test_split(train, random_state=42) # ##########Define the Configs############ data_config = DataConfig(target=["target"], continuous_cols=num_col_names, categorical_cols=cat_col_names) trainer_config = TrainerConfig(auto_lr_find=True, batch_size=1024, max_epochs=100, gpus=1) optimizer_config = OptimizerConfig() model_config = CategoryEmbeddingModelConfig(task="classification", layers="1024-512-512", activation="LeakyReLU", learning_rate=1e-3) tabular_mode = TabularModel(data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config) # Training the Model tabular_mode.fit(train=train, validation=val) # Evaluating the Model # #Loss and Metrics on New Data¶ result = tabular_mode.evaluate(test) # #New Predictions as DataFrame pred_df = tabular_mode.predict(test) pred_df.head() print_metrics(test['target'], pred_df["prediction"], tag="Holdout") # saving model tabular_mode.save_model("Analysis/basic")
def test_regression( regression_data, multi_target, continuous_cols, categorical_cols, continuous_feature_transform, normalize_continuous_features, target_range, target_transform, custom_metrics, custom_loss, custom_optimizer, ): (train, test, target) = regression_data if len(continuous_cols) + len(categorical_cols) == 0: assert True else: data_config = DataConfig( target=target + ["MedInc"] if multi_target else target, continuous_cols=continuous_cols, categorical_cols=categorical_cols, continuous_feature_transform=continuous_feature_transform, normalize_continuous_features=normalize_continuous_features, ) model_config_params = dict(task="regression") if target_range: _target_range = [] for target in data_config.target: _target_range.append(( float(train[target].min()), float(train[target].max()), )) model_config_params["target_range"] = _target_range model_config = CategoryEmbeddingModelConfig(**model_config_params) trainer_config = TrainerConfig( max_epochs=3, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True, ) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit( train=train, test=test, metrics=custom_metrics, target_transform=target_transform, loss=custom_loss, optimizer=custom_optimizer, optimizer_params={}, ) result = tabular_model.evaluate(test) # print(result[0]["valid_loss"]) if custom_metrics is None: assert "test_mean_squared_error" in result[0].keys() else: assert "test_fake_metric" in result[0].keys() pred_df = tabular_model.predict(test) assert pred_df.shape[0] == test.shape[0]
def main(): # Generate Synthetic Data data, test_data, cat_col_names, num_col_names = data_load() bsize = 2500 * 2 # ##########Define the Configs############ data_config = DataConfig(target=["target"], continuous_cols=num_col_names, categorical_cols=cat_col_names, num_workers=4) trainer_config = TrainerConfig(auto_lr_find=True, batch_size=bsize, max_epochs=100, gpus=1) optimizer_config = OptimizerConfig() # model_config = TabNetModelConfig( # task="classification", # learning_rate=1e-3*bsize/1024, # n_d=16, # n_a=16, # n_steps=5, # gamma=1.3 # ) model_config = NodeConfig( task="classification", num_layers=2, # Number of Dense Layers num_trees=1024, # Number of Trees in each layer depth=3, # Depth of each Tree embed_categorical=True, # If True, will use a learned embedding, else it will use LeaveOneOutEncoding for categorical columns learning_rate=1e-3, additional_tree_output_dim=5) # Training the Model # tabular_mode.fit(train=train, validation=val) # # Evaluating the Model # # #Loss and Metrics on New Data¶ # result = tabular_mode.evaluate(test) cv = StratifiedKFold(n_splits=10, shuffle=True) res_pred = [] res_test = [] for i, (train_idx, test_idx) in enumerate(cv.split(X=data, y=data.target.values)): train, test = data.iloc[train_idx], data.iloc[test_idx] train, val = train_test_split(train, random_state=42) tabular_mode = TabularModel(data_config=data_config, optimizer_config=optimizer_config, model_config=model_config, trainer_config=trainer_config) weighted_loss = get_class_weighted_cross_entropy( train["target"].values.ravel(), mu=0.1) # Training the Model tabular_mode.fit(train=train, validation=val, max_epochs=100, loss=weighted_loss) pred_df = tabular_mode.predict(test).loc[:, ["prediction"]] res_pred.append(pred_df) print( f"Fold {i} AUC score: {roc_auc_score(test.target.values, pred_df.prediction.values)}" ) # tabular_mode.save_model(f"Analysis/basic_tabnet_rep{i}") ns = 20000 nrep = int(test_data.shape[0] / ns) nlist = [] for i in range(nrep): pp = tabular_mode.predict(test_data.iloc[np.arange( ns * i, ns * (i + 1))]) nlist.append(pp) pred = pd.concat(nlist) res_test.append(pred) pred_df = pd.concat( [res_testi.loc[:, ["0_probability"]] for res_testi in res_test], axis=1).apply(np.mean, axis=1) pred_df2 = pred_df.map(lambda x: 0 if x > 0.5 else 1) sample_submisson = pd.read_csv("Data/sample_submission.csv") sample_submisson["target"] = pred_df2.values # ns = 20000 # nrep = int(test_data.shape[0] / ns) # nlist = [] # for i in range(nrep): # pp = tabular_mode.predict(test_data.iloc[np.arange(ns * i, ns * (i + 1))]) # nlist.append(pp) # #New Predictions as DataFrame pred_tot = pd.concat(res_pred).sort_index() print_metrics(data['target'], pred_tot["prediction"], tag="Holdout") # pred_df = pd.concat([res_testi.loc[:, ["0_probability"]] for res_testi in res_test], axis=1).apply(np.mean, axis=1) # pred_df2 = pred_df.map(lambda x: 1 if x>0.5 else 0) # sample_submisson = pd.read_csv("Data/sample_submission.csv") # sample_submisson["target"] = pred_tot.prediction.values sample_submisson.to_csv("Analysis/submission_2_node.csv", index=False) print(confusion_matrix(data['target'], pred_tot["prediction"]))
'target' ], #target should always be a list. Multi-targets are only supported for regression. Multi-Task Classification is not implemented continuous_cols=num_col_names, categorical_cols=cat_col_names, ) trainer_config = TrainerConfig( auto_lr_find= True, # Runs the LRFinder to automatically derive a learning rate batch_size=1024, max_epochs=100, gpus=1, #index of the GPU to use. 0, means CPU ) optimizer_config = OptimizerConfig() model_config = CategoryEmbeddingModelConfig( task="classification", layers="1024-512-512", # Number of nodes in each layer activation="LeakyReLU", # Activation between each layers learning_rate=1e-3) tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit(train=train, validation=val) result = tabular_model.evaluate(test) pred_df = tabular_model.predict(test) tabular_model.save_model("Analysis/basic") loaded_model = TabularModel.load_from_checkpoint("Analysis/basic")
def test2(nrows=10000): """ python source/models/torch_tabular.py test """ global model, session #X = np.random.rand(10000,20) #y = np.random.binomial(n=1, p=0.5, size=[10000]) BASE_DIR = Path.home().joinpath('data/input/covtype/') datafile = BASE_DIR.joinpath('covtype.data.gz') datafile.parent.mkdir(parents=True, exist_ok=True) url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz" if not datafile.exists(): wget.download(url, datafile.as_posix()) target_name = ["Covertype"] colcat = [ "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3", "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4", "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9", "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14", "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19", "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24", "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29", "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34", "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39", "Soil_Type40" ] colnum = [ "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology", "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways", "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm", "Horizontal_Distance_To_Fire_Points" ] feature_columns = (colnum + colcat + target_name) df = pd.read_csv(datafile, header=None, names=feature_columns, nrows=nrows) df.head() train, test = train_test_split(df, random_state=42) train, val = train_test_split(train, random_state=42) num_classes = len(set(train[target_name].values.ravel())) data_config = DataConfig( target=target_name, continuous_cols=colnum, categorical_cols=colcat, continuous_feature_transform=None, #"quantile_normal", normalize_continuous_features=False) model_config = CategoryEmbeddingModelConfig(task="classification", metrics=["f1", "accuracy"], metrics_params=[{ "num_classes": num_classes }, {}]) trainer_config = TrainerConfig(gpus=None, fast_dev_run=True) experiment_config = ExperimentConfig( project_name="PyTorch Tabular Example", run_name="node_forest_cov", exp_watch="gradients", log_target="wandb", log_logits=True) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, # experiment_config=experiment_config, ) tabular_model.fit(train=train, validation=val) result = tabular_model.evaluate(val) log(result) test.drop(columns=target_name, inplace=True) pred_df = tabular_model.predict(val.iloc[:100, :]) log(pred_df)
def main_64(): # Generate Synthetic Data global train data, test_data, cat_col_names, num_col_names = data_load() bsize = 2500*3*2*2 # ##########Define the Configs############ data_config = DataConfig( target=["target"], continuous_cols=num_col_names, categorical_cols=cat_col_names, num_workers=4 ) trainer_config = TrainerConfig( auto_lr_find=True, batch_size=bsize, max_epochs=100, gpus=1 ) optimizer_config = OptimizerConfig() model_config = TabNetModelConfig( task="classification", learning_rate=1e-3*bsize/1024, n_d=64, n_a=64, n_steps=5, gamma=1.3 ) # Training the Model # tabular_mode.fit(train=train, validation=val) # # Evaluating the Model # # #Loss and Metrics on New Data¶ # result = tabular_mode.evaluate(test) cv = StratifiedKFold(n_splits=10, shuffle=True) res_pred = [] res_test = [] for i, (train_idx, test_idx) in enumerate(cv.split(X=data, y=data.target.values)): train, test = data.iloc[train_idx], data.iloc[test_idx] train, val = train_test_split(train, random_state=42) tabular_mode = TabularModel( data_config=data_config, optimizer_config=optimizer_config, model_config=model_config, trainer_config=trainer_config ) weighted_loss = get_class_weighted_cross_entropy(train["target"].values.ravel(), mu=0.1) # Training the Model tabular_mode.fit(train=train, validation=val, max_epochs=100, loss=weighted_loss) pred_df = tabular_mode.predict(test).loc[:, ["prediction"]] res_pred.append(pred_df) tabular_mode.save_model(f"Analysis/basic_tabnet_rep{i}") pred = tabular_mode.predict(test_data) res_test.append(pred) # #New Predictions as DataFrame pred_tot = pd.concat(res_pred).sort_index() print_metrics(data['target'], pred_tot["prediction"], tag="Holdout") pred_df = pd.concat([res_testi.loc[:, ["0_probability"]] for res_testi in res_test], axis=1).apply(np.mean, axis=1) pred_df2 = pred_df.map(lambda x: 1 if x>0.5 else 0) sample_submisson = pd.read_csv("Data/sample_submission.csv") sample_submisson["target"] = pred_df2.values sample_submisson.to_csv("Analysis/submission_2.csv", index=False) print(confusion_matrix(data['target'], pred_tot["prediction"]))
def main(): # Generate Synthetic Data data, cat_col_names, num_col_names = data_load() bsize = 1024 # ##########Define the Configs############ data_config = DataConfig(target=["target"], continuous_cols=num_col_names, categorical_cols=cat_col_names) trainer_config = TrainerConfig(auto_lr_find=True, batch_size=bsize, max_epochs=100, gpus=1) optimizer_config = OptimizerConfig() model_config = CategoryEmbeddingModelConfig(task="classification", layers="1024-512-512", activation="LeakyReLU", learning_rate=1e-3) tabular_mode = TabularModel(data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config) # Training the Model # tabular_mode.fit(train=train, validation=val) # # Evaluating the Model # # #Loss and Metrics on New Data¶ # result = tabular_mode.evaluate(test) cv = StratifiedKFold(n_splits=10, shuffle=True) res_pred = [] for train_idx, test_idx in cv.split(X=data, y=data.target.values): train, test = data.iloc[train_idx], data.iloc[test_idx] train, val = train_test_split(train, random_state=42) tabular_mode = TabularModel(data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config) weighted_loss = get_class_weighted_cross_entropy( train["target"].values.ravel(), mu=0.1) # Training the Model tabular_mode.fit(train=train, validation=val, max_epochs=100, loss=weighted_loss) pred_df = tabular_mode.predict(test).loc[:, ["prediction"]] res_pred.append(pred_df) # #New Predictions as DataFrame pred_tot = pd.concat(res_pred).sort_index() print_metrics(data['target'], pred_tot["prediction"], tag="Holdout") confusion_matrix(data['target'], pred_tot["prediction"]) # saving model tabular_mode.save_model("Analysis/basic")
def test_pretrained_backbone( regression_data, model_config_class, continuous_cols, categorical_cols, custom_metrics, custom_loss, custom_optimizer, tmpdir, ): (train, test, target) = regression_data data_config = DataConfig( target=target, continuous_cols=continuous_cols, categorical_cols=categorical_cols, ) model_config_class, model_config_params = model_config_class model_config_params["task"] = "ssl" model_config_params["ssl_task"] = "Denoising" model_config_params["aug_task"] = "cutmix" model_config = model_config_class(**model_config_params) trainer_config = TrainerConfig( max_epochs=3, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True, ) optimizer_config = OptimizerConfig() tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit( train=train, test=test, metrics=custom_metrics, loss=custom_loss, optimizer=custom_optimizer, optimizer_params={}, ) result_1 = tabular_model.evaluate(test) with pytest.raises(AssertionError): tabular_model.predict(test) assert "test_mean_squared_error" in result_1[0].keys() sv_dir = tmpdir.mkdir("saved_model") tabular_model.save_model(str(sv_dir)) old_mdl = TabularModel.load_from_checkpoint(str(sv_dir)) model_config_params["task"] = "regression" model_config_params["ssl_task"] = None model_config_params["aug_task"] = None model_config = model_config_class(**model_config_params) trainer_config = TrainerConfig( max_epochs=1, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True, ) tabular_model = TabularModel( data_config=data_config, model_config=model_config, optimizer_config=optimizer_config, trainer_config=trainer_config, ) tabular_model.fit( train=train, test=test, metrics=custom_metrics, loss=custom_loss, optimizer=custom_optimizer, optimizer_params={}, trained_backbone=old_mdl.model.backbone, ) result_2 = tabular_model.evaluate(test) assert "test_mean_squared_error" in result_2[0].keys()