def test_history_callback_w_tabtransformer( optimizers, schedulers, len_loss_output, len_lr_output, init_lr, schedulers_type ): trainer_tt = Trainer( model_tt, objective="binary", optimizers=optimizers, lr_schedulers=schedulers, callbacks=[LRHistory(n_epochs=5)], verbose=0, ) trainer_tt.fit( X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=5, batch_size=16, ) out = [] out.append(len(trainer_tt.history["train_loss"]) == len_loss_output) try: lr_list = list(chain.from_iterable(trainer_tt.lr_history["lr_deeptabular_0"])) except TypeError: lr_list = trainer_tt.lr_history["lr_deeptabular_0"] except Exception: lr_list = trainer_tt.lr_history["lr_0"] out.append(len(lr_list) == len_lr_output) if init_lr is not None and schedulers_type == "step": out.append(lr_list[-1] == init_lr / 10) elif init_lr is not None and schedulers_type == "cyclic": out.append(lr_list[-1] == init_lr) assert all(out)
def test_basic_run_with_metrics_multiclass(): wide = Wide(np.unique(X_wide).shape[0], 3) deeptabular = TabMlp( mlp_hidden_dims=[32, 16], mlp_dropout=[0.5, 0.5], column_idx={k: v for v, k in enumerate(colnames)}, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deeptabular=deeptabular, pred_dim=3) trainer = Trainer(model, objective="multiclass", metrics=[Accuracy], verbose=False) trainer.fit( X_wide=X_wide, X_tab=X_tab, target=target_multi, n_epochs=1, batch_size=16, val_split=0.2, ) assert ("train_loss" in trainer.history.keys() and "train_acc" in trainer.history.keys())
def test_fit_objectives_tab_transformer( X_wide, X_tab, target, objective, X_wide_test, X_tab_test, X_test, pred_dim, probs_dim, ): wide = Wide(np.unique(X_wide).shape[0], pred_dim) tab_transformer = TabTransformer( column_idx={k: v for v, k in enumerate(colnames)}, embed_input=embed_input_tt, continuous_cols=colnames[5:], ) model = WideDeep(wide=wide, deeptabular=tab_transformer, pred_dim=pred_dim) trainer = Trainer(model, objective=objective, verbose=0) trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, batch_size=16) preds = trainer.predict(X_wide=X_wide, X_tab=X_tab, X_test=X_test) if objective == "binary": pass else: probs = trainer.predict_proba(X_wide=X_wide, X_tab=X_tab, X_test=X_test) assert preds.shape[0] == 32, probs.shape[1] == probs_dim
def test_model_checkpoint(save_best_only, max_save, n_files): wide = Wide(np.unique(X_wide).shape[0], 1) deeptabular = TabMlp( mlp_hidden_dims=[32, 16], mlp_dropout=[0.5, 0.5], column_idx=column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deeptabular=deeptabular) trainer = Trainer( model=model, objective="binary", callbacks=[ ModelCheckpoint( "tests/test_model_functioning/weights/test_weights", save_best_only=save_best_only, max_save=max_save, ) ], verbose=0, ) trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=5, val_split=0.2) n_saved = len(os.listdir("tests/test_model_functioning/weights/")) shutil.rmtree("tests/test_model_functioning/weights/") assert n_saved <= n_files
def test_save_and_load_dict(): wide = Wide(np.unique(X_wide).shape[0], 1) tabmlp = TabMlp( mlp_hidden_dims=[32, 16], column_idx={k: v for v, k in enumerate(colnames)}, embed_input=embed_input, continuous_cols=colnames[-5:], ) model1 = WideDeep(wide=deepcopy(wide), deeptabular=deepcopy(tabmlp)) trainer1 = Trainer(model1, objective="binary", verbose=0) trainer1.fit( X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_img, target=target, batch_size=16, ) wide_weights = model1.wide.wide_linear.weight.data trainer1.save_model_state_dict( "tests/test_model_functioning/model_dir/model_d.t") model2 = WideDeep(wide=wide, deeptabular=tabmlp) trainer2 = Trainer(model2, objective="binary", verbose=0) trainer2.load_model_state_dict( "tests/test_model_functioning/model_dir/model_d.t") n_wide_weights = trainer2.model.wide.wide_linear.weight.data shutil.rmtree("tests/test_model_functioning/model_dir/") assert torch.allclose(wide_weights, n_wide_weights)
def test_fit_objectives( X_wide, X_tab, target, objective, X_wide_test, X_tab_test, X_test, pred_dim, probs_dim, ): wide = Wide(np.unique(X_wide).shape[0], pred_dim) deeptabular = TabMlp( mlp_hidden_dims=[32, 16], mlp_dropout=[0.5, 0.5], column_idx=column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deeptabular=deeptabular, pred_dim=pred_dim) trainer = Trainer(model, objective=objective, verbose=0) trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, batch_size=16) preds = trainer.predict(X_wide=X_wide, X_tab=X_tab, X_test=X_test) if objective == "binary": pass else: probs = trainer.predict_proba(X_wide=X_wide, X_tab=X_tab, X_test=X_test) assert preds.shape[0] == 32, probs.shape[1] == probs_dim
def test_xtrain_xval_assertion( X_wide, X_tab, X_text, X_img, X_train, X_val, target, ): model = WideDeep(wide=wide, deeptabular=deeptabular, deeptext=deeptext, deepimage=deepimage) trainer = Trainer(model, objective="binary", verbose=0) with pytest.raises(AssertionError): trainer.fit( X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_img, X_train=X_train, X_val=X_val, target=target, batch_size=16, )
def test_widedeep_inputs( X_wide, X_tab, X_text, X_img, X_train, X_val, target, val_split, transforms, ): model = WideDeep(wide=wide, deeptabular=deeptabular, deeptext=deeptext, deepimage=deepimage) trainer = Trainer(model, objective="binary", transforms=transforms, verbose=0) trainer.fit( X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_img, X_train=X_train, X_val=X_val, target=target, val_split=val_split, batch_size=16, ) assert trainer.history["train_loss"] is not None
def test_save_and_load(): model = WideDeep(wide=wide, deeptabular=tabmlp) trainer = Trainer(model, objective="binary", verbose=0) trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, batch_size=16) wide_weights = model.wide.wide_linear.weight.data trainer.save_model("tests/test_model_functioning/model_dir/model.t") n_model = Trainer.load_model( "tests/test_model_functioning/model_dir/model.t") n_wide_weights = n_model.wide.wide_linear.weight.data assert torch.allclose(wide_weights, n_wide_weights)
def test_basic_run_with_metrics_binary(wide, deeptabular): model = WideDeep(wide=wide, deeptabular=deeptabular) trainer = Trainer(model, objective="binary", metrics=[Accuracy], verbose=False) trainer.fit( X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=1, batch_size=16, val_split=0.2, ) assert ("train_loss" in trainer.history.keys() and "train_acc" in trainer.history.keys())
def test_individual_inputs(wide, deeptabular, deeptext, deepimage, X_wide, X_tab, X_text, X_img, target): model = WideDeep(wide=wide, deeptabular=deeptabular, deeptext=deeptext, deepimage=deepimage) trainer = Trainer(model, objective="binary", verbose=0) trainer.fit( X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_img, target=target, batch_size=16, ) # check it has run succesfully assert len(trainer.history) == 1
def test_aliases(): wide = Wide(np.unique(X_wide).shape[0], 1) deeptabular = TabMlp( mlp_hidden_dims=[32, 16], mlp_dropout=[0.5, 0.5], column_idx=column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deeptabular=deeptabular, pred_dim=1) trainer = Trainer(model, loss="regression", verbose=0) trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target_regres, batch_size=16, warmup=True) assert ("train_loss" in trainer.history.keys() and trainer.__wd_aliases_used["objective"] == "loss" and trainer.__wd_aliases_used["finetune"] == "warmup")
def test_fit_with_regression_and_metric(): wide = Wide(np.unique(X_wide).shape[0], 1) deeptabular = TabMlp( mlp_hidden_dims=[32, 16], mlp_dropout=[0.5, 0.5], column_idx=column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deeptabular=deeptabular, pred_dim=1) trainer = Trainer(model, objective="regression", metrics=[R2Score], verbose=0) trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target_regres, batch_size=16) assert "train_r2" in trainer.history.keys()
def test_head_layers_individual_components(deeptabular, deeptext, deepimage, X_tab, X_text, X_img, target): model = WideDeep( deeptabular=deeptabular, deeptext=deeptext, deepimage=deepimage, head_hidden_dims=[8, 4], ) # noqa: F841 trainer = Trainer(model, objective="binary", verbose=0) trainer.fit( X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_img, target=target, batch_size=16, ) # check it has run succesfully assert len(trainer.history) == 1
def test_fit_with_deephead(): wide = Wide(np.unique(X_wide).shape[0], 1) deeptabular = TabMlp( mlp_hidden_dims=[32, 16], column_idx=column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) deephead = nn.Sequential(nn.Linear(16, 8), nn.Linear(8, 4)) model = WideDeep(wide=wide, deeptabular=deeptabular, pred_dim=1, deephead=deephead) trainer = Trainer(model, objective="binary", verbose=0) trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target_binary, batch_size=16) preds = trainer.predict(X_wide=X_wide, X_tab=X_tab, X_test=X_test) probs = trainer.predict_proba(X_wide=X_wide, X_tab=X_tab, X_test=X_test) assert preds.shape[0] == 32, probs.shape[1] == 2
def test_early_stop(): wide = Wide(np.unique(X_wide).shape[0], 1) deeptabular = TabMlp( mlp_hidden_dims=[32, 16], mlp_dropout=[0.5, 0.5], column_idx=column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deeptabular=deeptabular) trainer = Trainer( model=model, objective="binary", callbacks=[ EarlyStopping( min_delta=5.0, patience=3, restore_best_weights=True, verbose=1 ) ], verbose=1, ) trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.2, n_epochs=5) # length of history = patience+1 assert len(trainer.history["train_loss"]) == 3 + 1
def test_predict_with_individual_component(wide, deeptabular, deeptext, deepimage, X_wide, X_tab, X_text, X_img, target): model = WideDeep(wide=wide, deeptabular=deeptabular, deeptext=deeptext, deepimage=deepimage) trainer = Trainer(model, objective="binary", verbose=0) trainer.fit( X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_img, target=target, batch_size=16, ) # simply checking that runs and produces outputs preds = trainer.predict(X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_img) assert preds.shape[0] == 32 and "train_loss" in trainer.history