예제 #1
0
def test_history_callback_w_tabtransformer(
    optimizers, schedulers, len_loss_output, len_lr_output, init_lr, schedulers_type
):
    trainer_tt = Trainer(
        model_tt,
        objective="binary",
        optimizers=optimizers,
        lr_schedulers=schedulers,
        callbacks=[LRHistory(n_epochs=5)],
        verbose=0,
    )
    trainer_tt.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        target=target,
        n_epochs=5,
        batch_size=16,
    )
    out = []
    out.append(len(trainer_tt.history["train_loss"]) == len_loss_output)
    try:
        lr_list = list(chain.from_iterable(trainer_tt.lr_history["lr_deeptabular_0"]))
    except TypeError:
        lr_list = trainer_tt.lr_history["lr_deeptabular_0"]
    except Exception:
        lr_list = trainer_tt.lr_history["lr_0"]
    out.append(len(lr_list) == len_lr_output)
    if init_lr is not None and schedulers_type == "step":
        out.append(lr_list[-1] == init_lr / 10)
    elif init_lr is not None and schedulers_type == "cyclic":
        out.append(lr_list[-1] == init_lr)
    assert all(out)
def test_basic_run_with_metrics_multiclass():
    wide = Wide(np.unique(X_wide).shape[0], 3)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx={k: v
                    for v, k in enumerate(colnames)},
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deeptabular=deeptabular, pred_dim=3)
    trainer = Trainer(model,
                      objective="multiclass",
                      metrics=[Accuracy],
                      verbose=False)
    trainer.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        target=target_multi,
        n_epochs=1,
        batch_size=16,
        val_split=0.2,
    )
    assert ("train_loss" in trainer.history.keys()
            and "train_acc" in trainer.history.keys())
예제 #3
0
def test_fit_objectives_tab_transformer(
    X_wide,
    X_tab,
    target,
    objective,
    X_wide_test,
    X_tab_test,
    X_test,
    pred_dim,
    probs_dim,
):
    wide = Wide(np.unique(X_wide).shape[0], pred_dim)
    tab_transformer = TabTransformer(
        column_idx={k: v
                    for v, k in enumerate(colnames)},
        embed_input=embed_input_tt,
        continuous_cols=colnames[5:],
    )
    model = WideDeep(wide=wide, deeptabular=tab_transformer, pred_dim=pred_dim)
    trainer = Trainer(model, objective=objective, verbose=0)
    trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, batch_size=16)
    preds = trainer.predict(X_wide=X_wide, X_tab=X_tab, X_test=X_test)
    if objective == "binary":
        pass
    else:
        probs = trainer.predict_proba(X_wide=X_wide,
                                      X_tab=X_tab,
                                      X_test=X_test)
    assert preds.shape[0] == 32, probs.shape[1] == probs_dim
예제 #4
0
def test_model_checkpoint(save_best_only, max_save, n_files):
    wide = Wide(np.unique(X_wide).shape[0], 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deeptabular=deeptabular)
    trainer = Trainer(
        model=model,
        objective="binary",
        callbacks=[
            ModelCheckpoint(
                "tests/test_model_functioning/weights/test_weights",
                save_best_only=save_best_only,
                max_save=max_save,
            )
        ],
        verbose=0,
    )
    trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=5, val_split=0.2)
    n_saved = len(os.listdir("tests/test_model_functioning/weights/"))

    shutil.rmtree("tests/test_model_functioning/weights/")

    assert n_saved <= n_files
def test_save_and_load_dict():
    wide = Wide(np.unique(X_wide).shape[0], 1)
    tabmlp = TabMlp(
        mlp_hidden_dims=[32, 16],
        column_idx={k: v
                    for v, k in enumerate(colnames)},
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model1 = WideDeep(wide=deepcopy(wide), deeptabular=deepcopy(tabmlp))
    trainer1 = Trainer(model1, objective="binary", verbose=0)
    trainer1.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        X_text=X_text,
        X_img=X_img,
        target=target,
        batch_size=16,
    )
    wide_weights = model1.wide.wide_linear.weight.data
    trainer1.save_model_state_dict(
        "tests/test_model_functioning/model_dir/model_d.t")
    model2 = WideDeep(wide=wide, deeptabular=tabmlp)
    trainer2 = Trainer(model2, objective="binary", verbose=0)
    trainer2.load_model_state_dict(
        "tests/test_model_functioning/model_dir/model_d.t")
    n_wide_weights = trainer2.model.wide.wide_linear.weight.data

    shutil.rmtree("tests/test_model_functioning/model_dir/")

    assert torch.allclose(wide_weights, n_wide_weights)
예제 #6
0
def test_fit_objectives(
    X_wide,
    X_tab,
    target,
    objective,
    X_wide_test,
    X_tab_test,
    X_test,
    pred_dim,
    probs_dim,
):
    wide = Wide(np.unique(X_wide).shape[0], pred_dim)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deeptabular=deeptabular, pred_dim=pred_dim)
    trainer = Trainer(model, objective=objective, verbose=0)
    trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, batch_size=16)
    preds = trainer.predict(X_wide=X_wide, X_tab=X_tab, X_test=X_test)
    if objective == "binary":
        pass
    else:
        probs = trainer.predict_proba(X_wide=X_wide,
                                      X_tab=X_tab,
                                      X_test=X_test)
    assert preds.shape[0] == 32, probs.shape[1] == probs_dim
예제 #7
0
def test_xtrain_xval_assertion(
    X_wide,
    X_tab,
    X_text,
    X_img,
    X_train,
    X_val,
    target,
):
    model = WideDeep(wide=wide,
                     deeptabular=deeptabular,
                     deeptext=deeptext,
                     deepimage=deepimage)
    trainer = Trainer(model, objective="binary", verbose=0)
    with pytest.raises(AssertionError):
        trainer.fit(
            X_wide=X_wide,
            X_tab=X_tab,
            X_text=X_text,
            X_img=X_img,
            X_train=X_train,
            X_val=X_val,
            target=target,
            batch_size=16,
        )
예제 #8
0
def test_widedeep_inputs(
    X_wide,
    X_tab,
    X_text,
    X_img,
    X_train,
    X_val,
    target,
    val_split,
    transforms,
):
    model = WideDeep(wide=wide,
                     deeptabular=deeptabular,
                     deeptext=deeptext,
                     deepimage=deepimage)
    trainer = Trainer(model,
                      objective="binary",
                      transforms=transforms,
                      verbose=0)
    trainer.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        X_text=X_text,
        X_img=X_img,
        X_train=X_train,
        X_val=X_val,
        target=target,
        val_split=val_split,
        batch_size=16,
    )
    assert trainer.history["train_loss"] is not None
def test_save_and_load():
    model = WideDeep(wide=wide, deeptabular=tabmlp)
    trainer = Trainer(model, objective="binary", verbose=0)
    trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, batch_size=16)
    wide_weights = model.wide.wide_linear.weight.data
    trainer.save_model("tests/test_model_functioning/model_dir/model.t")
    n_model = Trainer.load_model(
        "tests/test_model_functioning/model_dir/model.t")
    n_wide_weights = n_model.wide.wide_linear.weight.data
    assert torch.allclose(wide_weights, n_wide_weights)
def test_basic_run_with_metrics_binary(wide, deeptabular):
    model = WideDeep(wide=wide, deeptabular=deeptabular)
    trainer = Trainer(model,
                      objective="binary",
                      metrics=[Accuracy],
                      verbose=False)
    trainer.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        target=target,
        n_epochs=1,
        batch_size=16,
        val_split=0.2,
    )
    assert ("train_loss" in trainer.history.keys()
            and "train_acc" in trainer.history.keys())
예제 #11
0
def test_individual_inputs(wide, deeptabular, deeptext, deepimage, X_wide,
                           X_tab, X_text, X_img, target):
    model = WideDeep(wide=wide,
                     deeptabular=deeptabular,
                     deeptext=deeptext,
                     deepimage=deepimage)
    trainer = Trainer(model, objective="binary", verbose=0)
    trainer.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        X_text=X_text,
        X_img=X_img,
        target=target,
        batch_size=16,
    )
    # check it has run succesfully
    assert len(trainer.history) == 1
예제 #12
0
def test_aliases():
    wide = Wide(np.unique(X_wide).shape[0], 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deeptabular=deeptabular, pred_dim=1)
    trainer = Trainer(model, loss="regression", verbose=0)
    trainer.fit(X_wide=X_wide,
                X_tab=X_tab,
                target=target_regres,
                batch_size=16,
                warmup=True)
    assert ("train_loss" in trainer.history.keys()
            and trainer.__wd_aliases_used["objective"] == "loss"
            and trainer.__wd_aliases_used["finetune"] == "warmup")
예제 #13
0
def test_fit_with_regression_and_metric():
    wide = Wide(np.unique(X_wide).shape[0], 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deeptabular=deeptabular, pred_dim=1)
    trainer = Trainer(model,
                      objective="regression",
                      metrics=[R2Score],
                      verbose=0)
    trainer.fit(X_wide=X_wide,
                X_tab=X_tab,
                target=target_regres,
                batch_size=16)
    assert "train_r2" in trainer.history.keys()
예제 #14
0
def test_head_layers_individual_components(deeptabular, deeptext, deepimage,
                                           X_tab, X_text, X_img, target):
    model = WideDeep(
        deeptabular=deeptabular,
        deeptext=deeptext,
        deepimage=deepimage,
        head_hidden_dims=[8, 4],
    )  # noqa: F841
    trainer = Trainer(model, objective="binary", verbose=0)
    trainer.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        X_text=X_text,
        X_img=X_img,
        target=target,
        batch_size=16,
    )
    # check it has run succesfully
    assert len(trainer.history) == 1
예제 #15
0
def test_fit_with_deephead():
    wide = Wide(np.unique(X_wide).shape[0], 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    deephead = nn.Sequential(nn.Linear(16, 8), nn.Linear(8, 4))
    model = WideDeep(wide=wide,
                     deeptabular=deeptabular,
                     pred_dim=1,
                     deephead=deephead)
    trainer = Trainer(model, objective="binary", verbose=0)
    trainer.fit(X_wide=X_wide,
                X_tab=X_tab,
                target=target_binary,
                batch_size=16)
    preds = trainer.predict(X_wide=X_wide, X_tab=X_tab, X_test=X_test)
    probs = trainer.predict_proba(X_wide=X_wide, X_tab=X_tab, X_test=X_test)
    assert preds.shape[0] == 32, probs.shape[1] == 2
예제 #16
0
def test_early_stop():
    wide = Wide(np.unique(X_wide).shape[0], 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deeptabular=deeptabular)
    trainer = Trainer(
        model=model,
        objective="binary",
        callbacks=[
            EarlyStopping(
                min_delta=5.0, patience=3, restore_best_weights=True, verbose=1
            )
        ],
        verbose=1,
    )
    trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.2, n_epochs=5)
    # length of history = patience+1
    assert len(trainer.history["train_loss"]) == 3 + 1
def test_predict_with_individual_component(wide, deeptabular, deeptext,
                                           deepimage, X_wide, X_tab, X_text,
                                           X_img, target):

    model = WideDeep(wide=wide,
                     deeptabular=deeptabular,
                     deeptext=deeptext,
                     deepimage=deepimage)
    trainer = Trainer(model, objective="binary", verbose=0)
    trainer.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        X_text=X_text,
        X_img=X_img,
        target=target,
        batch_size=16,
    )
    # simply checking that runs and produces outputs
    preds = trainer.predict(X_wide=X_wide,
                            X_tab=X_tab,
                            X_text=X_text,
                            X_img=X_img)

    assert preds.shape[0] == 32 and "train_loss" in trainer.history