Ejemplo n.º 1
0
def test_early_stop():
    wide = Wide(100, 1)
    deepdense = DeepDense(
        hidden_layers=[32, 16],
        dropout=[0.5, 0.5],
        deep_column_idx=deep_column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deepdense=deepdense)
    model.compile(
        method="binary",
        callbacks=[
            EarlyStopping(min_delta=0.1,
                          patience=3,
                          restore_best_weights=True,
                          verbose=1)
        ],
        verbose=1,
    )
    model.fit(X_wide=X_wide,
              X_deep=X_deep,
              target=target,
              val_split=0.2,
              n_epochs=5)
    # length of history = patience+1
    assert len(model.history._history["train_loss"]) == 3 + 1
Ejemplo n.º 2
0
def test_early_stop():
    wide = Wide(np.unique(X_wide).shape[0], 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deeptabular=deeptabular)
    trainer = Trainer(
        model=model,
        objective="binary",
        callbacks=[
            EarlyStopping(
                min_delta=5.0, patience=3, restore_best_weights=True, verbose=1
            )
        ],
        verbose=1,
    )
    trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.2, n_epochs=5)
    # length of history = patience+1
    assert len(trainer.history["train_loss"]) == 3 + 1
    mlp_batchnorm_last=args.mlp_batchnorm_last,
    mlp_linear_first=args.mlp_linear_first,
    embed_dropout=args.embed_dropout,
    continuous_cols=prepare_tab.continuous_cols,
    batchnorm_cont=args.batchnorm_cont,
    concat_cont_first=args.concat_cont_first,
)
model = WideDeep(deeptabular=deeptabular)

optimizers = set_optimizer(model, args)

steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)
early_stopping = EarlyStopping(
    monitor=args.monitor,
    min_delta=args.early_stop_delta,
    patience=args.early_stop_patience,
)

trainer = Trainer(
    model,
    objective="regression",
    optimizers=optimizers,
    lr_schedulers=lr_schedulers,
    reducelronplateau_criterion=args.monitor.split("_")[-1],
    callbacks=[early_stopping, LRHistory(n_epochs=args.n_epochs)],
)

start = time()
trainer.fit(
    X_train={"X_tab": X_train, "target": y_train},
Ejemplo n.º 4
0
    #     continuous_cols=continuous_cols,
    # )

    model = WideDeep(wide=wide, deeptabular=deeptabular)

    wide_opt = torch.optim.Adam(model.wide.parameters(), lr=0.01)
    deep_opt = RAdam(model.deeptabular.parameters())
    wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=3)
    deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=5)

    optimizers = {"wide": wide_opt, "deeptabular": deep_opt}
    schedulers = {"wide": wide_sch, "deeptabular": deep_sch}
    initializers = {"wide": KaimingNormal, "deeptabular": XavierNormal}
    callbacks = [
        LRHistory(n_epochs=10),
        EarlyStopping(patience=5),
        ModelCheckpoint(filepath="model_weights/wd_out"),
    ]
    metrics = [Accuracy, Precision]

    trainer = Trainer(
        model,
        objective="binary",
        optimizers=optimizers,
        lr_schedulers=schedulers,
        initializers=initializers,
        callbacks=callbacks,
        metrics=metrics,
    )

    trainer.fit(
def run_experiment_and_save(
    model,
    model_name,
    results_dir,
    models_dir,
    args,
    X_train,
    X_test,
    y_train,
    y_test,
):

    optimizers = set_optimizer(model, args)

    steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
    lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)

    early_stopping = EarlyStopping(
        monitor=args.monitor,
        min_delta=args.early_stop_delta,
        patience=args.early_stop_patience,
    )

    model_checkpoint = ModelCheckpoint(
        filepath=str(models_dir / "best_model"),
        monitor=args.monitor,
        save_best_only=True,
        max_save=1,
    )

    trainer = Trainer(
        model,
        objective="regression",
        optimizers=optimizers,
        lr_schedulers=lr_schedulers,
        reducelronplateau_criterion=args.monitor.split("_")[-1],
        callbacks=[
            early_stopping, model_checkpoint,
            LRHistory(n_epochs=args.n_epochs)
        ],
    )

    start = time()
    trainer.fit(
        X_train={
            "X_tab": X_train,
            "target": y_train
        },
        X_val={
            "X_tab": X_test,
            "target": y_test
        },
        n_epochs=args.n_epochs,
        batch_size=args.batch_size,
        validation_freq=args.eval_every,
    )
    runtime = time() - start

    y_pred = trainer.predict(X_tab=X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(f"rmse with the best model: {rmse}")

    if args.save_results:
        suffix = str(datetime.now()).replace(" ", "_").split(".")[:-1][0]
        filename = "_".join(["fb_comments", model_name, "best", suffix]) + ".p"
        results_d = {}
        results_d["args"] = args
        results_d["rmse"] = rmse
        results_d["r2"] = r2
        results_d["early_stopping"] = early_stopping
        results_d["trainer_history"] = trainer.history
        results_d["trainer_lr_history"] = trainer.lr_history
        results_d["runtime"] = runtime
        with open(results_dir / filename, "wb") as f:
            pickle.dump(results_d, f)
def run_experiment_and_save(
    model,
    model_name,
    results_dir,
    models_dir,
    args,
    X_train,
    X_test,
    y_train,
    y_test,
    fl_exp_indx: int = 0,
):

    try:
        if args.focal_loss:
            alpha, gamma = load_focal_loss_params(results_dir, fl_exp_indx)
            focal_loss = True
        else:
            alpha = 0.25
            gamma = 2
            focal_loss = False
    except AttributeError:
        alpha = 0.25
        gamma = 2
        focal_loss = False

    optimizers = set_optimizer(model, args)

    steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
    lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)

    early_stopping = EarlyStopping(
        monitor=args.monitor,
        min_delta=args.early_stop_delta,
        patience=args.early_stop_patience,
    )

    model_checkpoint = ModelCheckpoint(
        filepath=str(models_dir / "best_model"),
        monitor=args.monitor,
        save_best_only=True,
        max_save=1,
    )

    trainer = Trainer(
        model,
        objective="binary_focal_loss" if focal_loss else "binary",
        optimizers=optimizers,
        lr_schedulers=lr_schedulers,
        reducelronplateau_criterion=args.monitor.split("_")[-1],
        callbacks=[early_stopping, model_checkpoint, LRHistory(n_epochs=args.n_epochs)],
        metrics=[Accuracy, F1Score],
        alpha=alpha,
        gamma=gamma,
    )

    start = time()
    trainer.fit(
        X_train={"X_tab": X_train, "target": y_train},
        X_val={"X_tab": X_test, "target": y_test},
        n_epochs=args.n_epochs,
        batch_size=args.batch_size,
        validation_freq=args.eval_every,
    )
    runtime = time() - start

    y_pred = trainer.predict(X_tab=X_test)

    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print(f"Accuracy: {acc}. F1: {f1}. ROC_AUC: {auc}")
    print(confusion_matrix(y_test, y_pred))

    if args.save_results:
        suffix = str(datetime.now()).replace(" ", "_").split(".")[:-1][0]
        filename = "_".join(["bankm", model_name, "best", suffix]) + ".p"
        results_d = {}
        results_d["args"] = args
        results_d["acc"] = acc
        results_d["auc"] = auc
        results_d["f1"] = f1
        results_d["early_stopping"] = early_stopping
        results_d["trainer_history"] = trainer.history
        results_d["trainer_lr_history"] = trainer.lr_history
        results_d["runtime"] = runtime
        with open(results_dir / filename, "wb") as f:
            pickle.dump(results_d, f)
Ejemplo n.º 7
0
        def objective(params):

            deeptabular = TabMlp(
                column_idx=prepare_tab.column_idx,
                mlp_hidden_dims=mlp_hidden_dims,
                mlp_activation=args.mlp_activation,
                mlp_dropout=args.mlp_dropout,
                mlp_batchnorm=args.mlp_batchnorm,
                mlp_batchnorm_last=args.mlp_batchnorm_last,
                mlp_linear_first=args.mlp_linear_first,
                embed_input=prepare_tab.embeddings_input,
                embed_dropout=args.embed_dropout,
            )
            model = WideDeep(deeptabular=deeptabular)

            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

            lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode=args.rop_mode,
                factor=args.rop_factor,
                patience=args.rop_patience,
                threshold=args.rop_threshold,
                threshold_mode=args.rop_threshold_mode,
            )

            early_stopping = EarlyStopping(
                monitor=args.monitor,
                min_delta=args.early_stop_delta,
                patience=args.early_stop_patience,
            )
            trainer = Trainer(
                model,
                objective="binary_focal_loss",
                optimizers=optimizer,
                lr_schedulers=lr_scheduler,
                reducelronplateau_criterion=args.monitor.split("_")[-1],
                callbacks=[early_stopping,
                           LRHistory(n_epochs=args.n_epochs)],
                metrics=[Accuracy, F1Score],
                alpha=params["alpha"],
                gamma=params["gamma"],
                verbose=0,
            )

            trainer.fit(
                X_train={
                    "X_tab": X_train,
                    "target": y_train
                },
                X_val={
                    "X_tab": X_valid,
                    "target": y_valid
                },
                n_epochs=args.n_epochs,
                batch_size=args.batch_size,
                validation_freq=args.eval_every,
            )

            score = early_stopping.best

            return score