Beispiel #1
0
    optimizers = {"wide": wide_opt, "deeptabular": deep_opt}
    schedulers = {"wide": wide_sch, "deeptabular": deep_sch}
    initializers = {"wide": KaimingNormal, "deeptabular": XavierNormal}
    callbacks = [
        LRHistory(n_epochs=10),
        EarlyStopping(patience=5),
        ModelCheckpoint(filepath="model_weights/wd_out"),
    ]
    metrics = [Accuracy, Precision]

    trainer = Trainer(
        model,
        objective="binary",
        optimizers=optimizers,
        lr_schedulers=schedulers,
        initializers=initializers,
        callbacks=callbacks,
        metrics=metrics,
    )

    trainer.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        target=target,
        n_epochs=4,
        batch_size=64,
        val_split=0.2,
    )

    # # to save/load the model
model = WideDeep(deeptabular=deeptabular)

optimizers = set_optimizer(model, args)

steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)
early_stopping = EarlyStopping(
    monitor=args.monitor,
    min_delta=args.early_stop_delta,
    patience=args.early_stop_patience,
)

trainer = Trainer(
    model,
    objective="regression",
    optimizers=optimizers,
    lr_schedulers=lr_schedulers,
    reducelronplateau_criterion=args.monitor.split("_")[-1],
    callbacks=[early_stopping, LRHistory(n_epochs=args.n_epochs)],
)

start = time()
trainer.fit(
    X_train={"X_tab": X_train, "target": y_train},
    X_val={"X_tab": X_valid, "target": y_valid},
    n_epochs=args.n_epochs,
    batch_size=args.batch_size,
    validation_freq=args.eval_every,
)
runtime = time() - start

if args.save_results:
def run_experiment_and_save(
    model,
    model_name,
    results_dir,
    models_dir,
    args,
    X_train,
    X_test,
    y_train,
    y_test,
):

    optimizers = set_optimizer(model, args)

    steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
    lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)

    early_stopping = EarlyStopping(
        monitor=args.monitor,
        min_delta=args.early_stop_delta,
        patience=args.early_stop_patience,
    )

    model_checkpoint = ModelCheckpoint(
        filepath=str(models_dir / "best_model"),
        monitor=args.monitor,
        save_best_only=True,
        max_save=1,
    )

    trainer = Trainer(
        model,
        objective="regression",
        optimizers=optimizers,
        lr_schedulers=lr_schedulers,
        reducelronplateau_criterion=args.monitor.split("_")[-1],
        callbacks=[
            early_stopping, model_checkpoint,
            LRHistory(n_epochs=args.n_epochs)
        ],
    )

    start = time()
    trainer.fit(
        X_train={
            "X_tab": X_train,
            "target": y_train
        },
        X_val={
            "X_tab": X_test,
            "target": y_test
        },
        n_epochs=args.n_epochs,
        batch_size=args.batch_size,
        validation_freq=args.eval_every,
    )
    runtime = time() - start

    y_pred = trainer.predict(X_tab=X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(f"rmse with the best model: {rmse}")

    if args.save_results:
        suffix = str(datetime.now()).replace(" ", "_").split(".")[:-1][0]
        filename = "_".join(["fb_comments", model_name, "best", suffix]) + ".p"
        results_d = {}
        results_d["args"] = args
        results_d["rmse"] = rmse
        results_d["r2"] = r2
        results_d["early_stopping"] = early_stopping
        results_d["trainer_history"] = trainer.history
        results_d["trainer_lr_history"] = trainer.lr_history
        results_d["runtime"] = runtime
        with open(results_dir / filename, "wb") as f:
            pickle.dump(results_d, f)
Beispiel #4
0
    min_delta=args.early_stop_delta,
    patience=args.early_stop_patience,
)

model_checkpoint = ModelCheckpoint(
    filepath=str(MODELS_DIR / "best_model"),
    monitor=args.monitor,
    save_best_only=True,
    max_save=1,
)

trainer = Trainer(
    model,
    objective="binary",
    optimizers=optimizers,
    lr_schedulers=lr_schedulers,
    reducelronplateau_criterion=args.monitor.split("_")[-1],
    callbacks=[early_stopping, model_checkpoint, LRHistory(n_epochs=args.n_epochs)],
    metrics=[Accuracy],
)

start = time()
trainer.fit(
    X_train={"X_tab": X_train, "target": y_train},
    X_val={"X_tab": X_test, "target": y_test},
    n_epochs=args.n_epochs,
    batch_size=args.batch_size,
    validation_freq=args.eval_every,
)
runtime = time() - start
Beispiel #5
0
optimizers = set_optimizer(model, args)

steps_per_epoch = (X_tab_train.shape[0] // args.batch_size) + 1
lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)

early_stopping = EarlyStopping(
    monitor=args.monitor,
    min_delta=args.early_stop_delta,
    patience=args.early_stop_patience,
)

trainer = Trainer(
    model,
    objective="binary",
    optimizers=optimizers,
    lr_schedulers=lr_schedulers,
    reducelronplateau_criterion=args.monitor.split("_")[-1],
    callbacks=[early_stopping],
    metrics=[Accuracy],
)

start = time()
trainer.fit(
    X_train=X_train,
    X_val=X_val,
    n_epochs=args.n_epochs,
    batch_size=args.batch_size,
    validation_freq=args.eval_every,
)
runtime = time() - start
def run_experiment_and_save(
    model,
    model_name,
    results_dir,
    models_dir,
    args,
    X_train,
    X_test,
    y_train,
    y_test,
    fl_exp_indx: int = 0,
):

    try:
        if args.focal_loss:
            alpha, gamma = load_focal_loss_params(results_dir, fl_exp_indx)
            focal_loss = True
        else:
            alpha = 0.25
            gamma = 2
            focal_loss = False
    except AttributeError:
        alpha = 0.25
        gamma = 2
        focal_loss = False

    optimizers = set_optimizer(model, args)

    steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
    lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)

    early_stopping = EarlyStopping(
        monitor=args.monitor,
        min_delta=args.early_stop_delta,
        patience=args.early_stop_patience,
    )

    model_checkpoint = ModelCheckpoint(
        filepath=str(models_dir / "best_model"),
        monitor=args.monitor,
        save_best_only=True,
        max_save=1,
    )

    trainer = Trainer(
        model,
        objective="binary_focal_loss" if focal_loss else "binary",
        optimizers=optimizers,
        lr_schedulers=lr_schedulers,
        reducelronplateau_criterion=args.monitor.split("_")[-1],
        callbacks=[early_stopping, model_checkpoint, LRHistory(n_epochs=args.n_epochs)],
        metrics=[Accuracy, F1Score],
        alpha=alpha,
        gamma=gamma,
    )

    start = time()
    trainer.fit(
        X_train={"X_tab": X_train, "target": y_train},
        X_val={"X_tab": X_test, "target": y_test},
        n_epochs=args.n_epochs,
        batch_size=args.batch_size,
        validation_freq=args.eval_every,
    )
    runtime = time() - start

    y_pred = trainer.predict(X_tab=X_test)

    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print(f"Accuracy: {acc}. F1: {f1}. ROC_AUC: {auc}")
    print(confusion_matrix(y_test, y_pred))

    if args.save_results:
        suffix = str(datetime.now()).replace(" ", "_").split(".")[:-1][0]
        filename = "_".join(["bankm", model_name, "best", suffix]) + ".p"
        results_d = {}
        results_d["args"] = args
        results_d["acc"] = acc
        results_d["auc"] = auc
        results_d["f1"] = f1
        results_d["early_stopping"] = early_stopping
        results_d["trainer_history"] = trainer.history
        results_d["trainer_lr_history"] = trainer.lr_history
        results_d["runtime"] = runtime
        with open(results_dir / filename, "wb") as f:
            pickle.dump(results_d, f)
Beispiel #7
0
        def objective(params):

            deeptabular = TabMlp(
                column_idx=prepare_tab.column_idx,
                mlp_hidden_dims=mlp_hidden_dims,
                mlp_activation=args.mlp_activation,
                mlp_dropout=args.mlp_dropout,
                mlp_batchnorm=args.mlp_batchnorm,
                mlp_batchnorm_last=args.mlp_batchnorm_last,
                mlp_linear_first=args.mlp_linear_first,
                embed_input=prepare_tab.embeddings_input,
                embed_dropout=args.embed_dropout,
            )
            model = WideDeep(deeptabular=deeptabular)

            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

            lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode=args.rop_mode,
                factor=args.rop_factor,
                patience=args.rop_patience,
                threshold=args.rop_threshold,
                threshold_mode=args.rop_threshold_mode,
            )

            early_stopping = EarlyStopping(
                monitor=args.monitor,
                min_delta=args.early_stop_delta,
                patience=args.early_stop_patience,
            )
            trainer = Trainer(
                model,
                objective="binary_focal_loss",
                optimizers=optimizer,
                lr_schedulers=lr_scheduler,
                reducelronplateau_criterion=args.monitor.split("_")[-1],
                callbacks=[early_stopping,
                           LRHistory(n_epochs=args.n_epochs)],
                metrics=[Accuracy, F1Score],
                alpha=params["alpha"],
                gamma=params["gamma"],
                verbose=0,
            )

            trainer.fit(
                X_train={
                    "X_tab": X_train,
                    "target": y_train
                },
                X_val={
                    "X_tab": X_valid,
                    "target": y_valid
                },
                n_epochs=args.n_epochs,
                batch_size=args.batch_size,
                validation_freq=args.eval_every,
            )

            score = early_stopping.best

            return score
Beispiel #8
0
    x_wide = wide_preprocessor.fit_transform(train_df)

    # Deep
    tab_preprocessor = TabPreprocessor(
        embed_cols=deep_embedding_columns_list,
        continuous_cols=deep_continuous_column_list)
    x_deep = tab_preprocessor.fit_transform(train_df)
    """
    Model 구조 정의
    """
    # Model
    wide = Wide(wide_dim=np.unique(x_wide).shape[0], pred_dim=1)
    deeptabular = TabMlp(mlp_hidden_dims=[64, 32],
                         column_idx=tab_preprocessor.column_idx,
                         embed_input=tab_preprocessor.embeddings_input,
                         continuous_cols=deep_continuous_column_list)
    model = WideDeep(wide=wide, deeptabular=deeptabular)
    """
    학습
    """
    trainer = Trainer(model, objective="binary", metrics=[Accuracy])
    trainer.fit(X_wide=x_wide,
                X_tab=x_deep,
                target=target,
                n_epochs=5,
                batch_size=256,
                val_split=0.1)

    trainer.save_model(MODEL_PATH)
    save_pickle(WIDE_PROC_PATH, wide_preprocessor)
    save_pickle(DEEP_PROC_PATH, tab_preprocessor)
Beispiel #9
0
from utils import load_pickle, save_pickle

PRJ_DIR = os.path.dirname(__file__)
TEST_DATA_PATH = os.path.join(PRJ_DIR, 'data/test.csv')

MODEL_DIR = os.path.join(PRJ_DIR, 'model')
MODEL_PATH = os.path.join(MODEL_DIR, 'model.pth')
WIDE_PROC_PATH = os.path.join(MODEL_DIR, 'wide_proc.pickle')
DEEP_PROC_PATH = os.path.join(MODEL_DIR, 'deep_proc.pickle')

if __name__ == '__main__':

    # 데이터 불러오기
    test_df = pd.read_csv(TEST_DATA_PATH)

    # Trinaer 불러오기
    trainer = Trainer(model=MODEL_PATH, objective='binary')
    trainer.batch_size = 256

    # Preprocessor 불러오기
    wide_processor = load_pickle(WIDE_PROC_PATH)
    deep_processor = load_pickle(DEEP_PROC_PATH)

    x_wide = wide_processor.transform(test_df)
    x_deep = deep_processor.transform(test_df)

    # 예측
    y_pred = trainer.predict(X_wide=x_wide, X_tab=x_deep)
    y = test_df['income_label']

    print(f"Accuracy: {accuracy_score(y, y_pred)}")