Ejemplo n.º 1
0
    reducelronplateau_criterion=args.monitor.split("_")[-1],
    callbacks=[early_stopping, model_checkpoint, LRHistory(n_epochs=args.n_epochs)],
    metrics=[Accuracy],
)

start = time()
trainer.fit(
    X_train={"X_tab": X_train, "target": y_train},
    X_val={"X_tab": X_test, "target": y_test},
    n_epochs=args.n_epochs,
    batch_size=args.batch_size,
    validation_freq=args.eval_every,
)
runtime = time() - start

y_pred = trainer.predict(X_tab=X_test)
acc = accuracy_score(y_test, y_pred)

if args.save_results:
    suffix = str(datetime.now()).replace(" ", "_").split(".")[:-1][0]
    filename = "_".join(["adult_tabtransformer_best", suffix]) + ".p"
    results_d = {}
    results_d["args"] = args
    results_d["acc"] = acc
    results_d["early_stopping"] = early_stopping
    results_d["trainer_history"] = trainer.history
    results_d["trainer_lr_history"] = trainer.lr_history
    results_d["runtime"] = runtime
    with open(RESULTS_DIR / filename, "wb") as f:
        pickle.dump(results_d, f)
def run_experiment_and_save(
    model,
    model_name,
    results_dir,
    models_dir,
    args,
    X_train,
    X_test,
    y_train,
    y_test,
    fl_exp_indx: int = 0,
):

    try:
        if args.focal_loss:
            alpha, gamma = load_focal_loss_params(results_dir, fl_exp_indx)
            focal_loss = True
        else:
            alpha = 0.25
            gamma = 2
            focal_loss = False
    except AttributeError:
        alpha = 0.25
        gamma = 2
        focal_loss = False

    optimizers = set_optimizer(model, args)

    steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
    lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)

    early_stopping = EarlyStopping(
        monitor=args.monitor,
        min_delta=args.early_stop_delta,
        patience=args.early_stop_patience,
    )

    model_checkpoint = ModelCheckpoint(
        filepath=str(models_dir / "best_model"),
        monitor=args.monitor,
        save_best_only=True,
        max_save=1,
    )

    trainer = Trainer(
        model,
        objective="binary_focal_loss" if focal_loss else "binary",
        optimizers=optimizers,
        lr_schedulers=lr_schedulers,
        reducelronplateau_criterion=args.monitor.split("_")[-1],
        callbacks=[early_stopping, model_checkpoint, LRHistory(n_epochs=args.n_epochs)],
        metrics=[Accuracy, F1Score],
        alpha=alpha,
        gamma=gamma,
    )

    start = time()
    trainer.fit(
        X_train={"X_tab": X_train, "target": y_train},
        X_val={"X_tab": X_test, "target": y_test},
        n_epochs=args.n_epochs,
        batch_size=args.batch_size,
        validation_freq=args.eval_every,
    )
    runtime = time() - start

    y_pred = trainer.predict(X_tab=X_test)

    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print(f"Accuracy: {acc}. F1: {f1}. ROC_AUC: {auc}")
    print(confusion_matrix(y_test, y_pred))

    if args.save_results:
        suffix = str(datetime.now()).replace(" ", "_").split(".")[:-1][0]
        filename = "_".join(["bankm", model_name, "best", suffix]) + ".p"
        results_d = {}
        results_d["args"] = args
        results_d["acc"] = acc
        results_d["auc"] = auc
        results_d["f1"] = f1
        results_d["early_stopping"] = early_stopping
        results_d["trainer_history"] = trainer.history
        results_d["trainer_lr_history"] = trainer.lr_history
        results_d["runtime"] = runtime
        with open(results_dir / filename, "wb") as f:
            pickle.dump(results_d, f)
def run_experiment_and_save(
    model,
    model_name,
    results_dir,
    models_dir,
    args,
    X_train,
    X_test,
    y_train,
    y_test,
):

    optimizers = set_optimizer(model, args)

    steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
    lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)

    early_stopping = EarlyStopping(
        monitor=args.monitor,
        min_delta=args.early_stop_delta,
        patience=args.early_stop_patience,
    )

    model_checkpoint = ModelCheckpoint(
        filepath=str(models_dir / "best_model"),
        monitor=args.monitor,
        save_best_only=True,
        max_save=1,
    )

    trainer = Trainer(
        model,
        objective="regression",
        optimizers=optimizers,
        lr_schedulers=lr_schedulers,
        reducelronplateau_criterion=args.monitor.split("_")[-1],
        callbacks=[
            early_stopping, model_checkpoint,
            LRHistory(n_epochs=args.n_epochs)
        ],
    )

    start = time()
    trainer.fit(
        X_train={
            "X_tab": X_train,
            "target": y_train
        },
        X_val={
            "X_tab": X_test,
            "target": y_test
        },
        n_epochs=args.n_epochs,
        batch_size=args.batch_size,
        validation_freq=args.eval_every,
    )
    runtime = time() - start

    y_pred = trainer.predict(X_tab=X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(f"rmse with the best model: {rmse}")

    if args.save_results:
        suffix = str(datetime.now()).replace(" ", "_").split(".")[:-1][0]
        filename = "_".join(["fb_comments", model_name, "best", suffix]) + ".p"
        results_d = {}
        results_d["args"] = args
        results_d["rmse"] = rmse
        results_d["r2"] = r2
        results_d["early_stopping"] = early_stopping
        results_d["trainer_history"] = trainer.history
        results_d["trainer_lr_history"] = trainer.lr_history
        results_d["runtime"] = runtime
        with open(results_dir / filename, "wb") as f:
            pickle.dump(results_d, f)
Ejemplo n.º 4
0
from utils import load_pickle, save_pickle

PRJ_DIR = os.path.dirname(__file__)
TEST_DATA_PATH = os.path.join(PRJ_DIR, 'data/test.csv')

MODEL_DIR = os.path.join(PRJ_DIR, 'model')
MODEL_PATH = os.path.join(MODEL_DIR, 'model.pth')
WIDE_PROC_PATH = os.path.join(MODEL_DIR, 'wide_proc.pickle')
DEEP_PROC_PATH = os.path.join(MODEL_DIR, 'deep_proc.pickle')

if __name__ == '__main__':

    # 데이터 불러오기
    test_df = pd.read_csv(TEST_DATA_PATH)

    # Trinaer 불러오기
    trainer = Trainer(model=MODEL_PATH, objective='binary')
    trainer.batch_size = 256

    # Preprocessor 불러오기
    wide_processor = load_pickle(WIDE_PROC_PATH)
    deep_processor = load_pickle(DEEP_PROC_PATH)

    x_wide = wide_processor.transform(test_df)
    x_deep = deep_processor.transform(test_df)

    # 예측
    y_pred = trainer.predict(X_wide=x_wide, X_tab=x_deep)
    y = test_df['income_label']

    print(f"Accuracy: {accuracy_score(y, y_pred)}")