コード例 #1
0
def execute_test(fpp, model, test_x, test_y, test_i, model_dir, model_file_name):
    logger.info("Testing model ...")

    x = apply_feature_prep_pipeline(x=test_x, fpp=fpp, fit=False)
    y = test_y

    y_p = model.predict(x, verbose=1)

    y = np.reshape(a=y, newshape=(len(y),))
    y_p = np.reshape(a=y_p, newshape=(len(y),))

    test_result = pd.DataFrame(
        {RMC.SCEN_ID_COL: test_i + 1, 'y': y, 'y_pred': y_p, 'Difference': y - y_p, 'Deviation': (y - y_p) * 100 / y})
    test_result.set_index(RMC.SCEN_ID_COL, inplace=True)
    test_result.sort_index(inplace=True)

    skl_mse = mean_squared_error(y, y_p)
    skl_rmse = sqrt(skl_mse)

    print(" - test_skl_mse ({:.6f}), test_skl_rmse ({:.6f})".format(skl_mse, skl_rmse))
    print('\n')

    if model_dir is not None:
        with open(os.path.join(model_dir, model_file_name + '_test_results.csv'), "w") as file:
            file.write("Test MSE: {0}, Test RMSE: {1}\n".format(skl_mse, skl_rmse))
            file.write("\n")
            test_result.to_csv(path_or_buf=file, columns=['y', 'y_pred', 'Difference', 'Deviation'])
            file.write(",,,, {0}\n".format(np.mean(np.absolute(y - y_p) * 100 / y)))
コード例 #2
0
def lr_schedule(ep):
    lr = 0.001

    lr = lr / (ep // 10 + 1)

    logger.info('New learning rate: %01.10f', lr)

    return lr
コード例 #3
0
def main():
    overall = time()

    logger.info("Main script started ...")

    train = False
    test = False

    fpp = None
    model = None
    model_file_name = None
    model_dir = None

    for arg in sys.argv[1:]:
        if arg == 'train':
            train = True
        elif arg == 'test':
            test = True

    if not train and not test:
        train = True

    train_x, train_y, train_i, val_x, val_y, val_i, test_x, test_y, test_i = load_all_data(
        train_set=train,
        val_set=train,
        test_set=test,
        init=False)

    if train or test:
        if RMC.TRN is not None:
            model_file_name = '{0}_{1}_{2}_{3}'.format(RMC.TRN, RMC.MV, RMC.OV, RMC.DP)
            model_dir = os.path.join(RMC.OUTPUT_DIR, model_file_name)

            if not os.path.exists(model_dir) and train:
                os.makedirs(model_dir)

            if previous_keras_model_file_exists(model_dir, model_file_name):
               logger.info("Loading model ...")

               fpp = load_feature_prep_pipeline(model_dir, model_file_name)
               model = load_keras_model(model_dir, model_file_name)

               logger.info("Loading model done.")

    if train:
        fpp, model = execute_train(model_dir, model_file_name,
                                   start_epoch=RMC.START_EP, end_epoch=RMC.END_EP,
                                   fpp=fpp, build_on_model=model,
                                   train_x=train_x, train_y=train_y, train_i=train_i,
                                   val_x=val_x, val_y=val_y, val_i=val_i)

    if test:
        execute_test(fpp, model, test_x, test_y, test_i, model_dir, model_file_name)

    logger.info("Main script finished in %s.", time_it(overall, time()))
コード例 #4
0
def execute_train(model_dir, model_file_name, start_epoch, end_epoch, fpp, build_on_model,
                  train_x, train_y, train_i, val_x, val_y, val_i):
    if fpp is None:
        fpp = create_feature_prep_pipeline()
        fit = True
    else:
        fit = False

    x_t = apply_feature_prep_pipeline(x=train_x, fpp=fpp, fit=fit)
    y_t = train_y
    x_v = apply_feature_prep_pipeline(x=val_x, fpp=fpp, fit=False)
    y_v = val_y

    logger.info('Building/compiling model ...')

    if build_on_model is None:
        model = build_keras_model()
        model = compile_keras_model(model)
    else:
        model = build_on_model

    callbacks = [LearningRateScheduler(lr_schedule)]

    mt_callback = None

    if model_file_name is not None:
        mt_callback = Model_Tracker(model_dir, model_file_name, model=model)

        callbacks.append(mt_callback)

        save_model_graph_and_summary(model, model_dir, model_file_name)
        save_feature_prep_pipeline(fpp, model_dir, model_file_name)
        copy_this_file(model_dir, model_file_name)

    logger.info('Building/compiling model done.')

    logger.info('Fitting model ...')

    history = model.fit(
        x=[x_t], y=y_t,
        batch_size=RMC.BATCH_SIZE,
        epochs=end_epoch,
        verbose=1,
        callbacks=callbacks,
        shuffle=True,
        initial_epoch=start_epoch,
        steps_per_epoch=None,
        validation_data=[[x_v], y_v])

    if model_file_name is not None:
        save_training_history(history, model_dir, model_file_name)

    y_p = model.predict(x_v, verbose=1)

    y = np.reshape(a=y_v, newshape=(len(y_v),))
    y_p = np.reshape(a=y_p, newshape=(len(y_v),))

    # This is because we deleted scenario 2053 (index 2052 in numpy array) from data set
    val_i[val_i > 2051] += 1

    test_result = pd.DataFrame(
        {RMC.SCEN_ID_COL: val_i + 1, 'y': y, 'y_pred': y_p, 'Difference': y - y_p, 'Deviation': (y - y_p) * 100 / y})
    test_result.set_index(RMC.SCEN_ID_COL, inplace=True)
    test_result.sort_index(inplace=True)

    skl_mse = mean_squared_error(y, y_p)
    skl_rmse = sqrt(skl_mse)

    if model_file_name is not None:
        with open(os.path.join(model_dir, model_file_name + '_train_results.csv'), "w") as file:
            file.write("Best Epoch: {0}, Val MSE: {1}, Val RMSE: {2}\n".format(mt_callback.best_epoch, skl_mse, skl_rmse))
            file.write("\n")
            test_result.to_csv(path_or_buf=file, columns=['y', 'y_pred', 'Difference', 'Deviation'])
            file.write(",,,, {0}\n".format(np.mean(np.absolute(y - y_p) * 100 / y)))

    logger.info('Fitting model done.')

    return fpp, model