def main(argv):
    """ Builds, Trians, and evaluates the model. """
    assert len(argv) == 1

    # laod data from local disk.
    (x_train_dev,
     y_train_dev), (x_train, y_train), (x_dev, y_dev), (x_test, y_test), (
         series_mean, series_max,
         series_min) = load_normalized_data("orig_day_full_imf1.xlsx")

    # Build the training input_fn
    def input_train(features=x_train, labels=y_train, batch_size=512):
        """ An input function for training """
        # convert the input to a Dataset
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shffling with a buffer larger than the data set ensures
        # that the examples are will mixed.
        dataset = dataset.shuffle(4000).batch(
            batch_size).repeat().make_one_shot_iterator().get_next()

        return dataset

    # Build the validation input_fn
    def input_dev(features=x_dev, labels=y_dev, batch_size=512):
        # Convert the input to a Dataset
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shuffling
        dataset = dataset.shuffle(2000).batch(
            batch_size).make_one_shot_iterator().get_next()

        return dataset

    feature_columns = [
        tf.feature_column.numeric_column("X1"),
        tf.feature_column.numeric_column("X2"),
        tf.feature_column.numeric_column("X3"),
        tf.feature_column.numeric_column("X4"),
        tf.feature_column.numeric_column("X5"),
        tf.feature_column.numeric_column("X6"),
        tf.feature_column.numeric_column("X7"),
        tf.feature_column.numeric_column("X8"),
        tf.feature_column.numeric_column("X9"),
        tf.feature_column.numeric_column("X10"),
        tf.feature_column.numeric_column("X11"),
        tf.feature_column.numeric_column("X12"),
        tf.feature_column.numeric_column("X13"),
        tf.feature_column.numeric_column("X14"),
        tf.feature_column.numeric_column("X15"),
        tf.feature_column.numeric_column("X16"),
        # tf.feature_column.numeric_column("X17"),
        # tf.feature_column.numeric_column("X18"),
        # tf.feature_column.numeric_column("X19"),
    ]

    my_check_point_config = tf.estimator.RunConfig(
        save_checkpoints_steps=50,
        keep_checkpoint_max=1000  #Retain the 50most recent checkpoints
    )

    for learning_rate in [0.01]:
        decay_steps = 1000  # Learning rate decay steps
        decay_rate = 0.98  # Learning rate decay rate
        hidden_units = [15]
        batch_size = 512
        drop_rates = [0.0]
        # construct a hyperparameter str
        hparam_str = 'lr' + str(learning_rate) + '_ds' + str(
            decay_steps) + '_dr' + str(decay_rate) + '_hu' + str(
                hidden_units) + '_bs' + str(batch_size) + '_drop' + str(
                    drop_rates)

        model_dir = model_path + hparam_str

        # Build a custom Estimator, using the model_fn
        # 'params' is passed through the 'model_fn'
        model = tf.estimator.Estimator(
            model_fn=my_dnn_regression_fn,
            model_dir=model_dir,
            params={
                'feature_columns': feature_columns,
                'learning_rate': learning_rate,
                # without learning_rate decay
                'decay_steps': decay_steps,
                'decay_rate': decay_rate,
                'optimizer': tf.train.AdamOptimizer,
                'hidden_units': hidden_units,
                'drop_rates': drop_rates
            },
            config=my_check_point_config)

        fig = plt.figure(figsize=(16, 9))
        ax221 = plt.subplot(2, 2, 1)
        ax221.set_title('train predict line')
        ax221.set_xlabel('Time(day)')
        ax221.set_ylabel('flow(' + r'$m^3$' + '/s)')
        ax221.grid()
        ax222 = plt.subplot(2, 2, 2)
        ax222.set_title('train predictions and records scatters')
        ax222.set_xlabel('predictions(' + r'$m^3$' + '/s)')
        ax222.set_ylabel('records(' + r'$m^3$' + '/s)')
        ax222.grid()
        ax223 = plt.subplot(2, 2, 3)
        ax223.set_title('develop predict line')
        ax223.set_xlabel('Time(day)')
        ax223.set_ylabel('flow(' + r'$m^3$' + '/s)')
        ax223.grid()
        ax224 = plt.subplot(2, 2, 4)
        ax224.set_title('develop predictions and records scatters')
        ax224.set_xlabel('predictions(' + r'$m^3$' + '/s)')
        ax224.set_ylabel('records(' + r'$m^3$' + '/s)')
        ax224.grid()

        for i in range(20):
            # Train the model
            model.train(input_fn=input_train, steps=STEPS)

            # Evaluate how the model performs on a data it has not yet seen.
            eval_result = model.evaluate(input_fn=input_dev, steps=STEPS)

            # The evaluation returns a python dictionary. The 'average loss' key is
            # hold the Mean Square Error (MSE)
            average_loss = eval_result['mse']
            # Convert MSE to root mean square error (RMSE)
            print("\n" + 80 * "*")
            print("\nRMS error for the validation set: {:.8f}".format(
                average_loss))
            print()

            train_pred_input_fn = tf.estimator.inputs.pandas_input_fn(
                x_train, shuffle=False)
            dev_pred_input_fn = tf.estimator.inputs.pandas_input_fn(
                x_dev, shuffle=False)

            # predict the training set
            train_pred_results = model.predict(input_fn=train_pred_input_fn)
            # predict the testing set
            dev_pred_results = model.predict(input_fn=dev_pred_input_fn)

            # Convert generator to numpy array
            train_predictions = np.array(
                list(p['predictions'] for p in train_pred_results))
            dev_predictions = np.array(
                list(p['predictions'] for p in dev_pred_results))
            train_predictions = train_predictions.reshape(
                np.array(y_train).shape)
            dev_predictions = dev_predictions.reshape(np.array(y_dev).shape)

            r2_train = r2_score(y_train, train_predictions)
            r2_dev = r2_score(y_dev, dev_predictions)
            print('r2_score_train = {:.10f}'.format(r2_train))
            print('r2_score_dev = {:.10f}'.format(r2_dev))

            # time series length
            train_t = np.linspace(1, y_train.size, y_train.size)

            # plot predict line for training set
            ax221.cla()
            ax221.plot(train_t, y_train, label='train records', color='blue')
            ax221.plot(train_t,
                       train_predictions,
                       label='train predictions',
                       color='red')

            # plot scatters for training records and predictions
            ax222.cla()
            coeff = np.polyfit(train_predictions, y_train, 1)
            linear_fit = coeff[0] * train_predictions + coeff[1]
            ideal_fit = 1 * train_predictions
            ax222.plot(train_predictions, y_train, 'o', color='blue')
            ax222.plot(train_predictions,
                       linear_fit,
                       '--',
                       color='red',
                       label='Linear fit')
            ax222.plot(train_predictions,
                       ideal_fit,
                       '-',
                       color='black',
                       label='Ideal fit')

            # plot predict line for develop set
            dev_t = np.linspace(1, y_dev.size, y_dev.size)
            ax223.cla()
            ax223.plot(dev_t, y_dev, label='develop records', color='blue')
            ax223.plot(dev_t,
                       dev_predictions,
                       label='develop predictions',
                       color='red')

            # PLOT scatters for developing records and predictions
            ax224.cla()
            coeff = np.polyfit(dev_predictions, y_dev, 1)
            linear_fit = coeff[0] * dev_predictions + coeff[1]
            ideal_fit = 1 * dev_predictions
            ax224.plot(dev_predictions, y_dev, 'o', color='blue')
            ax224.plot(dev_predictions,
                       linear_fit,
                       '--',
                       color='red',
                       label='Linear fit')
            ax224.plot(dev_predictions,
                       ideal_fit,
                       '-',
                       color='black',
                       label='Ideal fit')
            plt.pause(1)
def main(argv):
    """ Predict based on the trained model and specfic checkpoints. """
    assert len(argv) == 1

    # laod data from local disk.
    (x_train_dev,
     y_train_dev), (x_train, y_train), (x_dev, y_dev), (x_test, y_test), (
         series_mean, series_max,
         series_min) = load_normalized_data("orig_day_full_imf1.xlsx",
                                            seed=123)

    # create feature colums
    feature_columns = [
        tf.feature_column.numeric_column("X1"),
        tf.feature_column.numeric_column("X2"),
        tf.feature_column.numeric_column("X3"),
        tf.feature_column.numeric_column("X4"),
        tf.feature_column.numeric_column("X5"),
        tf.feature_column.numeric_column("X6"),
        tf.feature_column.numeric_column("X7"),
        tf.feature_column.numeric_column("X8"),
        tf.feature_column.numeric_column("X9"),
        tf.feature_column.numeric_column("X10"),
        tf.feature_column.numeric_column("X11"),
        tf.feature_column.numeric_column("X12"),
        tf.feature_column.numeric_column("X13"),
        tf.feature_column.numeric_column("X14"),
        tf.feature_column.numeric_column("X15"),
        tf.feature_column.numeric_column("X16"),
        # tf.feature_column.numeric_column("X17"),
        # tf.feature_column.numeric_column("X18"),
        # tf.feature_column.numeric_column("X19"),
    ]

    # recovery the model, and set the dropout rate to 0.0
    model_path = 'F:/ml_fp_lytm/tf_projects/imf1_series/models/'
    current_model = 'lr0.01_ds1000_dr0.98_hu[5]_bs512_drop[0.0]'
    model_dir = model_path + current_model + '/'
    model = tf.estimator.Estimator(
        model_fn=my_dnn_regression_fn,
        model_dir=model_dir,
        params={
            'feature_columns': feature_columns,
            'hidden_units': [5],
            'drop_rates': [0.0]
        },
    )

    train_pred_input_fn = tf.estimator.inputs.pandas_input_fn(x_train,
                                                              shuffle=False)
    dev_pred_input_fn = tf.estimator.inputs.pandas_input_fn(x_dev,
                                                            shuffle=False)
    test_pred_input_fn = tf.estimator.inputs.pandas_input_fn(x_test,
                                                             shuffle=False)

    # Use the specific file to predict
    checkpoint_path = model_dir + 'model.ckpt-23100'

    # predict the training set
    train_pred_results = model.predict(input_fn=train_pred_input_fn,
                                       checkpoint_path=checkpoint_path)
    # predict the developing set
    dev_pred_results = model.predict(input_fn=dev_pred_input_fn,
                                     checkpoint_path=checkpoint_path)
    # predict the testing set.
    test_pred_results = model.predict(input_fn=test_pred_input_fn,
                                      checkpoint_path=checkpoint_path)

    # Convert generator to numpy array
    train_predictions = np.array(
        list(p['predictions'] for p in train_pred_results))
    dev_predictions = np.array(list(p['predictions']
                                    for p in dev_pred_results))
    test_predictions = np.array(
        list(p['predictions'] for p in test_pred_results))

    # reshape the prediction to y shape.
    train_predictions = train_predictions.reshape(np.array(y_train).shape)
    dev_predictions = dev_predictions.reshape(np.array(y_dev).shape)
    test_predictions = test_predictions.reshape(np.array(y_test).shape)

    # compute R square
    r2_train = r2_score(y_train, train_predictions)
    r2_dev = r2_score(y_dev, dev_predictions)
    r2_test = r2_score(y_test, test_predictions)

    # compute MSE
    mse_train = mean_squared_error(y_train, train_predictions)
    mse_dev = mean_squared_error(y_dev, dev_predictions)
    mse_test = mean_squared_error(y_test, test_predictions)

    # compute MAE
    mae_train = mean_absolute_error(y_train, train_predictions)
    mae_dev = mean_absolute_error(y_dev, dev_predictions)
    mae_test = mean_absolute_error(y_test, test_predictions)

    #
    print('r2_score_train = {:.10f}'.format(r2_train))
    print('r2_score_dev = {:.10f}'.format(r2_dev))

    dump_train_dev_test_to_excel(
        path='F:/ml_fp_lytm/tf_projects/imf1_series/models/' + current_model +
        '.xlsx',
        y_train=y_train,
        train_pred=train_predictions,
        r2_train=r2_train,
        mse_train=mse_train,
        mae_train=mae_train,
        y_dev=y_dev,
        dev_pred=dev_predictions,
        r2_dev=r2_dev,
        mse_dev=mse_dev,
        mae_dev=mae_dev,
        y_test=y_test,
        test_pred=test_predictions,
        r2_test=r2_test,
        mse_test=mse_test,
        mae_test=mae_test,
    )

    # print(test_predictions)

    # plot the predicted line
    plot_normreconvert_pred(y_train,
                            train_predictions,
                            series_mean,
                            series_max,
                            series_min,
                            fig_savepath=model_path + current_model +
                            '_train_pred.png')

    plot_normreconvert_relation(y_train,
                                train_predictions,
                                series_mean,
                                series_max,
                                series_min,
                                fig_savepath=model_path + current_model +
                                "_train_rela.png")

    plot_normreconvert_pred(y_dev,
                            dev_predictions,
                            series_mean,
                            series_max,
                            series_min,
                            fig_savepath=model_path + current_model +
                            "_dev_pred.png")

    # plot the relationship between the records and predcitions
    plot_normreconvert_relation(y_dev,
                                dev_predictions,
                                series_mean,
                                series_max,
                                series_min,
                                fig_savepath=model_path + current_model +
                                "_dev_rela.png")

    plot_normreconvert_pred(y_test,
                            test_predictions,
                            series_mean,
                            series_max,
                            series_min,
                            fig_savepath=model_path + current_model +
                            "_test_pred.png")

    # plot the relationship between the records and predcitions
    plot_normreconvert_relation(y_test,
                                test_predictions,
                                series_mean,
                                series_max,
                                series_min,
                                fig_savepath=model_path + current_model +
                                "_test_rela.png")
def main(argv):
    """ Builds, Trians, and evaluates the model. """
    assert len(argv) == 1

    # laod data from local disk.
    (x_train, y_train), (x_dev, y_dev), (x_test, y_test), (
        series_mean, series_max, series_min
    ) = import_tanmiao.load_normalized_data('orig_day_full_X.xlsx')

    # Build the training input_fn
    def input_train(features=x_train, labels=y_train, batch_size=128):
        """ An input function for training """
        # convert the input to a Dataset
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shffling with a buffer larger than the data set ensures
        # that the examples are will mixed.
        dataset = dataset.shuffle(4000).batch(
            batch_size).repeat().make_one_shot_iterator().get_next()

        return dataset

    # Build the validation input_fn
    def input_dev(features=x_dev, labels=y_dev, batch_size=128):
        # Convert the input to a Dataset
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shuffling
        dataset = dataset.shuffle(2000).batch(
            batch_size).make_one_shot_iterator().get_next()

        return dataset

    feature_columns = [
        tf.feature_column.numeric_column("X1"),
        tf.feature_column.numeric_column("X2"),
        tf.feature_column.numeric_column("X3"),
        tf.feature_column.numeric_column("X4"),
        tf.feature_column.numeric_column("X5"),
        tf.feature_column.numeric_column("X6")
        # tf.feature_column.numeric_column("X7")
    ]

    # Build a custom Estimator, using the model_fn
    # 'params' is passed through the 'model_fn'
    model = tf.estimator.Estimator(model_fn=my_dnn_regression_fn,
                                   model_dir=model_path,
                                   params={
                                       'feature_columns': feature_columns,
                                       'learning_rate': 0.1,
                                       'decay_steps': 500,
                                       'decay_rate': 0.98,
                                       'optimizer': tf.train.AdamOptimizer,
                                       'hidden_units': [20, 20, 20, 20]
                                   })

    train_pred_input_fn = tf.estimator.inputs.pandas_input_fn(x_train,
                                                              shuffle=False)

    space = [
        Real(10**-5, 10**0, 'log-uniform', name='learning_rate'),
        Real(0.1, 0.8, 'log-uniform', name='drop_rates')
    ]

    def objective(**paramss):
        model = tf.estimator.Estimator(model_fn=my_dnn_regression_fn,
                                       model_dir=model_path,
                                       params={
                                           'feature_columns':
                                           feature_columns,
                                           'learning_rate':
                                           paramss.get('learning_rate'),
                                           'decay_steps':
                                           1,
                                           'decay_rate':
                                           1,
                                           'optimizer':
                                           tf.train.AdamOptimizer,
                                           'hidden_units': [20, 20, 20, 20],
                                           'drop_rates':
                                           paramss.get('drop_rates')
                                       })

        # Train the model
        model.train(input_fn=input_train, steps=STEPS)
        pred = model.predict(input_fn=train_pred_input_fn)
        train_predictions = np.array(list(p['predictions'] for p in pred))
        train_predictions = train_predictions.reshape(np.array(y_train).shape)
        y_train = np.array(y_train)
        return np.sum(np.abs(y_train - train_predictions))

    res_gp = gp_minimize(objective, space, n_calls=50, random_state=0)

    print("minmun MAE=%.4f" % res_gp.fun)
def main(argv):
    """ Builds, Trians, and evaluates the model. """
    assert len(argv) == 1

    # laod data from local disk.
    (x_train, y_train), (x_dev, y_dev), (x_test,y_test),(series_mean,series_max,series_min) = import_tanmiao.load_normalized_data("orig_day_full_X.xlsx")

    # Build the training input_fn
    def input_train(features=x_train, labels=y_train, batch_size=128):
        """ An input function for training """
        # convert the input to a Dataset
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shffling with a buffer larger than the data set ensures
        # that the examples are will mixed.
        dataset = dataset.shuffle(4000).batch(
            batch_size).repeat().make_one_shot_iterator().get_next()

        return dataset

    # Build the validation input_fn
    def input_dev(features=x_dev, labels=y_dev, batch_size=128):
        # Convert the input to a Dataset
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shuffling
        dataset = dataset.shuffle(2000).batch(
            batch_size).make_one_shot_iterator().get_next()

        return dataset

    feature_columns = [
        tf.feature_column.numeric_column("X1"),
        tf.feature_column.numeric_column("X2"),
        tf.feature_column.numeric_column("X3"),
        tf.feature_column.numeric_column("X4"),
        tf.feature_column.numeric_column("X5"),
        tf.feature_column.numeric_column("X6"),
        # tf.feature_column.numeric_column("X7")
    ]

    # Build a custom Estimator, using the model_fn
    # 'params' is passed through the 'model_fn'
    model = tf.estimator.Estimator(
        model_fn=my_dnn_regression_fn,
        model_dir=model_path,
        params={
            'feature_columns': feature_columns,
            'learning_rate': 0.01,
            'optimizer': tf.train.AdamOptimizer,
            'hidden_units': [20]
        })


    # Train the model
    model.train(input_fn=input_train, steps=STEPS)

    # Evaluate how the model performs on a data it has not yet seen.
    eval_result = model.evaluate(input_fn=input_dev, steps=STEPS)

    # The evaluation returns a python dictionary. The 'average loss' key is
    # hold the Mean Square Error (MSE)
    average_loss = eval_result['rmse']
    # train_average_loss = train_eval_result['rmse']

    # Convert MSE to root mean square error (RMSE)
    print("\n" + 80 * "*")
    print("\nRMS error for the validation set: {:.2f}".format(
    average_loss))
    # print("\nRMS error for the validation set: {:.2f}".format(
    #     train_average_loss))
    print()

    # test the model's performance based on test set
    test_pred_input_fn = tf.estimator.inputs.pandas_input_fn(
        x_test, shuffle=False)

    # The result of 'PREDICT' mode is a python generator
    test_pred_results = model.predict(input_fn=test_pred_input_fn)

    # print(list(test_pred_results))

    # Convert generator to numpy array
    test_predictions = np.array(list(p['predictions'] for p in test_pred_results))
    test_predictions = test_predictions.reshape(np.array(y_test).shape)

    # test_predictions_list = []
    # for dictionary in list(test_pred_results):
    #     # print(dictionary['predictions'])
    #     test_predictions_list.append(dictionary['predictions'])
    # test_predictions = np.array(test_predictions_list)

    r2 = r2_score(y_test,test_predictions)
    print('r2_score = '+str(r2))

    # print(test_predictions)

    # plot the predicted line
    plot_util.plot_normreconvert_pred(
        y_test,
        test_predictions,
        series_mean,
        series_max,
        series_min,
        fig_savepath=model_path + "test_pred.png")

    # plot the relationship between the records and predcitions
    plot_util.plot_normreconvert_relation(
        y_test,
        test_predictions,
        series_mean,
        series_max,
        series_min,
        fig_savepath=model_path + "test_rela.png")