def main(argv):
    """ Builds, Trians, and evaluates the model. """
    assert len(argv) == 1

    # laod data from local disk.
    (x_train,
     y_train), (x_dev,
                y_dev), (x_test,
                         y_test) = import_tanmiao.load_data(logtrans=True)

    # Build the training input_fn
    def input_train(features=x_train, labels=y_train, batch_size=128):
        """ An input function for training """
        # convert the input to a Dataset
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shffling with a buffer larger than the data set ensures
        # that the examples are will mixed.
        dataset = dataset.shuffle(4000).batch(
            batch_size).repeat().make_one_shot_iterator().get_next()

        return dataset

    # Build the validation input_fn
    def input_dev(features=x_dev, labels=y_dev, batch_size=128):
        # Convert the input to a Dataset
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shuffling
        dataset = dataset.shuffle(2000).batch(
            batch_size).make_one_shot_iterator().get_next()

        return dataset

    feature_columns = [
        tf.feature_column.numeric_column("X1"),
        tf.feature_column.numeric_column("X2"),
        tf.feature_column.numeric_column("X3"),
        tf.feature_column.numeric_column("X4"),
        tf.feature_column.numeric_column("X5"),
        tf.feature_column.numeric_column("X6"),
        tf.feature_column.numeric_column("X7")
    ]

    # Set learning_rate decay
    # global_step_ = tf.Variable(0)
    # learning_rate = tf.train.exponential_decay(
    #     0.1, global_step_, 100, 0.96, staircase=True)

    # Build a custom Estimator, using the model_fn
    # 'params' is passed through the 'model_fn'
    model = tf.estimator.Estimator(
        model_fn=my_dnn_regression_fn,
        model_dir="F:/ml_fp_lytm/tf_projects/test/models/temp2.2",
        params={
            'feature_columns': feature_columns,
            'learning_rate': 0.1,
            'optimizer': tf.train.AdamOptimizer,
            'hidden_units': [20, 20, 20, 20]
        })

    # use tf.estimator.train_and_evaluate
    train_spec = tf.estimator.TrainSpec(input_fn=input_train, max_steps=10000)
    eval_spec = tf.estimator.EvalSpec(input_fn=input_dev,
                                      steps=10000,
                                      throttle_secs=60,
                                      start_delay_secs=0)
    tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
def main(argv):
    """ Builds, Trians, and evaluates the model. """
    assert len(argv) == 1

    # laod data from local disk.
    (x_train,
     y_train), (x_dev,
                y_dev), (x_test,
                         y_test) = import_tanmiao.load_data(logtrans=True)

    # Build the training input_fn
    def input_train(features=x_train, labels=y_train, batch_size=128):
        """ An input function for training """
        # convert the input to a Dataset
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shffling with a buffer larger than the data set ensures
        # that the examples are will mixed.
        dataset = dataset.shuffle(4000).batch(
            batch_size).repeat().make_one_shot_iterator().get_next()

        return dataset

    # Build the validation input_fn
    def input_dev(features=x_dev, labels=y_dev, batch_size=128):
        # Convert the input to a Dataset
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shuffling
        dataset = dataset.shuffle(2000).batch(
            batch_size).make_one_shot_iterator().get_next()

        return dataset

    feature_columns = [
        tf.feature_column.numeric_column("X1"),
        tf.feature_column.numeric_column("X2"),
        tf.feature_column.numeric_column("X3"),
        tf.feature_column.numeric_column("X4"),
        tf.feature_column.numeric_column("X5"),
        tf.feature_column.numeric_column("X6"),
        tf.feature_column.numeric_column("X7")
    ]

    # Build a custom Estimator, using the model_fn
    # 'params' is passed through the 'model_fn'
    model = tf.estimator.Estimator(model_fn=my_dnn_regression_fn,
                                   model_dir=model_path,
                                   params={
                                       'feature_columns': feature_columns,
                                       'learning_rate': 0.001,
                                       'optimizer': tf.train.AdamOptimizer,
                                       'hidden_units': [20, 20, 20, 20],
                                       'drop_rates': [0.5, 0.5, 0.5, 0.5]
                                   })

    # Train the model
    model.train(input_fn=input_train, steps=STEPS)

    # Evaluate how the model performs on a data it has not yet seen.
    eval_result = model.evaluate(input_fn=input_dev, steps=STEPS)

    # The evaluation returns a python dictionary. The 'average loss' key is
    # hold the Mean Square Error (MSE)
    average_loss = eval_result['rmse']
    # train_average_loss = train_eval_result['rmse']

    # Convert MSE to root mean square error (RMSE)
    print("\n" + 80 * "*")
    print("\nRMS error for the validation set: {:.2f}".format(average_loss))
    # print("\nRMS error for the validation set: {:.2f}".format(
    #     train_average_loss))
    print()

    # test the model's performance based on test set
    test_pred_input_fn = tf.estimator.inputs.pandas_input_fn(x_test,
                                                             shuffle=False)

    # The result of 'PREDICT' mode is a python generator
    test_pred_results = model.predict(input_fn=test_pred_input_fn)

    # print(list(test_pred_results))

    # Convert generator to numpy array
    test_predictions_list = []
    for dictionary in list(test_pred_results):
        # print(dictionary['predictions'])
        test_predictions_list.append(dictionary['predictions'])
    test_predictions = np.array(test_predictions_list)

    r2 = r2_score(y_test, test_predictions)
    print('r2_score = ' + str(r2))

    # print(test_predictions)

    # plot the predicted line
    plot_util.plot_pred(y_test,
                        test_predictions,
                        fig_savepath=model_path + "test_pred_2.png")

    # plot the relationship between the records and predcitions
    plot_util.plot_relation(y_test,
                            test_predictions,
                            fig_savepath=model_path + "test_rela_2.png")
Esempio n. 3
0
def main(argv):
    """ Builds, Trians, and evaluates the model. """
    assert len(argv) == 1

    # laod data from local disk.
    (x_train,
     y_train), (x_dev,
                y_dev), (x_test,
                         y_test) = import_tanmiao.load_data(logtrans=True)

    # Build the training input_fn
    def input_train(features=x_train, labels=y_train, batch_size=128):
        """ An input function for training """
        # convert the input to a Dataset
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shffling with a buffer larger than the data set ensures
        # that the examples are will mixed.
        dataset = dataset.shuffle(4000).batch(
            batch_size).repeat().make_one_shot_iterator().get_next()

        return dataset

    # Build the validation input_fn
    def input_dev(features=x_dev, labels=y_dev, batch_size=128):
        # Convert the input to a Dataset
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shuffling
        dataset = dataset.shuffle(2000).batch(
            batch_size).make_one_shot_iterator().get_next()

        return dataset

    feature_columns = [
        tf.feature_column.numeric_column("X1"),
        tf.feature_column.numeric_column("X2"),
        tf.feature_column.numeric_column("X3"),
        tf.feature_column.numeric_column("X4"),
        tf.feature_column.numeric_column("X5"),
        tf.feature_column.numeric_column("X6"),
        tf.feature_column.numeric_column("X7")
    ]

    # Hyperparameter tuning
    # try a few learning rate
    for learning_rate in [0.1, 0.01, 0.001]:
        # Tuning the hidden units firstly and hidden layers secondly.
        for hidden_units in [[27, 19, 5], [27, 19, 10], [27, 19, 15],
                             [27, 19, 20], [27, 19, 25], [27, 19, 30],
                             [27, 19, 35]]:

            # construct a hyperparameter str
            hparam_str = 'lr' + str(learning_rate) + '_h_u' + str(hidden_units)
            # print(hparam_str)

            # Build a custom Estimator, using the model_fn
            # 'params' is passed through the 'model_fn'
            model = tf.estimator.Estimator(
                model_fn=my_dnn_regression_fn,
                model_dir="F:/ml_fp_lytm/tf_projects/test/models/temp2.1/" +
                hparam_str,
                params={
                    'feature_columns': feature_columns,
                    'learning_rate': learning_rate,
                    'optimizer': tf.train.AdamOptimizer,
                    'hidden_units': hidden_units
                })

            # Train the model
            model.train(input_fn=input_train, steps=STEPS)
            # Evaluate how the model performs on a data it has not yet seen.
            eval_result = model.evaluate(input_fn=input_dev)
            # The evaluation returns a python dictionary. The 'average loss' key is
            # hold the Mean Square Error (MSE)
            average_loss = eval_result['rmse']

            # Convert MSE to root mean square error (RMSE)
            print("\n" + 80 * "*")
            print("\nRMS error for the validation set: {:.0f}".format(
                average_loss**0.5))
            print()
Esempio n. 4
0
def main(argv):
    """ Build, train, and evaluates the model. """
    assert len(argv) == 1
    (x_train,
     y_train), (x_dev,
                y_dev), (x_test,
                         y_test) = import_tanmiao.load_data(logtrans=True)

    # build the training input_fn
    def input_train(features=x_train, labels=y_train, batch_size=128):
        """ An input function for training """
        # Convert the inputs to a Dataset.
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

        # Shuffle, repeat, and batch the examples.
        # Buffer size should be greater than the size of training samples.
        # repeat means epoch, .repeat(5) mean epoch=5
        # batch size is the mini-batch size.
        dataset = dataset.shuffle(4000).batch(
            batch_size).repeat().make_one_shot_iterator().get_next()

        # return the dataset
        return dataset

    # Build the validation input_fn
    def input_dev(featutres=x_dev, labels=y_dev, batch_size=128):
        dataset = tf.data.Dataset.from_tensor_slices((dict(featutres), labels))

        dataset = dataset.shuffle(2000).batch(
            batch_size).make_one_shot_iterator().get_next()
        return dataset

    feature_columns = [
        tf.feature_column.numeric_column("X1"),
        tf.feature_column.numeric_column("X2"),
        tf.feature_column.numeric_column("X3"),
        tf.feature_column.numeric_column("X4"),
        tf.feature_column.numeric_column("X5"),
        tf.feature_column.numeric_column("X6"),
        # tf.feature_column.numeric_column("X7")
    ]

    model_dir = "F:/ml_fp_lytm/tf_projects/test/models/temp1"

    # Build the estimator
    model = tf.estimator.DNNRegressor(hidden_units=[10, 10],
                                      feature_columns=feature_columns,
                                      model_dir=model_dir)
    # Train the model
    model.train(input_fn=input_train, steps=STEPS)

    # Evaluate how the model preforms on data it has not yet seen.
    eval_result = model.evaluate(input_fn=input_dev)

    # The evaluation returns a python dictionary. The 'average loss' key is
    # hold the Mean Square Error (MSE)
    average_loss = eval_result["average_loss"]

    # Convert MSE to root mean square error (RMSE)
    print("\n" + 80 * "*")
    print("\nRMS error for the validation set: ${:.2f}".format(
        average_loss**0.5))
    print()

    test_pred_input_fn = tf.estimator.inputs.pandas_input_fn(x_train,
                                                             shuffle=False)

    test_pred_results = model.predict(input_fn=test_pred_input_fn)

    test_predicitons_list = []

    for dicts in list(test_pred_results):
        test_predicitons_list.append(dicts['predictions'][0])

    test_predictions = np.array(test_predicitons_list)
    plot_util.plot_pred(y_train,
                        test_predictions,
                        fig_savepath=fig_savepath + "test_pred_1.png")
    plot_util.plot_relation(y_train,
                            test_predictions,
                            fig_savepath=fig_savepath + "test_rela_1.png")