예제 #1
0
def main(argv):
    args = parser.parse_args(argv[1:])

    # Get Feature Columns
    feature_columns = data_load.get_feature_column(
        "data/original_features.csv")
    # Build 2 hidden layer DNN with 20, 20 units respectively.
    checkpointing_config = tf.estimator.RunConfig(
        save_checkpoints_secs=60,  # Save checkpoints every 60 seconds.
        keep_checkpoint_max=10,  # Retain the 10 most recent checkpoints.
    )
    classifier = tf.estimator.DNNClassifier(
        model_dir="model/y_dnn",
        config=checkpointing_config,
        feature_columns=feature_columns,
        # Two hidden layers of 20 nodes each.
        hidden_units=[20, 20],
        # The model must choose between 2 classes.
        n_classes=2)

    train_x, train_y = get_data_to_be_re_train()
    # Train the Model.
    classifier.train(
        input_fn=lambda: train_input_fn(train_x, train_y, args.batch_size),
        steps=args.train_steps)
예제 #2
0
def main(argv):

    args = parser.parse_args(argv[1:])

    # Get Feature Columns
    feature_columns = data_load.get_feature_column("data/original_features.csv")
    # Build 2 hidden layer DNN with 20, 20 units respectively.
    checkpointing_config = tf.estimator.RunConfig(
        save_checkpoints_secs=60,  # Save checkpoints every 60 seconds.
        keep_checkpoint_max=10,  # Retain the 10 most recent checkpoints.
    )
    classifier = tf.estimator.DNNClassifier(
        model_dir="model/y_dnn",
        config=checkpointing_config,
        feature_columns=feature_columns,
        # Two hidden layers of 20 nodes each.
        hidden_units=[20, 20],
        # The model must choose between 2 classes.
        n_classes=2)

    data_to_be_predicted, feature_id = get_features_to_be_predicted()

    predictions = classifier.predict(
        input_fn=lambda: predict_input_fn(data_to_be_predicted,
                                          batch_size=args.batch_size)
    )

    predictions = list(predictions)

    data_set = []

    for i in range(0, len(predictions)):
        features = data_to_be_predicted.values[i]
        f_id = feature_id[i]
        pre = predictions[i].get("class_ids")[0]
        new_data_item = [f_id]
        for feature_item in features:
            new_data_item.append(feature_item)
        new_data_item.append(pre)
        data_set.append(new_data_item)
        print(new_data_item)

    print(data_set)

    data_save.write_to_csv(
        "data/y2.csv",
        data=data_set,
        header=None)
예제 #3
0
def pre_train():

    print("[Pre-Train] Ready to do pre_train")

    # Load training data from data/y1.csv
    train_x, train_y = data_load.load_data_original_with_label(
        "../data/y1.csv", columns.CSV_FEATURE_AND_LABEL_RESULT, "y1")

    print("=====[View Train Data]=====")
    print(train_x.head())
    print("===========================")

    # Get Feature Columns just from original_features.csv
    # because original_features.csv only include features and feature_id
    feature_columns = data_load.get_feature_column(
        "../data/original_features.csv")

    # Build 2 hidden layer DNN with 20, 20 units respectively.
    checkpointing_config = tf.estimator.RunConfig(
        save_checkpoints_secs=60,  # Save checkpoints every 60 seconds.
        keep_checkpoint_max=10,  # Retain the 10 most recent checkpoints.
    )
    classifier = tf.estimator.DNNClassifier(
        model_dir="../model/y_dnn",
        config=checkpointing_config,
        feature_columns=feature_columns,
        # Two hidden layers of 20 nodes each.
        hidden_units=[20, 20],
        # The model must choose between 2 classes.
        n_classes=2)

    # Train the Model.
    classifier.train(
        input_fn=lambda: train_input_fn(train_x, train_y, batch_size=100),
        steps=1000)

    # Print where the model saved.
    print("[Check Point]" + classifier.latest_checkpoint())