コード例 #1
0
def main(argv):
  """Builds, trains, and evaluates the model."""
  assert len(argv) == 1
  (train, test) = imports85.dataset()

  # Switch the labels to units of thousands for better convergence.
  def normalize_price(features, labels):
    return features, labels / PRICE_NORM_FACTOR

  train = train.map(normalize_price)
  test = test.map(normalize_price)

  # Build the training input_fn.
  def input_train():
    return (
        # Shuffling with a buffer larger than the data set ensures
        # that the examples are well mixed.
        train.shuffle(1000).batch(128)
        # Repeat forever
        .repeat().make_one_shot_iterator().get_next())

  # Build the validation input_fn.
  def input_test():
    return (test.shuffle(1000).batch(128)
            .make_one_shot_iterator().get_next())

  # The first way assigns a unique weight to each category. To do this you must
  # specify the category's vocabulary (values outside this specification will
  # receive a weight of zero). Here we specify the vocabulary using a list of
  # options. The vocabulary can also be specified with a vocabulary file (using
  # `categorical_column_with_vocabulary_file`). For features covering a
  # range of positive integers use `categorical_column_with_identity`.
  body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
  body_style = tf.feature_column.categorical_column_with_vocabulary_list(
      key="body-style", vocabulary_list=body_style_vocab)
  make = tf.feature_column.categorical_column_with_hash_bucket(
      key="make", hash_bucket_size=50)

  feature_columns = [
      tf.feature_column.numeric_column(key="curb-weight"),
      tf.feature_column.numeric_column(key="highway-mpg"),
      # Since this is a DNN model, convert categorical columns from sparse
      # to dense.
      # Wrap them in an `indicator_column` to create a
      # one-hot vector from the input.
      tf.feature_column.indicator_column(body_style),
      # Or use an `embedding_column` to create a trainable vector for each
      # index.
      tf.feature_column.embedding_column(make, dimension=3),
  ]

  # Build a custom Estimator, using the model_fn.
  # `params` is passed through to the `model_fn`.
  model = tf.estimator.Estimator(
      model_fn=my_dnn_regression_fn,
      params={
          "feature_columns": feature_columns,
          "learning_rate": 0.001,
          "optimizer": tf.train.AdamOptimizer,
          "hidden_units": [20, 20]
      })

  # Train the model.
  model.train(input_fn=input_train, steps=STEPS)

  # Evaluate how the model performs on data it has not yet seen.
  eval_result = model.evaluate(input_fn=input_test)

  # Print the Root Mean Square Error (RMSE).
  print("\n" + 80 * "*")
  print("\nRMS error for the test set: ${:.0f}"
        .format(PRICE_NORM_FACTOR * eval_result["rmse"]))

  print()
コード例 #2
0
def main(argv):
  """Builds, trains, and evaluates the model."""
  assert len(argv) == 1
  (train, test) = imports85.dataset()

  # Switch the labels to units of thousands for better convergence.
  def normalize_price(features, labels):
    return features, labels / PRICE_NORM_FACTOR

  train = train.map(normalize_price)
  test = test.map(normalize_price)

  # Build the training input_fn.
  def input_train():
    return (
        # Shuffling with a buffer larger than the data set ensures
        # that the examples are well mixed.
        train.shuffle(1000).batch(128)
        # Repeat forever
        .repeat())

  # Build the validation input_fn.
  def input_test():
    return test.shuffle(1000).batch(128)

  # The first way assigns a unique weight to each category. To do this you must
  # specify the category's vocabulary (values outside this specification will
  # receive a weight of zero). Here we specify the vocabulary using a list of
  # options. The vocabulary can also be specified with a vocabulary file (using
  # `categorical_column_with_vocabulary_file`). For features covering a
  # range of positive integers use `categorical_column_with_identity`.
  body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
  body_style = tf.feature_column.categorical_column_with_vocabulary_list(
      key="body-style", vocabulary_list=body_style_vocab)
  make = tf.feature_column.categorical_column_with_hash_bucket(
      key="make", hash_bucket_size=50)

  feature_columns = [
      tf.feature_column.numeric_column(key="curb-weight"),
      tf.feature_column.numeric_column(key="highway-mpg"),
      # Since this is a DNN model, convert categorical columns from sparse
      # to dense.
      # Wrap them in an `indicator_column` to create a
      # one-hot vector from the input.
      tf.feature_column.indicator_column(body_style),
      # Or use an `embedding_column` to create a trainable vector for each
      # index.
      tf.feature_column.embedding_column(make, dimension=3),
  ]

  # Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns
  # defined above as input.
  model = tf.estimator.DNNRegressor(
      hidden_units=[20, 20], feature_columns=feature_columns)

  # Train the model.
  model.train(input_fn=input_train, steps=STEPS)

  # Evaluate how the model performs on data it has not yet seen.
  eval_result = model.evaluate(input_fn=input_test)

  # The evaluation returns a Python dictionary. The "average_loss" key holds the
  # Mean Squared Error (MSE).
  average_loss = eval_result["average_loss"]

  # Convert MSE to Root Mean Square Error (RMSE).
  print("\n" + 80 * "*")
  print("\nRMS error for the test set: ${:.0f}"
        .format(PRICE_NORM_FACTOR * average_loss**0.5))

  print()
コード例 #3
0
def main(argv):
  """Builds, trains, and evaluates the model."""
  assert len(argv) == 1
  (train, test) = imports85.dataset()

  # Switch the labels to units of thousands for better convergence.
  def normalize_price(features, labels):
    return features, labels / PRICE_NORM_FACTOR

  train = train.map(normalize_price)
  test = test.map(normalize_price)

  # Build the training input_fn.
  def input_train():
    return (
        # Shuffling with a buffer larger than the data set ensures
        # that the examples are well mixed.
        train.shuffle(1000).batch(128)
        # Repeat forever
        .repeat().make_one_shot_iterator().get_next())

  # Build the validation input_fn.
  def input_test():
    return (test.shuffle(1000).batch(128)
            .make_one_shot_iterator().get_next())

  # The following code demonstrates two of the ways that `feature_columns` can
  # be used to build a model with categorical inputs.

  # The first way assigns a unique weight to each category. To do this, you must
  # specify the category's vocabulary (values outside this specification will
  # receive a weight of zero).
  # Alternatively, you can define the vocabulary in a file (by calling
  # `categorical_column_with_vocabulary_file`) or as a range of positive
  # integers (by calling `categorical_column_with_identity`)
  body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
  body_style_column = tf.feature_column.categorical_column_with_vocabulary_list(
      key="body-style", vocabulary_list=body_style_vocab)

  # The second way, appropriate for an unspecified vocabulary, is to create a
  # hashed column. It will create a fixed length list of weights, and
  # automatically assign each input categort to a weight. Due to the
  # pseudo-randomness of the process, some weights may be shared between
  # categories, while others will remain unused.
  make_column = tf.feature_column.categorical_column_with_hash_bucket(
      key="make", hash_bucket_size=50)

  feature_columns = [
      # This model uses the same two numeric features as `linear_regressor.py`
      tf.feature_column.numeric_column(key="curb-weight"),
      tf.feature_column.numeric_column(key="highway-mpg"),
      # This model adds two categorical colums that will adjust the price based
      # on "make" and "body-style".
      body_style_column,
      make_column,
  ]

  # Build the Estimator.
  model = tf.estimator.LinearRegressor(feature_columns=feature_columns)

  # Train the model.
  # By default, the Estimators log output every 100 steps.
  model.train(input_fn=input_train, steps=STEPS)

  # Evaluate how the model performs on data it has not yet seen.
  eval_result = model.evaluate(input_fn=input_test)

  # The evaluation returns a Python dictionary. The "average_loss" key holds the
  # Mean Squared Error (MSE).
  average_loss = eval_result["average_loss"]

  # Convert MSE to Root Mean Square Error (RMSE).
  print("\n" + 80 * "*")
  print("\nRMS error for the test set: ${:.0f}"
        .format(PRICE_NORM_FACTOR * average_loss**0.5))

  print()
コード例 #4
0
def main(argv):
    """Builds, trains, and evaluates the model."""
    assert len(argv) == 1
    (train, test) = imports85.dataset()

    # Build the training input_fn.
    def input_train():
        return (
            # Shuffling with a buffer larger than the data set ensures
            # that the examples are well mixed.
            train.shuffle(1000).batch(128)
            # Repeat forever
            .repeat().make_one_shot_iterator().get_next())

    # Build the validation input_fn.
    def input_test():
        return (
            test.shuffle(1000).batch(128).make_one_shot_iterator().get_next())

    feature_columns = [
        # "curb-weight" and "highway-mpg" are numeric columns.
        tf.feature_column.numeric_column(key="curb-weight"),
        tf.feature_column.numeric_column(key="highway-mpg"),
    ]

    # Build the Estimator.
    model = tf.estimator.LinearRegressor(feature_columns=feature_columns)

    # Train the model.
    # By default, the Estimators log output every 100 steps.
    model.train(input_fn=input_train, steps=STEPS)

    # Evaluate how the model performs on data it has not yet seen.
    eval_result = model.evaluate(input_fn=input_test)

    # The evaluation returns a Python dictionary. The "average_loss" key holds the
    # Mean Squared Error (MSE).
    average_loss = eval_result["average_loss"]

    # Convert MSE to Root Mean Square Error (RMSE).
    print("\n" + 80 * "*")
    print("\nRMS error for the test set: ${:.0f}".format(average_loss**0.5))

    # Run the model in prediction mode.
    input_dict = {
        "curb-weight": np.array([2000, 3000]),
        "highway-mpg": np.array([30, 40])
    }
    predict_input_fn = tf.estimator.inputs.numpy_input_fn(input_dict,
                                                          shuffle=False)
    predict_results = model.predict(input_fn=predict_input_fn)

    # Print the prediction results.
    print("\nPrediction results:")
    for i, prediction in enumerate(predict_results):
        msg = ("Curb weight: {: 4d}lbs, "
               "Highway: {: 0d}mpg, "
               "Prediction: ${: 9.2f}")
        msg = msg.format(input_dict["curb-weight"][i],
                         input_dict["highway-mpg"][i],
                         prediction["predictions"][0])

        print("    " + msg)
    print()
コード例 #5
0
def main(argv):
    """Builds, trains, and evaluates the model."""
    assert len(argv) == 1
    (train, test) = imports85.dataset()

    # Switch the labels to units of thousands for better convergence.
    def normalize_price(features, labels):
        return features, labels / PRICE_NORM_FACTOR

    train = train.map(normalize_price)
    test = test.map(normalize_price)

    # Build the training input_fn.
    def input_train():
        return (
            # Shuffling with a buffer larger than the data set ensures
            # that the examples are well mixed.
            train.shuffle(1000).batch(128)
            # Repeat forever
            .repeat().make_one_shot_iterator().get_next())

    # Build the validation input_fn.
    def input_test():
        return (
            test.shuffle(1000).batch(128).make_one_shot_iterator().get_next())

    # The first way assigns a unique weight to each category. To do this you must
    # specify the category's vocabulary (values outside this specification will
    # receive a weight of zero). Here we specify the vocabulary using a list of
    # options. The vocabulary can also be specified with a vocabulary file (using
    # `categorical_column_with_vocabulary_file`). For features covering a
    # range of positive integers use `categorical_column_with_identity`.
    body_style_vocab = [
        "hardtop", "wagon", "sedan", "hatchback", "convertible"
    ]
    body_style = tf.feature_column.categorical_column_with_vocabulary_list(
        key="body-style", vocabulary_list=body_style_vocab)
    make = tf.feature_column.categorical_column_with_hash_bucket(
        key="make", hash_bucket_size=50)

    feature_columns = [
        tf.feature_column.numeric_column(key="curb-weight"),
        tf.feature_column.numeric_column(key="highway-mpg"),
        # Since this is a DNN model, convert categorical columns from sparse
        # to dense.
        # Wrap them in an `indicator_column` to create a
        # one-hot vector from the input.
        tf.feature_column.indicator_column(body_style),
        # Or use an `embedding_column` to create a trainable vector for each
        # index.
        tf.feature_column.embedding_column(make, dimension=3),
    ]

    # Build a custom Estimator, using the model_fn.
    # `params` is passed through to the `model_fn`.
    model = tf.estimator.Estimator(model_fn=my_dnn_regression_fn,
                                   params={
                                       "feature_columns": feature_columns,
                                       "learning_rate": 0.001,
                                       "optimizer": tf.train.AdamOptimizer,
                                       "hidden_units": [20, 20]
                                   })

    # Train the model.
    model.train(input_fn=input_train, steps=STEPS)

    # Evaluate how the model performs on data it has not yet seen.
    eval_result = model.evaluate(input_fn=input_test)

    # Print the Root Mean Square Error (RMSE).
    print("\n" + 80 * "*")
    print("\nRMS error for the test set: ${:.0f}".format(PRICE_NORM_FACTOR *
                                                         eval_result["rmse"]))

    print()
コード例 #6
0
def main(argv):
  """Builds, trains, and evaluates the model."""
  assert len(argv) == 1
  train = pt.dataset()
  test=imports85.dataset()

  # Switch the labels to units of thousands for better convergence.
  def normalize(features, labels):
    return features, labels #/ PRICE_NORM_FACTOR
  
  def normalize_pred(features):
    return features

  train = train.map(normalize)
  test = test.map(normalize_pred)

  # Build the training input_fn.
  def input_train():
    return (
        # Shuffling with a buffer larger than the data set ensures
        # that the examples are well mixed.
        train.shuffle(1000).batch(128)
        # Repeat forever
        .repeat().make_one_shot_iterator().get_next())

  # Build the validation input_fn.
  def input_test():
    return (test.batch(128)
            .make_one_shot_iterator().get_next())

  # The first way assigns a unique weight to each category. To do this you must
  # specify the category's vocabulary (values outside this specification will
  # receive a weight of zero). Here we specify the vocabulary using a list of
  # options. The vocabulary can also be specified with a vocabulary file (using
  # `categorical_column_with_vocabulary_file`). For features covering a
  # range of positive integers use `categorical_column_with_identity`.


  cat_var_1=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_1", hash_bucket_size=10000)
  cat_var_2=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_2", hash_bucket_size=10000)
  cat_var_3=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_3", hash_bucket_size=10000)
  cat_var_4=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_4", hash_bucket_size=10000)
  cat_var_5=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_5", hash_bucket_size=10000)
  cat_var_6=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_6", hash_bucket_size=10000)
  cat_var_7=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_7", hash_bucket_size=10000)
  cat_var_8=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_8", hash_bucket_size=10000)
  cat_var_9=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_9", hash_bucket_size=10000)
  cat_var_10=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_10", hash_bucket_size=10000)
  cat_var_11=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_11", hash_bucket_size=10000)
  cat_var_12=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_12", hash_bucket_size=10000)
  cat_var_13=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_13", hash_bucket_size=10000)
  cat_var_14=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_14", hash_bucket_size=10000)
  cat_var_15=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_15", hash_bucket_size=10000)
  cat_var_16=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_16", hash_bucket_size=10000)
  cat_var_17=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_17", hash_bucket_size=10000)
  cat_var_18=tf.feature_column.categorical_column_with_hash_bucket(
      key="cat_var_18", hash_bucket_size=10000)



  feature_columns = [
      tf.feature_column.numeric_column(key="num_var_1"),
      tf.feature_column.numeric_column(key="num_var_2"),
      tf.feature_column.numeric_column(key="num_var_4"),
      tf.feature_column.numeric_column(key="num_var_5"),
      tf.feature_column.numeric_column(key="num_var_6"),
      tf.feature_column.numeric_column(key="num_var_7"),
      # Since this is a DNN model, convert categorical columns from sparse
      # to dense.
      # Wrap them in an `indicator_column` to create a
      # one-hot vector from the input.
      #tf.feature_column.indicator_column(body_style),
      # Or use an `embedding_column` to create a trainable vector for each
      # index.
      tf.feature_column.indicator_column(cat_var_1),
      tf.feature_column.indicator_column(cat_var_2),
      tf.feature_column.indicator_column(cat_var_3),
      tf.feature_column.indicator_column(cat_var_4),
      tf.feature_column.indicator_column(cat_var_5),
      tf.feature_column.indicator_column(cat_var_6),
      #tf.feature_column.indicator_column(cat_var_7),
      tf.feature_column.indicator_column(cat_var_8),
      tf.feature_column.indicator_column(cat_var_9),
      tf.feature_column.indicator_column(cat_var_10),
      tf.feature_column.indicator_column(cat_var_11),
      tf.feature_column.indicator_column(cat_var_12),
      tf.feature_column.indicator_column(cat_var_13),
      tf.feature_column.indicator_column(cat_var_14),
      tf.feature_column.indicator_column(cat_var_15),
      tf.feature_column.indicator_column(cat_var_16),
      tf.feature_column.indicator_column(cat_var_17),
      tf.feature_column.indicator_column(cat_var_18),
      tf.feature_column.numeric_column(key="cat_var_19"),
      tf.feature_column.numeric_column(key="cat_var_20"),
      tf.feature_column.numeric_column(key="cat_var_21"),
      tf.feature_column.numeric_column(key="cat_var_22"),
      tf.feature_column.numeric_column(key="cat_var_23"),
      tf.feature_column.numeric_column(key="cat_var_24"),
      #tf.feature_column.embedding_column(make, dimension=3),
  ]

  # Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns
  # defined above as input.
  model = tf.estimator.DNNRegressor(
      hidden_units=[20, 20], feature_columns=feature_columns)

  # Train the model.
  model.train(input_fn=input_train, steps=STEPS)
  predicted=model.predict(input_test)
  # Evaluate how the model performs on data it has not yet seen.
  #eval_result = model.evaluate(input_fn=input_test)
  x=0
  arr1=[]
  
 # for numbers in predicted:
  #  #for key in numbers:
   #   numbers1[x] = int(numbers)#/PRICE_NORM_FACTOR
    #  x=x+1
     # print(numbers1[key])
  with open("test_for_name.csv") as f:
    reader=csv.DictReader(f)
    for row in reader:
      #print(row["portfolio_id"])
      arr1.append(str(row["transaction_id"]))
  
  arr=[]
  print(len(arr1))
  arr.append("transaction_id")
  arr.append("target")
  arr2=[]
  arr2.append(["transaction_id","target"])
  f=0
  #for i, p in enumerate(predicted):
  #  f=f+1
  #print(f)
  
  for i, p in enumerate(predicted):
    for ki in p.values():
     #print(i, float(ki))
     #arr.append(str())
     arr.append(arr1[x])
     arr.append(float(ki))
     arr2.append([arr1[x],abs(float(ki))])
     #print(arr2)
     x=x+1
     
  
  h=0
  with open('out_pred.csv', 'w',newline='\n') as myfile:
   #w = csv.writer(myfile, quoting=csv.QUOTE_ALL)
    w = csv.writer(myfile,delimiter =',',quotechar =' ')
    #w.writerow(arr)
    for j in arr2:
      w.writerow(j)
  # The evaluation returns a Python dictionary. The "average_loss" key holds the
  # Mean Squared Error (MSE).
  #average_loss = eval_result["average_loss"]

  # Convert MSE to Root Mean Square Error (RMSE).
  print("\n" + 80 * "*")
  #print("\nRMS error for the test set: ${:.0f}"
   #     .format(PRICE_NORM_FACTOR * average_loss**0.5))

  print()
コード例 #7
0
def main(argv):
  """Builds, trains, and evaluates the model."""
  assert len(argv) == 1
  (train, test) = imports85.dataset()

  # Build the training input_fn.
  def input_train():
    return (
        # Shuffling with a buffer larger than the data set ensures
        # that the examples are well mixed.
        train.shuffle(1000).batch(128)
        # Repeat forever
        .repeat().make_one_shot_iterator().get_next())

  # Build the validation input_fn.
  def input_test():
    return (test.shuffle(1000).batch(128)
            .make_one_shot_iterator().get_next())

  feature_columns = [
      # "curb-weight" and "highway-mpg" are numeric columns.
      tf.feature_column.numeric_column(key="curb-weight"),
      tf.feature_column.numeric_column(key="highway-mpg"),
  ]

  # Build the Estimator.
  model = tf.estimator.LinearRegressor(feature_columns=feature_columns)

  # Train the model.
  # By default, the Estimators log output every 100 steps.
  model.train(input_fn=input_train, steps=STEPS)

  # Evaluate how the model performs on data it has not yet seen.
  eval_result = model.evaluate(input_fn=input_test)

  # The evaluation returns a Python dictionary. The "average_loss" key holds the
  # Mean Squared Error (MSE).
  average_loss = eval_result["average_loss"]

  # Convert MSE to Root Mean Square Error (RMSE).
  print("\n" + 80 * "*")
  print("\nRMS error for the test set: ${:.0f}".format(average_loss**0.5))

  # Run the model in prediction mode.
  input_dict = {
      "curb-weight": np.array([2000, 3000]),
      "highway-mpg": np.array([30, 40])
  }
  predict_input_fn = tf.estimator.inputs.numpy_input_fn(
      input_dict, shuffle=False)
  predict_results = model.predict(input_fn=predict_input_fn)

  # Print the prediction results.
  print("\nPrediction results:")
  for i, prediction in enumerate(predict_results):
    msg = ("Curb weight: {: 4d}lbs, "
           "Highway: {: 0d}mpg, "
           "Prediction: ${: 9.2f}")
    msg = msg.format(input_dict["curb-weight"][i], input_dict["highway-mpg"][i],
                     prediction["predictions"][0])

    print("    " + msg)
  print()