def main(argv): """Builds, trains, and evaluates the model.""" assert len(argv) == 1 (train, test) = imports85.dataset() # Switch the labels to units of thousands for better convergence. def normalize_price(features, labels): return features, labels / PRICE_NORM_FACTOR train = train.map(normalize_price) test = test.map(normalize_price) # Build the training input_fn. def input_train(): return ( # Shuffling with a buffer larger than the data set ensures # that the examples are well mixed. train.shuffle(1000).batch(128) # Repeat forever .repeat().make_one_shot_iterator().get_next()) # Build the validation input_fn. def input_test(): return (test.shuffle(1000).batch(128) .make_one_shot_iterator().get_next()) # The first way assigns a unique weight to each category. To do this you must # specify the category's vocabulary (values outside this specification will # receive a weight of zero). Here we specify the vocabulary using a list of # options. The vocabulary can also be specified with a vocabulary file (using # `categorical_column_with_vocabulary_file`). For features covering a # range of positive integers use `categorical_column_with_identity`. body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"] body_style = tf.feature_column.categorical_column_with_vocabulary_list( key="body-style", vocabulary_list=body_style_vocab) make = tf.feature_column.categorical_column_with_hash_bucket( key="make", hash_bucket_size=50) feature_columns = [ tf.feature_column.numeric_column(key="curb-weight"), tf.feature_column.numeric_column(key="highway-mpg"), # Since this is a DNN model, convert categorical columns from sparse # to dense. # Wrap them in an `indicator_column` to create a # one-hot vector from the input. tf.feature_column.indicator_column(body_style), # Or use an `embedding_column` to create a trainable vector for each # index. tf.feature_column.embedding_column(make, dimension=3), ] # Build a custom Estimator, using the model_fn. # `params` is passed through to the `model_fn`. model = tf.estimator.Estimator( model_fn=my_dnn_regression_fn, params={ "feature_columns": feature_columns, "learning_rate": 0.001, "optimizer": tf.train.AdamOptimizer, "hidden_units": [20, 20] }) # Train the model. model.train(input_fn=input_train, steps=STEPS) # Evaluate how the model performs on data it has not yet seen. eval_result = model.evaluate(input_fn=input_test) # Print the Root Mean Square Error (RMSE). print("\n" + 80 * "*") print("\nRMS error for the test set: ${:.0f}" .format(PRICE_NORM_FACTOR * eval_result["rmse"])) print()
def main(argv): """Builds, trains, and evaluates the model.""" assert len(argv) == 1 (train, test) = imports85.dataset() # Switch the labels to units of thousands for better convergence. def normalize_price(features, labels): return features, labels / PRICE_NORM_FACTOR train = train.map(normalize_price) test = test.map(normalize_price) # Build the training input_fn. def input_train(): return ( # Shuffling with a buffer larger than the data set ensures # that the examples are well mixed. train.shuffle(1000).batch(128) # Repeat forever .repeat()) # Build the validation input_fn. def input_test(): return test.shuffle(1000).batch(128) # The first way assigns a unique weight to each category. To do this you must # specify the category's vocabulary (values outside this specification will # receive a weight of zero). Here we specify the vocabulary using a list of # options. The vocabulary can also be specified with a vocabulary file (using # `categorical_column_with_vocabulary_file`). For features covering a # range of positive integers use `categorical_column_with_identity`. body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"] body_style = tf.feature_column.categorical_column_with_vocabulary_list( key="body-style", vocabulary_list=body_style_vocab) make = tf.feature_column.categorical_column_with_hash_bucket( key="make", hash_bucket_size=50) feature_columns = [ tf.feature_column.numeric_column(key="curb-weight"), tf.feature_column.numeric_column(key="highway-mpg"), # Since this is a DNN model, convert categorical columns from sparse # to dense. # Wrap them in an `indicator_column` to create a # one-hot vector from the input. tf.feature_column.indicator_column(body_style), # Or use an `embedding_column` to create a trainable vector for each # index. tf.feature_column.embedding_column(make, dimension=3), ] # Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns # defined above as input. model = tf.estimator.DNNRegressor( hidden_units=[20, 20], feature_columns=feature_columns) # Train the model. model.train(input_fn=input_train, steps=STEPS) # Evaluate how the model performs on data it has not yet seen. eval_result = model.evaluate(input_fn=input_test) # The evaluation returns a Python dictionary. The "average_loss" key holds the # Mean Squared Error (MSE). average_loss = eval_result["average_loss"] # Convert MSE to Root Mean Square Error (RMSE). print("\n" + 80 * "*") print("\nRMS error for the test set: ${:.0f}" .format(PRICE_NORM_FACTOR * average_loss**0.5)) print()
def main(argv): """Builds, trains, and evaluates the model.""" assert len(argv) == 1 (train, test) = imports85.dataset() # Switch the labels to units of thousands for better convergence. def normalize_price(features, labels): return features, labels / PRICE_NORM_FACTOR train = train.map(normalize_price) test = test.map(normalize_price) # Build the training input_fn. def input_train(): return ( # Shuffling with a buffer larger than the data set ensures # that the examples are well mixed. train.shuffle(1000).batch(128) # Repeat forever .repeat().make_one_shot_iterator().get_next()) # Build the validation input_fn. def input_test(): return (test.shuffle(1000).batch(128) .make_one_shot_iterator().get_next()) # The following code demonstrates two of the ways that `feature_columns` can # be used to build a model with categorical inputs. # The first way assigns a unique weight to each category. To do this, you must # specify the category's vocabulary (values outside this specification will # receive a weight of zero). # Alternatively, you can define the vocabulary in a file (by calling # `categorical_column_with_vocabulary_file`) or as a range of positive # integers (by calling `categorical_column_with_identity`) body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"] body_style_column = tf.feature_column.categorical_column_with_vocabulary_list( key="body-style", vocabulary_list=body_style_vocab) # The second way, appropriate for an unspecified vocabulary, is to create a # hashed column. It will create a fixed length list of weights, and # automatically assign each input categort to a weight. Due to the # pseudo-randomness of the process, some weights may be shared between # categories, while others will remain unused. make_column = tf.feature_column.categorical_column_with_hash_bucket( key="make", hash_bucket_size=50) feature_columns = [ # This model uses the same two numeric features as `linear_regressor.py` tf.feature_column.numeric_column(key="curb-weight"), tf.feature_column.numeric_column(key="highway-mpg"), # This model adds two categorical colums that will adjust the price based # on "make" and "body-style". body_style_column, make_column, ] # Build the Estimator. model = tf.estimator.LinearRegressor(feature_columns=feature_columns) # Train the model. # By default, the Estimators log output every 100 steps. model.train(input_fn=input_train, steps=STEPS) # Evaluate how the model performs on data it has not yet seen. eval_result = model.evaluate(input_fn=input_test) # The evaluation returns a Python dictionary. The "average_loss" key holds the # Mean Squared Error (MSE). average_loss = eval_result["average_loss"] # Convert MSE to Root Mean Square Error (RMSE). print("\n" + 80 * "*") print("\nRMS error for the test set: ${:.0f}" .format(PRICE_NORM_FACTOR * average_loss**0.5)) print()
def main(argv): """Builds, trains, and evaluates the model.""" assert len(argv) == 1 (train, test) = imports85.dataset() # Build the training input_fn. def input_train(): return ( # Shuffling with a buffer larger than the data set ensures # that the examples are well mixed. train.shuffle(1000).batch(128) # Repeat forever .repeat().make_one_shot_iterator().get_next()) # Build the validation input_fn. def input_test(): return ( test.shuffle(1000).batch(128).make_one_shot_iterator().get_next()) feature_columns = [ # "curb-weight" and "highway-mpg" are numeric columns. tf.feature_column.numeric_column(key="curb-weight"), tf.feature_column.numeric_column(key="highway-mpg"), ] # Build the Estimator. model = tf.estimator.LinearRegressor(feature_columns=feature_columns) # Train the model. # By default, the Estimators log output every 100 steps. model.train(input_fn=input_train, steps=STEPS) # Evaluate how the model performs on data it has not yet seen. eval_result = model.evaluate(input_fn=input_test) # The evaluation returns a Python dictionary. The "average_loss" key holds the # Mean Squared Error (MSE). average_loss = eval_result["average_loss"] # Convert MSE to Root Mean Square Error (RMSE). print("\n" + 80 * "*") print("\nRMS error for the test set: ${:.0f}".format(average_loss**0.5)) # Run the model in prediction mode. input_dict = { "curb-weight": np.array([2000, 3000]), "highway-mpg": np.array([30, 40]) } predict_input_fn = tf.estimator.inputs.numpy_input_fn(input_dict, shuffle=False) predict_results = model.predict(input_fn=predict_input_fn) # Print the prediction results. print("\nPrediction results:") for i, prediction in enumerate(predict_results): msg = ("Curb weight: {: 4d}lbs, " "Highway: {: 0d}mpg, " "Prediction: ${: 9.2f}") msg = msg.format(input_dict["curb-weight"][i], input_dict["highway-mpg"][i], prediction["predictions"][0]) print(" " + msg) print()
def main(argv): """Builds, trains, and evaluates the model.""" assert len(argv) == 1 (train, test) = imports85.dataset() # Switch the labels to units of thousands for better convergence. def normalize_price(features, labels): return features, labels / PRICE_NORM_FACTOR train = train.map(normalize_price) test = test.map(normalize_price) # Build the training input_fn. def input_train(): return ( # Shuffling with a buffer larger than the data set ensures # that the examples are well mixed. train.shuffle(1000).batch(128) # Repeat forever .repeat().make_one_shot_iterator().get_next()) # Build the validation input_fn. def input_test(): return ( test.shuffle(1000).batch(128).make_one_shot_iterator().get_next()) # The first way assigns a unique weight to each category. To do this you must # specify the category's vocabulary (values outside this specification will # receive a weight of zero). Here we specify the vocabulary using a list of # options. The vocabulary can also be specified with a vocabulary file (using # `categorical_column_with_vocabulary_file`). For features covering a # range of positive integers use `categorical_column_with_identity`. body_style_vocab = [ "hardtop", "wagon", "sedan", "hatchback", "convertible" ] body_style = tf.feature_column.categorical_column_with_vocabulary_list( key="body-style", vocabulary_list=body_style_vocab) make = tf.feature_column.categorical_column_with_hash_bucket( key="make", hash_bucket_size=50) feature_columns = [ tf.feature_column.numeric_column(key="curb-weight"), tf.feature_column.numeric_column(key="highway-mpg"), # Since this is a DNN model, convert categorical columns from sparse # to dense. # Wrap them in an `indicator_column` to create a # one-hot vector from the input. tf.feature_column.indicator_column(body_style), # Or use an `embedding_column` to create a trainable vector for each # index. tf.feature_column.embedding_column(make, dimension=3), ] # Build a custom Estimator, using the model_fn. # `params` is passed through to the `model_fn`. model = tf.estimator.Estimator(model_fn=my_dnn_regression_fn, params={ "feature_columns": feature_columns, "learning_rate": 0.001, "optimizer": tf.train.AdamOptimizer, "hidden_units": [20, 20] }) # Train the model. model.train(input_fn=input_train, steps=STEPS) # Evaluate how the model performs on data it has not yet seen. eval_result = model.evaluate(input_fn=input_test) # Print the Root Mean Square Error (RMSE). print("\n" + 80 * "*") print("\nRMS error for the test set: ${:.0f}".format(PRICE_NORM_FACTOR * eval_result["rmse"])) print()
def main(argv): """Builds, trains, and evaluates the model.""" assert len(argv) == 1 train = pt.dataset() test=imports85.dataset() # Switch the labels to units of thousands for better convergence. def normalize(features, labels): return features, labels #/ PRICE_NORM_FACTOR def normalize_pred(features): return features train = train.map(normalize) test = test.map(normalize_pred) # Build the training input_fn. def input_train(): return ( # Shuffling with a buffer larger than the data set ensures # that the examples are well mixed. train.shuffle(1000).batch(128) # Repeat forever .repeat().make_one_shot_iterator().get_next()) # Build the validation input_fn. def input_test(): return (test.batch(128) .make_one_shot_iterator().get_next()) # The first way assigns a unique weight to each category. To do this you must # specify the category's vocabulary (values outside this specification will # receive a weight of zero). Here we specify the vocabulary using a list of # options. The vocabulary can also be specified with a vocabulary file (using # `categorical_column_with_vocabulary_file`). For features covering a # range of positive integers use `categorical_column_with_identity`. cat_var_1=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_1", hash_bucket_size=10000) cat_var_2=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_2", hash_bucket_size=10000) cat_var_3=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_3", hash_bucket_size=10000) cat_var_4=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_4", hash_bucket_size=10000) cat_var_5=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_5", hash_bucket_size=10000) cat_var_6=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_6", hash_bucket_size=10000) cat_var_7=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_7", hash_bucket_size=10000) cat_var_8=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_8", hash_bucket_size=10000) cat_var_9=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_9", hash_bucket_size=10000) cat_var_10=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_10", hash_bucket_size=10000) cat_var_11=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_11", hash_bucket_size=10000) cat_var_12=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_12", hash_bucket_size=10000) cat_var_13=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_13", hash_bucket_size=10000) cat_var_14=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_14", hash_bucket_size=10000) cat_var_15=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_15", hash_bucket_size=10000) cat_var_16=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_16", hash_bucket_size=10000) cat_var_17=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_17", hash_bucket_size=10000) cat_var_18=tf.feature_column.categorical_column_with_hash_bucket( key="cat_var_18", hash_bucket_size=10000) feature_columns = [ tf.feature_column.numeric_column(key="num_var_1"), tf.feature_column.numeric_column(key="num_var_2"), tf.feature_column.numeric_column(key="num_var_4"), tf.feature_column.numeric_column(key="num_var_5"), tf.feature_column.numeric_column(key="num_var_6"), tf.feature_column.numeric_column(key="num_var_7"), # Since this is a DNN model, convert categorical columns from sparse # to dense. # Wrap them in an `indicator_column` to create a # one-hot vector from the input. #tf.feature_column.indicator_column(body_style), # Or use an `embedding_column` to create a trainable vector for each # index. tf.feature_column.indicator_column(cat_var_1), tf.feature_column.indicator_column(cat_var_2), tf.feature_column.indicator_column(cat_var_3), tf.feature_column.indicator_column(cat_var_4), tf.feature_column.indicator_column(cat_var_5), tf.feature_column.indicator_column(cat_var_6), #tf.feature_column.indicator_column(cat_var_7), tf.feature_column.indicator_column(cat_var_8), tf.feature_column.indicator_column(cat_var_9), tf.feature_column.indicator_column(cat_var_10), tf.feature_column.indicator_column(cat_var_11), tf.feature_column.indicator_column(cat_var_12), tf.feature_column.indicator_column(cat_var_13), tf.feature_column.indicator_column(cat_var_14), tf.feature_column.indicator_column(cat_var_15), tf.feature_column.indicator_column(cat_var_16), tf.feature_column.indicator_column(cat_var_17), tf.feature_column.indicator_column(cat_var_18), tf.feature_column.numeric_column(key="cat_var_19"), tf.feature_column.numeric_column(key="cat_var_20"), tf.feature_column.numeric_column(key="cat_var_21"), tf.feature_column.numeric_column(key="cat_var_22"), tf.feature_column.numeric_column(key="cat_var_23"), tf.feature_column.numeric_column(key="cat_var_24"), #tf.feature_column.embedding_column(make, dimension=3), ] # Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns # defined above as input. model = tf.estimator.DNNRegressor( hidden_units=[20, 20], feature_columns=feature_columns) # Train the model. model.train(input_fn=input_train, steps=STEPS) predicted=model.predict(input_test) # Evaluate how the model performs on data it has not yet seen. #eval_result = model.evaluate(input_fn=input_test) x=0 arr1=[] # for numbers in predicted: # #for key in numbers: # numbers1[x] = int(numbers)#/PRICE_NORM_FACTOR # x=x+1 # print(numbers1[key]) with open("test_for_name.csv") as f: reader=csv.DictReader(f) for row in reader: #print(row["portfolio_id"]) arr1.append(str(row["transaction_id"])) arr=[] print(len(arr1)) arr.append("transaction_id") arr.append("target") arr2=[] arr2.append(["transaction_id","target"]) f=0 #for i, p in enumerate(predicted): # f=f+1 #print(f) for i, p in enumerate(predicted): for ki in p.values(): #print(i, float(ki)) #arr.append(str()) arr.append(arr1[x]) arr.append(float(ki)) arr2.append([arr1[x],abs(float(ki))]) #print(arr2) x=x+1 h=0 with open('out_pred.csv', 'w',newline='\n') as myfile: #w = csv.writer(myfile, quoting=csv.QUOTE_ALL) w = csv.writer(myfile,delimiter =',',quotechar =' ') #w.writerow(arr) for j in arr2: w.writerow(j) # The evaluation returns a Python dictionary. The "average_loss" key holds the # Mean Squared Error (MSE). #average_loss = eval_result["average_loss"] # Convert MSE to Root Mean Square Error (RMSE). print("\n" + 80 * "*") #print("\nRMS error for the test set: ${:.0f}" # .format(PRICE_NORM_FACTOR * average_loss**0.5)) print()
def main(argv): """Builds, trains, and evaluates the model.""" assert len(argv) == 1 (train, test) = imports85.dataset() # Build the training input_fn. def input_train(): return ( # Shuffling with a buffer larger than the data set ensures # that the examples are well mixed. train.shuffle(1000).batch(128) # Repeat forever .repeat().make_one_shot_iterator().get_next()) # Build the validation input_fn. def input_test(): return (test.shuffle(1000).batch(128) .make_one_shot_iterator().get_next()) feature_columns = [ # "curb-weight" and "highway-mpg" are numeric columns. tf.feature_column.numeric_column(key="curb-weight"), tf.feature_column.numeric_column(key="highway-mpg"), ] # Build the Estimator. model = tf.estimator.LinearRegressor(feature_columns=feature_columns) # Train the model. # By default, the Estimators log output every 100 steps. model.train(input_fn=input_train, steps=STEPS) # Evaluate how the model performs on data it has not yet seen. eval_result = model.evaluate(input_fn=input_test) # The evaluation returns a Python dictionary. The "average_loss" key holds the # Mean Squared Error (MSE). average_loss = eval_result["average_loss"] # Convert MSE to Root Mean Square Error (RMSE). print("\n" + 80 * "*") print("\nRMS error for the test set: ${:.0f}".format(average_loss**0.5)) # Run the model in prediction mode. input_dict = { "curb-weight": np.array([2000, 3000]), "highway-mpg": np.array([30, 40]) } predict_input_fn = tf.estimator.inputs.numpy_input_fn( input_dict, shuffle=False) predict_results = model.predict(input_fn=predict_input_fn) # Print the prediction results. print("\nPrediction results:") for i, prediction in enumerate(predict_results): msg = ("Curb weight: {: 4d}lbs, " "Highway: {: 0d}mpg, " "Prediction: ${: 9.2f}") msg = msg.format(input_dict["curb-weight"][i], input_dict["highway-mpg"][i], prediction["predictions"][0]) print(" " + msg) print()