Beispiel #1
0
def neural_network(_train_X, _train_y, _test_X, _test_y, _epochs, _rate,  _regularisation, _cross_val, _output_layer=0):
    reg_type, reg_scale = _regularisation
    X = tf.placeholder(tf.float32, [None, hp.num_features(_train_X)], name="input")
    y = tf.placeholder(tf.float32, name="output")
    pred, cost, W, b = layers(X, y, _output_layer)

    lad = calc_error_L1(y, pred)
    huber_loss = huber_error(y, pred)

    print("Regularisation: ", _regularisation)
    if reg_type == 1:
        L1 = tf.contrib.layers.l1_regularizer(scale=reg_scale)
        reg_cost = tf.contrib.layers.apply_regularization(L1, W)
    elif reg_type == 2:
        L2 = tf.contrib.layers.l2_regularizer(scale=reg_scale)
        reg_cost = tf.contrib.layers.apply_regularization(L2, W)
    else:
        reg_cost = 0
    
    cost += reg_cost

    optimizer = tf.train.GradientDescentOptimizer(_rate).minimize(cost)

    XyWb = [X, y, W, b]

    with tf.Session() as sess:
        if _cross_val == True:
            return hp.cross_validation(sess, XyWb, _train_X, _train_y, _test_X, _test_y, optimizer, cost, huber_loss, _epochs, "nn")
        else:
            return hp.run(sess, XyWb, _train_X, _train_y, _test_X, _test_y, optimizer, cost, huber_loss, _epochs, "nn")
Beispiel #2
0
def linear_regression(_train_X,
                      _train_y,
                      _test_X,
                      _test_y,
                      _epochs,
                      _rate,
                      _cost_fn,
                      _regularisation,
                      _cross_val,
                      _el_params=[1., 1.]):
    reg_type, reg_scale = _regularisation
    X = tf.placeholder(tf.float32, [None, hp.num_features(_train_X)],
                       name="input")
    y = tf.placeholder(tf.float32, name="output")

    W = tf.Variable(tf.random_normal([hp.num_features(_train_X), 1],
                                     dtype=tf.float32),
                    name="weight")
    b = tf.Variable(tf.random_normal([1], dtype=tf.float32), name="bias")

    _, huber_cost = huber_error(X, y, W, b)
    _, lad = calc_error_L1(X, y, W, b)

    if reg_type == 1:
        pred, cost = calc_error_reg_L1(X, y, W, b, _cost_fn, reg_scale)
    elif reg_type == 2:
        pred, cost = calc_error_reg_L2(X, y, W, b, _cost_fn, reg_scale)
    elif reg_type == 3:
        pred, cost = calc_error_reg_elastic(X, y, W, b, _cost_fn, _el_params)
    else:
        print("No Regularisation")
        pred, cost = calc_error(X, y, W, b, _cost_fn, _el_params)

    optimizer = tf.train.GradientDescentOptimizer(_rate).minimize(cost)
    XyWb = [X, y, W, b]
    with tf.Session() as sess:
        if _cross_val == True:
            return hp.cross_validation(sess, XyWb, _train_X, _train_y, _test_X,
                                       _test_y, optimizer, cost, huber_cost,
                                       _epochs, "lin")
        else:
            return hp.run(sess, XyWb, _train_X, _train_y, _test_X, _test_y,
                          optimizer, cost, huber_cost, _epochs, "lin")
# 	print("f1_score: {}".format(f1_score))
# 	print("log loss: {}".format(log_loss))
# 	print(pd.DataFrame.from_dict(zip(using+[f], model.coef_[0]), orient='columns'))
# 	ax = plt.subplot()
# 	sns.heatmap(confusion_matrix(results["Actual"], results["Model_Class"]), ax=ax, annot=True, fmt='g')
# 	ax.set_xlabel("Predicted Labels")
# 	ax.set_ylabel("True Labels")
# 	ax.xaxis.set_ticklabels(["No", "Top 24 Finish (f3)"])
# 	ax.yaxis.set_ticklabels(["No", "Top 24 Finish (f3)"])
# 	plt.show()

# features, f1_score, log_loss = helpers.forward_stepwise_selection(draft_capital_only+features, te_data, "hit_within3years", model=model, logistic=True)
# features, f1_score, log_loss = helpers.backwards_stepwise_selection(using, te_data, "hit_within3years", model=model, logistic=True)
f1_score, log_loss, results, _ = helpers.cross_validation(using,
                                                          te_data,
                                                          "hit_within3years",
                                                          model=model,
                                                          logistic=True)
# results["Model_Class"] = results["Model"].apply(lambda x: x >= .5)

# print("Features: {}".format(features))
print("f1_score: {}".format(f1_score))
print("log loss: {}".format(log_loss))
# print(pd.DataFrame.from_dict(zip(using, model.coef_[0]), orient='columns'))
# ax = plt.subplot()
# sns.heatmap(confusion_matrix(results["Actual"], results["Model_Class"]), ax=ax, annot=True, fmt='g')
# ax.set_xlabel("Predicted Labels")
# ax.set_ylabel("True Labels")
# ax.xaxis.set_ticklabels(["No", "Top 24 Finish (f3)"])
# ax.yaxis.set_ticklabels(["No", "Top 24 Finish (f3)"])
# plt.show()
Beispiel #4
0
# 	# print(wr_data[best+[f]].corr())
# 	input("press enter to continue\n")

# x = wr_data[relation]
# y = wr_data['true_points']
# huber = sm.RLM(y,x,M=sm.robust.norms.HuberT(3.32))
# results = huber.fit(maxiter=200,tol=.7)
# print(results.summary())

model = linear_model.HuberRegressor(alpha=.001,
                                    epsilon=3.32,
                                    max_iter=200,
                                    tol=.7)

rmse, r2, result_df, _ = helpers.cross_validation(using,
                                                  wr_data,
                                                  'true_points',
                                                  model=model)
# features, rmse, r2 = helpers.forward_stepwise_selection(using, wr_data, 'true_points', model)

print("RMSE %s" % rmse)
print("Adj R2: %s" % r2)
# print("Features: {}".format(features))
# print(wr_data[fwd_sel].corr())
print(pd.DataFrame.from_dict(zip(using, model.coef_), orient='columns'))
half_std_success_rate = []
full_std_success_rate = []
stddev = result_df["Model"].std()
for idx, row in result_df.iterrows():
    lower_range = float(row["Model"]) - stddev / 2
    upper_range = float(row["Model"]) + stddev / 2
    matches = result_df[(result_df["Model"] >= lower_range)
lr_set = torch.logspace(-1, 0, 5)
k_fold = 5

# set loss and optimizer
nb_epochs = 100
batch_size = 50
loss = MSELoss()
optimizer_name = SGD

# create models
model = Sequential(Linear(2, 25), Relu(), Linear(25, 25), Relu(),
                   Linear(25, 25), Relu(), Linear(25, 2), Tanh())

# cross validation to find best learning rate
print("cross validation to get best leaning rate for model")
best_lr = cross_validation(model, optimizer_name, nb_epochs, batch_size, loss,
                           k_fold, lr_set, train_input, train_target)

# initialize models
optimizer = optimizer_name(model=model, lr=best_lr)
# logging info
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logging.info(f'''Starting training:
    Epochs:          {nb_epochs}
    Learning rate:   {best_lr}
    Batch size:      {batch_size}
    Optimizer:       {"SGD"}
    Loss:            {"MSE loss"}
    Training size:   {1000}
    Test size:       {1000}
''')
Beispiel #6
0
print("-- Training for FNN Model--\n")
AL_weight = 0.4
model = FNN
if run_cross_validation:  # Run cross validation to help select optimal hyperparameter
    k_fold = 5
    lr_set = [0.0001, 0.001, 0.01,
              0.1]  # learning rate range for cross validation
    reg_set = [0, 0.1, 0.2, 0.3]  # weight decay factor range
    gamma_set = [0, 0.1]  # learning rate scheduler multiplicative factor range
    for i in range(len(auxiliary_loss)):
        cross_validation(k_fold,
                         lr_set,
                         reg_set,
                         gamma_set,
                         model,
                         cross_entropy,
                         AL_weight,
                         epochs,
                         batch_size=batch_size,
                         weight_sharing=weight_sharing[i],
                         auxiliary_loss=auxiliary_loss[i])

#train and test the model
#hyperparameters for training and testing
reg = [0.05, 0.03, 0.06]  # weight decay factor
lr = [0.003, 0.005, 0.003]  # learning rate
gamma = [0, 0, 0]  # learing rate scheduler's multiplicative factor

for i in range(len(auxiliary_loss)):
    mean_acc_tr, std_acc_tr, mean_acc_te, std_acc_te = get_train_stats(
        model,