def compute_cost(X, t, w):
    #  YOUR CODE here:
    data_mean, std = scal.mean_std(X)
    X = scal.standardize(X, data_mean, std)
    N = len(X)  # Length of X square ft stored in N
    first = (1) / (2 * N)  # The first part of the equation 1/2N
    second = np.sum((
        np.dot(X, w) -
        t)**2)  # second part of equation which consists of sum and squaring it
    cost = first * second  # The cost function all together to get the actual value
    return cost
def train(X, t, eta, epochs):

    #  YOUR CODE here:
    data_mean, std = scal.mean_std(X)
    data_scaled = scal.standardize(X, data_mean, std)
    w_t = np.array([0, 0, 0, 0])
    for e in range(epochs):
        w_tplus1 = w_t - (eta * compute_gradient(data_scaled, t, w_t))
        w_t = w_tplus1

    return w_t
Exemple #3
0
def scale(X):
    dataset_ones = np.column_stack((np.ones(X.shape[0]), X))
    data_mean, std = scal.mean_std(dataset_ones)
    data_scaled = scal.standardize(dataset_ones, data_mean, std)
    return data_scaled[:, 1]
Exemple #4
0
                    type=str,
                    default='../data/polyfit',
                    help='Directory for the houses dataset.')
FLAGS, unparsed = parser.parse_known_args()

# Read the training data.
X_dataset, t_dataset = read_data(FLAGS.input_data_dir + "/dataset.txt")
X_train, t_train = read_data(FLAGS.input_data_dir + "/train.txt")
X_test, t_test = read_data(FLAGS.input_data_dir + "/test.txt")
X_devel, t_devel = read_data(FLAGS.input_data_dir + "/devel.txt")

# # Plotting  for 4a # #

# plot for dataset, Part A
dataset_ones = np.column_stack((np.ones(X_dataset.shape[0]), X_dataset))
data_mean, std = scal.mean_std(dataset_ones)
scaled_dataset_x = scal.standardize(dataset_ones, data_mean, std)
plt.figure(0)
plt.title('X_dataset vs t_dataset')
plt.scatter(scaled_dataset_x[:, 1], t_dataset,
            color='blue')  # the dataset data
plt.xlabel("scaled dataset x")
plt.ylabel("t_dataset")
plt.savefig('X-t-dataset.png')

# # Part B # #

# # Plotting for 4b # #

# plot for train data
dataset_ones = np.column_stack((np.ones(X_train.shape[0]), X_train))
Exemple #5
0
         label='Train')  # the train data
plt.plot(epochs_test_data,
         costs_test_data,
         marker='^',
         color='limegreen',
         label='Test')  # the test data
plt.xlabel("#epochs")
plt.ylabel("j(w)")

leg1 = plt.legend()

plt.savefig('train_test_line.png')

# figure 2 linear aproximation

data_mean, std = scal.mean_std(x_train_biased)
scaled_xtrain = scal.standardize(x_train_biased, data_mean, std)
data_mean, std = scal.mean_std(x_test_biased)
scaled_xtest = scal.standardize(x_test_biased, data_mean, std)

plt.figure(2)
plt.title('House price vs adjusted floor size')
plt.scatter(scaled_xtrain[:, 1], ttrain, marker='o', label='Train')
plt.scatter(scaled_xtest[:, 1],
            ttest,
            marker='^',
            color='limegreen',
            label='Test')

plt.xlabel('adjusted floor size')
plt.ylabel('house price')
Exemple #6
0
def main():
    """Run main function."""
    parser = argparse.ArgumentParser('Univariate Exercise.')
    parser.add_argument('-i',
                        '--input_data_dir',
                        type=str,
                        default='../data/univariate',
                        help='Directory for the univariate houses dataset.')
    FLAGS, unparsed = parser.parse_known_args()

    # Read the training and test data.
    Xtrain, ttrain = read_data(FLAGS.input_data_dir + "/train.txt")
    Xtest, ttest = read_data(FLAGS.input_data_dir + "/test.txt")

    # Normalize and add bias term to Train data
    mean_train, std_train = scaling.mean_std(Xtrain)
    Xtrain = scaling.standardize(Xtrain, mean_train, std_train)
    X1train = np.append(np.ones_like(ttrain), Xtrain, axis=1)

    # Normalize and add bias term to Test data
    Xtest = scaling.standardize(Xtest, mean_train, std_train)  # XXX
    X1test = np.append(np.ones_like(ttest), Xtest, axis=1)

    # Hyperparameters
    epochs, step, learning_rate = 200, 10, 0.1

    # Get w from Train and use it on Test
    w = train_BGD(X1train, ttrain, learning_rate, epochs)
    print('epochs = {} w_BGD = {}'.format(epochs, w))

    # Parameters for Test
    rmse = compute_rmse(X1train, ttrain, w)
    cost = compute_cost(X1train, ttrain, w)
    grad = compute_gradient(X1train, ttrain, w)
    print_train_outputs(epochs, learning_rate, mean_train, std_train, w, rmse,
                        cost)

    # Compare w from normal eqn and BGD
    w_norm_eqn = train_norm_eqn(X1train, ttrain)
    w_bgd = train_BGD(X1train, ttrain, learning_rate, epochs)
    w_sgd = train_SGD(X1train, ttrain, learning_rate, 115)
    print("\n")
    print("w_norm_eqn         = {}".format(w_norm_eqn))
    print("epochs = 200 w_bgd = {}".format(w_bgd))
    print("epochs = 115 w_sgd = {}".format(w_sgd))

    # plots
    plot_cost_history(X1train, ttrain, epochs, step, learning_rate)
    plot_train_test(X1train, ttrain, X1test, ttest, w_bgd, w_sgd)

    # Compare w from BGD and SGD
    #
    w_norm_eqn = train_norm_eqn(X1train, ttrain)
    iters = 200
    a_mins = []
    for i in np.arange(iters):
        w_sgd = train_SGD(X1train, ttrain, learning_rate, i)
        print("\n")
        print("i = {}".format(i))
        print("w_norm_eqn        = {}".format(w_norm_eqn))
        print('iters = {} w_BGD = {}'.format(iters, w))
        print("iters = {} w_SGD = {}".format(i, w_sgd))
        abs_diff_min = [
            abs(w_sgd[0][0] - w_norm_eqn[0][0]) +
            abs(w_sgd[0][1] - w_norm_eqn[0][1])
        ][0]
        a_mins.append(abs_diff_min)
        print("abs_diff_min = {}".format(abs_diff_min))
        print('np.argmin(abs_diff_min) = ', np.argmin(a_mins))
        print("len(a_mins) = {}".format(len(a_mins)))