Example #1
0
def part1_2d():
    #load the data
    data = (scio.loadmat('boston.mat'))['boston']
    #spile the train data and test data
    data_train, data_test = train_test_split(data, test_size=0.33)
    #initialize the vector w and b
    mse_price_train = np.zeros(20)
    mse_price_test = np.zeros(20)
    for j in range(20):
        #initialize the train and test sets
        x_train = data_train[:, range(13)]
        x_test = data_test[:, range(13)]
        #add a bias term
        x_train = np.asmatrix(np.c_[(x_train, np.ones(len(data_train)))])
        x_test = np.asmatrix(np.c_[(x_test, np.ones(len(data_test)))])
        mse_price_train[j] = lr.mean_squared_error((x_train @ ((inv(
            (x_train.T) @ (x_train)) @ (x_train.T) @ data_train[:, 13]).T)),
                                                   data_train[:, 13])
        mse_price_test[j] = lr.mean_squared_error((x_test @ ((inv(
            (x_test.T) @ (x_test)) @ (x_test.T) @ data_test[:, 13]).T)),
                                                  data_test[:, 13])
        #random spilt the train and test set after each linear regression
        data_train, data_test = train_test_split(data, test_size=0.33)
    #calculate standard deviations on training set and testing set
    train_std = np.std(mse_price_train, ddof=1)
    test_std = np.std(mse_price_test, ddof=1)
    #take the average MSE for results over 20 runs on the train and test set
    mse_price_train = np.sum(mse_price_train) / 20
    mse_price_test = np.sum(mse_price_test) / 20
    print("Linear regression with all attributes\t", '\t MSE train\t',
          mse_price_train, '\tMSE test\t', mse_price_test)
    return mse_price_train, mse_price_test, train_std, test_std
Example #2
0
def part1_2a():
    #load the data
    data = (scio.loadmat('boston.mat'))['boston']
    #spile the train data and test data
    data_train, data_test = train_test_split(data, test_size=0.33)
    #initialize the train and test sets
    x_train = np.asmatrix(np.ones(len(data_train))).T
    x_test = np.asmatrix(np.ones(len(data_test))).T
    mse_price_train = np.zeros(20)
    mse_price_test = np.zeros(20)
    for j in range(20):
        #fit the data with constant function
        w_price_train = inv(
            (x_train.T) @ (x_train)) @ (x_train.T) @ data_train[:, 13]
        w_price_test = inv(
            (x_test.T) @ (x_test)) @ (x_test.T) @ data_test[:, 13]
        #calculate MSEs
        mse_price_train[j] = lr.mean_squared_error(x_train * w_price_train,
                                                   data_train[:, 13])
        mse_price_test[j] = lr.mean_squared_error(x_test * w_price_test,
                                                  data_train[:, 13])

        data_train, data_test = train_test_split(data, test_size=0.33)
    train_std = np.std(mse_price_train, ddof=1)
    test_std = np.std(mse_price_test, ddof=1)
    mse_price_train = np.sum(mse_price_train) / 20
    mse_price_test = np.sum(mse_price_test) / 20
    print('average MSE for train set\t', mse_price_train,
          '\naverage MSE for test set\t', mse_price_test)
    return mse_price_train, mse_price_test, train_std, test_std
Example #3
0
def part1_1_1_c():
    #calculate estimate value for different dimension
    estimate_y1 = lr.estimate(lr.pl_featured(x, 1), model_d1)
    estimate_y2 = lr.estimate(lr.pl_featured(x, 2), model_d2)
    estimate_y3 = lr.estimate(lr.pl_featured(x, 3), model_d3)
    estimate_y4 = lr.estimate(lr.pl_featured(x, 4), model_d4)

    #calculate MSEs
    mse1 = lr.mean_squared_error(y, estimate_y1)
    mse2 = lr.mean_squared_error(y, estimate_y2)
    mse3 = lr.mean_squared_error(y, estimate_y3)
    mse4 = lr.mean_squared_error(y, estimate_y4)

    print("MSE 1D: %f, MSE 2D: %f, MSE 3D: %f, MSE 4D: %f" %
          (mse1, mse2, mse3, mse4))
Example #4
0
def mse_1d_to_18d_sin(x1, y1, xt, yt):
    mse = np.zeros(shape=(18, 1))
    for k in range(1, 19):
        #calculate w for dimension 1 to 18
        model_dk = lr.fit_sin(x1, y1, k)
        #calculate estimate value for different dimension
        estimate_yt = lr.estimate(lr.pl_featured_sin(xt, k), model_dk)
        #calculate MSEs
        mse[k - 1] = lr.mean_squared_error(yt, estimate_yt)
    return mse
Example #5
0
def part1_3c():
    #best performance parameters
    sigma = 2**10
    gamma = 2**(-31)
    #load data from .mat file
    data = (scio.loadmat('boston.mat'))['boston']
    #spile the train data and test data
    data_train, data_test = train_test_split(data, test_size=0.33)
    x_train = data_train[:, range(13)]
    y_train = data_train[:, 13]
    x_test = data_test[:, range(13)]
    y_test = data_test[:, 13]
    #train model
    alpha = krr.fit(x_train, sigma, gamma, y_train)
    #calculate mse
    y_e_train = krr.estimate(x_train, x_train, sigma, alpha)
    mse_train = lr.mean_squared_error(y_train, y_e_train)
    y_e_test = krr.estimate(x_train, x_test, sigma, alpha)
    mse_test = lr.mean_squared_error(y_test, y_e_test)
    #print('MSE on training set is: %f MSE on test set is: %f'%(mse_train, mse_test))
    return mse_train, mse_test
Example #6
0
def part1_2c():
    #load the data
    data = (scio.loadmat('boston.mat'))['boston']
    #spile the train data and test data
    data_train, data_test = train_test_split(data, test_size=0.33)
    #initialize the MSE
    mse_price_train = np.zeros((20, 13))
    mse_price_test = np.zeros((20, 13))
    mse_mean_price_train = np.zeros((1, 13))
    mse_mean_price_test = np.zeros((1, 13))
    for j in range(20):
        for i in range(13):
            #initialize the train and test sets
            x_train = data_train[:, i]
            x_test = data_test[:, i]
            #add a bias term
            x_train = np.asmatrix(
                np.vstack((x_train, np.ones(len(data_train))))).T  #339*2
            x_test = np.asmatrix(np.vstack(
                (x_test, np.ones(len(data_test))))).T
            mse_price_train[j, i] = lr.mean_squared_error((x_train @ (inv(
                (x_train.T) @ (x_train)) @ (x_train.T) @ data_train[:, 13]).T),
                                                          data_train[:, 13])
            mse_price_test[j, i] = lr.mean_squared_error((x_test @ (inv(
                (x_test.T) @ (x_test)) @ (x_test.T) @ data_test[:, 13]).T),
                                                         data_test[:, 13])

        #random spilt the train and test set after each linear regression
        data_train, data_test = train_test_split(data, test_size=0.33)
    #take the average for results over 20 runs
    train_std = np.std(mse_price_train, axis=0, ddof=1)
    test_std = np.std(mse_price_test, axis=0, ddof=1)
    mse_mean_price_train = np.sum(mse_price_train, axis=0) / 20
    mse_mean_price_test = np.sum(mse_price_test, axis=0) / 20
    print("For liner regression with single attribute\t ")
    for i in range(len(mse_mean_price_train)):
        print("Linear regression attribute", i + 1, "\tMSE train\t",
              mse_mean_price_train[i], "\tMSE test\t", mse_mean_price_test[i],
              "\n")
    return mse_mean_price_train, mse_mean_price_test, train_std, test_std
Example #7
0
def k_fold_crossvalidation(k, data_x, data_y, sigma, gamma):
    #split data into k segments averagely
    kf = KFold(n_splits=k)
    a = kf.split(data_x)
    mse_list = []
    for train_data_index, test_data_index in a:
        x_train = data_x[train_data_index]
        x_test = data_x[test_data_index]
        y_train = data_y[train_data_index]
        y_test = data_y[test_data_index]
        alpha = krr.fit(x_train, sigma, gamma, y_train)
        y_e = krr.estimate(x_train, x_test, sigma, alpha)
        mse = lr.mean_squared_error(y_test, y_e)
        mse_list.append(mse)
    sum = 0
    for i in range(len(mse_list)):
        sum += mse_list[i]
    mean_mse = sum / len(mse_list)
    return mean_mse