コード例 #1
0
def fitLineAndPlot(independentList, dependentList, plt, order=1):
    regression = PolynomialRegression(order)
    coefficient = regression.fit(independentList, dependentList)
    print('Coefficients for order %d' % order, coefficient)
    predictionList = getPredictions(coefficient, independentList)
    plt.plot(independentList, predictionList, label='Order-%d' % order, linewidth=3)
    plt.legend()
コード例 #2
0
ファイル: main.py プロジェクト: Selich/Serbian-Airquality
def main():
    # numpy oba
    city = "nis"
    y = pd.read_csv("../data/aqi_" + city + ".csv")
    y = y.to_numpy()
    y = y[-100:]
    x = np.linspace(0, len(y), len(y))

    x_train, x_test, y_train, y_test = train_test_split(x, y)

    PR = PolynomialRegression(x, y)
    theta = PR.fit(order=2, tol=10**-3, numIters=100, alpha=10**-3)
    PR.plot_predicted()
    PR.plot_cost()
def compare_dataset_size(train_datasets, test_dataset, K=10, out_dir="."):
    """ Generate plot to compare effects of dataset size.

    Args:
    - train_datasets (list of dict): A list of training datasets from the same distribution.
    - test_dataset (dict): The test dataset.
    - K (int): The degree of the polynomial to fit. Note: 1 <= K <= 10
    """
    model = PolynomialRegression(K=K)

    title = "Comparing Effects of Dataset Size"
    x_label = "Dataset size"
    y_label = "Error (Log Scale)"

    # One for training error, one for testing error
    labels = ("Train Error", "Test Error")
    x_s = [[], []]
    y_s = [[], []]

    test_X = test_dataset["X"]
    test_Y = test_dataset["Y"]

    for dataset in train_datasets:
        train_X = dataset["X"]
        train_Y = dataset["Y"]

        num_samples = len(train_X)

        x_s[0].append(num_samples)
        x_s[1].append(num_samples)

        model.fit(train_X, train_Y)

        train_loss = model.compute_mse(train_X, train_Y)
        test_loss = model.compute_mse(test_X, test_Y)

        y_s[0].append(np.log(train_loss))
        y_s[1].append(np.log(test_loss))

    visualize(x_s,
              y_s,
              labels,
              title,
              x_label,
              y_label,
              savefig=True,
              out_dir=out_dir)
コード例 #4
0
def compare_regularization(train_dataset,
                           test_dataset,
                           K,
                           l2_coefs,
                           title_prefix="",
                           out_dir="."):
    """ Generate plot to compare effects of model complexity
    """

    title = f"{title_prefix}Comparing Effects of Regularization"
    x_label = "L2 Coefficient (Lambda Term) 1e-2"
    y_label = "Error (Log Scale)"

    labels = ("Train Error", "Test Error")
    x_s = [[], []]
    y_s = [[], []]

    train_X = train_dataset["X"]
    train_Y = train_dataset["Y"]

    test_X = test_dataset["X"]
    test_Y = test_dataset["Y"]

    for l2_coef in l2_coefs:
        x_s[0].append(l2_coef * 1e2)
        x_s[1].append(l2_coef * 1e2)

        model = PolynomialRegression(K)
        model.fit_with_l2_regularization(train_X, train_Y, l2_coef)

        train_loss = model.compute_mse(train_X, train_Y)
        test_loss = model.compute_mse(test_X, test_Y)

        y_s[0].append(np.log(train_loss))
        y_s[1].append(np.log(test_loss))

    visualize(x_s,
              y_s,
              labels,
              title,
              x_label,
              y_label,
              savefig=True,
              out_dir=out_dir)
コード例 #5
0
def compare_model_complexity(train_dataset,
                             test_dataset,
                             Ks,
                             title_prefix="",
                             out_dir="."):
    """ Generate plot to compare effects of model complexity
    """

    title = f"{title_prefix}Comparing Effects of Model Complexity"
    x_label = "Model Complexity (Degree of Polynomial)"
    y_label = "Error (Log Scale)"

    labels = ("Train Error", "Test Error")
    x_s = [[], []]
    y_s = [[], []]

    train_X = train_dataset["X"]
    train_Y = train_dataset["Y"]

    test_X = test_dataset["X"]
    test_Y = test_dataset["Y"]

    for K in Ks:
        x_s[0].append(K)
        x_s[1].append(K)

        model = PolynomialRegression(K)
        model.fit(train_X, train_Y)

        train_loss = model.compute_mse(train_X, train_Y)
        test_loss = model.compute_mse(test_X, test_Y)

        y_s[0].append(np.log(train_loss))
        y_s[1].append(np.log(test_loss))

    visualize(x_s,
              y_s,
              labels,
              title,
              x_label,
              y_label,
              savefig=True,
              out_dir=out_dir)
コード例 #6
0
def polynomial_regression():
    mse_train = []
    mse_test = []
    N = 100
    max_degree = 10
    random_list_size = 10
    d = 4
    x, y = generate_regression_data(d, N, amount_of_noise=0.1)
    x_train, y_train = np.zeros(
        (random_list_size, 1)), np.zeros((random_list_size, 1))
    x_test, y_test = np.zeros(
        (N - random_list_size, 1)), np.zeros((N - random_list_size, 1))
    random_list = []
    for i in range(0, random_list_size):
        n = random.randint(0, N - 1)
        while n in random_list:
            n = random.randint(0, N - 1)
        random_list.append(n)

    counter_train = 0
    counter_test = 0
    for i in range(N):
        if i in random_list:
            x_train[counter_train] = x[i]
            y_train[counter_train] = y[i]
            counter_train += 1
        else:
            x_test[counter_test] = x[i]
            y_test[counter_test] = y[i]
            counter_test += 1

    for degree in range(max_degree):
        p = PolynomialRegression(degree)
        p.fit(x_train, y_train)
        y_hat_train = p.predict(x_train)
        y_hat_test = p.predict(x_test)
        if len(mse_train) == 0 or min(mse_train) > mean_squared_error(y_train, y_hat_train):
            min_train_y_predict = y_hat_train
        if len(mse_test) == 0 or min(mse_test) > mean_squared_error(y_test, y_hat_test):
            min_test_y_predict = y_hat_test
        mse_train.append(mean_squared_error(y_train, y_hat_train))
        mse_test.append(mean_squared_error(y_test, y_hat_test))

        # p.visualize(x_test, y_test)

    # Q1A
    plt.figure()
    plt.plot(range(max_degree), mse_train,
             color='orange', label='The train error')
    plt.plot(range(max_degree), mse_test, color='blue', label='The test error')
    plt.title('error vs degree')
    plt.xlabel('degree')
    plt.ylabel('error')
    plt.yscale('log')
    plt.legend(loc="best")
    plt.savefig("Q1A.png")

    # Q1B
    features_sorted = np.zeros(x_train.shape)
    targets_sorted = np.zeros(min_train_y_predict.shape)
    sort_indexes = x_train.argsort(axis=0)
    for i in range(len(x_train.argsort(axis=0))):
        features_sorted[i] = x_train[sort_indexes[i]]
        targets_sorted[i] = min_train_y_predict[sort_indexes[i]]

    features2_sorted = np.zeros(x_test.shape)
    targets2_sorted = np.zeros(min_test_y_predict.shape)
    sort_indexes = x_test.argsort(axis=0)
    for i in range(len(x_test.argsort(axis=0))):
        features2_sorted[i] = x_test[sort_indexes[i]]
        targets2_sorted[i] = min_test_y_predict[sort_indexes[i]]

    plt.figure()
    plt.scatter(x_train, y_train, color='blue')
    plt.plot(features_sorted, targets_sorted, color='orange',
             label='The lowest training error')
    plt.plot(features2_sorted, targets2_sorted,
             color='green', label='The lowest testing error')
    plt.title('X vs Y')
    plt.xlabel('X')
    plt.ylabel('Y')
    plt.legend(loc="best")
    plt.savefig("Q1B.png")

    # Q5
    # we create 50 separable points
    X, Y = make_blobs(n_samples=50, centers=2,
                      random_state=0, cluster_std=0.60)

    # fit the model
    clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=200)
    clf.fit(X, Y)

    # plot the line, the points, and the nearest vectors to the plane
    xx = np.linspace(-1, 5, 10)
    yy = np.linspace(-1, 5, 10)

    X1, X2 = np.meshgrid(xx, yy)
    Z = np.empty(X1.shape)
    for (i, j), val in np.ndenumerate(X1):
        x1 = val
        x2 = X2[i, j]
        p = clf.decision_function([[x1, x2]])
        Z[i, j] = p[0]
    levels = [-1.0, 0.0, 1.0]
    linestyles = ['dashed', 'solid', 'dashed']
    colors = 'k'
    cs = plt.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)
    plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired,
                edgecolor='black', s=20, label='data points')
    cs.collections[0].set_label('h(x)=0')
    plt.axis('tight')
    plt.title('Linear Classification Example')
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.legend(loc="best")
    plt.savefig("Q5.png")
コード例 #7
0
def train_x():
    return matrix(train['x'])


def train_y():
    return matrix(train['y'])


def test_x():
    return matrix(test['x'])


def test_y():
    return matrix(test['y'])


print('linear regression')
linear = LinearRegression()
linear.fit(train_x(), train_y())
print(linear.score(test_x(), test_y()))
print()

print('polynomial regression')
polynomial = PolynomialRegression([2, 3, 4],
                                  sigma=.000000000000972,
                                  iterations=100)
polynomial.fit(train_x(), train_y())
print(polynomial.score(test_x(), test_y()))
print()
コード例 #8
0
    # Plot error curves
    range_x = range(1, D + 1)
    plt.plot(range_x, training_errors, label="Training Error", marker="o")
    plt.plot(range_x, validation_errors, label="Validation Error", marker="o")
    plt.title(title)
    plt.xlabel('Number of Principal Components')
    plt.ylabel('Cross Validation MSE Scores')
    plt.legend()
    plt.show()


if __name__ == '__main__':
    dataset = "./data/oil_500.pkl"

    rates = load_pickle_dataset(dataset)

    X = rates['X']
    y = rates['y']

    seed = 0
    val_percent = 0.3

    # Linear regression
    model = PolynomialRegression(1)
    model_selection(model, X, y, seed, 'Linear Regression', True)

    # Quadratic regression
    model = PolynomialRegression(2)
    model_selection(model, X, y, seed, 'Quadratic Regression', False)
コード例 #9
0
from generatePolyPoints import generatePolyPoints
from polynomial_regression import PolynomialRegression

x_pts, y_pts = generatePolyPoints(0, 50, 100, [5, 1, 1], noiseLevel=2, plot=1)
PR = PolynomialRegression(x_pts, y_pts)
theta = PR.fit(method='normal_equation', order=2)
PR.plot_predictedPolyLine()