예제 #1
0
파일: ex7_pca.py 프로젝트: b4sus/ml-course
def pca_faces():
    data = sio.loadmat(
        "ml_course_material/machine-learning-ex7/ex7/ex7faces.mat")
    X = data["X"]

    plt.figure(0)
    plot_images(X)
    plt.show(block=False)  #somehow when plotting, svd will not converge

    normalizer = feature.FeatureNormalizer(X)
    X_norm = normalizer.normalized_x_m

    while True:
        try:
            (U, S) = pca.pca(X_norm)
            break
        except:
            pass

    plt.figure(1)
    plot_images(U.T)
    plt.show(block=False)

    k = 100

    Z = pca.project(X, U, k)

    X_approx = pca.reconstruct(Z, U, k)

    plt.figure(2)
    plot_images(X_approx)
    plt.show()
예제 #2
0
파일: ex7_pca.py 프로젝트: b4sus/ml-course
def pca_warmup():
    data = sio.loadmat(
        "ml_course_material/machine-learning-ex7/ex7/ex7data1.mat")
    X = data["X"]
    # plt.plot(X[:, 0], X[:, 1], '.')
    # plt.show(block=False)

    normalizer = feature.FeatureNormalizer(X)
    X_norm = normalizer.normalized_x_m
    (U, S) = pca.pca(X_norm)

    print(U[:, 0])

    plt.plot(X_norm[:, 0], X_norm[:, 1], '.')
    plt.show(block=False)

    plt.plot([-3, U[0, 0]], [-3, U[0, 1]], "x-")
    plt.show(block=False)

    Z = pca.project(X_norm, U, 1)
    print(Z)
    X_approx = pca.reconstruct(Z, U, 1)
    print(X_approx)
    plt.plot(X_approx[:, 0], X_approx[:, 1], "y.")
    plt.show()
예제 #3
0
def learning_curves_of_different_polynomial_degree(X_train, y_train, X_cv,
                                                   y_cv, estimator_predictor,
                                                   cost_fun,
                                                   max_polynomial_degree):
    """
    Trains theta with X_train and y_train using minimize_fun with different polynomial degree
    (1 to provided max_polynomial_degree). Calculates and return training set and cross-validation error.
    :param X_train:
    :param y_train:
    :param X_cv:
    :param y_cv:
    :param estimator_predictor: has methods fit(X, y) and predict(X) -> y
    :param cost_fun: expected signature cost_fun(y_true, y_pred) -> float
    :param max_polynomial_degree:
    :return: training errors, cross-validation errors
    """
    j_train = []
    j_cv = []

    for polynomial_degree in range(1, max_polynomial_degree):
        poly_features = PolynomialFeatures(polynomial_degree,
                                           include_bias=False)
        X_train_poly = poly_features.fit_transform(X_train)

        normalizer = feature.FeatureNormalizer(X_train_poly)
        X_train_poly = normalizer.normalize_matrix(X_train_poly)

        X_train_poly = np.hstack((np.ones(
            (X_train_poly.shape[0], 1)), X_train_poly))

        estimator_predictor.fit(X_train_poly, y_train)

        j_train.append(
            cost_fun(y_train, estimator_predictor.predict(X_train_poly)))

        X_cv_poly = poly_features.fit_transform(X_cv)
        X_cv_poly = normalizer.normalize_matrix(X_cv_poly)
        X_cv_poly = np.hstack((np.ones((X_cv_poly.shape[0], 1)), X_cv_poly))

        j_cv.append(cost_fun(y_cv, estimator_predictor.predict(X_cv_poly)))

    plt.plot(list(range(1, len(j_train) + 1)), j_train, label="j_train")
    plt.plot(list(range(1, len(j_train) + 1)), j_cv, label="j_cv")
    plt.xlabel("polynomial degree")
    plt.ylabel("error")
    plt.legend()
    plt.show()

    return j_train, j_cv
예제 #4
0
파일: ex7_pca.py 프로젝트: b4sus/ml-course
def pca_on_bird():
    bird_image = image.imread(
        "ml_course_material/machine-learning-ex7/ex7/bird_small.png")
    plt.imshow(bird_image)
    plt.show(block=False)

    im_shape = bird_image.shape

    X = bird_image.reshape([im_shape[0] * im_shape[1], 3])

    k = 16

    (centroids,
     closest_centroids) = k_means.k_means(X,
                                          k_means.init_random_centroids(X, k))

    nr_samples = 1000
    rng = np.random.default_rng()
    random_indices = rng.integers(0, X.shape[0], nr_samples)

    colors = []
    X_rand = np.empty((nr_samples, X.shape[1]))
    for i in range(nr_samples):
        colors.append(closest_centroids[random_indices[i]])
        X_rand[i, :] = X[random_indices[i], :]

    plt.hsv()
    fig = plt.figure(3)
    ax = fig.gca(projection='3d')
    ax.scatter(X_rand[:, 0], X_rand[:, 1], X_rand[:, 2], c=colors)
    plt.show(block=False)

    X_norm = feature.FeatureNormalizer(X).normalized_x_m
    while True:
        try:
            (U, S) = pca.pca(X_norm)
            break
        except:
            pass

    Z = pca.project(X, U, 2)
    Z_rand = np.empty((nr_samples, Z.shape[1]))
    for i in range(nr_samples):
        Z_rand[i, :] = Z[random_indices[i], :]

    plt.figure(4)
    plt.scatter(Z_rand[:, 0], Z_rand[:, 1], c=colors)
    plt.show()
예제 #5
0
파일: ex8.py 프로젝트: b4sus/ml-course
def anomaly_detection_high_dimensional_dataset():
    data = sio.loadmat("ml_course_material/machine-learning-ex8/ex8/ex8data2.mat")
    X = data["X"]
    X_cv = data["Xval"]
    y_cv = data["yval"]

    (mu, sigma2) = estimate_gaussian(X)
    p_cv = gaussian(X_cv, mu, sigma2)
    (epsilon, f1) = select_threshold(p_cv, y_cv)
    print(f"best epsilon is {epsilon} with F1 score {f1}")
    p = gaussian(X, mu, sigma2)
    (anomalies_indices_x, anomalies_indices_y) = np.nonzero(p < epsilon)
    print(f"{len(anomalies_indices_x)} anomalies")

    # pca for fun
    normnalizer = feature.FeatureNormalizer(X)
    X_norm = normnalizer.normalized_x_m
    (U, S) = pca.pca(X_norm)
    Z = pca.project(X_norm, U, 2)
    visualize(Z)
    plt.plot(Z[anomalies_indices_x, 0], Z[anomalies_indices_x, 1], '+')
    plt.show()
예제 #6
0
    return ((diff.T @ diff) / (2 * m))[0][0]


plt.figure(1)
(j_train, j_cv) = lc.learning_curves_of_different_training_set_size(
    X_train, y_train, X_cv, y_cv, LinearRegressionEstimatorPredictor(), cost)

plt.figure(2)
(j_train, j_cv) = lc.learning_curves_of_different_polynomial_degree(
    X_train, y_train, X_cv, y_cv, LinearRegressionEstimatorPredictor(), cost,
    12)

poly_features = PolynomialFeatures(8, include_bias=False)
X_train_poly = poly_features.fit_transform(X_train)

normalizer = feature.FeatureNormalizer(X_train_poly)
X_train_poly = normalizer.normalize_matrix(X_train_poly)
X_cv_poly = poly_features.fit_transform(X_cv)
X_cv_poly = normalizer.normalize_matrix(X_cv_poly)
X_test_poly = poly_features.fit_transform(X_test)
X_test_poly = normalizer.normalize_matrix(X_test_poly)

regularization_lambda = 1

result = op.minimize(fun=lire.linear_regression_cost_gradient,
                     x0=np.zeros(X_train_poly.shape[1] + 1),
                     args=(np.hstack((np.ones(
                         (X_train_poly.shape[0], 1)), X_train_poly)), y_train,
                           regularization_lambda),
                     method="CG",
                     jac=True,
예제 #7
0
파일: ex2_1.py 프로젝트: b4sus/ml-course
import ml.feature as ft
import ml.logistic_regression as lore
import ml.predict as predict

data = np.loadtxt("ml_course_material/machine-learning-ex2/ex2/ex2data1.txt", delimiter=",")

(m, feature_length_with_result) = data.shape

x = data[:, 0:feature_length_with_result - 1].reshape((m, feature_length_with_result - 1))
y = data[:, feature_length_with_result - 1].reshape((m, 1))

success_indices = np.nonzero(y.flatten() == 1)
fail_indices = np.nonzero(y.flatten() == 0)

normalizer = ft.FeatureNormalizer(x)

normalized_x_m = np.hstack((np.ones((m, 1)), normalizer.normalized_x_m))

# (theta, costs) = ml.gradient_descent(normalized_x_m, y, ml.logistic_regression_cost,
#                                      ml.logistic_regression_cost_derivative, alpha=0.01, num_iter=10000)

(theta, num_of_evaluations, return_code) = op.fmin_tnc(func=lore.logistic_regression_cost_gradient, x0=np.zeros((3, 1)),
                                                       args=(np.hstack((np.ones((m, 1)), x)), y))
theta = theta.reshape(len(theta), 1)

plt.figure(0)
plt.subplot(211)
plt.plot(x[success_indices, 0], x[success_indices, 1], "kx")
plt.plot(x[fail_indices, 0], x[fail_indices, 1], "yo")
plt.plot()