def main():
	set_printoptions(precision=6, linewidth=200)
        
	# loading data
	data = genfromtxt('data/ex1data2.txt', delimiter = ',')
	X = data[:, 0:2]
	y = data[:, 2:3]
	m = shape(X)[0]	

	X, mu, sigma = featureNormalize(X)
#	print X
#	print mu
#	print sigma
	X = c_[ones((m, 1)), X]
	m ,n = shape(X)[0], shape(X)[1]
	iterations = 400
	alphas = [0.01, 0.03, 0.1, 0.3, 1.0] 
	
	for alpha in alphas:
		theta = zeros((n,1))
		J_history, theta = gradientDescent(X, y, theta, alpha, iterations)
		#print theta	
		number_of_iterations = array([x for x in range(1, iterations + 1)]).reshape(iterations, 1)
		pyplot.plot(number_of_iterations, J_history, '-b')
		pyplot.title("Alpha = %f" % (alpha))
		pyplot.xlabel('Number of iterations')
		pyplot.xlim([0, 50])
		pyplot.show(block=True)
def polynomialDegreeCurve(X, y, Xval, yval, reg_lambda):
    """Error cruve in function of degree of polynimal d
    """
    
    dimensions = np.arange(1, 80).reshape(-1, 1)
    
    # You need to return these variables correctly.
    error_train = np.zeros((len(dimensions), 1))
    error_val = np.zeros((len(dimensions), 1))
    
    m_train_set = X.shape[0]
    m_val_set = Xval.shape[0]
      
    
    for i in range(len(dimensions)):
        dimension = dimensions[i]
        
        X_poly = polyFeatures(X, dimension)
        X_poly, mu, sigma = featureNormalize(X_poly)  # Normalize
        X_poly = np.c_[np.ones((m_train_set, 1)), X_poly]   
        
        X_poly_val = polyFeatures(Xval, dimension)
        X_poly_val = X_poly_val - mu
        X_poly_val = X_poly_val / sigma
        X_poly_val = np.c_[np.ones((m_val_set, 1)), X_poly_val] 
        
        theta = trainLinearReg(X_poly, y, reg_lambda)
        error_train[i], tmp = linearRegCostFunction(X_poly, y, theta, 0)
        error_val[i],  tmp  = linearRegCostFunction(X_poly_val, yval, theta, 0)        
    
    
    return dimensions, error_train, error_val
def polynomialDegreeCurve(X, y, Xval, yval, reg_lambda):
    """Error cruve in function of degree of polynimal d
    """

    dimensions = np.arange(1, 80).reshape(-1, 1)

    # You need to return these variables correctly.
    error_train = np.zeros((len(dimensions), 1))
    error_val = np.zeros((len(dimensions), 1))

    m_train_set = X.shape[0]
    m_val_set = Xval.shape[0]

    for i in range(len(dimensions)):
        dimension = dimensions[i]

        X_poly = polyFeatures(X, dimension)
        X_poly, mu, sigma = featureNormalize(X_poly)  # Normalize
        X_poly = np.c_[np.ones((m_train_set, 1)), X_poly]

        X_poly_val = polyFeatures(Xval, dimension)
        X_poly_val = X_poly_val - mu
        X_poly_val = X_poly_val / sigma
        X_poly_val = np.c_[np.ones((m_val_set, 1)), X_poly_val]

        theta = trainLinearReg(X_poly, y, reg_lambda)
        error_train[i], tmp = linearRegCostFunction(X_poly, y, theta, 0)
        error_val[i], tmp = linearRegCostFunction(X_poly_val, yval, theta, 0)

    return dimensions, error_train, error_val
예제 #4
0
def output(partId):
    # Random Test Cases
    X1 = np.column_stack(
        (np.ones(20), np.exp(1) + np.exp(2) * np.linspace(0.1, 2, 20)))
    Y1 = X1[:, 1] + np.sin(X1[:, 0]) + np.cos(X1[:, 1])
    X2 = np.column_stack((X1, X1[:, 1]**0.5, X1[:, 1]**0.25))
    Y2 = np.power(Y1, 0.5) + Y1
    if partId == '1':
        out = formatter('%0.5f ', warmUpExercise())
    elif partId == '2':
        out = formatter('%0.5f ', computeCost(X1, Y1, np.array([0.5, -0.5])))
    elif partId == '3':
        out = formatter(
            '%0.5f ', gradientDescent(X1, Y1, np.array([0.5, -0.5]), 0.01, 10))
    elif partId == '4':
        out = formatter('%0.5f ', featureNormalize(X2[:, 1:4]))
    elif partId == '5':
        out = formatter(
            '%0.5f ', computeCostMulti(X2, Y2, np.array([0.1, 0.2, 0.3, 0.4])))
    elif partId == '6':
        out = formatter(
            '%0.5f ',
            gradientDescentMulti(X2, Y2, np.array([-0.1, -0.2, -0.3, -0.4]),
                                 0.01, 10))
    elif partId == '7':
        out = formatter('%0.5f ', normalEqn(X2, Y2))
    return out
def gradientDescentMulti(X, y, theta, alpha, num_iters):
    m = len(y)  # number of training examples
    J_history = np.zeros((num_iters, 1))
    l, o, n = featureNormalize.featureNormalize(y)
    y = (y - o) / n
    for iter in range(num_iters):
        h = np.dot(X, theta)
        divergence = h - y
        theta = theta - (alpha * (np.dot(X.T, divergence))) / m
        J_history[iter] = computeCostMulti.computeCostMulti(X, y, theta)
    return theta, J_history
예제 #6
0
	def solve(self):
	#========== Normalise the features and add neutral feature ================
		self.X = featureNormalize.featureNormalize(self.X)
		m = self.X.shape[0]
		n = self.X.shape[1]
		self.X = numpy.hstack((numpy.ones((m,1)), self.X))
		initialTheta = numpy.zeros(n+1).reshape(n+1,1)

		optimalTheta = fmin_cg(self.objectiveFunction, initialTheta, fprime=self.objectiveGrad,maxiter=self.iterations)
		objectiveValue = self.objectiveFunction(optimalTheta)
		prediction = numpy.dot(self.X, optimalTheta)
		return optimalTheta, objectiveValue, predict.predict(optimalTheta, self.X)
예제 #7
0
파일: main.py 프로젝트: ichigoXZ/neuralnet
def mulLinerRegression(data):
    X = data[:, :-1]
    X_norm, mu, sigma = featureNormalize(X)
    X_norm = np.c_[X_norm, np.ones(shape=data.shape[0])]
    y = data[:, -1]
    theta = np.zeros(shape=data.shape[1])

    theta, loss = gradientDescent(X_norm, y, theta, options)
    plotLoss(loss, options["iterations"])
    print theta, mu, sigma

    plot3D(data, theta, mu, sigma)

    # test
    x = [[1380, 3], [1494, 3], [1940, 4]]
    x = np.c_[(x - mu) / sigma, np.ones(3)]
    print np.dot(x, theta)
예제 #8
0
파일: submit.py 프로젝트: libelo/py-ng-ml
def output(partId):
	# Random Test Cases
	X1 = column_stack((ones(20), exp(1) + dot(exp(2), arange(0.1, 2.1, 0.1))))
	Y1 = X1[:,1] + sin(X1[:,0]) + cos(X1[:,1])
	X2 = column_stack((X1, X1[:,1]**0.5, X1[:,1]**0.25))
	Y2 = Y1**0.5 + Y1
	if partId == '1':
		return sprintf('%0.5f ', warmUpExercise())
	elif partId == '2':
		return sprintf('%0.5f ', computeCost(X1, Y1, array([0.5, -0.5])))
	elif partId == '3':
		return sprintf('%0.5f ', gradientDescent(X1, Y1, array([0.5, -0.5]), 0.01, 10))
	elif partId == '4':
		return sprintf('%0.5f ', featureNormalize(X2[:,1:3]));
	elif partId == '5':
		return sprintf('%0.5f ', computeCostMulti(X2, Y2, array([0.1, 0.2, 0.3, 0.4])))
	elif partId == '6':
		return sprintf('%0.5f ', gradientDescentMulti(X2, Y2, array([-0.1, -0.2, -0.3, -0.4]), 0.01, 10))
	elif partId == '7':
		return sprintf('%0.5f ', normalEqn(X2, Y2))
예제 #9
0
def plotFit(min_x, max_x, mu, sigma, theta, p):
    """
    Plots a learned polynomial regression fit over an existing figure.
    Parameters
    ----------
    min_x : float
        Minimum value of features.
    max_x : float
        Maximum value of features.
    mu : ndarray, shape (n_features - 1,)
        Mean value of features, without the intercept term.
    sigma : ndarray, shape (n_features - 1,)
        Standard deviation of features, without the intercept term.
    theta : ndarray, shape (n_features,)
        Linear regression parameter.
    p : int
        Power of polynomial fit.
    """
    x = np.arange(min_x - 15, max_x + 25, 0.05)
    X_poly = polyFeatures(x, p)
    X_poly, dummy_mu, dummy_sigma = featureNormalize(X_poly, mu, sigma)
    X_poly = np.hstack((np.ones((X_poly.shape[0], 1)), X_poly))
    plt.plot(x, X_poly.dot(theta), linestyle='--', marker='', color='b')
예제 #10
0
from read_data import read_data
from plot_data import plot_data
from cost_function import cost_function
from batch_gradient_update import batch_gradient_update
from sigmoid_function import sigmoid_function
from featureNormalize import featureNormalize
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.datasets.samples_generator import make_blobs
import scipy as sp
#X,y=read_data("ex2data1.txt")
X, y = make_blobs(n_samples=400, centers=2, random_state=0, cluster_std=1)
# after featureNormalize it accuarcy could get 89%
X,X_mu,X_sigma=featureNormalize(X)


#plot_data(X,y)
y=np.reshape(y,(y.size,1))
m,n=X.shape
X=np.concatenate((np.ones([len(X[:,0]),1]),X),axis=1)
initial_theta=np.zeros([n+1,1])


#initial_theta=np.array([1,1,1])
# test is the cost_function  ok?
cost,grad=cost_function(initial_theta,X,y)


# batch_gradient_update error!!! wrong theta
theta=batch_gradient_update(initial_theta,X,y)
print theta
예제 #11
0
파일: main.py 프로젝트: yjian180/python_ML
from read_data import read_data
from plot_data import plot_data
from cost_function import cost_function
from batch_gradient_update import batch_gradient_update
from sigmoid_function import sigmoid_function
from featureNormalize import featureNormalize
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.datasets.samples_generator import make_blobs
import scipy as sp
#X,y=read_data("ex2data1.txt")
X, y = make_blobs(n_samples=400, centers=2, random_state=0, cluster_std=1)
# after featureNormalize it accuarcy could get 89%
X, X_mu, X_sigma = featureNormalize(X)

#plot_data(X,y)
y = np.reshape(y, (y.size, 1))
m, n = X.shape
X = np.concatenate((np.ones([len(X[:, 0]), 1]), X), axis=1)
initial_theta = np.zeros([n + 1, 1])

#initial_theta=np.array([1,1,1])
# test is the cost_function  ok?
cost, grad = cost_function(initial_theta, X, y)

# batch_gradient_update error!!! wrong theta
theta = batch_gradient_update(initial_theta, X, y)
print theta

prob = sigmoid_function(np.dot(X, theta))
print prob
예제 #12
0
import pandas as pd
import matplotlib.pyplot as plt
from featureNormalize import featureNormalize
from computeCostMulti import computeCostMulti, computeCostMulti2
from gradientDescentMulti import gradientDescentMulti

#3
data = pd.read_csv('ex1data2.txt', header=None)  #read from dataset
X = data.iloc[:, 0:2]  # read first 2column
y = data.iloc[:, 2]  # read third column
m = len(y)  # number of training example
data_top = data.head()  # view first few rows of the data
print(data_top)

#3.1
X = featureNormalize(X)

#3.2
ones = np.ones((m, 1))
y = y[:, np.newaxis]
X = np.hstack((ones, X))  # adding the intercept term
theta = np.zeros([3, 1])
iterations = 400
alpha = 0.01

J = computeCostMulti(X, y, theta)
print('CostFunction: ', J)
J1 = computeCostMulti2(X, y, theta)
print('CostFunction2: ', J1)

theta = gradientDescentMulti(X, y, theta, alpha, iterations)
# plt.title('Compressed, with #d colors.')
# plt.show()

example_width = round(math.sqrt(n))
example_height = int(n / example_width)
data_modified = []
for i in range(m):
    temp = np.reshape(data[i], (example_height, example_width))
    temp = list(np.transpose(temp).ravel())
    data_modified.append(temp)
# plt.imshow(np.reshape(temp, (example_height, example_width)))
# plt.show()
# print(np.shape(data))
# print(np.shape(data_modified))
displayData(data_modified[0:100], None)
[X_norm, mu, sigma] = featureNormalize(data_modified)

[u, s] = pca(X_norm)
u = np.transpose(u)
displayData(u[:36], None)

k = 100
# print('----------------------')
# print(m)
# print(n)
# print(np.shape(u))
# print(np.shape(u[:36]))
z = projectData(X_norm, u, k)
X_rec = recoverData(z, u, k)
print(np.shape(data))
print(np.shape(X_rec))
예제 #14
0
data = np.loadtxt(('ex1data2.txt'),delimiter=",");
X = data[:, 0:2]
y = data[:, 2:3];
m = len(y);

#Print out some data points
print('First 10 examples from the dataset: \n');
print('  X \n %s\n   y\n %s \n'  %(X[0:10,:] ,y[0:10,:]));

print('Program paused. Press enter to continue.\n')
raw_input()
# Scale features and set them to zero mean
print('Normalizing Features ...\n');

#getting back the mu,sigma and X_norm from the tuple gotten from the feature normalize
mu,sigma,X=fN.featureNormalize(X)
print("%s %s " %(mu,sigma))
# Add intercept term to X
X=np.c_[np.ones(m),X]

"""% ================ Part 2: Gradient Descent ================"""
print('Running gradient descent ...\n');

# Choose some alpha value
alpha = 0.1
num_iters = 400
#initializing theta to zeros
theta=np.zeros((3,1),dtype=float)
#converting from a numpy array to a matrix

예제 #15
0
y = y.reshape((-1,1))
X = X.reshape(-1,2)
print(y.shape)
print(X.shape)
m = np.size(y)



# Print out some data points
print('First 10 examples from the dataset: \n')
print(' x = , y =  \n', X[:10,:], y[:10,:])

# Scale features and set them to zero mean
print('Normalizing Features ...\n')
X_nonnorm = X
X = featureNormalize(X)


print('First 10 examples after normilize: \n')
print(' x = , y =  \n', X[:10,:], y[:10,:])

# Add intercept term to X
ones = np.ones((m,1))
X = np.hstack((ones,X))


'''
%% ================ Part 2: Gradient Descent ================

% ====================== YOUR CODE HERE ======================
% Instructions: We have provided you with the following starter
@author: ChrisChen
"""

import numpy as np
import matplotlib.pyplot as plt
import featureNormalize as fn
import gradientDescent as gd
import normalEqn as ne

house = np.loadtxt('ex1data2.txt', delimiter = ',')
x = house[:, 0:2]
y = house[:, 2]
y = y.reshape(47, 1)

x_fn = fn.featureNormalize(x)
m = len(x)
x_intercept = np.ones((m, 1))
x_total = np.concatenate((x_intercept, x_fn), axis = 1)
thetas1 = np.zeros((x_total.shape[1], 1))

iterations = 400
alpha = 0.03

# using the gradientDescent function from univariate linear regression
thetas1 = gd.gradientDescent(thetas1, iterations, x_total, y, alpha)

# normal equations
x = house[:, 0:2]
y = house[:, 2]
y = y.reshape(47, 1)
예제 #17
0
def ex7_pca():
    ## Machine Learning Online Class
    #  Exercise 7 | Principle Component Analysis and K-Means Clustering
    #
    #  Instructions
    #  ------------
    #
    #  This file contains code that helps you get started on the
    #  exercise. You will need to complete the following functions:
    #
    #     pca.m
    #     projectData.m
    #     recoverData.m
    #     computeCentroids.m
    #     findClosestCentroids.m
    #     kMeansInitCentroids.m
    #
    #  For this exercise, you will not need to change any code in this file,
    #  or any other files other than those mentioned above.
    #

    ## Initialization
    #clear ; close all; clc

    ## ================== Part 1: Load Example Dataset  ===================
    #  We start this exercise by using a small dataset that is easily to
    #  visualize
    #
    print('Visualizing example dataset for PCA.\n')

    #  The following command loads the dataset. You should now have the 
    #  variable X in your environment
    mat = scipy.io.loadmat('ex7data1.mat')
    X = mat['X']

    #  Visualize the example dataset
    plt.plot(X[:, 0], X[:, 1], 'wo', ms=10, mec='b', mew=1)
    plt.axis([0.5, 6.5, 2, 8])

    plt.savefig('figure1.png')

    print('Program paused. Press enter to continue.')
    #pause


    ## =============== Part 2: Principal Component Analysis ===============
    #  You should now implement PCA, a dimension reduction technique. You
    #  should complete the code in pca.m
    #
    print('\nRunning PCA on example dataset.\n')

    #  Before running PCA, it is important to first normalize X
    X_norm, mu, sigma = featureNormalize(X)

    #  Run PCA
    U, S = pca(X_norm)

    #  Compute mu, the mean of the each feature

    #  Draw the eigenvectors centered at mean of data. These lines show the
    #  directions of maximum variations in the dataset.
    #hold on
    print(S)
    print(U)
    drawLine(mu, mu + 1.5 * np.dot(S[0], U[:,0].T))
    drawLine(mu, mu + 1.5 * np.dot(S[1], U[:,1].T))
    #hold off
    plt.savefig('figure2.png')

    print('Top eigenvector: ')
    print(' U(:,1) = %f %f ' % (U[0,0], U[1,0]))
    print('\n(you should expect to see -0.707107 -0.707107)')

    print('Program paused. Press enter to continue.')
    #pause


    ## =================== Part 3: Dimension Reduction ===================
    #  You should now implement the projection step to map the data onto the 
    #  first k eigenvectors. The code will then plot the data in this reduced 
    #  dimensional space.  This will show you what the data looks like when 
    #  using only the corresponding eigenvectors to reconstruct it.
    #
    #  You should complete the code in projectData.m
    #
    print('\nDimension reduction on example dataset.\n\n')

    #  Plot the normalized dataset (returned from pca)
    fig = plt.figure()
    plt.plot(X_norm[:, 0], X_norm[:, 1], 'bo')

    #  Project the data onto K = 1 dimension
    K = 1
    Z = projectData(X_norm, U, K)
    print('Projection of the first example: %f' % Z[0])
    print('\n(this value should be about 1.481274)\n')

    X_rec = recoverData(Z, U, K)
    print('Approximation of the first example: %f %f' % (X_rec[0, 0], X_rec[0, 1]))
    print('\n(this value should be about  -1.047419 -1.047419)\n')

    #  Draw lines connecting the projected points to the original points
    plt.plot(X_rec[:, 0], X_rec[:, 1], 'ro')
    for i in range(X_norm.shape[0]):
        drawLine(X_norm[i,:], X_rec[i,:])
    #end
    plt.savefig('figure3.png')

    print('Program paused. Press enter to continue.\n')
    #pause

    ## =============== Part 4: Loading and Visualizing Face Data =============
    #  We start the exercise by first loading and visualizing the dataset.
    #  The following code will load the dataset into your environment
    #
    print('\nLoading face dataset.\n\n')

    #  Load Face dataset
    mat = scipy.io.loadmat('ex7faces.mat')
    X = mat['X']

    #  Display the first 100 faces in the dataset
    displayData(X[:100, :])
    plt.savefig('figure4.png')

    print('Program paused. Press enter to continue.\n')
    #pause

    ## =========== Part 5: PCA on Face Data: Eigenfaces  ===================
    #  Run PCA and visualize the eigenvectors which are in this case eigenfaces
    #  We display the first 36 eigenfaces.
    #
    print('\nRunning PCA on face dataset.\n(this mght take a minute or two ...)\n')

    #  Before running PCA, it is important to first normalize X by subtracting 
    #  the mean value from each feature
    X_norm, mu, sigma = featureNormalize(X)

    #  Run PCA
    U, S = pca(X_norm)

    #  Visualize the top 36 eigenvectors found
    displayData(U[:, :36].T)
    plt.savefig('figure5.png')

    print('Program paused. Press enter to continue.')
    #pause


    ## ============= Part 6: Dimension Reduction for Faces =================
    #  Project images to the eigen space using the top k eigenvectors 
    #  If you are applying a machine learning algorithm 
    print('\nDimension reduction for face dataset.\n')

    K = 100
    Z = projectData(X_norm, U, K)

    print('The projected data Z has a size of: ')
    print(formatter('%d ', Z.shape))

    print('\n\nProgram paused. Press enter to continue.')
    #pause

    ## ==== Part 7: Visualization of Faces after PCA Dimension Reduction ====
    #  Project images to the eigen space using the top K eigen vectors and 
    #  visualize only using those K dimensions
    #  Compare to the original input, which is also displayed

    print('\nVisualizing the projected (reduced dimension) faces.\n')

    K = 100
    X_rec  = recoverData(Z, U, K)

    # Display normalized data
    #subplot(1, 2, 1)
    displayData(X_norm[:100,:])
    plt.gcf().suptitle('Original faces')
    #axis square

    plt.savefig('figure6.a.png')

    # Display reconstructed data from only k eigenfaces
    #subplot(1, 2, 2)
    displayData(X_rec[:100,:])
    plt.gcf().suptitle('Recovered faces')
    #axis square

    plt.savefig('figure6.b.png')

    print('Program paused. Press enter to continue.')
    #pause


    ## === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization ===
    #  One useful application of PCA is to use it to visualize high-dimensional
    #  data. In the last K-Means exercise you ran K-Means on 3-dimensional 
    #  pixel colors of an image. We first visualize this output in 3D, and then
    #  apply PCA to obtain a visualization in 2D.

    #close all; close all; clc

    # Re-load the image from the previous exercise and run K-Means on it
    # For this to work, you need to complete the K-Means assignment first
    A = matplotlib.image.imread('bird_small.png')

    # If imread does not work for you, you can try instead
    #   load ('bird_small.mat')

    A = A / 255
    X = A.reshape(-1, 3)
    K = 16
    max_iters = 10
    initial_centroids = kMeansInitCentroids(X, K)
    centroids, idx = runkMeans('7', X, initial_centroids, max_iters)

    #  Sample 1000 random indexes (since working with all the data is
    #  too expensive. If you have a fast computer, you may increase this.
    sel = np.random.choice(X.shape[0], size=1000)

    #  Setup Color Palette
    #palette = hsv(K)
    #colors = palette(idx(sel), :)

    #  Visualize the data and centroid memberships in 3D
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], cmap='rainbow', c=idx[sel], s=8**2)
    ax.set_title('Pixel dataset plotted in 3D. Color shows centroid memberships')
    plt.savefig('figure8.png')

    print('Program paused. Press enter to continue.')
    #pause

    ## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
    # Use PCA to project this cloud to 2D for visualization

    # Subtract the mean to use PCA
    X_norm, mu, sigma = featureNormalize(X)

    # PCA and project the data to 2D
    U, S = pca(X_norm)
    Z = projectData(X_norm, U, 2)

    # Plot in 2D
    fig = plt.figure()
    plotDataPoints(Z[sel, :], [idx[sel]], K, 0)
    plt.title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction')
    plt.savefig('figure9.png')
    print('Program paused. Press enter to continue.\n')
예제 #18
0
def ex5():
    ## Machine Learning Online Class
    #  Exercise 5 | Regularized Linear Regression and Bias-Variance
    #
    #  Instructions
    #  ------------
    #
    #  This file contains code that helps you get started on the
    #  exercise. You will need to complete the following functions:
    #
    #     linearRegCostFunction.m
    #     learningCurve.m
    #     validationCurve.m
    #
    #  For this exercise, you will not need to change any code in this file,
    #  or any other files other than those mentioned above.
    #

    ## Initialization
    #clear ; close all; clc

    ## =========== Part 1: Loading and Visualizing Data =============
    #  We start the exercise by first loading and visualizing the dataset.
    #  The following code will load the dataset into your environment and plot
    #  the data.
    #

    # Load Training Data
    print('Loading and Visualizing Data ...')

    # Load from ex5data1:
    # You will have X, y, Xval, yval, Xtest, ytest in your environment
    mat = scipy.io.loadmat('ex5data1.mat')
    X = mat['X']
    y = mat['y'].ravel()
    Xval = mat['Xval']
    yval = mat['yval'].ravel()
    Xtest = mat['Xtest']
    ytest = mat['ytest'].ravel()

    # m = Number of examples
    m = X.shape[0]

    # Plot training data
    plt.plot(X, y, marker='x', linestyle='None', ms=10, lw=1.5)
    plt.xlabel('Change in water level (x)')
    plt.ylabel('Water flowing out of the dam (y)')
    plt.savefig('figure1.png')

    print('Program paused. Press enter to continue.')
    #pause;

    ## =========== Part 2: Regularized Linear Regression Cost =============
    #  You should now implement the cost function for regularized linear
    #  regression.
    #

    theta = np.array([1, 1])
    J, _ = linearRegCostFunction(np.concatenate([np.ones((m, 1)), X], axis=1),
                                 y, theta, 1)

    print(
        'Cost at theta = [1 ; 1]: %f \n(this value should be about 303.993192)'
        % J)

    print('Program paused. Press enter to continue.')
    #pause;

    ## =========== Part 3: Regularized Linear Regression Gradient =============
    #  You should now implement the gradient for regularized linear
    #  regression.
    #

    theta = np.array([1, 1])
    J, grad = linearRegCostFunction(
        np.concatenate([np.ones((m, 1)), X], axis=1), y, theta, 1)

    print(
        'Gradient at theta = [1 ; 1]:  [%f; %f] \n(this value should be about [-15.303016; 598.250744])'
        % (grad[0], grad[1]))

    print('Program paused. Press enter to continue.')
    #pause;

    ## =========== Part 4: Train Linear Regression =============
    #  Once you have implemented the cost and gradient correctly, the
    #  trainLinearReg function will use your cost function to train
    #  regularized linear regression.
    #
    #  Write Up Note: The data is non-linear, so this will not give a great
    #                 fit.
    #

    fig = plt.figure()

    #  Train linear regression with lambda = 0
    lambda_value = 0
    theta = trainLinearReg(np.concatenate([np.ones((m, 1)), X], axis=1), y,
                           lambda_value)

    #  Plot fit over the data
    plt.plot(X, y, marker='x', linestyle='None', ms=10, lw=1.5)
    plt.xlabel('Change in water level (x)')
    plt.ylabel('Water flowing out of the dam (y)')
    plt.plot(X,
             np.dot(np.concatenate([np.ones((m, 1)), X], axis=1), theta),
             '--',
             lw=2)
    plt.savefig('figure2.png')

    print('Program paused. Press enter to continue.')
    #pause;

    ## =========== Part 5: Learning Curve for Linear Regression =============
    #  Next, you should implement the learningCurve function.
    #
    #  Write Up Note: Since the model is underfitting the data, we expect to
    #                 see a graph with "high bias" -- slide 8 in ML-advice.pdf
    #

    fig = plt.figure()

    lambda_value = 0
    error_train, error_val = learningCurve(
        np.concatenate([np.ones((m, 1)), X], axis=1), y,
        np.concatenate([np.ones((yval.size, 1)), Xval], axis=1), yval,
        lambda_value)

    plt.plot(np.arange(1, m + 1), error_train, np.arange(1, m + 1), error_val)
    plt.title('Learning curve for linear regression')
    plt.legend(['Train', 'Cross Validation'])
    plt.xlabel('Number of training examples')
    plt.ylabel('Error')
    plt.axis([0, 13, 0, 150])

    print('# Training Examples\tTrain Error\tCross Validation Error')
    for i in range(m):
        print('  \t%d\t\t%f\t%f' % (i, error_train[i], error_val[i]))
    plt.savefig('figure3.png')

    print('Program paused. Press enter to continue.')
    #pause;

    ## =========== Part 6: Feature Mapping for Polynomial Regression =============
    #  One solution to this is to use polynomial regression. You should now
    #  complete polyFeatures to map each example into its powers
    #

    p = 8

    # Map X onto Polynomial Features and Normalize
    X_poly = polyFeatures(X, p)
    X_poly, mu, sigma = featureNormalize(X_poly)  # Normalize
    X_poly = np.concatenate([np.ones((m, 1)), X_poly], axis=1)  # Add Ones

    # Map X_poly_test and normalize (using mu and sigma)
    X_poly_test = polyFeatures(Xtest, p)
    X_poly_test -= mu
    X_poly_test /= sigma
    X_poly_test = np.concatenate(
        [np.ones((X_poly_test.shape[0], 1)), X_poly_test], axis=1)  # Add Ones

    # Map X_poly_val and normalize (using mu and sigma)
    X_poly_val = polyFeatures(Xval, p)
    X_poly_val -= mu
    X_poly_val /= sigma
    X_poly_val = np.concatenate(
        [np.ones((X_poly_val.shape[0], 1)), X_poly_val], axis=1)  # Add Ones

    print('Normalized Training Example 1:')
    print(formatter('  %f  \n', X_poly[0, :]))

    print('\nProgram paused. Press enter to continue.')
    #pause;

    ## =========== Part 7: Learning Curve for Polynomial Regression =============
    #  Now, you will get to experiment with polynomial regression with multiple
    #  values of lambda. The code below runs polynomial regression with
    #  lambda = 0. You should try running the code with different values of
    #  lambda to see how the fit and learning curve change.
    #

    fig = plt.figure()

    lambda_value = 0
    theta = trainLinearReg(X_poly, y, lambda_value)

    # Plot training data and fit
    plt.plot(X, y, marker='x', ms=10, lw=1.5)
    plotFit(np.min(X), np.max(X), mu, sigma, theta, p)
    plt.xlabel('Change in water level (x)')
    plt.ylabel('Water flowing out of the dam (y)')
    plt.title('Polynomial Regression Fit (lambda = %f)' % lambda_value)

    plt.figure()
    error_train, error_val = learningCurve(X_poly, y, X_poly_val, yval,
                                           lambda_value)
    plt.plot(np.arange(1, 1 + m), error_train, np.arange(1, 1 + m), error_val)

    plt.title('Polynomial Regression Learning Curve (lambda = %f)' %
              lambda_value)
    plt.xlabel('Number of training examples')
    plt.ylabel('Error')
    plt.axis([0, 13, 0, 100])
    plt.legend(['Train', 'Cross Validation'])

    print('Polynomial Regression (lambda = %f)\n' % lambda_value)
    print('# Training Examples\tTrain Error\tCross Validation Error')
    for i in range(m):
        print('  \t%d\t\t%f\t%f' % (i, error_train[i], error_val[i]))
    plt.savefig('figure4.png')

    print('Program paused. Press enter to continue.')
    #pause;

    ## =========== Part 8: Validation for Selecting Lambda =============
    #  You will now implement validationCurve to test various values of
    #  lambda on a validation set. You will then use this to select the
    #  "best" lambda value.
    #

    fig = plt.figure()

    lambda_vec, error_train, error_val = validationCurve(
        X_poly, y, X_poly_val, yval)

    plt.plot(lambda_vec, error_train, lambda_vec, error_val)
    plt.legend(['Train', 'Cross Validation'])
    plt.xlabel('lambda')
    plt.ylabel('Error')

    print('lambda\t\tTrain Error\tValidation Error')
    for i in range(lambda_vec.size):
        print(' %f\t%f\t%f' % (lambda_vec[i], error_train[i], error_val[i]))
    plt.savefig('figure5.png')

    print('Program paused. Press enter to continue.')
from runkMeans import runkMeans
from kMeansInitCentroids import kMeansInitCentroids
from featureNormalize import featureNormalize
from pca import pca
from projectData import projectData
from recoverData import recoverData
from displayData import displayData

#2.1
data = loadmat('ex7data1.mat')
X = data['X']
#plt.plot(X[:,0],X[:,1],'bo')
#plt.show()

#2.2
(X_norm, mu, sigma) = featureNormalize(X)
(U, S, V) = pca(X_norm)
#for i in range(2):
#plt.arrow(mu[0], mu[1], 1.5 * S[i]*U[0, i], 1.5 * S[i]*U[1, i])
#plt.show()

#2.3.1
K = 1
Z = projectData(X_norm, U, K)

X_rec = recoverData(Z, U, K)
#print(X_rec)

#2.3.3
#plt.plot(X_rec[:,0],X_rec[:,1],'ro')
#plt.plot(X_norm[:,0],X_norm[:,1],'bo')
예제 #20
0
y = data[:, 2]
m = y.size

# Print some data points

print('First 10 examples from the dataset: \n')
list(map(lambda x,y: print("x = [%0.f %0.f], y=%0.f"%(x[0],x[1],y)),X[0:11,:],y[0:11]))

print('Program paused. Press enter to contine \n')
time.sleep(2)

# Scale features and set them to zero mean

print("Normalizing features ... \n")

X, mu, sigma = featureNormalize(X)  #X = stats.zscore(X) #(You could also use this instead of our featureNormalize function)

# Add intercept term to X

X = np.c_[np.ones((m,)), X]

### Part 2 : Gradient Descent

print("Running gradient Descent ... \n")

# Choose some alpha value

alpha = 0.01
num_iters = 400

# Init Theta and run Gradient Descent
예제 #21
0
#plt.show()

raw_input('Program paused. Press enter to continue.\n')

### ================ Part 3: Cost and Gradient descent ========= ###
print('Running Gradient Descent...')

### Method 1: Add a column to X
#A = np.ones((m), dtype=int)
#X = np.c_[A, X] # Add a column of ones to x ( Can use np.column_stack to add a column to X)

### Method 2
#X = np.column_stack((np.ones((m, 1)), X))

# Feature scaling
X_norm, mu, sigma = featureNormalize(X_poly)

X_ones = np.ones((m, 1))

X_padded = np.column_stack((X_ones, X_norm))

X_test = np.column_stack((X_ones, X_poly))

print X_padded

# Initialize fitting parameters
#theta = np.zeros(shape=(2, 1))
theta = np.zeros(shape=(3, 1))

# Some gradient descent settings
iterations = 30000
data = np.loadtxt('ex1data2.txt', delimiter=",")
X = data[:,:2]
y = data[:,2]
m = len(y) # number of training examples

# Print out some data points
print('First 10 examples from the dataset: \n')
for i in xrange(10):
    print ("x = [{:.0f} {:.0f}], y = {:.0f}".format(X[i,0], X[i,1], y[i]))

raw_input('Program paused. Press enter to continue.\n')

# Scale features and set them to zero mean
print('Normalizing Features...')

X_norm, mu, sigma = fn.featureNormalize(X)

# Add intercept term to X
X_padded = np.column_stack((np.ones((m,1)), X_norm)) # Add a column of ones to x


## ================ Part 2: Gradient Descent ================

print('Running gradient descent...')

# Choose some alpha value
alpha = 0.01
num_iters = 400

# Init Theta and Run Gradient Descent 
theta = np.zeros((3, 1)) 
예제 #23
0
theta, J_history = gradientDescentMulti(X, y, theta, alpha, iterations)

print('Theta found by gradient descent:\n', theta,
      '\nExpected theta values (approx)\n -3.6303\n  1.1664\n\n')

# plotData('scatter', data[:,0],data[:,1], 'data1', 'population', 'profit')
# plotData('plot', data[:,0], X.dot(theta), 'data1', 'Training data', 'Linear regression', color='blue')
# plt.show()

predict1 = np.array([1, 3.5]).dot(theta)
print('For population = 35,000, we predict a profit of \n', predict1 * 10000)
predict2 = np.array([1, 7]).dot(theta)
print('For population = 70,000, we predict a profit of \n', predict2 * 10000)

# plotData('plot', np.array(range(iterations)), J_history, 'Cost function', 'iterations', 'J')
# plt.show()

data2 = np.loadtxt('ex1/ex1data2.txt', delimiter=',')
m, n = data2.shape

x_norm, mu, sigma = featureNormalize(data2[:, 0:n - 1], True)
y = data2[:, 1].reshape(m, 1)
iterations = 800
alpha = 0.01
theta = np.zeros((n, 1))

theta, J_history = gradientDescentMulti(x_norm, y, theta, alpha, iterations)
plotData('plot', np.array(range(iterations)), J_history, 'Cost function',
         'iterations', 'J')
plt.show()
예제 #24
0
@author: Trey
"""

import numpy as np
import gradientDescent as gd
import featureNormalize as fn
import normalEquation as ne

data = np.loadtxt(open("data2.txt", "rb"), delimiter=",")

num_features = len(data[1, ...])
X = data[..., range(num_features - 1)]
y = np.array([data[..., num_features - 1]]).T
m = len(y)
X_norm = fn.featureNormalize(X)
X_norm = np.append(np.ones((m, 1)), X_norm, axis=1)

theta = np.zeros((3, 1))
theta = gd.gradientDescent(X_norm, y, theta, 0.01, 400)
print(theta)
test = np.array([[1650, 3]]).T
test = fn.featureNormalize(test)
test = np.insert(test, 0, [[1]])
print(test @ theta)

X = np.append(np.ones((m, 1)), X, axis=1)
test = np.array([[1, 1650, 3]])
theta = ne.normalEquation(X, y)
print(test @ theta)
예제 #25
0
파일: ex1_multi.py 프로젝트: edonyM/pyexer
    y = np.append(y,float(tmp[2].split()[0]))
X1 = np.concatenate(([X1],[X2]),axis=0)
X = X1.T
m = y.shape[0]
y = y.reshape(m,1)
# Print out some data points
print('First 10 examples from the dataset: \n');
for i in range(10):
    print(' x = [%.0f %.0f], y = %.0f \n'% (X[i,0],X[i,1],y[i]));

raw_input('Program paused. Press enter to continue.\n');

# Scale features and set them to zero mean
print('Normalizing Features ...\n');
from featureNormalize import featureNormalize
[X,mu,sigma] = featureNormalize(X);

# Add intercept term to X
tmp = np.array(np.ones((m, 1)));
tmp = np.concatenate((tmp,X),axis=1);
X = tmp;

## ================ Part 2: Gradient Descent ================

# ====================== YOUR CODE HERE ======================
# Instructions: We have provided you with the following starter
#               code that runs gradient descent with a particular
#               learning rate (alpha). 
#
#               Your task is to first make sure that your functions - 
#               computeCost and gradientDescent already work with 
예제 #26
0
                            np.column_stack((np.ones((Xval.shape[0],1)), Xval)), yval, _lambda)
plt.plot(range(0, m), error_train, label="Training Error")
plt.plot(range(0, m), error_val, label="Validation Error")
plt.legend()
plt.xlabel('Number of training examples')
plt.ylabel('Error')
plt.show()
print('Training Examples\tTrain Error\tCross Validation Error\n')
for i in range(0, m):
    print('{:d}\t\t{:f}\t{:f}\n'.format(i + 1, float(error_train[i]),
                                        float(error_val[i])))

# =========== Part 6: Feature Mapping for Polynomial Regression =============
p = 8
x_poly = polyFeatures(X, p)
x_poly, mu, sigma = featureNormalize(x_poly)
x_poly = np.column_stack((np.ones((x_poly.shape[0], 1)), x_poly))

x_poly_test = polyFeatures(Xtest, p)
x_poly_test = x_poly_test - mu
x_poly_test = x_poly_test / sigma
x_poly_test = np.column_stack((np.ones(
    (x_poly_test.shape[0], 1)), x_poly_test))

x_poly_val = polyFeatures(Xval, p)
x_poly_val = x_poly_val - mu
x_poly_val = x_poly_val / sigma
x_poly_val = np.column_stack((np.ones((x_poly_val.shape[0], 1)), x_poly_val))

# =========== Part 7: Learning Curve for Polynomial Regression =============
_lambda = 0
예제 #27
0
# plt.plot(X[:, 0], X[:, 1], 'o', markersize=9, markeredgewidth=1, markeredgecolor='b', markerfacecolor='None')
plt.scatter(X[:, 0], X[:, 1], s=75, facecolors='none', edgecolors='b')
plt.axis([0.5, 6.5, 2, 8])
plt.gca().set_aspect('equal', adjustable='box')
plt.show(block=False)

raw_input('Program paused. Press enter to continue.')

## =============== Part 2: Principal Component Analysis ===============
#  You should now implement PCA, a dimension reduction technique. You
#  should complete the code in pca.m
#
print('Running PCA on example dataset.\n')

#  Before running PCA, it is important to first normalize X
X_norm, mu, _ = fn.featureNormalize(X)

#  Run PCA
U, S = pca.pca(X_norm)

#  Compute mu, the mean of the each feature

#  Draw the eigenvectors centered at mean of data. These lines show the
#  directions of maximum variations in the dataset.
plt.hold(True)
dl.drawLine(mu, mu + 1.5 * S[0, 0] * U[:, 0].T, c='k', linewidth=2)
dl.drawLine(mu, mu + 1.5 * S[1, 1] * U[:, 1].T, c='k', linewidth=2)
plt.hold(False)

print('Top eigenvector: \n')
print(' U(:,1) = {:f} {:f} \n'.format(U[0, 0], U[1, 0]))
예제 #28
0
plt.xlabel('Number of training examples')
plt.ylabel('Error')
#plt.show()

print('# Training Examples / Train Error / Cross Validation Error')
for i in range(m):
    print('  {0:<19} {1:<13.8f} {2:<.8f}'.format(i + 1, error_train[i],
                                                 error_val[i]))

# ===================== 3. Polynomial regression ==============================
# ===================== 3.1 Learning Polynomial Regression ====================

p = 8

X_poly = polyFeatures(X, p)
X_poly, mu, sigma = featureNormalize(X_poly)
X_poly = np.hstack((np.ones((m, 1)), X_poly))

X_poly_test = polyFeatures(X_test, p)
X_poly_test, dummy_mu, dummy_sigma = featureNormalize(X_poly_test, mu, sigma)
X_poly_test = np.hstack((np.ones((m_test, 1)), X_poly_test))

X_poly_val = polyFeatures(X_val, p)
X_poly_val, dummy_mu, dummy_sigma = featureNormalize(X_poly_val, mu, sigma)
X_poly_val = np.hstack((np.ones((m_val, 1)), X_poly_val))

print('Normalized Training Example 1:')
print(X_poly[0, :])

l = 0.0
theta = trainLinearReg(X_poly, y, l, iteration=500)
def ex1_multi():
    # Initialization

    # ================ Part 1: Feature Normalization ================

    # Clear and Close Figures
    #clear ; close all; clc

    print('Loading data ...')

    # Load Data
    data = np.loadtxt('ex1data2.txt', delimiter=',')
    X = np.reshape(data[:, 0:2], (data.shape[0], 2))
    y = np.reshape(data[:, 2], (data.shape[0], 1))
    m = y.shape[0]

    # Print out some data points
    print('First 10 examples from the dataset: ')
    print(np.c_[X[0:10, :], y[0:10, :]].T)

    print('Program paused. Press enter to continue.')
    #input()

    # Scale features and set them to zero mean
    print('Normalizing Features ...')

    X, mu, sigma = featureNormalize(X)

    # Add intercept term to X
    X = np.c_[np.ones((m, 1)), X]


    # ================ Part 2: Gradient Descent ================

    # ====================== YOUR CODE HERE ======================
    # Instructions: We have provided you with the following starter
    #               code that runs gradient descent with a particular
    #               learning rate (alpha).
    #
    #               Your task is to first make sure that your functions -
    #               computeCost and gradientDescent already work with
    #               this starter code and support multiple variables.
    #
    #               After that, try running gradient descent with
    #               different values of alpha and see which one gives
    #               you the best result.
    #
    #               Finally, you should complete the code at the end
    #               to predict the price of a 1650 sq-ft, 3 br house.
    #
    # Hint: By using the 'hold on' command, you can plot multiple
    #       graphs on the same figure.

    # Hint: At prediction, make sure you do the same feature normalization.


    # Begin: My code plotting for different learning rates
    alphas = [0.3, 0.1, 0.03, 0.01]
    colors = ['r', 'g', 'b', 'k']
    short_iters = 50
    fig = plt.figure()
    #hold on;
    plt.xlabel('Number of iterations')
    plt.ylabel('Cost J')
    for i in range(len(alphas)):
        _, J = gradientDescentMulti(X, y, np.reshape(np.zeros((3, 1)), (3, 1)), alphas[i], short_iters)
        plt.plot(range(len(J)), J, colors[i], markersize=2)
    plt.savefig('figure1.multi.png')
    # End: My code plotting for different learning rates

    print('Running gradient descent ...')

    # Choose some alpha value
    alpha = 0.01
    num_iters = 400

    # Init Theta and Run Gradient Descent
    theta = np.reshape(np.zeros((3, 1)), (3, 1))
    theta, J_history = gradientDescentMulti(X, y, theta, alpha, num_iters)

    # Plot the convergence graph
    fig = plt.figure()
    plt.plot(range(len(J_history)), J_history, '-b', markersize=2)
    plt.xlabel('Number of iterations')
    plt.ylabel('Cost J')
    plt.savefig('figure2.multi.png')

    # Display gradient descent's result
    print('Theta computed from gradient descent: ')
    print(theta)
    print()

    # Estimate the price of a 1650 sq-ft, 3 br house
    # ====================== YOUR CODE HERE ======================
    # Recall that the first column of X is all-ones. Thus, it does
    # not need to be normalized.
    #price = 0; % You should change this

    price = np.dot(np.r_[1, np.divide(np.subtract([1650, 3], mu), sigma)], theta)

    # ============================================================

    print('Predicted price of a 1650 sq-ft, 3 br house (using gradient descent):\n $%f' % price)

    print('Program paused. Press enter to continue.')
    #input()

    # ================ Part 3: Normal Equations ================

    print('Solving with normal equations...')

    # ====================== YOUR CODE HERE ======================
    # Instructions: The following code computes the closed form
    #               solution for linear regression using the normal
    #               equations. You should complete the code in
    #               normalEqn.m
    #
    #               After doing so, you should complete this code
    #               to predict the price of a 1650 sq-ft, 3 br house.
    #

    # Load Data
    data = np.loadtxt('ex1data2.txt', delimiter=',')
    X = np.reshape(data[:, 0:2], (data.shape[0], 2))
    y = np.reshape(data[:, 2], (data.shape[0], 1))
    m = y.shape[0]

    # Add intercept term to X
    X = np.c_[np.ones((m, 1)), X]

    # Calculate the parameters from the normal equation
    theta = normalEqn(X, y)

    # Display normal equation's result
    print('Theta computed from the normal equations: ')
    print(theta)
    print('')


    # Estimate the price of a 1650 sq-ft, 3 br house
    # ====================== YOUR CODE HERE ======================
    price = np.dot([1, 1650, 3], theta) # You should change this


    # ============================================================

    print('Predicted price of a 1650 sq-ft, 3 br house (using normal equations):\n $%f' % price)

    # http://scikit-learn.org/stable/auto_examples/linear_model/plot_ridge_coeffs.html
    # Using sklearn
    X = np.reshape(data[:, 0:2], (data.shape[0], 2))
    y = np.reshape(data[:, 2], (data.shape[0], 1))
    model = linear_model.Ridge(max_iter=num_iters, solver='lsqr')
    count = 200
    alphas = np.logspace(-3, 1, count)
    coefs = np.zeros((count, 2))
    errors = np.zeros((count, 1))
    for i, alpha in enumerate(alphas):
        model.set_params(alpha=alpha)
        model.fit(X, y)
        coefs[i, :] = model.coef_
        errors[i, 0] = metrics.mean_squared_error(model.predict(X), y)
    results = [(r'$\theta_1$', coefs[:, 0]), (r'$\theta_2$', coefs[:, 1]), ('MSE', errors)]
    for i, result in enumerate(results):
        label, values = result
        plt.figure()
        ax = plt.gca()
        ax.set_xscale('log')
        ax.plot(alphas, values)
        plt.xlabel(r'$\alpha$')
        plt.ylabel(label)
        plt.savefig('figure%d.multi.sklearn.png' % (i + 1))
    #model = linear_model.LinearRegression()
    model = linear_model.Ridge(alpha=alpha, max_iter=num_iters, solver='lsqr')
    model.fit(X, y)
    print('Theta found: ')
    print('%f %f %f' % (model.intercept_[0], model.coef_[0, 0], model.coef_[0, 1]))
    print('Predicted price of a 1650 sq-ft, 3 br house (using sklearn):\n $%f' % model.predict([[1650, 3]]))
예제 #30
0
    data = np.asmatrix(np.loadtxt('ex1data2.txt', delimiter=','))
    X = data[:, :2]
    y = data[:, 2]
    m = y.shape[0]

    # Print out some data points
    print('First 10 examples from the dataset: ')
    for i in range(10):
        print("x = [%i %i], y = %i" % (X[i, 0], X[i, 1], y[i]))

    input('Program paused. Press enter to continue.')

    # Scale features and set them to zero mean
    print('Normalizing Features ...')

    X, mu, sigma = featureNormalize(X)

    # Add intercept term to X
    X = np.hstack((np.ones((m, 1)), X))

    # ================ Part 2: Gradient Descent ================

    # ====================== YOUR CODE HERE ====================

    print('Running gradient descent ...')

    # Choose some alpha value
    alpha = 0.01
    num_iters = 400

    # Init Theta and Run Gradient Descent
예제 #31
0
# Variable with collum 1 index values OBS: Use two brackets to represent the 2D matrix correctly (Profit of the food truck of each city)

# Size of the house and (X1) and number of bedrooms (X2)
x = data[[0, 1]]
x.columns = ['X1', 'X2']

y = data[[2]]  # Price of houses in Port-land

# Number of training examples
m = len(y)

#%% Part 2 - Feature Scaling or Normalization

import featureNormalize as fn

[x_norm, x_mean, x_std] = fn.featureNormalize(x)

#%% Part 3 - Plotting the Data Normalized

# Please check the MLplot source code for details
import MLplot as pl

dataPlot = pl.plot2D(x_norm, y)
dataPlot.set_title("Profit and Population")
dataPlot.set_xlabel('Size of the house and (X1) and number of bedrooms (X2)')
dataPlot.set_ylabel('Price of the house ($)')

#%% Part 4 - Gradient Descent for n-Features

import numpy as np
import gradientDescent as gd
예제 #32
0
print('Training Examples\tTrain Error\tCross Validation Error')
for i in range(m):
    print('  \t%d\t\t%f\t%f' % (i, error_train[i], error_val[i]))

input("Program paused. Press Enter to continue...")

## =========== Part 6: Feature Mapping for Polynomial Regression =============
#  One solution to this is to use polynomial regression. You should now
#  complete polyFeatures to map each example into its powers
#

p = 8

# Map X onto Polynomial Features and Normalize
X_poly = polyFeatures(X, p)
X_poly, mu, sigma = featureNormalize(X_poly)  # Normalize
X_poly = np.column_stack((np.ones(m), X_poly))  # Add Ones

# Map X_poly_test and normalize (using mu and sigma)
X_poly_test = polyFeatures(Xtest, p)
X_poly_test = X_poly_test - mu
X_poly_test = X_poly_test / sigma
X_poly_test = np.column_stack(
    (np.ones(X_poly_test.shape[0]), X_poly_test))  # Add Ones

# Map X_poly_val and normalize (using mu and sigma)
X_poly_val = polyFeatures(Xval, p)
X_poly_val = X_poly_val - mu
X_poly_val = X_poly_val / sigma
X_poly_val = np.column_stack(
    (np.ones(X_poly_test.shape[0]), X_poly_val))  # Add Ones
예제 #33
0
X = data[:, 0:2]
y = data[:, 2:3]
m = len(y)

#Print out some data points
print('First 10 examples from the dataset: \n')
a = X[0:9, :]
b = y[0:9, :]
print('\tX = \t, y = ')
for c in range(len(a)):
    print(a[c], b[c])

# Scale features and set them to zero mean
print('Normalizing Features ...\n')

X, mu, sigma = featureNormalize.featureNormalize(X)

# Add intercept term to X
X = np.concatenate((np.ones((m, 1)), X), axis=1)

print('Running gradient descent ...\n')

# Choose some alpha value
alpha = 0.5
num_iters = 100

# Init Theta and Run Gradient Descent
theta1 = np.zeros([3, 1])
theta1, J_history = gradientDescentMulti.gradientDescentMulti(
    X, y, theta1, alpha, num_iters)
plt.axis([0.5, 6.5, 2, 8])
plt.draw()
plt.show(block=False)

print('Program paused. Press enter to continue.\n')
pause()

"""
## =============== Part 2: Principal Component Analysis ===============
#  You should now implement PCA, a dimension reduction technique. You
#  should complete the code in pca.m
"""
print('\nRunning PCA on example dataset.\n\n')

#  Before running PCA, it is important to first normalize X
[X_norm, mu, sigma] = featureNormalize(X)

#  Run PCA
[U, S] = pca(X_norm)


#  Draw the eigenvectors centered at mean of data. These lines show the
#  directions of maximum variations in the dataset.

def drawLine(p1, p2, *args, **kwargs):
    x = [p1[0], p2[0]]
    y = [p1[1], p2[1]]
    plt.plot(x, y, *args, **kwargs)


drawLine(mu, mu + 1.5 * S[0] * U[:, 0].T, lw=2, color='k')
예제 #35
0
axis([0.5, 6.5, 2, 8])
axis('equal')
fig.show()

print 'Program paused. Press enter to continue.'
raw_input()


## =============== Part 2: Principal Component Analysis ===============
#  You should now implement PCA, a dimension reduction technique. You
#  should complete the code in pca.py
#
print '\nRunning PCA on example dataset.\n'

#  Before running PCA, it is important to first normalize X
X_norm, mu, sigma = featureNormalize(X)

#  Run PCA
U, s = pca(X_norm)

#  Compute mu, the mean of the each feature

#  Draw the eigenvectors centered at mean of data. These lines show the
#  directions of maximum variations in the dataset.
hold(True)
drawLine(mu, mu + 1.5 * s[0] * U[:,0].T, '-k', linewidth=2)
drawLine(mu, mu + 1.5 * s[1] * U[:,1].T, '-k', linewidth=2)
hold(False)
fig.show()

print 'Top eigenvector:'
예제 #36
0
# The complete model to execute the whole multi-regression

import loadData
import featureNormalize
import normalEqn
import gradientDescent
import numpy


def init(number):
    return numpy.random.normal(0, 1, number + 1)


def multiRegression(number, x, y, learning_rate=1e-3, epoch=50):
    weights = init(number)
    for i in range(epoch):
        weights = gradientDescent.computeCost(learning_rate, weights, x, y)
    return weights


if __name__ == '__main__':
    x, y = loadData.loadData('ex1data2.txt', 2)
    x = featureNormalize.featureNormalize(x, 2)
    print normalEqn.normalEqn(x, y)
    print multiRegression(2, x, y)
예제 #37
0
print 'Training Examples\tTrain Error\tCross Validation Error'
for i in range(m):
    print '  \t%d\t\t%f\t%f' % (i, error_train[i], error_val[i])

raw_input("Program paused. Press Enter to continue...") 

## =========== Part 6: Feature Mapping for Polynomial Regression =============
#  One solution to this is to use polynomial regression. You should now
#  complete polyFeatures to map each example into its powers
#

p = 8

# Map X onto Polynomial Features and Normalize
X_poly = polyFeatures(X, p)
X_poly, mu, sigma = featureNormalize(X_poly)  # Normalize
X_poly = np.column_stack((np.ones(m), X_poly))                   # Add Ones

# Map X_poly_test and normalize (using mu and sigma)
X_poly_test = polyFeatures(Xtest, p)
X_poly_test = X_poly_test - mu
X_poly_test = X_poly_test / sigma
X_poly_test = np.column_stack((np.ones(X_poly_test.shape[0]), X_poly_test))        # Add Ones

# Map X_poly_val and normalize (using mu and sigma)
X_poly_val = polyFeatures(Xval, p)
X_poly_val = X_poly_val - mu
X_poly_val = X_poly_val / sigma
X_poly_val = np.column_stack((np.ones(X_poly_test.shape[0]), X_poly_val))           # Add Ones

print 'Normalized Training Example 1:'
예제 #38
0
#Importing the dataset
dataset = pd.read_csv("ex1_data2.csv")
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
m = len(y)
y = np.reshape(y, newshape=(m, 1))
print(X.shape, y.shape)

# Part 1 : Feature Normalisation
# Print out some data points
from featureNormalize import featureNormalize

print('First 10 examples from dataset: ')
print(dataset.iloc[:10, :].values)
print('Normalising Features.')
X, mean, sigma = featureNormalize(X)

#Adding Intercept Column
X = np.append(arr=np.ones((m, 1)).astype(int), values=X,
              axis=1)  # 1 for Column
print('After Normalizing: First 5 examples from dataset')
print(X[:5, :])

# Part 2: Gradient Descent
from gradientDescentMulti import *

print('Running Gradient Descent.')
alpha = 0.03
iterations = 1000
m1, n = X.shape
theta = np.zeros((n, 1)).astype(int)
예제 #39
0
price = data['Price']

x = np.zeros((len(data), 2))
x[:, 0] = size
x[:, 1] = noOfVBedrooms

y = np.zeros((len(data), 1))
y[:, 0] = price

m = len(y)

print('First 10 examples from the dataset: \n')
print(' x = ', x[0:10, :])
print(' y = ', y[0:10])

[X, mu, sigma] = fp.featureNormalize(x)

# increasing the shape, adding a column of ones to x
ones = np.ones((len(x), 1))
X = np.hstack((ones, X))
#print(np.hstack((ones, X)))

# Gradient Descent
# 1) Try different values of alpha
# 2) prediction (With feature normalisation)

alpha = 0.009
#0.009, try 0.01, 0.009.
num_iters = 350

# Init Theta and Run Gradient Descent
예제 #40
0
plt.show()

input('Программа остановлена. Нажмите Enter для продолжения ... \n')

# ======== Часть 2. Стоимостная функция с регуляризацией =========

print('Часть 2. Стоимостная функция с регуляризацией')

# Задание параметров модели
theta = np.ones([2, 1])

# Задание параметра регуляризации
lam = 1

# Нормализация и добавление единичного признака
X_norm, mu, sigma = featureNormalize(X)
X_norm = np.concatenate((np.ones((m, 1)), X_norm), axis=1)

# Вычисление значения стоимостной функции для начального theta
J = computeCost(X_norm, y, theta, lam)
print('Значение стоимостной функции: {:.4f}'.format(J))

input('Программа остановлена. Нажмите Enter для продолжения ... \n')

# ==== Часть 3. Обучение регуляризованной линейной регрессии =====

print('Часть 3. Обучение регуляризованной линейной регрессии')

# Задание начальных значений параметров модели
theta = np.zeros([2, 1])
import matplotlib.pyplot as plt
# In[]
X=[]
Y=[]
with open("./ex1data2.txt",'r',encoding="UTF-8") as rf:
    for line in rf:
        splitResults=line.strip().split(",")
        X.append([float(splitResults[0]),float(splitResults[1])])
        Y.append(float(splitResults[2]))
# In[]
X=np.array(X)
Y=np.array(Y).reshape(len(Y),1)
m=X.shape[0]
print("X=[",X[0:10,:],"],Y=[",Y[0:10,:],"]")
# In[]
X_norm, mu, sigma = featureNormalize(X);

# In[]

X_norm=np.concatenate((np.ones((m,1)),X_norm),axis=1)
theta=np.zeros((3,1))
num_iters=400
alpha=0.1

# In[]
J=computeCostMulti(X_norm, Y, theta)
theta,J_history = gradientDescentMulti(X_norm, Y, theta, alpha, num_iters)
print("Theta computed from gradient descent:",theta[0],theta[1],theta[2])

# In[]
X=np.array(X)
예제 #42
0
print('5x5 Identity Matrix: \n')
print(WUE.warmUpExercise())
input("Press Enter to continue...")
data = pd.read_csv('ex1data2.txt', header=None)
X, y = data.iloc[:, :2], data.iloc[:,
                                   2]  #Data Separated into two pandas series
m = np.size(y)  #number of training example

#Print out some data points
print('First 10 examples from the dataset: \n')
print('Printing X\n', X.head(10))
print('Printing y\n', y.head(10))

# Scale features and set them to zero mean
print('Normalizing Features ...\n')
[X, mu, sigma] = FN.featureNormalize(X)
# Add intercept term to X
X = np.concatenate([np.ones((m, 1)), np.array(X)], axis=1)
input("Press Enter to continue...")

# ================ Part 2: Gradient Descent ================
print('Running gradient descent ...\n')
alpha = 0.01
num_iters = 400
theta = np.zeros([3, 1])
[theta, J_history] = GDM.gradientDescentMulti(X, y, theta, alpha, num_iters)

# Plot the convergence graph
plt.plot(np.arange(0, num_iters), J_history, "b-")
plt.xlabel('Number of Iteration')
plt.ylabel('Cost J')
예제 #43
0
from gradientDescentMulti import gradientDescent
import numpy as np
import pickle as pk
import matplotlib.pyplot as plt

data = np.loadtxt('ex1data2.txt', delimiter=',')


X = data[:, :-1]
y = data[:, -1]


del data
answers = {}

X, mu, sd = featureNormalize(X)
answers['X_norm'] = X
answers['mu'] = mu
answers['sd'] = sd

y, _, _ = featureNormalize(y)

X = np.concatenate((np.ones(shape=(X.shape[0], 1)), X), axis=1)
y = y.reshape(-1, 1)
theta = np.zeros(X.shape[1]).reshape(-1, 1)


theta, J_history = gradientDescent(X, y, theta, alpha=0.1)
print(theta)
print(J_history)
예제 #44
0
theta = np.zeros((n + 1, 1))
# Print out original data and picture
print("The first ten lines from data set:")
for i in range(10):
    try:
        print("{0},{1}".format(x[i], y[i]))
    except:
        break
# computation environment setup
iteration = 1000
alpha = 0.3

#---------------------- Part 2: Feature Normalization-----------------
# Scale features and set them to zero mean
print('Normalizing Features ...\n')
x, mu, sd = featureNormalize(x, n)
X = np.hstack((np.ones((m, 1)), x))  # Add a column of ones to x as x0
#
#---------------------- Part 3: Gradient Descent-----------------

# finding theta, def gradientDescent(X,y,theta,alpha,num_iter)
theta, J_history = gradientDescent(X, y, theta, alpha, iteration)
print("Expected theta value is:\n", theta)
# with ex1data1.txt, the theta should be [[-3.63029144]
# [ 1.16636235]]
#
##---------------------- Part 4: Convergence Visualization---------------
# Plot figure def plotJHistory(J_history, iteration)
plotJHistory(J_history, iteration)

#------------------------ Part 5: Estimate as validation-------------------
y = data[:, 2].reshape(-1, 1)
m = len(y)

# Print out some data points
print('First 10 examples from the dataset: \n')
# no find function for python
# fprintf(' x = [%.0f %.0f], y = %.0f \n', [X(1:10,:) y(1:10,:)]');
print(X[0:10, :])
print(y[0:10])

input('Program paused. Press enter to continue.\n')

# Scale features and set them to zero mean
print('Normalizing Features ...\n')

X, mu, sigma = featureNormalize(X)

# Add intercept term to X
X = np.vstack((np.ones(m), X.T)).T

## ================ Part 2: Gradient Descent ================

print('Running gradient descent ...\n')

# Choose some alpha value
alpha = 0.01
num_iters = 400

# Init Theta and Run Gradient Descent
theta = np.zeros((3, 1))
theta, J_history = gradientDescentMulti(X, y, theta, alpha, num_iters)
plt.axis([0.5, 6.5, 2, 8])
plt.gca().set_aspect('equal', adjustable='box')
plt.show(block=False)


raw_input('Program paused. Press enter to continue.')


## =============== Part 2: Principal Component Analysis ===============
#  You should now implement PCA, a dimension reduction technique. You
#  should complete the code in pca.m
#
print('Running PCA on example dataset.\n');

#  Before running PCA, it is important to first normalize X
X_norm, mu, _ = fn.featureNormalize(X)

#  Run PCA
U, S = pca.pca(X_norm)

#  Compute mu, the mean of the each feature

#  Draw the eigenvectors centered at mean of data. These lines show the
#  directions of maximum variations in the dataset.
plt.hold(True)
dl.drawLine(mu, mu + 1.5 * S[0,0] * U[:,0].T, c='k', linewidth=2)
dl.drawLine(mu, mu + 1.5 * S[1,1] * U[:,1].T, c='k', linewidth=2)
plt.hold(False)

print('Top eigenvector: \n')
print(' U(:,1) = {:f} {:f} \n'.format(U[0,0], U[1,0]))