def predict(Theta1,Theta2,X): import numpy as np m=len(X[:,0]) num_labels=len(Theta2[:,0]) p=np.zeros([m,1]) from sigmoid_function import sigmoid_function import numpy as np h1=sigmoid_function(np.dot(np.concatenate((np.ones([m,1]),X),axis=1),Theta1.T)) h2=sigmoid_function(np.dot(np.concatenate((np.ones([m,1]),h1),axis=1),Theta2.T)) p=np.max(h2,axis=1) p_index=np.argmax(h2,axis=1) return p,p_index
def predict(Theta1, Theta2, X): import numpy as np m = len(X[:, 0]) num_labels = len(Theta2[:, 0]) p = np.zeros([m, 1]) from sigmoid_function import sigmoid_function import numpy as np h1 = sigmoid_function( np.dot(np.concatenate((np.ones([m, 1]), X), axis=1), Theta1.T)) h2 = sigmoid_function( np.dot(np.concatenate((np.ones([m, 1]), h1), axis=1), Theta2.T)) p = np.max(h2, axis=1) p_index = np.argmax(h2, axis=1) return p, p_index
def fit_rvc_cost(psi, w, Hd, K): import numpy as np from scipy.stats import multivariate_normal from sigmoid_function import sigmoid_function I = K[:, 0].size # ensure that Hd is 1-D array Hd = np.reshape(Hd, [ Hd.size, ]) Hd_diag = np.diag(Hd) # error in mvnpdf psi = psi.flatten() mvnpdf = multivariate_normal.pdf(psi, np.zeros([ 40, ]), np.diag(1 / Hd)) L = I * (-1) * np.log(mvnpdf) psi = np.reshape(psi, [psi.size, 1]) g = I * np.dot(Hd_diag, psi) H = I * Hd_diag predictions = sigmoid_function(np.dot(psi.T, K)) predictions = np.reshape(predictions, [predictions.size, 1]) for i in range(I): # update L y = predictions[i, 0] if w[i] == 1: L = L - np.log(y) else: L = L - np.log(1 - y) # update g and H, debug here 2014/05/26 K_temp = np.reshape(K[:, i], [K[:, i].size, 1]) g = g + (y - w[i]) * K_temp H = H + y * (1 - y) * np.dot(K_temp, K_temp.T) return L, g, H
def fit_rvc_cost(psi,w,Hd,K): import numpy as np from scipy.stats import multivariate_normal from sigmoid_function import sigmoid_function I=K[:,0].size # ensure that Hd is 1-D array Hd=np.reshape(Hd,[Hd.size,]) Hd_diag=np.diag(Hd) # error in mvnpdf psi=psi.flatten() mvnpdf=multivariate_normal.pdf(psi,np.zeros([40,]),np.diag(1/Hd)) L=I*(-1)*np.log(mvnpdf) psi=np.reshape(psi,[psi.size,1]) g=I*np.dot(Hd_diag,psi) H=I*Hd_diag predictions=sigmoid_function(np.dot(psi.T,K)) predictions=np.reshape(predictions,[predictions.size,1]) for i in range(I): # update L y=predictions[i,0] if w[i]==1: L=L-np.log(y) else: L=L-np.log(1-y) # update g and H, debug here 2014/05/26 K_temp=np.reshape(K[:,i],[K[:,i].size,1]) g=g+(y-w[i])*K_temp H=H+y*(1-y)*np.dot(K_temp,K_temp.T) return L,g,H
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_nn): import numpy as np from sigmoid_function import sigmoid_function from sigmoid_gradient import sigmoid_gradient Theta1 = np.reshape( nn_params[0:hidden_layer_size * (input_layer_size + 1)], [hidden_layer_size, input_layer_size + 1]) Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], [num_labels, hidden_layer_size + 1]) m = len(X[:, 0]) J = 0 Theta1_grad = np.zeros(Theta1.size) Theta2_grad = np.zeros(Theta2.size) X = np.concatenate((np.ones([len(X[:, 0]), 1]), X), axis=1) a1 = X z2 = np.dot(a1, Theta1.T) a2 = sigmoid_function(z2) a2 = np.concatenate((np.ones([len(X[:, 0]), 1]), a2), axis=1) a3 = sigmoid_function(np.dot(a2, Theta2.T)) #num_labels_eye=np.eye(num_labels) #ry=num_labels_eye[y,:] ry = np.zeros([len(X[:, 0]), num_labels]) for i in range(5000): ry[i, y[i] - 1] = 1 cost = ry * np.log(a3) + (1 - ry) * np.log(1 - a3) J = -np.sum(cost) / m reg = np.sum(Theta1[:, 1:]**2) + np.sum(Theta2[:, 1:]**2) J = J + lambda_nn * 1.0 / (2 * m) * reg # Backpropagation algorithm delta3 = a3 - ry temp = np.dot(delta3, Theta2) delta2 = temp[:, 1:] * sigmoid_gradient(z2) Delta1 = np.dot(delta2.T, a1) Delta2 = np.dot(delta3.T, a2) Theta1_grad = Delta1 / m + lambda_nn * np.concatenate( (np.zeros([hidden_layer_size, 1]), Theta1[:, 1:]), axis=1) / m Theta2_grad = Delta2 / m + lambda_nn * np.concatenate( (np.zeros([num_labels, 1]), Theta2[:, 1:]), axis=1) / m Theta1_grad = np.reshape(Theta1_grad, [Theta1_grad.size, 1]) Theta2_grad = np.reshape(Theta2_grad, [Theta2_grad.size, 1]) grad = np.concatenate((Theta1_grad, Theta2_grad), axis=0) return J, grad
def predict(Theta1, Theta2, X): import numpy as np m, n = X.shape num_labels = Theta2[:, 0].size p = np.zeros([m, 1]) X0 = np.ones([m, 1]) X = np.concatenate((X0, X), axis=1) from sigmoid_function import sigmoid_function # 3 layers in the NN a1 = X a2 = sigmoid_function(np.dot(a1, Theta1.T)) a2 = np.concatenate((X0, a2), axis=1) # the layer must add all ones when it use for the next layer a3 = sigmoid_function(np.dot(a2, Theta2.T)) m = np.max(a3, axis=1) p = np.argmax(a3, axis=1) + 1 m = np.reshape(m, [m.size, 1]) p = np.reshape(p, [p.size, 1]) return m, p
def predict(Theta1,Theta2,X): import numpy as np m,n=X.shape num_labels=Theta2[:,0].size p=np.zeros([m,1]) X0=np.ones([m,1]) X=np.concatenate((X0,X),axis=1) from sigmoid_function import sigmoid_function # 3 layers in the NN a1=X a2=sigmoid_function(np.dot(a1,Theta1.T)) a2=np.concatenate((X0,a2),axis=1) # the layer must add all ones when it use for the next layer a3=sigmoid_function(np.dot(a2,Theta2.T)) m=np.max(a3,axis=1) p=np.argmax(a3,axis=1)+1 m=np.reshape(m,[m.size,1]) p=np.reshape(p,[p.size,1]) return m,p
def batch_gradient_update(theta,X,y,alpha=1,threshold=1e-6,maxIter=1000): from sigmoid_function import sigmoid_function from cost_function import cost_function import numpy as np for i in range(maxIter): T=np.zeros(np.shape(theta)) h=sigmoid_function(np.dot(X,theta)) for i in range(len(X[:,0])): T=T+(y[i]-h[i])*(X[i].reshape(np.shape(theta))) theta+=alpha*T/len(X[:,0]) return theta
def nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,lambda_nn): import numpy as np from sigmoid_function import sigmoid_function from sigmoid_gradient import sigmoid_gradient Theta1=np.reshape(nn_params[0:hidden_layer_size*(input_layer_size+1)],[hidden_layer_size,input_layer_size+1]) Theta2=np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):],[num_labels,hidden_layer_size+1]) m=len(X[:,0]) J=0 Theta1_grad=np.zeros(Theta1.size) Theta2_grad=np.zeros(Theta2.size) X=np.concatenate((np.ones([len(X[:,0]),1]),X),axis=1) a1=X z2=np.dot(a1,Theta1.T) a2=sigmoid_function(z2) a2=np.concatenate((np.ones([len(X[:,0]),1]),a2),axis=1) a3=sigmoid_function(np.dot(a2,Theta2.T)) #num_labels_eye=np.eye(num_labels) #ry=num_labels_eye[y,:] ry=np.zeros([len(X[:,0]),num_labels]) for i in range(5000): ry[i,y[i]-1]=1 cost=ry*np.log(a3)+(1-ry)*np.log(1-a3) J=-np.sum(cost)/m reg=np.sum(Theta1[:,1:]**2)+np.sum(Theta2[:,1:]**2) J=J+lambda_nn*1.0/(2*m)*reg # Backpropagation algorithm delta3=a3-ry temp=np.dot(delta3,Theta2) delta2=temp[:,1:]*sigmoid_gradient(z2) Delta1=np.dot(delta2.T,a1) Delta2=np.dot(delta3.T,a2) Theta1_grad=Delta1/m+lambda_nn*np.concatenate((np.zeros([hidden_layer_size,1]),Theta1[:,1:]),axis=1)/m Theta2_grad=Delta2/m+lambda_nn*np.concatenate((np.zeros([num_labels,1]),Theta2[:,1:]),axis=1)/m Theta1_grad=np.reshape(Theta1_grad,[Theta1_grad.size,1]) Theta2_grad=np.reshape(Theta2_grad,[Theta2_grad.size,1]) grad=np.concatenate((Theta1_grad,Theta2_grad),axis=0) return J,grad
def batch_gradient_update(theta,X,y,alpha=1,threshold=1e-6,maxIter=1000,llambda=1): from sigmoid_function import sigmoid_function from cost_function import cost_function import numpy as np for i in range(maxIter): T=np.zeros(np.shape(theta)) h=sigmoid_function(np.dot(X,theta)) for i in range(len(X[:,0])): T=T+(y[i]-h[i])*(X[i].reshape(np.shape(theta))) theta+=alpha*T/len(X[:,0]) theta+=llambda*theta/len(X[:,0]) return theta
def predictOneVsAll(all_theta,X): import numpy as np m,n=X.shape num_labels=all_theta[:,0].size p=np.zeros([m,1]) X0=np.ones([m,1]) X=np.concatenate((X0,X),axis=1) print X.shape from sigmoid_function import sigmoid_function C=sigmoid_function(np.dot(X,all_theta.T)) print C.shape M=C.max(axis=1) p=C.argmax(axis=1) return p
def cost_function(theta,X,y): from sigmoid_function import sigmoid_function import numpy as np m=y.size J=0 grad=np.zeros(theta.size) H=sigmoid_function(np.dot(X,theta)) T=y*np.log(H)+(1-y)*np.log(1-H) J=-np.sum(T)/m # compute the grad for i in range(m): grad=grad-(H[i]-y[i])*X[i,:].T grad=grad/m return J,grad
def regularize_cost_function(theta, X, y, llambda): from sigmoid_function import sigmoid_function import numpy as np m = y.size J = 0 grad = np.zeros([theta.size, 1]) H = sigmoid_function(np.dot(X, theta)) T = y * np.log(H) + (1 - y) * np.log(1 - H) J = -1 * np.sum(T) / m + llambda * np.sum(theta**2) / (2 * m) # compute the grad for i in range(m): grad = grad + np.reshape((H[i] - y[i]) * X[i, :].T, grad.shape) grad = grad / m + llambda * theta / m return J, grad
def regularize_cost_function(theta,X,y,llambda): from sigmoid_function import sigmoid_function import numpy as np m=y.size J=0 grad=np.zeros([theta.size,1]) H=sigmoid_function(np.dot(X,theta)) T=y*np.log(H)+(1-y)*np.log(1-H) J=-1*np.sum(T)/m+llambda*np.sum(theta**2)/(2*m) # compute the grad for i in range(m): grad=grad+np.reshape((H[i]-y[i])*X[i,:].T,grad.shape) grad=grad/m+llambda*theta/m return J,grad
def cost_function(theta,X,y): from sigmoid_function import sigmoid_function import numpy as np m=y.size J=0 grad=np.zeros(theta.size) H=sigmoid_function(np.dot(X,theta)) T=y*np.log(H)+(1-y)*np.log(1-H) J=-np.sum(T)/m # compute the grad for i in range(m): grad=grad+(H[i]-y[i])*X[i,:].T grad=grad/m return J,grad
def fit_rvc(X,w,nu,X_test,initial_psi,kernel,lam): import numpy as np from sigmoid_function import sigmoid_function from numpy.linalg import inv from scipy.optimize import minimize I=X[0,:].size K=np.zeros([I,I]) for i in range(I): for j in range(I): K[i,j]=kernel(X[:,i],X[:,j],lam) # Initialize H. H=np.ones([I,1]) # The main loop. iterations_count=0 mu=0 sig=0 def costFunction(psi): # It is ok to use the H w K int he scope of the function fit_rvc # It has an error when the second in this function from fit_rvc_cost import fit_rvc_cost L,g,Hession=fit_rvc_cost(psi,w,H,K) print "cost function: %s"%L return L def gradientFunction(psi): from fit_rvc_cost import fit_rvc_cost L,g,Hession=fit_rvc_cost(psi,w,H,K) print "gradient : %s" %g #print "gradient shape:%s"%(g.shape()) return g.flatten() # what is the psi function in the fit_rvc.m while True: psi=minimize(costFunction,initial_psi,method='BFGS',jac=gradientFunction) #psi_optimize=fmin_cg(costFunction,initial_psi,gradientFunction) have a error #psi_optimize=psi_optimize.x # error here, no idea about the return of the fmin_bfgs #psi=fmin_bfgs(costFunction,initial_psi,fprime=gradientFunction) psi=psi.x # Compute Hessian S at peak # a error here diag()need a 1d array S=np.diag(H.flatten()) # np.dot need 2d array #--------debug here in 2014-05-28------------------------------------------# ys=sigmoid_function(np.dot(psi.reshape([psi.size,1]).T,K)) for i in range(I): y=ys[0,i] S=S+y*(1-y)*np.dot(K[:,i].reshape([K[:,i].size,1]),K[:,i].reshape([K[:,i].size,1]).T) # Set mean and variance of Laplace approximation mu=psi sig=-inv(S) # Update H H=H*(np.diag(sig).reshape([np.diag(sig).size,1])) H=nu+1-H H=H/(mu.reshape([mu.size,1])**2+nu) iterations_count=iterations_count+1 print "iteration: %d"%iterations_count if(iterations_count==3): break threshold=1000 selector=(H.flatten()<threshold) X=X[:,selector] mu=mu[selector] mu=mu.reshape([mu.size,1]) sig=sig[selector,selector] sig=sig.reshape([sig.shape[0],sig.shape[0]]) #sig=sig.reshape([sig[:,0].size,sig[0,:].size]) relevant_points=selector print "Hessian:%s"%H # Recompute K[X,X] I=X[0,:].size K=np.zeros([I,I]) for i in range(I): for j in range(J): K[i,j]=kernel(X[:,i],X[:,j],lam) # Recompute K[X,X_test] I_test=X_test[0,:].size K_test=np.zeros([I,I_test]) for i in range(I): for j in range(I_test): K_test[i,j]=kernel(X[:,i],X_test[:,j],lam) # Compute mean and variance of activation mu_a=np.dot(mu.T,K_test) var_a_temp=np.dot(sig,K_test) var_a=np.zeros([1,I_test]) for i in range(I_test): var_a[:,i]=np.dot(K_test[:,i].T,var_a_temp[:,i]) # Approximate the integral to get the Bernoulli parameter. predictions=np.sqrt(1+np.pi/8*var_a) predictions=mu_a/predictions predictions=sigmoid_function(predictions) return predictions,relevant_points
def fit_rvc(X, w, nu, X_test, initial_psi, kernel, lam): import numpy as np from sigmoid_function import sigmoid_function from numpy.linalg import inv from scipy.optimize import minimize I = X[0, :].size K = np.zeros([I, I]) for i in range(I): for j in range(I): K[i, j] = kernel(X[:, i], X[:, j], lam) # Initialize H. H = np.ones([I, 1]) # The main loop. iterations_count = 0 mu = 0 sig = 0 def costFunction(psi): # It is ok to use the H w K int he scope of the function fit_rvc # It has an error when the second in this function from fit_rvc_cost import fit_rvc_cost L, g, Hession = fit_rvc_cost(psi, w, H, K) print "cost function: %s" % L return L def gradientFunction(psi): from fit_rvc_cost import fit_rvc_cost L, g, Hession = fit_rvc_cost(psi, w, H, K) print "gradient : %s" % g #print "gradient shape:%s"%(g.shape()) return g.flatten() # what is the psi function in the fit_rvc.m while True: psi = minimize(costFunction, initial_psi, method='BFGS', jac=gradientFunction) #psi_optimize=fmin_cg(costFunction,initial_psi,gradientFunction) have a error #psi_optimize=psi_optimize.x # error here, no idea about the return of the fmin_bfgs #psi=fmin_bfgs(costFunction,initial_psi,fprime=gradientFunction) psi = psi.x # Compute Hessian S at peak # a error here diag()need a 1d array S = np.diag(H.flatten()) # np.dot need 2d array #--------debug here in 2014-05-28------------------------------------------# ys = sigmoid_function(np.dot(psi.reshape([psi.size, 1]).T, K)) for i in range(I): y = ys[0, i] S = S + y * (1 - y) * np.dot(K[:, i].reshape([K[:, i].size, 1]), K[:, i].reshape([K[:, i].size, 1]).T) # Set mean and variance of Laplace approximation mu = psi sig = -inv(S) # Update H H = H * (np.diag(sig).reshape([np.diag(sig).size, 1])) H = nu + 1 - H H = H / (mu.reshape([mu.size, 1])**2 + nu) iterations_count = iterations_count + 1 print "iteration: %d" % iterations_count if (iterations_count == 3): break threshold = 1000 selector = (H.flatten() < threshold) X = X[:, selector] mu = mu[selector] mu = mu.reshape([mu.size, 1]) sig = sig[selector, selector] sig = sig.reshape([sig.shape[0], sig.shape[0]]) #sig=sig.reshape([sig[:,0].size,sig[0,:].size]) relevant_points = selector print "Hessian:%s" % H # Recompute K[X,X] I = X[0, :].size K = np.zeros([I, I]) for i in range(I): for j in range(J): K[i, j] = kernel(X[:, i], X[:, j], lam) # Recompute K[X,X_test] I_test = X_test[0, :].size K_test = np.zeros([I, I_test]) for i in range(I): for j in range(I_test): K_test[i, j] = kernel(X[:, i], X_test[:, j], lam) # Compute mean and variance of activation mu_a = np.dot(mu.T, K_test) var_a_temp = np.dot(sig, K_test) var_a = np.zeros([1, I_test]) for i in range(I_test): var_a[:, i] = np.dot(K_test[:, i].T, var_a_temp[:, i]) # Approximate the integral to get the Bernoulli parameter. predictions = np.sqrt(1 + np.pi / 8 * var_a) predictions = mu_a / predictions predictions = sigmoid_function(predictions) return predictions, relevant_points
from sigmoid_function import sigmoid_function import numpy as np import matplotlib.pyplot as plt Zz = np.linspace(-5, 5, 100) Gg = sigmoid_function(Zz) plt.figure(1) plt.plot(Zz, Gg) plt.title('Sigmoid function') plt.show()
def sigmoid_gradient(z): g = np.zeros(z.size) g = sigmoid_function(z) * (1 - sigmoid_function(z)) return g
def sigmoid_gradient(z): g=np.zeros(z.size) g=sigmoid_function(z)*(1-sigmoid_function(z)) return g
import numpy as np from sklearn.linear_model import SGDClassifier from sklearn.datasets.samples_generator import make_blobs import scipy as sp #X,y=read_data("ex2data1.txt") X, y = make_blobs(n_samples=400, centers=2, random_state=0, cluster_std=1) # after featureNormalize it accuarcy could get 89% X, X_mu, X_sigma = featureNormalize(X) #plot_data(X,y) y = np.reshape(y, (y.size, 1)) m, n = X.shape X = np.concatenate((np.ones([len(X[:, 0]), 1]), X), axis=1) initial_theta = np.zeros([n + 1, 1]) #initial_theta=np.array([1,1,1]) # test is the cost_function ok? cost, grad = cost_function(initial_theta, X, y) # batch_gradient_update error!!! wrong theta theta = batch_gradient_update(initial_theta, X, y) print theta prob = sigmoid_function(np.dot(X, theta)) print prob prob[prob > 0.5] = 1.0 prob[prob < 0.5] = 0.0 print prob y = np.reshape(y, prob.shape) print "accuracy:", tuple(1 - sum(abs(prob - y)) / 100)
X, y = make_blobs(n_samples=400, centers=2, random_state=0, cluster_std=1) # after featureNormalize it accuarcy could get 89% X,X_mu,X_sigma=featureNormalize(X) #plot_data(X,y) y=np.reshape(y,(y.size,1)) m,n=X.shape X=np.concatenate((np.ones([len(X[:,0]),1]),X),axis=1) initial_theta=np.zeros([n+1,1]) #initial_theta=np.array([1,1,1]) # test is the cost_function ok? cost,grad=cost_function(initial_theta,X,y) # batch_gradient_update error!!! wrong theta theta=batch_gradient_update(initial_theta,X,y) print theta prob=sigmoid_function(np.dot(X,theta)) print prob prob[prob>0.5]=1.0 prob[prob<0.5]=0.0 print prob y=np.reshape(y,prob.shape) print "accuracy:",tuple(1-sum(abs(prob-y))/100)
from sigmoid_function import sigmoid_function import numpy as np import matplotlib.pyplot as plt Zz=np.linspace(-5,5,100) Gg=sigmoid_function(Zz) plt.figure(1) plt.plot(Zz,Gg) plt.title('Sigmoid function') plt.show()