def costFunction(Theta, X, Y, lam): """Returns cost of Theta using logistic regression""" m = len(X) n = len(X[0]) k = len(Y[0]) k_h = (n + k) // 2 #average of features and categories Theta1 = np.reshape(Theta[0:(n+1)*k_h], (n+1, k_h)) Theta2 = np.reshape(Theta[(n+1)*k_h:], (k_h+1, k)) one = np.ones(m) one = np.reshape(one, (m, 1)) a1 = np.concatenate((one, X), axis=1) #compute inputs to hidden layer a2 = sigmoid(np.dot(a1, Theta1)) a2 = np.concatenate((one, a2), axis=1) #compute output layer a3 = sigmoid(np.dot(a2, Theta2)) #compute cost J = -(1.0/m) * (np.dot(np.log(a3).T, Y) + \ np.dot(np.log(1.0 - a3).T, (1.0 - Y))) J = J.sum() #compute regularization term Theta1_sq = np.dot(Theta1.T, Theta1) Theta1_sq[0, :] = np.zeros(k_h) Theta2_sq = np.dot(Theta2.T, Theta2) Theta2_sq[0, :] = np.zeros(k) J = J + (lam / 2.0 / m) * (Theta1_sq.sum() + Theta2_sq.sum()) print 'cost =', J return J
def lrCostFunction(theta, X, y, Lambda): """computes the cost of using theta as the parameter for regularized logistic regression and the gradient of the cost w.r.t. to the parameters. """ # ====================== YOUR CODE HERE ====================== # Instructions: Compute the cost of a particular choice of theta. # You should set J to the cost. # # Hint: The computation of the cost function and gradients can be # efficiently vectorized. For example, consider the computation # # sigmoid(X * theta) # # Each row of the resulting matrix will contain the value of the # prediction for that example. You can make use of this to vectorize # the cost function and gradient computations. # # ============================================================= m = y.size J=( -dot(y,log(sigmoid(dot(X,theta))))-dot((1-y),log(1-sigmoid(dot(X,theta)))))/m +(Lambda/(2*m))*(sum(theta**2)-theta[0]**2) #theta0=np.copy(theta) #np.put(theta0,0,0) #J=dot((sigmoid(dot(X,theta))-y),X)/m+ (Lambda/m)*theta0 #J= sum(-y*log(sigmoid(dot(X,theta.T)))-(1-y)*log(1-sigmoid(dot(X,theta.T))))/m #J= sum(-y*log(sigmoid(dot(X,theta.T)))-(1-y)*log(1-sigmoid(dot(X,theta.T))))/m return J
def predict(theta,board) : """ theta - unrolled Neural Network weights board - n*n matrix representing board Returns: h - n*1 column vector - confidence level for performing next move """ n = size(board,1) #neural network parameters input_units = n*n hidden_units = n*n output_units = n*n #theta1 - unrolled weights between input and hidden layer #theta2 - unrolled weights between hidden and output layer theta1 = theta[:,:hidden_units*(input_units+1)] theta2 = theta[:,hidden_units*(input_units+1):] #reshaping to obtain rolled weights theta1 = np.reshape(theta1,(hidden_units,input_units+1)) theta2 = np.reshape(theta2,(output_units,hidden_units+1)) #calculating confidence level given board #position and neural network weights X = board.flatten().T X = concatenate((mat(1),X)) z2 = theta1*X a2 = sigmoid(z2) a2 = concatenate((mat(1),a2)) z3 = theta2*a2 h = sigmoid(z3) return h
def lrCostFunction(theta, X, y, lmbda): # Initialize some useful values m = y.shape[0] # number of training examples # You need to return the following variables correctly J = 0 grad = np.zeros(theta.shape) # ====================== YOUR CODE HERE ====================== def h(X, theta): return X.dot(theta) J = np.float(-y.T * np.nan_to_num(np.log(sigmoid(h(X, theta))).T) - (1 - y).T * np.nan_to_num(np.log(1 - sigmoid(h(X, theta))).T)) / m reg_cost = theta.copy() reg_cost[0] = 0 J += (lmbda * reg_cost.T.dot(reg_cost)) / (2 * m) grad = np.asarray((sigmoid(h(X, theta)) - y.T).dot(X) / m)[0] reg_grad = theta * (float(lmbda) / m) reg_grad[0] = 0 grad += reg_grad # ============================================================= return (J, grad)
def cost_function(cost_function_parameters): """Cost function""" theta = cost_function_parameters['theta'] input_layer_size = cost_function_parameters['input_layer_size'] hidden_layer_size = cost_function_parameters['hidden_layer_size'] num_labels = cost_function_parameters['number_of_labels'] x_values = cost_function_parameters['x_values'] y_values = cost_function_parameters['y_values'] lambda_value = cost_function_parameters['lambda_value'] theta_1_parameters = theta[0: (hidden_layer_size * (input_layer_size + 1))] theta_2_parameters = theta[(hidden_layer_size * (input_layer_size + 1)):] theta_1 = theta_1_parameters.reshape(hidden_layer_size, input_layer_size + 1) theta_2 = theta_2_parameters.reshape(num_labels, (hidden_layer_size + 1)) input_examples_size = x_values.shape[0] hidden_layer_input = numpy.c_[numpy.ones(input_examples_size), x_values].dot(theta_1.T) hidden_layer_output = sigmoid(hidden_layer_input) output_layer_input = numpy.c_[numpy.ones(hidden_layer_output.shape[0]), hidden_layer_output].dot(theta_2.T) output = sigmoid(output_layer_input) first_part_of_cost = -((y_values) * numpy.log(output)) second_part_of_cost = ((1.0 - y_values) * numpy.log(1.0-output)) combined_thetas = numpy.append(theta_1.flatten()[1:], theta_2.flatten()[1:]) regularization_term = (lambda_value/(2.0 * input_examples_size)) * numpy.sum(numpy.power(combined_thetas, 2)) j = ((1.0/input_examples_size) * numpy.sum(numpy.sum(first_part_of_cost - second_part_of_cost))) + regularization_term return j
def charTrain(): X = np.matrix('0,0,1,0; 0,1,0,0; 0,0,0,1; 0,0,0,1; 1,0,0,0') # encoding for hello numIn, numHid, numOut = 4, 10, 4 numInTot = numIn + numHid + 1 theta1 = np.matrix(1 * np.sqrt(6 / (numIn + numHid)) * np.random.randn(numInTot, numHid)) theta2 = np.matrix(1 * np.sqrt(6 / (numOut + numHid)) * np.random.randn(numHid + 1, numOut)) theta1_grad = np.zeros((numInTot, numHid)) theta2_grad = np.zeros((numHid + 1, numOut)) hid_last = np.zeros((numHid, 1)) m = X.shape[0] alpha = 0.05 for ita in range(5000): for j in range(m-1): #for every training element, except for the last one, which we don't know what is followed y = X[j+1, :] # given the input char, the next char is expected # forward context = hid_last x_context = np.concatenate((X[j, :], context.T), axis=1) a1 = np.matrix(np.concatenate((x_context, np.matrix('[1]')), axis=1)).T z2 = theta1.T * a1; a2 = np.concatenate((sigmoid(z2), np.matrix('[1]'))) hid_last = a2[0:-1, 0]; z3 = theta2.T * a2 a3 = sigmoid(z3) # backward propagation d3 = np.multiply(z3.T, (a3.T - y)) # 1*4, d(loss)/d(z) = z * (a3 - y) theta2 = theta2 - alpha * a2 * d3 # 11*1 * 1*4 => 11*4a d(loss)/d(theta2) = d(loss)/d(z3) * d(z3)/d(theta2) d2 = np.multiply((theta2 * d3.T), np.multiply(a2, (1 - a2))) # (11*4 * 4*1) multiply ( 11*1 multiply 11*1) => 11*1 theta1 = theta1 - alpha * a1 * d2[0:numHid,:].T # 15*1 * 1*10 => 15*10 return theta1, theta2, numHid, numOut
def costFunction(theta, X, y, return_grad=False): #COSTFUNCTION Compute cost and gradient for logistic regression # J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the # parameter for logistic regression and the gradient of the cost # w.r.t. to the parameters. import numpy as np from sigmoid import sigmoid # Initialize some useful values m = len(y) # number of training examples # You need to return the following variables correctly J = 0 grad = np.zeros(theta.shape) # ====================== YOUR CODE HERE ====================== # Instructions: Compute the cost of a particular choice of theta. # You should set J to the cost. # Compute the partial derivatives and set grad to the partial # derivatives of the cost w.r.t. each parameter in theta # # Note: grad should have the same dimensions as theta # # given the following dimensions: # theta.shape = (n+1,1) # X.shape = (m,n+1) # the equation's # theta' times X # becomes # np.dot(X,theta) # to obtain a (m,1) vector # given that # y.shape = (m,) # we transpose the (m,1) shaped # np.log( sigmoid( np.dot(X,theta) ) ) , as well as # np.log( 1 - sigmoid( np.dot(X,theta) ) ) # to obtain (1,m) vectors to be mutually added, # and whose elements are summed to form a scalar one = y * np.transpose(np.log( sigmoid( np.dot(X,theta) ) )) two = (1-y) * np.transpose(np.log( 1 - sigmoid( np.dot(X,theta) ) )) J = -(1./m)*(one+two).sum() # here we need n+1 gradients. # note that # y.shape = (m,) # sigmoid( np.dot(X,theta) ).shape = (m, 1) # so we transpose the latter, subtract y, obtaining a vector of (1, m) # we multiply such vector by X, whose dimension is # X.shape = (m, n+1), # and we obtain a (1, n+1) vector, which we also transpose # this last vectorized multiplication takes care of the sum grad = (1./m) * np.dot(sigmoid( np.dot(X,theta) ).T - y, X).T if return_grad == True: return J, np.transpose(grad) elif return_grad == False: return J # for use in fmin/fmin_bfgs optimization function
def sigmoidGradient(z): """returns the gradient of the sigmoid function evaluated at z g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function evaluated at z. This should work regardless if z is a matrix or a vector. In particular, if z is a vector or matrix, you should return the gradient for each element.""" return sigmoid(z) * (1-sigmoid(z))
def sigmoidGradient(z): # SIGMOIDGRADIENT returns the gradient of the sigmoid function evaluated at z g = zeros(z.shape) g = sigmoid(z).transpose() * (1.0 - sigmoid(z)) return g
def compute_cost(theta,X,y): #computes cost given predicted and actual values m = X.shape[0] #number of training examples theta = np.reshape(theta,(len(theta),1)) #y = reshape(y,(len(y),1)) J = (1./m) * (-np.transpose(y).dot(np.log(sg.sigmoid(X.dot(theta)))) - np.transpose(1-y).dot(np.log(1-sg.sigmoid(X.dot(theta))))) grad = np.transpose((1./m)*np.transpose(sg.sigmoid(X.dot(theta)) - y).dot(X)) #optimize.fmin expects a single value, so cannot return grad return J.mean()#,grad
def sigmoidGradient(z): import sigmoid as sg import numpy as np g = np.zeros(np.size(z)); g=sg.sigmoid(z)*(1-sg.sigmoid(z)); return g
def costFunctionReg(theta, X, y, lam): dim = X.shape m = dim[0] theta = theta.reshape(theta.shape[0], 1) J = -(1.0/m) * ( numpy.dot(numpy.transpose(y), utils.multimap(math.log, \ sigmoid.sigmoid(numpy.dot(X, theta)))) + numpy.dot(numpy.transpose(1-y), \ utils.multimap(math.log, 1 - sigmoid.sigmoid(numpy.dot(X,theta)))) \ + lam/2.0*numpy.dot(numpy.transpose(theta[1:, :]),theta[1:,:]) ) return float(J[0])
def predict(X, Theta1, Theta2): m, n = X.shape a1 = X z2 = a1.dot(Theta1.T) a2 = sigmoid(z2) a2 = np.concatenate((np.ones((m, 1)), a2), axis=1) z3 = a2.dot(Theta2.T) a3 = sigmoid(z3) return np.argmax(a3, axis= 1)[np.newaxis]
def predict(nn_params, layers, X, y, lam, display, path): m = X.shape[0] Theta = reshapeThetas(nn_params, layers) l = len(layers) A = [] A_ones = [] A_sig = [] Z = [] J = 0 for i in range(0,l): A.append(0) A_ones.append(0) A_sig.append(0) Z.append(0) A_ones[0] = ones((m,1))+0.0 A[0] = concatenate((A_ones[0],X),1) Z[1] = dot(A[0],Theta[0].conj().T) for i in range(1,l-1): A_ones[i] = ones((Z[i].shape[0],1))+0.0 A_sig[i] = sigmoid(Z[i]) A[i] = concatenate((A_ones[i], A_sig[i]),1) Z[i+1] = dot(A[1],Theta[1].conj().T) A[-1] = sigmoid(Z[-1]) predictions = A[-1].argmax(axis=1)+0.0 # cost calculation if not(isinstance(y, (int, long))): # if there are associated y values, calculate test results for i in range(0,layers[-1]): J_curr = (1.0/m)*sum(-1*((y==i)*log(A[-1][:,i])) - (1-(y==i)) * log(1-A[-1][:,i])) J += J_curr if display == 1: # if the results should be displayed, do so print mean(predictions == y)*100, '%' print "Cost:",J return (J, mean(predictions == y)) else: # if there are no y values, save predictions to file pfile = open(path + '/predictions.txt','w') for i in predictions: pfile.write(str(int(i))+'\n') pfile.close() ffile = open(path + '/feature predict.txt','w') A_c = A[-1] A_c.tolist() for i in range(0,len(A_c)): ffile.write(','.join(str(elem) for elem in A_c[i])+'\n') ffile.close()
def sigmoidGradient(z): # sigmoidGradient returns the gradient of the sigmoid function evaluated at z g = zeros(z.shape) # =========================== DONE ================================== # Instructions: Compute the gradient of the sigmoid function evaluated at # each value of z. g += sigmoid(z) * (1 - sigmoid(z)) return g
def predict(Theta1, Theta2, X): # Useful values m = X.shape[0] num_labels = Theta2.shape[0] a1 = np.vstack((np.ones(m), X.T)).T a2 = sigmoid(np.dot(a1, Theta1.T)) a2 = np.vstack((np.ones(m), a2.T)).T a3 = sigmoid(np.dot(a2, Theta2.T)) return np.argmax(a3, axis=1)
def costFunction(theta, X,y): """ computes the cost of using theta as the parameter for logistic regression and the gradient of the cost w.r.t. to the parameters.""" from numpy import dot # Initialize some useful values m = y.size # number of training examples first = -dot(y, log(sigmoid(dot(X,theta)))) second = -dot((1-y), log(1-sigmoid(dot(X,theta)))) #first = -dot(y, log(sigmoid(dot(X,theta)))) #second = -dot((ones(m)-y), log(ones(m)-sigmoid(dot(X,theta)))) J=(first+second)/m return J
def sigmoidGradient(z): g = np.zeros(z.shape) # ====================== YOUR CODE HERE ====================== # Instructions: Compute the gradient of the sigmoid function evaluated at # each value of z (z can be a matrix, vector or scalar). g = np.multiply(sigmoid(z), 1 - sigmoid(z)) # == == == == == == == == == == == == == == == == == == == == = return g
def sigmoidGradient(z): """computes the gradient of the sigmoid function evaluated at z. This should work regardless if z is a matrix or a vector. In particular, if z is a vector or matrix, you should return the gradient for each element.""" # ====================== YOUR CODE HERE ====================== # Instructions: Compute the gradient of the sigmoid function evaluated at # each value of z (z can be a matrix, vector or scalar). # ============================================================= g= sigmoid(z)*(1-sigmoid(z)) return g
def lrCostFunction(theta, X, y, lambda_reg, return_grad=False): #LRCOSTFUNCTION Compute cost and gradient for logistic regression with #regularization # J = LRCOSTFUNCTION(theta, X, y, lambda_reg) computes the cost of using # theta as the parameter for regularized logistic regression and the # gradient of the cost w.r.t. to the parameters. import numpy as np from sigmoid import sigmoid import sys # Initialize some useful values m = len(y) # number of training examples # You need to return the following variables correctly J = 0 grad = np.zeros(theta.shape) # ====================== YOUR CODE HERE ====================== # Instructions: Compute the cost of a particular choice of theta. # You should set J to the cost. # Compute the partial derivatives and set grad to the partial # derivatives of the cost w.r.t. each parameter in theta # # taken from costFunctionReg.py one = y * np.transpose(np.log( sigmoid( np.dot(X,theta) ) )) two = (1-y) * np.transpose(np.log( 1 - sigmoid( np.dot(X,theta) ) )) reg = ( float(lambda_reg) / (2*m)) * np.power(theta[1:theta.shape[0]],2).sum() J = -(1./m)*(one+two).sum() + reg grad = (1./m) * np.dot(sigmoid( np.dot(X,theta) ).T - y, X).T + ( float(lambda_reg) / m )*theta # the case of j = 0 (recall that grad is a n+1 vector) grad_no_regularization = (1./m) * np.dot(sigmoid( np.dot(X,theta) ).T - y, X).T # and then assign only the first element of grad_no_regularization to grad grad[0] = grad_no_regularization[0] # display cost at each iteration sys.stdout.write("Cost: %f \r" % (J) ) sys.stdout.flush() if return_grad: return J, grad.flatten() else: return J # =============================================================
def runForward(X, theta1, theta2, numHid, numOut): m = X.shape[0] hid_last = np.zeros((numHid, 1)) # context unit last time, initialized as 0 results = np.zeros((m, numOut)) # save output, so given 4 samples (each of which is 1*4), output 4 * 4 too for j in range(m): # one sample a time context = hid_last x_context = np.concatenate((X[j,:], context.T), axis=1) # concat( (1*4, 10*1) ) ==> 1*14 a1 = np.matrix(np.concatenate((x_context,np.matrix('[1]')), axis=1)).T # now add bias, make it 1 * 15; then .T -> 15*1 z2 = theta1.T * a1 # (15*10).T * 15*1 ==> 10*1 a2 = np.concatenate((sigmoid(z2), np.matrix('[1]'))) # now add hidden layer bias ,make it 5*1 hid_last = a2[0:-1, 0] # update hid_last z3 = theta2.T * a2 # (10*4).T * 10*1 ==> 4*1 a3 = sigmoid(z3) results[j, :] = a3.reshape(numOut,) # line of results is the result of the input on current step return results
def predict(Theta1, Theta2, X): """Predicts the label of an input given a trained neural network p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the trained weights of a neural network (Theta1, Theta2)""" #Useful values m = X.shape[0] h1 = sigmoid(np.c_[np.ones((m, 1)), X].dot(Theta1.T)) h2 = sigmoid(np.c_[np.ones((m, 1)), h1].dot(Theta2.T)) p = np.argmax(h2, axis=1) return p
def runForward(X, theta1, theta2, numHid): m = X.shape[0] hid_last = np.zeros((numHid, 1)) # context unit last time, initialized as 0 results = np.zeros((m, 1)) # save output for j in range(m): # one sample a time context = hid_last x_context = np.concatenate((X[j,:], context)) # concat( (1*1, 4*1) ) ==> 5*1 a1 = np.matrix(np.concatenate((x_context,np.matrix('[1]')))) # now add bias, make it 6*1 z2 = theta1.T * a1 # (6*4).T * 6*1 ==> 4*1 a2 = np.concatenate((sigmoid(z2), np.matrix('[1]'))) # now add hidden layer bias ,make it 5*1 hid_last = a2[0:-1, 0] # update hid_last z3 = theta2.T * a2 # (5*1).T * 5*1 ==> 1*1 a3 = sigmoid(z3) results[j] = a3 return results
def predict(self, stream): """Predicts the direction of movement based on the NN response""" input_layer_size, number_of_labels, x_value = _convert_stream_to_array(stream) theta1_params = self.thetas[0: (self.hidden_layer_size * (input_layer_size + 1))] theta2_params = self.thetas[(self.hidden_layer_size * (input_layer_size + 1)):] theta_1 = theta1_params.reshape(self.hidden_layer_size, input_layer_size + 1) theta_2 = theta2_params.reshape(number_of_labels, (self.hidden_layer_size + 1)) first_layer_output = x_value.dot(theta_1.T) hidden_layer_input = sigmoid(first_layer_output) hidden_layer_output = c_[[1], [hidden_layer_input]].dot(theta_2.T) model_output = sigmoid(hidden_layer_output) index, value = max(enumerate(model_output[0]), key=operator.itemgetter(1)) print(value) return CLASSIFICATION_LABELS[index]
def runForward(X, theta1, theta2): m = X.shape[0] #forward propagation hid_last = np.zeros((numHid, 1)) #context units results = np.zeros((m, numOut)) for j in range(m):#for every input element context = hid_last x_context = np.concatenate((X[j,:], context.T), axis=1) a1 = np.matrix(np.concatenate((x_context, np.matrix('[1]')), axis=1)).T#add bias, context units to input layer z2 = theta1.T * a1 a2 = np.concatenate((sigmoid(z2), np.matrix('[1]'))) #add bias, output hidden layer hid_last = a2[0:-1, 0] z3 = theta2.T * a2 a3 = sigmoid(z3) results[j, :] = a3.reshape(numOut,) return results
def predict(all_theta, X): m, n = X.shape X = np.hstack((np.ones((m, 1)), X)) prediction = np.argmax(sigmoid(np.dot(X, all_theta.T)), axis=1) return prediction
def activation(self): """ Applique la fonction sigmoide a tous les neurones de la couche """ for it_act_neur in self.neurone_list: #if not (it_act_neur.in_val < 10000 and it_act_neur.in_val > -10000): # print("Error in activaion, in_val : [{}], lay_type = [{}]".format(it_act_neur.in_val, self.layer_type)) it_act_neur.in_val = sigmoid(it_act_neur.in_val)
def computeRegularizedCost(theta,X,y,lam): m=len(y) z = np.dot(theta,X) h = sigmoid(z) J = ( (lam*sum(theta[1:]*theta[1:])/2.0) + \ sum((-y*np.log(h))-((1-y)*np.log(1-h))) )/m return J
def predict(theta, X): """PREDICT Predict whether the label is 0 or 1 using learned logistic regression parameters theta p = PREDICT(theta, X) computes the predictions for X using a threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1) """ return sigmoid(np.dot(X, theta)) >=0.5
def nnCostFunction(thetas, X, y, struc, lambd=1.0, bias=1): j = 0.0 grad = {} grad_final = np.empty_like([]) m,n = X.shape hidden = [] t1 = 0 t2 = 0 # try: # my2, ny2 = y2.shape # except: # ny2 = 1 # # if ny2 < 2: # y = np.zeros((len(y2),y2.max()+1)) # for i in range(0,len(y2)): # for ii in range(0,len(y[i])): # if y2[i] == ii: # y[i][ii] = 1 # else: # y = y2 for i in range(0,len(struc)): m2 = struc[i][0] n2 = struc[i][1] t2 += m2 * n2 hidden.append({'layer': i,'theta': thetas[t1:t2].reshape(n2,m2).transpose()}) t1 = t2 local = {'a1': X,'t': 0.0} c = 1 last = '' if bias == 1: for layer in hidden: theta = layer['theta'] local['Theta' + str(c)] = theta local['theta' + str(c)] = theta.copy() local['theta' + str(c)][:,0] = 0.0 local['t'] += (local['theta' + str(c)][:]**2).sum() local['a'+ str(c)] = np.hstack((np.ones((m,1)),local['a'+ str(c)])) c += 1 local['z'+ str(c)] = local['a'+ str(c - 1)].dot(theta.conj().transpose()) local['a'+ str(c)] = s.sigmoid(local['z'+ str(c)]) last = 'a' + str(c) cost = y * np.log(local[last]) + (1 - y) * np.log(1 - local[last]) r = (lambd / (2.0 * m)) * local['t'] j = -(1.0 / m) * cost.sum() + r local['s' + str(c)] = local['a'+ str(c)] - y for i in range(1,(c)): local['s' + str(c-i)] = ((local['s' + str(c)]).dot(local['Theta' + str(c-1)][:,1:])) * sigg.sigmoidGradient(local['z'+ str(c-1)]) for i in range(0,c-1): delta = (local['s' + str(c-i)].conj().transpose()).dot(local['a'+ str(c-(i+1))]) r = (lambd / m) * local['theta' + str(c-(i+1))] grad['Theta' + str(c-(i+1))] = (1.0 / m) * delta + r for i in range(1,c): grad_final = np.hstack((grad_final.T.ravel(), grad['Theta' + str(i)].T.ravel())) return (j, grad_final)
def computeGrad(theta, X, y): # Computes the gradient of the cost with respect to # the parameters. m = X.shape[0] # number of training examples grad = zeros(size(theta)) for i in range(theta.shape[0]): for j in range(m): grad[i] += (sigmoid(dot(X[j,:],theta)) - y[j]) * X[j,i] grad /= m # ============================================================= return grad
def nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lamb): Theta1 = numpy.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)], (hidden_layer_size, input_layer_size + 1), order="F") Theta2 = numpy.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], (num_labels, hidden_layer_size + 1), order="F") m = len(X) yvec = __formalize(y, num_labels) X = numpy.c_[numpy.ones((m, 1)), X] a2 = sigmoid(numpy.dot(X, Theta1.T)) a2 = numpy.c_[numpy.ones((len(a2), 1)), a2] a3 = sigmoid(numpy.dot(a2, Theta2.T)) first = numpy.multiply(yvec, numpy.log(a3)) second = numpy.multiply(1 - yvec, numpy.log(1 - a3)) cost = -numpy.sum(numpy.sum(first + second)) / m theta1_reg = numpy.sum(numpy.sum(numpy.power(Theta1[:, 1:], 2))) theta2_reg = numpy.sum(numpy.sum(numpy.power(Theta2[:, 1:], 2))) extra = lamb * (theta1_reg + theta2_reg) / (2 * m) return cost + extra
def compute_cost(thetas, X, y): cost = 0 theta_T = thetas.transpose() m = X.shape[0] n = X.shape[1] for i in range(0, m): x_i = X[i:i + 1, 0:n].transpose() y_i = y[i, 0] theta_feature_product = np.dot(theta_T, x_i)[0] hypothesis_value = sigmoid(theta_feature_product) cost += (y_i * math.log(hypothesis_value)) + ( (1 - y_i) * math.log(1 - hypothesis_value)) cost = -(cost / m) return cost
def cost_Function_Reg(X, Y, theta, lmd): m = X.shape[0] Z = np.dot(X, theta) g = sigmoid(Z) cost = -(Y.T).dot(np.log(g)) - ((1 - Y).T).dot(np.log(1 - g)) cost = cost / (m) + lmd * (theta.T).dot(theta) / (2 * m) # g.shape = [m,] # Y.shape = [m,1] # 两者相减得到的是[m,m] grad = (X.T).dot(g - Y.reshape(Y.size)) / m grad[0] = grad[0] grad[1:] = grad[1:] + (lmd * theta[1:]) / m return cost, grad
def gradient(theta, X, Y, l): m, n = X.shape theta = theta.reshape((n, 1)) grad = np.zeros((theta.shape)) Y = Y.reshape((m, 1)) #grad = np.zeros((theta.shape)) h_theta = sigmoid.sigmoid( X @ theta) #the hypothesis h(theta) = 1/(1 + e**(z)) grad[0, :] = (1 / m) * (h_theta - Y).T @ X[:, 0] grad[1:, :] = (((1 / m) * (h_theta - Y).T @ X[:, 1:]) + ((l / m) * theta[1:, :]).T).T return grad
def predict(Theta1, Theta2, X): #PREDICT Predict the label of an input given a trained neural network # p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the # trained weights of a neural network (Theta1, Theta2) # Useful values m = np.shape(X)[0] #number of examples # You need to return the following variables correctly p = np.zeros(m) # ====================== YOUR CODE HERE ====================== # Instructions: Complete the following code to make predictions using # your learned neural network. You should set p to a # vector containing labels between 1 to num_labels. # add a bias to x X = np.hstack((np.ones((X.shape[0], 1)), X)) # calculate the a_2 a_2 = sigmoid(np.dot(X, np.transpose(Theta1))) # add a bias to a_2 a_2 = np.hstack((np.ones((a_2.shape[0], 1)), a_2)) # calculate the output layer h = sigmoid(np.dot(a_2, np.transpose(Theta2))) # indexing the maxium element of the rows; # i.e. find the most possible number index p = np.argmax(h, axis=1) # because the index of reorder. eg. 10 in the index 9 p = p + 1 # Hint: The max function might come in useful. In particular, the max # function can also return the index of the max element, for more # information see 'help max'. If your examples are in rows, then, you # can use max(A, [], 2) to obtain the max for each row. # return p
def predict(Theta1, Theta2, X): #PREDICT Predict the label of an input given a trained neural network # p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the # trained weights of a neural network (Theta1, Theta2) # turns 1D X array into 2D if X.ndim == 1: X = np.reshape(X, (-1,X.shape[0])) # Useful values m = X.shape[0] num_labels = Theta2.shape[0] # You need to return the following variables correctly p = np.zeros((m,1)) h1 = s.sigmoid( np.dot( np.column_stack( ( np.ones((m,1)), X ) ) , Theta1.T ) ) h2 = s.sigmoid( np.dot( np.column_stack( ( np.ones((m,1)), h1) ) , Theta2.T ) ) p = np.argmax(h2, axis=1) # ========================================================================= return p + 1 # offsets python's zero notation
def DecisionBoundary_reg(data, theta): PlotData(data, 'Microchip Test 1', 'Microchip Test 2', 'Accepted', 'Not Accepted') X = data[:, 0:2] x1_min, x1_max = X[:, 0].min(), X[:, 0].max() x2_min, x2_max = X[:, 1].min(), X[:, 1].max() xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max)) XX = MapFeature(np.c_[xx1.ravel(), xx2.ravel()]) XXX = np.hstack((np.ones((XX.shape[0], 1)), XX)) h = sigmoid( XXX.dot(theta) ) #only difference from DecisionBoundary() is addition of MapFeature..... h = h.reshape(xx1.shape) #.....same as MapFeature(X)*Theta plt.contour(xx1, xx2, h, [0.5], linewidths=1, colors='g')
def SEAIRD(y, t): beta = sg.sigmoid(t - startT, beta0, beta01) S = y[0] E = y[1] A = y[2] I = y[3] R = y[4] p = 0.4 y0 = (-(beta2 * A + beta * I) * S - mu * S) #S y1 = (beta2 * A + beta * I) * S - sigma * E - mu * E #E y2 = sigma * E * (1 - p) - mu * A - gamma2 * A #A y3 = sigma * E * p - gamma * I - mu * I #I y4 = (b * I + d * A - mu * R) #R y5 = (-(y0 + y1 + y2 + y3 + y4)) #D return [y0, y1, y2, y3, y4, y5]
def lrCostFunction(theta, X, y, lam): """ lam:lambda惩罚系数 该函数为正则化的代价函数 """ theta = theta.T theta_Reg = theta[1:] #不惩罚第一项 # 同线性代数中矩阵乘法的定义: np.dot() # 对应元素相乘 element-wise product: np.multiply(), 或 * g = S.sigmoid(np.dot(X, theta)) J = np.sum((-y * np.log(g)) + (y - 1) * np.log(1 - g)) / len(X) + lam * np.sum( theta_Reg * theta_Reg) / (2 * len(X)) return J
def gradient(theta, X, y): theta = np.matrix(theta) X = np.matrix(X) y = np.matrix(y) parameters = int(theta.ravel().shape[1]) grad = np.zeros(parameters) error = sigmoid(X * theta.T) - y for i in range(parameters): term = np.multiply(error, X[:,i]) grad[i] = np.sum(term) / len(X) return grad
def predict(X, params, dimensions): assert len(dimensions) == 3 W1, b1, W2, b2 = unpack_parmas(params, dimensions) z1 = np.dot(X, W1) + b1 h = sigmoid(z1) z2 = np.dot(h, W2) + b2 y = softmax(z2) y = y.argmax(axis=1) n_label = np.max(y) + 1 y = np.eye(n_label,dtype=np.int64)[y] return y
def Gradient(theta, X, y): X = np.matrix(X) y = np.matrix(y) theta = np.matrix(theta) parameters = int(theta.ravel().shape[1]) G = np.zeros(parameters) # G = 0 m = len(X) for i in range(parameters): pred = sigmoid(X * theta.T) # J = 1/m * ( np.sum(-np.multiply(y ,np.log(pred)) - np.multiply((1-y), np.log(1- pred)))) G[i] = 1 / m * np.sum((np.multiply(X[:, i], (pred - y)))) return G
def costFunctionReg(theta, reg, X, y): '''returns the cost in a regularized manner, input is theta,lambda as reg,X and y as inputs and predicted value respectively np.log(a)==> returns array of elementwise log of element''' m = y.size h = sigmoid(X.dot(theta)) theta_J = theta[1:] regparameter = (reg / 2 * m) * (theta.T @ theta ) # the calue added to the cost function J = -1 * (1 / m) * ((np.log(h + epsilon).T).dot(y) + np.log(1 - h + epsilon).T.dot(1 - y)) + regparameter return J
def costFunction(theta, X, y): '''returns cost for theta, X and y np.log(a)==> returns array with elementwise log on array a use the sigmoid function that's being imported above ''' m = y.size h = sigmoid(X.dot(theta)) y = np.array(y) h = np.array(h) #print(y.shape[0]) J = -1 * (1 / m) * ((np.log(h).T).dot(y) + np.log(1 - h).T.dot(1 - y)) return J if np.isnan(J[0]): return (np.inf) return (J[0])
def predictNN(X, Theta1, Theta2): """ Функция позволяет выполнить предсказание метки класса p в диапазоне от 0 до K (число классов равно K + 1) для множества объектов, описанных в матрице объекты-признаки X. Предсказание метки выполняется с использованием матриц обучененных параметров модели Theta1, Theta2 трехслойной нейронной сети """ m = X.shape[0] p = np.zeros([m, 1]) a1 = X a2 = sigmoid(np.dot(a1, Theta1.transpose())) a2 = np.concatenate((np.ones((m, 1)), a2), axis=1) a3 = sigmoid(np.dot(a2, Theta2.transpose())) h = a3 p = np.argmax(h, axis=1) p = np.array([p]).transpose().astype('uint8') return p
def costfunc(theta, X, Y, m): m = float(m) res = 0 #print np.dot(theta.T, X) h = sigmoid(np.dot(X, theta)) #tmp = ( Y.T * np.log(h) ) + ((1 - Y.T) * np.log(1 - h)) tmp = np.dot(Y.T, np.log(h)) + np.dot((1 - Y.T), np.log(1 - h)) res = -(1 / m * tmp) #tmp = 1 / m * np.dot(X.T, (h - Y)) return res
def predict(Theta1, Theta2, X): # Useful values m, n = X.shape num_labels = Theta2.shape[0] # You need to return the following variables correctly p = np.zeros((m, 1)) # ====================== YOUR CODE HERE ====================== def h(X, theta): return X.dot(theta) X = np.hstack((np.ones((m, 1)), X)) a2 = sigmoid(h(X, Theta1.T)) a2 = np.hstack((np.ones((m, 1)), a2)) a3 = sigmoid(h(a2, Theta2.T)) p = np.argmax(a3, axis=1) + 1 # ============================================================= return p
def gradFunction(theta, *args): X, y, l = args #reshape theta theta = np.reshape(theta, (len(theta), 1)) m = len(X) h = sigmoid(X.dot(theta)) reg_theta = np.concatenate(([[0.]], theta[1:])) grad = 1.0 / m * (X.T.dot(h - y)) + l * reg_theta * (1.0 / m) return grad.flatten()
def predict_function(theta, X, y=None): """ Compute predictions on X using the parameters theta. If y is provided computes and returns the accuracy of the classifier as well. """ preds = None accuracy = None threshold = 0.5 score = np.dot(X, theta) preds_1 = sigmoid(score) preds = np.where(preds_1 >= threshold, 1, 0) accuracy = np.mean(y == preds) return preds, accuracy
def predict(Theta1, Theta2, X): m, n = X.shape num_labels = Theta2.shape[0] X = np.concatenate((np.ones((m, 1)), X), axis=1) # print('num_labels: ', num_labels) # print('Theta1: ', Theta1.shape) # print('Theta2: ', Theta2.shape) # print('X: ', X.shape) a2 = sigmoid(X.dot(Theta1.T)) m, n = a2.shape a2 = np.concatenate((np.ones((m, 1)), a2), axis=1) # print('a2: ', a2.shape) a3 = sigmoid(a2.dot(Theta2.T)) # print('a3: ', a3.shape) p = np.argmax(a3, axis=1) return p + 1 # Matlab data is 1-indexed
def lr_cost_function(theta, X, y, lmd): m = len(y) # You need to return the following values correctly theta = theta.reshape((len(theta), 1)) g = np.array(sigmoid(X.dot(theta))) cost = (np.sum(-y * np.log(g) - (1 - y) * np.log(1 - g)) + lmd / 2 * np.sum(np.power(theta[1:], 2))) / m # ===================== Your Code Here ===================== # Instructions : Compute the cost of a particular choice of theta # You should set cost and grad correctly. # # =========================================================== return cost
def cost_function_reg(theta, X, y, l): """ Compute cost and gradient for logistic regression with regularization. Parameters ---------- theta : ndarray, shape (n_features,) Linear regression parameter. X : ndarray, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. y : ndarray, shape (n_samples,) Labels. l : float Regularization parameter. Returns ------- J : numpy.float64 The cost of using theta as the parameter for regularized logistic regression w.r.t. the parameters. grad: ndarray, shape (n_features,) Partial derivatives of the cost w.r.t. each parameter in theta. """ m, n = X.shape x_dot_theta = X.dot(theta) mask = np.eye(len(theta)) # Skip the theta[0, 0] parameter when performing regularization mask[0, 0] = 0 J = 1.0 / m * (np.dot(-y.T, np.log(sigmoid(x_dot_theta))) - np.dot((1 - y).T, np.log(1 - sigmoid(x_dot_theta)))) \ + 1.0 * l / (2 * m) * np.sum(np.power((mask.dot(theta)), 2)) grad = 1.0 / m * np.dot( (sigmoid(x_dot_theta) - y).T, X).T + 1.0 * l / m * (mask.dot(theta)) return J, grad
def gradientDescent(X, y, theta, alpha, num_iters): m = len(y) nfeatures = len(theta) J_history = np.zeros(num_iters) for ii in range(num_iters): z = np.dot(theta, X) h = sigmoid(z) for jj in range(nfeatures): theta[jj] -= alpha * sum((h - y) * X[jj, :]) / m if plotJ: J_history[ii] = computeCost(X, y, theta) if plotJ: p.plot(J_history) p.show() return theta
def predict(theta, X): """ computes the predictions for X using a threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1) """ p = 0 # ====================== YOUR CODE HERE ====================== # Instructions: Complete the following code to make predictions using # your learned logistic regression parameters. # You should set p to a vector of 0's and 1's # # ========================================================================= p = (np.round(sigmoid(np.dot(X, theta)), 1)) >= 0.5 return p
def predict(theta, X): '''Predict whether the label is 0 or 1 using learned logistic regression parameters theta''' # p = PREDICT(theta, X) computes the predictions for X using a # threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1) # You need to return the following variables correctly # ====================== YOUR CODE HERE ====================== # Instructions: Complete the following code to make predictions using # your learned logistic regression parameters. # You should set p to a vector of 0's and 1's # p = sigmoid(X.dot(theta)) >= 0.5 return p
def costFunction(theta, X,y): """ computes the cost of using theta as the parameter for logistic regression and the gradient of the cost w.r.t. to the parameters.""" # Initialize some useful values m = len(y) # number of training examples J=0 # ====================== YOUR CODE HERE ====================== # Instructions: Compute the cost of a particular choice of theta. # You should set J to the cost. # Compute the partial derivatives and set grad to the partial # derivatives of the cost w.r.t. each parameter in theta # # Note: grad should have the same dimensions as theta #theta.shape=(3,), X.Shape=(100,3), y.shape=(100,) gradient_1=y*np.transpose(np.log(1-sigmoid(np.dot(X,theta)))) gradient_2=(1-y) * np.transpose(np.log(sigmoid(np.dot(X,theta) ) )) J = -(1./m)*(gradient_1+gradient_2).sum() return J
def costFunction(nn_weights, layers, X, y, num_labels, lambd): # Computes the cost function of the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Unroll Params Theta = roll_params(nn_weights, layers) # ================================ TODO ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = np.zeros((num_labels, m)) for i in range(len(y)): yv[int(y[i]), i] = 1 yv = np.transpose(yv) # ================================ TODO ================================ # In this point calculate the cost of the neural network (feedforward) x = np.copy(X) for i in range(num_layers - 1): s = np.shape(Theta[i]) theta = Theta[i][:, 1:s[1]] x = np.dot(x, np.transpose(theta)) x = x + Theta[i][:, 0] x = sigmoid(x) cost = (yv * np.log(x) + (1 - yv) * np.log(1 - x)) / m cost = -np.sum(cost) somme = 0 for i in range(num_layers - 1): somme += lambd * np.sum(Theta[i] ** 2) / (2 * m) cost += somme return cost
def logistic_SGD(X, y, num_iter=10000, alpha=0.01): """ Perform logistic regression with stochastic gradient descent. Args: theta_0: Initial value for parameters of shape [num_features] X: Data matrix of shape [num_train, num_features] y: Labels corresponding to X of size [num_train, 1] num_iter: Number of iterations of SGD alpha: The learning rate Returns: theta: The value of the parameters after logistic regression """ theta = np.zeros(X.shape[1]) losses = [] new_loss = cost_function(theta, X, y) for i in range(num_iter): start = time.time() N = len(X) # theta_transp = np.transpose(theta) theta_x = np.dot(X, theta_transp) predictions = sigmoid(theta_x) # #grad = gradient_function(theta, X, y) gradient = np.dot(X.T, predictions - y) # gradient /= N # gradient *= alpha # theta -= gradient # # return theta if i % 1000 == 0: exec_time = time.time() - start loss = cost_function(theta, X, y) losses.append(loss) print('Iter {}/{}: cost = {} ({}s)'.format( i, num_iter, loss, exec_time)) alpha *= 0.9 return theta, losses
def forward_backward_prop(data, labels, params, dimensions): """ Forward and backward propagation for a two-layer sigmoidal network Compute the forward propagation and for the cross entropy cost, and backward propagation for the gradients for all parameters. Arguments: data -- M x Dx matrix, where each row is a training example. labels -- M x Dy matrix, where each row is a one-hot vector. params -- Model parameters, these are unpacked for you. dimensions -- A tuple of input dimension, number of hidden units and output dimension """ # Unpack network parameters (do not modify) ofs = 0 Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2]) W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H)) ofs += Dx * H b1 = np.reshape(params[ofs:ofs + H], (1, H)) ofs += H W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) ofs += H * Dy b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy)) # YOUR CODE HERE: forward propagation z1 = np.dot(data, W1) + b1 h = sigmoid(z1) z2 = np.dot(h, W2) + b2 y_guess = softmax(z2) cost = -np.sum(labels * np.log(y_guess)) # cross entropy loss # END YOUR CODE # YOUR CODE HERE: backward propagation diff_labels = y_guess - labels gradb2 = np.sum(diff_labels, axis=0) gradW2 = np.dot(h.T, diff_labels) gradb1 = np.sum(np.dot(diff_labels, W2.T) * sigmoid_grad(h), axis=0) gradW1 = np.dot(data.T, np.dot(diff_labels, W2.T) * sigmoid_grad(h)) # END YOUR CODE # Stack gradients (do not modify) grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), gradW2.flatten(), gradb2.flatten())) return cost, grad