def beginner(): data = read_csv('family_data.csv') family_size_dict = data[['n_people']].to_dict()['n_people'] cols = ['choice_' + str(i) for i in range(10)] choice_dict = data[cols].to_dict() N_DAYS = 100 days = list(range(N_DAYS,0,-1)) submission = read_csv('sample_submission.csv') best = submission['assigned_day'].tolist() start_score = cost_function(best, choice_dict, family_size_dict) new = [x for x in best] pre_time = time.time() # loop over each family for fam_id, _ in enumerate(best): # loop over each family choice for pick in range(10): day = choice_dict['choice_' + str(pick)][fam_id] temp = [x for x in new] temp[fam_id] = day # add in the new pick if cost_function(temp, choice_dict, family_size_dict) < start_score: new = [x for x in temp] start_score = cost_function(new, choice_dict, family_size_dict) if fam_id % 500 == 0: print 'time = ', time.time() - pre_time pre_time = time.time() submission['assigned_day'] = new score = cost_function(new, choice_dict, family_size_dict) submission.to_csv(data_path + 'submission_20191207_01.csv', index = False) print 'Score = ', score
def logistic_SGD(X, y, num_iter=10000, alpha=0.01): """ Perform logistic regression with stochastic gradient descent. Args: theta_0: Initial value for parameters of shape [num_features] X: Data matrix of shape [num_train, num_features] y: Labels corresponding to X of size [num_train, 1] num_iter: Number of iterations of SGD alpha: The learning rate Returns: theta: The value of the parameters after logistic regression """ theta = np.zeros(X.shape[1]) losses = [] new_loss = cost_function(theta, X, y) for i in range(num_iter): start = time.time() N = len(X) # theta_transp = np.transpose(theta) theta_x = np.dot(X, theta_transp) predictions = sigmoid(theta_x) # #grad = gradient_function(theta, X, y) gradient = np.dot(X.T, predictions - y) # gradient /= N # gradient *= alpha # theta -= gradient # # return theta if i % 1000 == 0: exec_time = time.time() - start loss = cost_function(theta, X, y) losses.append(loss) print('Iter {}/{}: cost = {} ({}s)'.format( i, num_iter, loss, exec_time)) alpha *= 0.9 return theta, losses
def gda(X, y): """ Perform Gaussian Discriminant Analysis. Args: X: Data matrix of shape [num_train, num_features] y: Labels corresponding to X of size [num_train, 1] Returns: theta: The value of the parameters after logistic regression """ theta = None phi = None mu_0 = None mu_1 = None sigma = None X = X[:, 1:] # Note: We remove the bias term! start = time.time() ####################################################################### # TODO: # # Perform GDA: # # - Compute the values for phi, mu_0, mu_1 and sigma # # # ####################################################################### y = y[:, np.newaxis] phi = float(np.count_nonzero(y)) / float(y.shape[0]) mu_0 = np.sum(((1 - y) * X) / (y.shape[0] - np.count_nonzero(y)), axis=0) mu_1 = np.sum((y * X) / (np.count_nonzero(y)), axis=0) mat_1 = (X - ((1 - y) * mu_0) - (y * mu_1)) sigma = np.matmul(mat_1.T, mat_1) / y.shape[0] ####################################################################### # END OF YOUR CODE # ####################################################################### # Compute theta from the results of GDA sigma_inv = np.linalg.inv(sigma) quad_form = lambda A, x: np.dot(x.T, np.dot(A, x)) b = 0.5 * quad_form(sigma_inv, mu_0) - 0.5 * quad_form( sigma_inv, mu_1) + np.log(phi / (1 - phi)) w = np.dot((mu_1 - mu_0), sigma_inv) theta = np.concatenate([[b], w]) exec_time = time.time() - start # Add the bias to X and compute the cost X = np.concatenate([np.ones((X.shape[0], 1)), X], axis=1) loss = cost_function(theta, X, y) print('Iter 1/1: cost = {} ({}s)'.format(loss, exec_time)) return theta, None
def gradient_descent(data_matrix, label_matrix, alpha, max_iter_numbers): step_alpha = alpha epoches = max_iter_numbers m, n = np.shape(data_matrix) theta = np.zeros((n, 1)) # Initial theta cost_vector = [] for epoch in range(epoches): cost_j = cost_function(theta, data_matrix, label_matrix) cost_vector.append(cost_j[0, 0]) theta = theta + step_alpha * gradient(theta, data_matrix, label_matrix) cost_j = cost_function(theta, data_matrix, label_matrix) cost_vector.append(cost_j[0, 0]) return theta, cost_vector
def batch_gradient_update(theta,X,y,alpha=0.2,threshold=1e-6,maxIter=800): from sigmoid_function import sigmoid_function from cost_function import cost_function import numpy as np for i in range(maxIter): J,grad=cost_function(theta,X,y) theta+=alpha*grad.reshape(np.shape(theta)) return theta
def batch_gradient_update(theta, X, y, alpha=0.2, threshold=1e-6, maxIter=800): from sigmoid_function import sigmoid_function from cost_function import cost_function import numpy as np for i in range(maxIter): J, grad = cost_function(theta, X, y) theta += alpha * grad.reshape(np.shape(theta)) return theta
def manual_gradient(Theta1, Theta2, GRADIENT_CHECK_EPSILON, training_sets, number_features_sets, y_training_sets): grad_theta1_clone = np.zeros((Theta1.shape[0], Theta1.shape[1])) for i1 in range(Theta1.shape[0]): for i2 in range(Theta1.shape[1]): Theta1_clone = Theta1 Theta1_clone[i1][i2] = Theta1_clone[i1][i2] + \ GRADIENT_CHECK_EPSILON Theta1_clone_minus = Theta1 Theta1_clone_minus[i1][i2] = Theta1_clone[i1][i2] - \ GRADIENT_CHECK_EPSILON J1_clone, h, new_a2_sets, z2 = cost_function.cost_function( training_sets, Theta1_clone, Theta2, number_features_sets, y_training_sets) J1_clone_minus, h, new_a2_sets, z2 = cost_function.cost_function( training_sets, Theta1_clone_minus, Theta2, number_features_sets, y_training_sets) grad_theta1_clone[i1][i2] = ( J1_clone - J1_clone_minus)/(2*GRADIENT_CHECK_EPSILON) return grad_theta1_clone
def normal_quations(data_matrix, label_matrix): cost_vector = [] theta = (data_matrix.T * data_matrix).I * data_matrix.T * label_matrix cost_j = cost_function(theta, data_matrix, label_matrix) cost_vector.append(cost_j[0,0]) return theta, cost_vector
def gda(X, y): """ Perform Gaussian Discriminant Analysis. Args: X: Data matrix of shape [num_train, num_features] y: Labels corresponding to X of size [num_train, 1] Returns: theta: The value of the parameters after logistic regression """ # Initialize Variables theta = None phi = None mu_0 = None mu_1 = None sigma = None X = X[:, 1:] # Note: Remove the bias term! start = time.time() m, n = X.shape phi = np.sum(np.where(y == 1, 1, 0), dtype=float) / m a = tuple(np.array(np.where(y == 0)).tolist()) b = tuple(np.array(np.where(y == 1)).tolist()) mu_0 = np.sum(X[tuple(a)], axis=0, dtype=float) / len(y[a]) mu_1 = np.sum(X[tuple(b)], axis=0, dtype=float) / len(y[b]) var0 = np.array(X[a] - mu_0) / len(a) var1 = np.array(X[b] - mu_1) / len(b) co_variances = np.concatenate((var0, var1), axis=0) / m sigma = np.dot(co_variances.T, co_variances) # Compute theta from the results of GDA sigma_inv = np.linalg.inv(sigma) quad_form = lambda A, x: np.dot(x.T, np.dot(A, x)) b = 0.5 * quad_form(sigma_inv, mu_0) - 0.5 * quad_form( sigma_inv, mu_1) + np.log(phi / (1 - phi)) w = np.dot((mu_1 - mu_0), sigma_inv) theta = np.concatenate([[b], w]) exec_time = time.time() - start # Add the bias to X and compute the cost X = np.concatenate([np.ones((X.shape[0], 1)), X], axis=1) loss = cost_function(theta, X, y) print('Iter 1/1: cost = {} ({}s)'.format(loss, exec_time)) return theta, None
def logistic_SGD(X, y, num_iter=100000, alpha=0.01): """ Perform logistic regression with stochastic gradient descent. Args: theta_0: Initial value for parameters of shape [num_features] X: Data matrix of shape [num_train, num_features] y: Labels corresponding to X of size [num_train, 1] num_iter: Number of iterations of SGD alpha: The learning rate Returns: theta: The value of the parameters after logistic regression """ theta = np.zeros(X.shape[1]) losses = [] for i in range(num_iter): start = time.time() ####################################################################### # TODO: # # Perform one step of stochastic gradient descent: # # - Select a single training example at random # # - Update theta based on alpha and using gradient_function # # # ####################################################################### #create random number random = int(np.floor(np.random.random() * X.shape[0])) #make sure dimension of variables is appropriat for further calculation random_X = X[random, np.newaxis] random_y = y[random, np.newaxis] grad = gradient_function(theta, random_X, random_y) # make sure dimension of variables is appropriat for further calculation theta = theta[:, np.newaxis] theta = theta + alpha * grad # set dimension of theta back to default theta = np.squeeze(theta) ####################################################################### # END OF YOUR CODE # ####################################################################### if i % 10000 == 0: exec_time = time.time() - start loss = cost_function(theta, X, y) losses.append(loss) print('Iter {}/{}: cost = {} ({}s)'.format( i, num_iter, loss, exec_time)) alpha *= 0.9 return theta, losses
def batch_gradient_update(theta,X,y,alpha=1,threshold=1e-6,maxIter=1000): from sigmoid_function import sigmoid_function from cost_function import cost_function import numpy as np for i in range(maxIter): #T=np.zeros(np.shape(theta)) #h=sigmoid_function(np.dot(X,theta)) #for i in range(len(X[:,0])): # T=T+(y[i]-h[i])*(X[i].reshape(np.shape(theta))) #theta+=alpha*T/len(X[:,0]) J,grad=cost_function(theta,X,y) theta+=alpha*grad.reshape(np.shape(theta)) return theta
def batch_gradient_update(theta, X, y, alpha=1, threshold=1e-6, maxIter=1000): from sigmoid_function import sigmoid_function from cost_function import cost_function import numpy as np for i in range(maxIter): #T=np.zeros(np.shape(theta)) #h=sigmoid_function(np.dot(X,theta)) #for i in range(len(X[:,0])): # T=T+(y[i]-h[i])*(X[i].reshape(np.shape(theta))) #theta+=alpha*T/len(X[:,0]) J, grad = cost_function(theta, X, y) theta += alpha * grad.reshape(np.shape(theta)) return theta
def gradient_descent(x, y, theta, alpha, iteration): m = len(y) iter = 0 while iter < iteration: h = hypothises(x, theta) hy = h - y theta[0][0] = theta[0][0] - alpha / m * sum(hy) theta[1][0] = theta[1][0] - alpha / m * np.sum( np.dot(np.transpose(hy), x[:, 1])) cost = cost_function(x, y, theta) # print(theta[0][0], theta[1][0], cost) iter += 1 return theta
def logistic_SGD(X, y, num_iter=100000, alpha=0.01): """ Perform logistic regression with stochastic gradient descent. Args: theta_0: Initial value for parameters of shape [num_features] X: Data matrix of shape [num_train, num_features] y: Labels corresponding to X of size [num_train, 1] num_iter: Number of iterations of SGD alpha: The learning rate Returns: theta: The value of the parameters after logistic regression """ theta = np.zeros(X.shape[1]) losses = [] for i in range(num_iter): start = time.time() ####################################################################### # TODO: # # Perform one step of stochastic gradient descent: # # - Select a single training example at random # # - Update theta based on alpha and using gradient_function # # # ####################################################################### 'random number generator' random = int(np.floor(np.random.random() * X.shape[0])) 'calls function "gradient function" to calculate the gradient' gradient = gradient_function(theta, X[random, np.newaxis], y[random, np.newaxis].T) 'calculates theta recording to specs' theta = np.squeeze(theta[:,np.newaxis] + alpha * gradient) ####################################################################### # END OF YOUR CODE # ####################################################################### if i % 10000 == 0: exec_time = time.time() - start loss = cost_function(theta, X, y) losses.append(loss) print('Iter {}/{}: cost = {} ({}s)'.format(i, num_iter, loss, exec_time)) alpha *= 0.9 return theta, losses
def logistic_Newton(X, y, num_iter=10): """ Perform logistic regression with Newton's method. Args: theta_0: Initial value for parameters of shape [num_features] X: Data matrix of shape [num_train, num_features] y: Labels corresponding to X of size [num_train, 1] num_iter: Number of iterations of Newton's method Returns: theta: The value of the parameters after logistic regression """ theta = np.zeros(X.shape[1]) losses = [] for i in range(num_iter): start = time.time() # Computing the Hessian theta_transp = np.transpose(theta) x_transp = np.transpose(X) score = np.dot(X, theta_transp) mu = 1 / 1 + np.exp(score) scores = np.dot( np.transpose(mu), (1 - mu)) # line represents: mu multiplied by 1 minus mu X_xtransp = np.dot(x_transp, X) # x multiplied by x transposed #Hessian = np.sum(np.dot(X_xtransp, scores)) Hessian = np.dot(X_xtransp, scores) inverse_ = np.linalg.inv(Hessian) # Update theta using gradient and inverse of Hessian grad = gradient_function(theta, X, y) #theta = theta - np.linalg.solve(inverse_, grad) var = np.linalg.solve(inverse_, grad) var_transp = np.transpose(var) theta = theta - np.dot(grad, var_transp) exec_time = time.time() - start loss = cost_function(theta, X, y) losses.append(loss) print('Iter {}/{}: cost = {} ({}s)'.format(i + 1, num_iter, loss, exec_time)) return theta, losses
async def run(self): msg = await self.receive(30) # type: Message await self.agent.records_ready.acquire() if msg: data = jsonpickle.loads(msg.body) self.agent.log.debug('Data ready, computing cost function') costs = [ cost_function(value[0], value[1], self.agent.training_records) for value in data ] self.agent.log.debug('Cost function computed') reply = msg.make_reply() reply.metadata = dict(performative='reply') reply.body = tools.to_json(costs) await asyncio.sleep(5) await self.send(reply) self.agent.log.debug('Reply sent!') self.agent.records_ready.release()
def tanh_GD(X, y, num_iter=10000, alpha=0.01): """ Perform regression with gradient descent. Args: theta_0: Initial value for parameters of shape [num_features] X: Data matrix of shape [num_train, num_features] y: Labels corresponding to X of size [num_train, 1] num_iter: Number of iterations of GD alpha: The learning rate Returns: theta: The value of the parameters after regression """ theta = np.zeros(X.shape[1]) losses = [] for i in range(num_iter): start = time.time() ####################################################################### # TODO: # # Perform one step of gradient descent: # # - Select a single training example at random # # - Update theta based on alpha and using gradient_function # # # ####################################################################### grad = gradient_function(theta, X, y) theta -= np.multiply(alpha, grad) ####################################################################### # END OF YOUR CODE # ####################################################################### if i % 1000 == 0: exec_time = time.time() - start loss = cost_function(theta, X, y) losses.append(loss) print('Iter {}/{}: cost = {} ({}s)'.format(i, num_iter, loss, exec_time)) alpha *= 0.9 return theta, losses
def logistic_Newton(X, y, num_iter=10): """ Perform logistic regression with Newton's method. Args: theta_0: Initial value for parameters of shape [num_features] X: Data matrix of shape [num_train, num_features] y: Labels corresponding to X of size [num_train, 1] num_iter: Number of iterations of Newton's method Returns: theta: The value of the parameters after logistic regression """ theta = np.zeros(X.shape[1]) losses = [] for i in range(num_iter): start = time.time() ####################################################################### # TODO: # # Perform one step of Newton's method: # # - Compute the Hessian # # - Update theta using the gradient and the inverse of the hessian # # # # Hint: To solve for A^(-1)b consider using np.linalg.solve for speed # ####################################################################### pass ####################################################################### # END OF YOUR CODE # ####################################################################### exec_time = time.time() - start loss = cost_function(theta, X, y) losses.append(loss) print('Iter {}/{}: cost = {} ({}s)'.format(i + 1, num_iter, loss, exec_time)) return theta, losses
test = number_features_sets - training test_sets = new_features_sets[training:, :] y_test_sets = y[training:, :] test_sets_mean = mean_normalization.mean_normalization(test_sets[:, 1:]) new_test_sets = np.concatenate((np.ones( (test_sets.shape[0], 1)), test_sets_mean), axis=1) # Theta Theta1 = np.genfromtxt("Theta1.csv", delimiter=",") Theta2 = np.genfromtxt("Theta2.csv", delimiter=",") theta2 = np.array([Theta2]) J, h, new_a2_sets, z2 = cost_function.cost_function(new_test_sets, Theta1, theta2, number_features_sets, y_test_sets) result = h.T result[result >= 0.5] = 1 result[result < 0.5] = 0 compare = result == y_test_sets num_right_predictions = compare[compare == True].size accuracy = (num_right_predictions / y_test_sets.size) * 100 print("Test sets accuracy: %.2f" % accuracy + "%")
# -- NN -> input layer: 8 nodes, hidden layer: 5 nodes, output layer: 1 node # initialize theta Theta1 = np.random.rand(HIDDEN_LAYER_NODES, number_features) * \ (2 * INITIAL_EPSILON) - INITIAL_EPSILON Theta2 = np.random.rand( OUTPUT_LAYER_NODES, HIDDEN_LAYER_NODES + 1) * (2 * INITIAL_EPSILON) - INITIAL_EPSILON # J_datas = [] # for j in range(len(LEARNING_RATES)): # J_data = [] for i in range(NUM_ITERATIONS): # forward propagation J, h, new_a2_sets, z2 = cost_function.cost_function( new_training_sets, Theta1, Theta2, number_features_sets, y_training_sets) # J_data.append(J) print(J) if(J < COST_THRESHOLD): break # back propagation delta3 = h - y_training_sets.T Theta2_grad = (delta3@new_a2_sets.T)/number_features_sets Fake_theta2 = Theta2[:, 1:] delta2 = (Fake_theta2.T@delta3)*sigmoid_gradient.sigmoid_gradient(z2) Theta1_grad = (delta2@training_sets)/number_features_sets # gradient descent Theta1 = Theta1 - LEARNING_RATE*Theta1_grad
import matplotlib.pyplot as plt fig = plt.figure() ax = plt.axes(projection="3d") x = np.linspace(-2, 2, 30) y = np.linspace(-2, 2, 30) X, Y = np.meshgrid(x, y) W = np.ndarray(shape=(30, 30)) row = [] insert = 0 for a in x: for b in y: cost = cost_function.cost_function(x, y, (a, b)) print("COST={}".format(cost)) row.append(cost) np.insert(W, insert, row) insert = insert + 1 # print("\n\nW={}".format(W)) row = [] # print("\n\nW = {}".format(W)) ax = plt.axes(projection='3d') ax.plot_surface(X, Y, W, rstride=1, cstride=1,
def cost_function_wrapper(theta, cost_function_parameters): """Wrapper for the Cost Function""" cost_function_parameters['theta'] = theta return cost_function(cost_function_parameters)
def f(x, *args): theta1_size = args[0] theta1 = x[:theta1_size].reshape(args[1]) theta2 = x[theta1_size:].reshape(args[2]) return cost_function([theta1, theta2], X, y, l)
def main(): print 'main', '-' * 50 pre_time = time.time() data = read_csv('family_data.csv') family_size_dict = data[['n_people']].to_dict()['n_people'] cols = ['choice_' + str(i) for i in range(10)] choice_dict = data[cols].to_dict() N_DAYS = 100 days = list(range(N_DAYS, 0, -1)) max_occu = 300 min_occu = 125 submission = read_csv('sample_submission.csv') best = [choice_dict['choice_0'][x] for x in range(len(data))] best_score = cost_function(best, choice_dict, family_size_dict) daily_occupancy = {k: 0 for k in days} for fam_id, day in enumerate(best): daily_occupancy[day] += family_size_dict[fam_id] for fam_id, now_day in enumerate(best): random.seed(time.time()) now_occu = daily_occupancy[now_day] now_size = family_size_dict[fam_id] if now_occu > max_occu: answer_day = -1 for day in days: if daily_occupancy[day] < min_occu: answer_day = day break if answer_day == -1: for _ in range(100): temp_day = random.randint(1, 100) if daily_occupancy[temp_day] + now_size <= max_occu: answer_day = temp_day break if answer_day == -1: print 'fam_id = ', fam_id else: best[fam_id] = answer_day daily_occupancy[now_day] -= now_size daily_occupancy[answer_day] += now_size for day in days: assert daily_occupancy[day] >= min_occu and daily_occupancy[ day] <= max_occu best_score = cost_function(best, choice_dict, family_size_dict) print 'deal with max min daily_occupancy time, score = ', time.time( ) - pre_time, best_score pre_time = time.time() submission['assigned_day'] = best write_submission('submission_20191207_deal_with_max_min', submission) for _ in range(100): pre_all_score = best_score for fam_id, now_day in enumerate(best): random.seed(time.time()) now_occu = daily_occupancy[now_day] now_size = family_size_dict[fam_id] if daily_occupancy[now_day] - now_size < min_occu: continue for pick in range(10): new_day = choice_dict['choice_' + str(pick)][fam_id] if new_day == now_day or daily_occupancy[ now_day] - now_size < min_occu or daily_occupancy[ new_day] + now_size > max_occu: continue temp = [x for x in best] temp[fam_id] = new_day # add in the new pick temp_score = cost_function(temp, choice_dict, family_size_dict) if temp_score < best_score: best[fam_id] = new_day best_score = temp_score daily_occupancy[now_day] -= now_size daily_occupancy[new_day] += now_size for __ in range(10): new_day = random.randint(1, 100) if new_day == now_day or daily_occupancy[ now_day] - now_size < min_occu or daily_occupancy[ new_day] + now_size > max_occu: continue temp = [x for x in best] temp[fam_id] = new_day temp_score = cost_function(temp, choice_dict, family_size_dict) if temp_score < best_score: best[fam_id] = new_day best_score = temp_score daily_occupancy[now_day] -= now_size daily_occupancy[new_day] += now_size if fam_id % 500 == 0: print 'best_score = ', best_score, time.time() - pre_time pre_time = time.time() if best_score < pre_all_score: submission['assigned_day'] = best write_submission('submission_20191207_' + str(_), submission) print 'write_submission, score = ', _, best_score if best_score >= pre_all_score: not_better += 1 else: not_better = 0 if not_better > 3: break print 'not_better = ', not_better
#! /usr/bin/env python3 import numpy as np import cost_function Y_GOLDEN = [0.9, 1.6, 2.4, 2.3, 3.1, 3.6, 3.7, 4.5, 5.1, 5.3] X = np.arange(0, 10, 1) my_cost = cost_function.cost_function(X, Y_GOLDEN, (1, 0.5)) print("FOUND: Cost = {:02f} ".format(my_cost))
# -- NN -> input layer: 8 nodes, hidden layer: 5 nodes, output layer: 1 node # initialize theta Theta1 = np.random.rand(HIDDEN_LAYER_NODES, number_features) * \ (2 * INITIAL_EPSILON) - INITIAL_EPSILON Theta2 = np.random.rand(OUTPUT_LAYER_NODES, HIDDEN_LAYER_NODES + 1) * 2 * INITIAL_EPSILON - INITIAL_EPSILON J = 1 # J_datas = [] for i in range(NUM_ITERATIONS): # forward propagation J, h, new_a2_sets, z2 = cost_function.cost_function( training_sets, Theta1, Theta2, number_features_sets, y_training_sets) # J_datas.append(J) print(J) # if(J < COST_THRESHOLD): # break # back propagation # delta3 = h - y_training_sets.T # Theta2_grad = (delta3@new_a2_sets.T)/number_features_sets # Fake_theta2 = Theta2[:, 1:] # delta2 = (Fake_theta2.T@delta3)*sigmoid_gradient.sigmoid_gradient(z2) # Theta1_grad = (delta2@training_sets)/number_features_sets # manually compute derivative Theta1_grad = np.zeros((Theta1.shape[0], Theta1.shape[1]))
plot_data(X, y) plt.xlabel('Exam 1 Score') plt.ylabel('Exam 2 Score') plt.legend(['Admitted', 'Not admitted']) input('Program paused. Press enter to continue.') #============ Part 2: Compute Cost and Gradient ============ m, n = X.shape X = map_feature(X) initial_theta = np.zeros((n + 4, 1)) cost = cost_function(initial_theta, X, y) grad = gradient(initial_theta, X, y) print('Cost at initial theta (zeros):', cost) print('Expected cost (approx): 0.693') print('Gradient at initial theta (zeros)', grad) test_theta = np.array([-24, 0.2, 0.2, 0, 0.1, 0]).reshape((6, 1)) cost = cost_function(test_theta, X, y) grad = gradient(test_theta, X, y) print('Cost at test theta:', cost) print('Gradient at test theta', grad) # ============= Part 3: Optimizing using fminunc =============
epsilon = abs(l_new - l_old) l_old = l_new if iteration > n_iteration or epsilon < EPSILON: break status = MPI.Status() dw1, dw2, db1, db2 = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) w1 = w1 - dw1 * eta w2 = w2 - dw2 * eta bs = bs - np.hstack((db1, db2)) * eta comm.send([w1, w2, bs], dest=status.Get_source(), tag=0) print "dw1 from worker {}".format(status.Get_source()) #print "dw", dw1 #print 'w', w iteration += 1 #send message to let workers stop for r in range(1, size): comm.send(0, dest=r, tag=DIETAG) else: while True: dw1, dw2, db1, db2 = cost_function.cost_function( w1, w2, bs, layers, subdata[:, 1:3], subdata[:, 3:4]) comm.send([dw1, dw2, db1.flatten(), db2.flatten()], dest=0, tag=1) status = MPI.Status(), subdata[:, 3] w1, w2, bs = comm.recv(source=0, tag=MPI.ANY_TAG, status=status) if status.Get_tag() == DIETAG: break
import numpy as np from sklearn.linear_model import SGDClassifier from sklearn.datasets.samples_generator import make_blobs import scipy as sp #X,y=read_data("ex2data1.txt") X, y = make_blobs(n_samples=400, centers=2, random_state=0, cluster_std=1) # after featureNormalize it accuarcy could get 89% X, X_mu, X_sigma = featureNormalize(X) #plot_data(X,y) y = np.reshape(y, (y.size, 1)) m, n = X.shape X = np.concatenate((np.ones([len(X[:, 0]), 1]), X), axis=1) initial_theta = np.zeros([n + 1, 1]) #initial_theta=np.array([1,1,1]) # test is the cost_function ok? cost, grad = cost_function(initial_theta, X, y) # batch_gradient_update error!!! wrong theta theta = batch_gradient_update(initial_theta, X, y) print theta prob = sigmoid_function(np.dot(X, theta)) print prob prob[prob > 0.5] = 1.0 prob[prob < 0.5] = 0.0 print prob y = np.reshape(y, prob.shape) print "accuracy:", tuple(1 - sum(abs(prob - y)) / 100)
def gradient_descent(theta, X, y, alpha, lam): J, grad = cost_function(theta, X, y, lam) return theta - alpha * grad
x, x_val, x_test = split(x, [80, 99]) y, y_val, y_test = split(y, [80, 99]) print("Start optimization with \n lambda = {} \n max_iteration_number = {} \n examples = {}".format( lambda_parameter, max_iteration_number, m)) Theta1, Theta2 = fit([Theta1, Theta2], x, y, max_iteration_number, lambda_parameter) predictions = predict(Theta1, Theta2, x_val) predicted_numbers = get_predicted_number(predictions) corretness = np.array(predicted_numbers).reshape(y_val.shape) == y_val results = np.c_[predictions, predicted_numbers, y_val, corretness] results_to_show = pd.DataFrame(results, columns=[1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 'Predicted Value', 'Actual value', 'Correct value']) print("Results") print(results_to_show) val_cost = cost_function([Theta1, Theta2], x_val, y_val, 0) training_cost = cost_function([Theta1, Theta2], x, y, 0) print(training_cost) print(val_cost) accuracy = get_accuracy(predicted_numbers, list(y_val)) print("accuracy = {}".format(accuracy)) if save_after_training: saveThetaParameters('theta_parameters.npz', Theta1, Theta2)
plt.ylim([30, 100]) plt.legend(['Admitted', 'Not admitted'], loc='upper right', numpoints=1) plt.show() # ============ Part 2: Compute Cost and Gradient ============ # Setup the data matrix appropriately, and add ones for the intercept term m, n = X.shape # Add intercept term to x and X_test X = np.hstack((np.ones((m, 1)), X)) # Initialize fitting parameters theta = np.zeros(n + 1) # Initialize fitting parameters cost, grad = cost_function(theta, X, y) print 'Cost at initial theta (zeros):', cost print 'Gradient at initial theta (zeros):', grad # ============= Part 3: Optimizing using fmin_tnc ============= theta, nfeval, rc = opt.fmin_tnc(func=cost_function, x0=theta, args=(X, y), messages=0) if rc == 0: print 'Local minimum reached after', nfeval, 'function evaluations.' # Print theta to screen cost, _ = cost_function(theta, X, y) print 'Cost at theta found by fminunc:', cost print 'theta:', theta
#X,y=read_data("ex2data1.txt") X, y = make_blobs(n_samples=400, centers=2, random_state=0, cluster_std=1) # after featureNormalize it accuarcy could get 89% X,X_mu,X_sigma=featureNormalize(X) #plot_data(X,y) y=np.reshape(y,(y.size,1)) m,n=X.shape X=np.concatenate((np.ones([len(X[:,0]),1]),X),axis=1) initial_theta=np.zeros([n+1,1]) #initial_theta=np.array([1,1,1]) # test is the cost_function ok? cost,grad=cost_function(initial_theta,X,y) # batch_gradient_update error!!! wrong theta theta=batch_gradient_update(initial_theta,X,y) print theta prob=sigmoid_function(np.dot(X,theta)) print prob prob[prob>0.5]=1.0 prob[prob<0.5]=0.0 print prob y=np.reshape(y,prob.shape) print "accuracy:",tuple(1-sum(abs(prob-y))/100)
def cost_function_wrapper(theta, cost_function_parameters): """Wrapper for the Cost Function""" cost_function_parameters['theta'] = theta return cost_function(cost_function_parameters)