def __init__(self, **kwargs): self.config = kwargs.get('config', {'input_size': 30 * 30, 'hidden_size': 30 * 30, 'lambda': 1, 'num_labels': 1}) self.INIT_EPSILON = initialize_epsilon(self.config['input_size'], self.config['hidden_size']) input_size = self.config['input_size'] hidden_size = self.config['hidden_size'] num_labels = self.config['num_labels'] try: theta1 = None theta2 = None with open('Theta1.csv') as theta1_file: all_lines = [] for line in theta1_file: all_lines += line.split(',') theta1 = np.array([all_lines], dtype=np.float) theta1 = theta1.reshape((hidden_size, input_size + 1)) with open('Theta2.csv') as theta2_file: all_lines = [] for line in theta2_file: all_lines += line.split(',') theta2 = np.array([all_lines], dtype=np.float) theta2 = theta2.reshape((num_labels, hidden_size + 1)) except (IOError, ValueError): theta1 = np.random.rand(hidden_size, input_size + 1) * 2 * self.INIT_EPSILON - self.INIT_EPSILON theta2 = np.random.rand(num_labels, hidden_size + 1) * 2 * self.INIT_EPSILON - self.INIT_EPSILON finally: self.nn_params = wrap(theta1, theta2)
def clear(self): self.nn_params = None input_size = self.config['input_size'] hidden_size = self.config['hidden_size'] num_labels = self.config['num_labels'] theta1 = np.random.rand(hidden_size, input_size + 1) * 2 * self.INIT_EPSILON - self.INIT_EPSILON theta2 = np.random.rand(num_labels, hidden_size + 1) * 2 * self.INIT_EPSILON - self.INIT_EPSILON self.nn_params = wrap(theta1, theta2)
def nn_cfx(self, X, y, nn_params): input_size = self.config['input_size'] num_labels = self.config['num_labels'] hidden_size = self.config['hidden_size'] lambda_ = self.config['lambda'] theta1 = nn_params[:((hidden_size) * (input_size + 1))].reshape( (hidden_size, input_size + 1)) theta2 = nn_params[((hidden_size) * (input_size + 1)):].reshape( (num_labels, hidden_size + 1)) m = X.shape[0] J = 0 theta1_grad = np.zeros(theta1.shape) theta2_grad = np.zeros(theta2.shape) a1 = insert_bias(X) z2 = theta1.dot(a1.T) a2 = sigmoid(z2) a2 = insert_bias(a2.T) z3 = theta2.dot(a2.T) h = sigmoid(z3) yk = np.zeros((num_labels, m)) #back propagation for i in range(m): yk[int(y[i])-1, i] = 1.0 error = (-yk) * np.log(h) - (1 - yk) * np.log(1 - h) J = (1.0/m)*sum(sum(error)) t1 = np.array(theta1[:,1:]) t2 = np.array(theta2[:,1:]) sum1 = sum(sum(np.power(t1,2))) sum2 = sum(sum(np.power(t2,2))) r = (lambda_/(2.0*m))*(sum1 + sum2) J += r for t in range(m): z2 = np.matrix(theta1.dot(a1[t,:].T)).T #change to t later a2 = sigmoid(z2) a2 = insert_bias_row(a2) z3 = theta2.dot(a2) h = sigmoid(z3) z2 = insert_bias_row(z2) output = np.matrix(yk[:,t]).T #change to t later d3 = np.matrix(h - output) sg = np.matrix(sigmoid_gradient(z2)) d2 = np.multiply(theta2.T.dot(d3),sg) d2 = d2[1:,:] theta2_grad += d3.dot(a2.T) theta1_grad += d2.dot(np.matrix(a1[t,:])) #change to t later # regularization theta1_grad[:,0] = np.matrix(theta1_grad[:,0]/(m*1.0)) theta1_grad[:,1:] = (theta1_grad[:,1:]*(1/(m*1.0)) + ((lambda_/(m*1.0)*theta1[:,1:]))) theta2_grad[:,0] = np.matrix(theta2_grad[:,0]/(m*1.0)) theta2_grad[:,1:] = (theta2_grad[:,1:]*(1/(m*1.0)) + ((lambda_/(m*1.0)*theta2[:,1:]))) #print accuracy(predict1(theta1_grad, theta2_grad, X), y) return J, wrap(theta1_grad, theta2_grad)