def predict(self, X): # hidden_1 h1_input = np.dot(X, self.W1) + self.b1 h1_output = functions.relu(h1_input) # hidden_2 h2_input = np.dot(h1_output, self.W2) + self.b2 h2_output = functions.relu(h2_input) # output o_input = np.dot(h2_output, self.W3) + self.b3 y_hat = functions.softmax(o_input) return y_hat
def forwardpass_train(self, X): """Make forward pass and return the computation results for backpropagation""" # hidden_1 h1_input = np.dot(X, self.W1) + self.b1 h1_output = functions.relu(h1_input) # hidden_2 h2_input = np.dot(h1_output, self.W2) + self.b2 h2_output = functions.relu(h2_input) # output o_input = np.dot(h2_output, self.W3) + self.b3 final_output = functions.softmax(o_input) return h1_input, h1_output, h2_input, h2_output, final_output
def loss(self, x, t): z = self.predict(x) #y = softmax(z) y = functions.relu(z) loss = functions.cross_entropy_error(y, t) return loss
def forward_prop(X, parameters, activation_func, Keep_prob): cache= {} activations = {} activations["A" + str(0)] = X L = len(parameters)//2 for l in range(1, L): cache["Z" + str(l)] = np.dot(parameters["W" + str(l)], activations["A" + str(l - 1)]) + parameters["b" + str(l)] if(activation_func == "sigmoid"): activations["A" + str(l)] = sigmoid(cache["Z" + str(l)]) elif(activation_func == "relu"): activations["A" + str(l)] = relu(cache["Z" + str(l)]) elif(activation_func == "tanh"): activations["A" + str(l)] = np.tanh(cache["Z" + str(l)]) else: print("Error. Invalid Activation Function.") # Dropout Regularization cache["d" + str(l)] = np.random.rand(activations["A" + str(l)].shape[0], activations["A" + str(l)].shape[1]) cache["d" + str(l)] = cache["d" + str(l)] < Keep_prob activations["A" + str(l)] = np.multiply(activations["A" + str(l)], cache["d" + str(l)]) activations["A" + str(l)] /= Keep_prob cache["Z" + str(L)] = np.dot(parameters["W" + str(L)], activations["A" + str(L - 1)]) + parameters["b" + str(L)] activations["A" + str(L)] = sigmoid(cache["Z" + str(L)]) ## The Dropout could've been iniatilized by running another for-loop over the activations ## but in the present way, there's no need of another for-loop return cache, activations
def gradient(self, x, t): w1, w2, w3, w4 = self.dict['w1'], self.dict['w2'], self.dict['w3'], self.dict['w4'] b1, b2, b3, b4 = self.dict['b1'], self.dict['b2'], self.dict['b3'], self.dict['b4'] grads = {} # a1 = np.dot(x, w1) + b1 # z1 = sigmoid(a1) # a2 = np.dot(z1, w2) + b2 # z2 = sigmoid(a2) # a3 = np.dot(z2, w3) + b3 # z3 = sigmoid(a3) # a4 = np.dot(z3, w4) + b4 # y = softmax(a4) a1 = np.dot(x, w1) + b1 z1 = relu(a1) a2 = np.dot(z1, w2) + b2 z2 = relu(a2) a3 = np.dot(z2, w3) + b3 z3 = relu(a3) a4 = np.dot(z3, w4) + b4 y = softmax(a4) num = x.shape[0] dy = (y-t)/num grads['w4'] = np.dot(z3.T, dy) grads['b4'] = np.sum(dy, axis=0) da3 = np.dot(dy, w4.T) # dz3 = sigmoid_grad(a3) * da3 dz3 = relu_grad(a3) * da3 grads['w3'] = np.dot(z2.T, dz3) grads['b3'] = np.sum(dz3, axis=0) da2 = np.dot(dz3, w3.T) # dz2 = sigmoid_grad(a2) * da2 dz2 = relu_grad(a2) * da2 grads['w2'] = np.dot(z1.T, dz2) grads['b2'] = np.sum(dz2, axis=0) da1 = np.dot(dz2, w2.T) # dz1 = sigmoid_grad(a1) * da1 dz1 = relu_grad(a1) * da1 grads['w1'] = np.dot(x.T, dz1) grads['b1'] = np.sum(dz1, axis=0) return grads
def train(self, inputs, labels, inputs_test, labels_test): time_start = datetime.now() print('\n Train: \n\n') self.initial_weights_biases() random_state = np.random.get_state() np.random.shuffle(inputs) np.random.set_state(random_state) np.random.shuffle(labels) for epoch in range(self.num_epochs): for iteration in range(0, inputs.shape[0], self.batch_size): # batch input inputs_batch = inputs[iteration:iteration + self.batch_size] labels_batch = labels[iteration:iteration + self.batch_size] # forward pass inputs_batch = inputs_batch.T z1 = self.weight1.dot(inputs_batch) + self.bias1 a1 = functions.relu(z1) z2 = self.weight2.dot(a1) + self.bias2 y = functions.softmax(z2) delta_2 = labels_batch.T - y weight2_gradient = delta_2.dot(a1.T) / self.batch_size bias2_gradient = np.sum(delta_2, axis=1, keepdims=True) / self.batch_size delta_1 = self.weight2.T.dot(delta_2) * functions.relu_deriv( z1) weight1_gradient = delta_1.dot( inputs_batch.T) / self.batch_size bias1_gradient = np.sum(delta_1, axis=1, keepdims=True) / self.batch_size self.weight1 = self.weight1 + self.learning_rate * weight1_gradient self.weight2 = self.weight2 + self.learning_rate * weight2_gradient self.bias1 = self.bias1 + self.learning_rate * bias1_gradient self.bias2 = self.bias2 + self.learning_rate * bias2_gradient print('Epoch---{}'.format(epoch)) accuracy, crossentropy = self.predict(inputs, labels, self.weight1, self.weight2, self.bias1, self.bias2) print(" accuracy: ", str(accuracy), " loss: ", str(crossentropy)) accuracy, crossentropy = self.predict(inputs, labels, self.weight1, self.weight2, self.bias1, self.bias2) print("Training: \n", " accuracy: ", str(accuracy), " loss: ", str(crossentropy)) delta_time = datetime.now() - time_start print('Time:', delta_time) test_accuracy, test_entropy = self.predict(inputs_test, labels_test, self.weight1, self.weight2, self.bias1, self.bias2) print("Test: \n", " accuracy: ", str(test_accuracy), " loss: ", str(test_entropy))
def compute(self, inputs): self.sum = np.dot(self.weights, inputs) + self.bias self.grad_scalar = np.ones((self.output_size, 1)) self.grad_scalar[ self.sum <= 0] = 0 # the gradient for each output neuron wrt the function self.grad_x_local = self.weights * self.grad_scalar # needs to be also mult. by upstream self.grad_w_local = np.repeat(np.transpose(inputs), self.output_size, axis=0) self.outputs = fn.relu(np.dot(self.weights, inputs) + self.bias) return self.outputs
def predict(self, x): w1, w2, w3, w4 = self.dict['w1'], self.dict['w2'], self.dict['w3'], self.dict['w4'] b1, b2, b3, b4 = self.dict['b1'], self.dict['b2'], self.dict['b3'], self.dict['b4'] # a1 = np.dot(x, w1) + b1 # z1 = sigmoid(a1) # a2 = np.dot(z1, w2) + b2 # z2 = sigmoid(a2) # a3 = np.dot(z2, w3) + b3 # z3 = sigmoid(a3) # a4 = np.dot(z3, w4) + b4 # y = softmax(a4) a1 = np.dot(x, w1) + b1 z1 = relu(a1) a2 = np.dot(z1, w2) + b2 z2 = relu(a2) a3 = np.dot(z2, w3) + b3 z3 = relu(a3) a4 = np.dot(z3, w4) + b4 y = softmax(a4) return y
def predict(self, inputs, labels, weight_1, weight_2, bias_1, bias_2): W_layer_1 = weight_1.dot(inputs.T) + bias_1 layer_1 = functions.relu(W_layer_1) W_layer_2 = weight_2.dot(layer_1) + bias_2 Y_predict = functions.softmax(W_layer_2) Y_predict = Y_predict.T cross = -np.sum(labels * np.log(Y_predict)) / labels.shape[0] Y_predict = np.argmax(Y_predict, axis=1) labels = np.argmax(labels, axis=1) accuracy = (labels == Y_predict).mean() return accuracy, cross
def GNN_agg_readout(graph_matrix, n_vector, W, n_steps): """graph_matrixが与えられた時に(集約1),(集約2),(READOUT)を計算してhを返す""" n_graph = graph_matrix.shape[0] X = X0_initialize(n_vector, n_graph) # n_stepsだけループを回す for step in range(n_steps): # 集約1を行う A = np.dot(X, graph_matrix) # 集約2を行う X = relu(np.dot(W, A)) # 列方向に足し合わせる h = np.sum(X, axis=1) return h
def forward(self, x): n_bt = x.shape[0] x_ch, x_h, x_w, n_flt, flt_h, flt_w, stride, pad = self.params y_ch, y_h, y_w = self.y_ch, self.y_h, self.y_w #input,필터 전처리(im2col) self.cols = fn.im2col(x, flt_h, flt_w, y_h, y_w, stride=stride, pad=pad) self.w_col = self.w.reshape(n_flt, x_ch * flt_h * flt_w) #출력 u = (self.w_col @ self.cols).T + self.b self.u = u.reshape(n_bt, y_h, y_w, y_ch).transpose(0, 3, 1, 2) self.y = fn.relu(self.u)
def test_relu(self): test_data = np.array([[1, 0, -0.1], [-0.5, -0.001, 0.001]]) expected = np.array([[1, 0, 0], [0, 0, 0.001]]) actual = functions.relu(test_data) self.assertTrue((expected == actual).all())
import matplotlib.pylab as plt input_data = np.random.randn(1000, 100) node_num = 100 hidden_layer_size = 5 activations = {} x = input_data for i in range(hidden_layer_size): if i != 0: x = activations[i - 1] # w = np.random.randn(node_num, node_num) * 1 w = np.random.randn(node_num, node_num) * np.sqrt(2.0/node_num) # w = np.random.randn(node_num, node_num) / np.sqrt(node_num) a = np.dot(x, w) # z = sigmoid(a) z = relu(a) activations[i] = z for i, a in activations.items(): plt.subplot(1, len(activations), i + 1) plt.title(str(i + 1) + "-layer") if i != 0: plt.yticks([], []) plt.hist(a.flatten(), 30, range=(0, 1)) plt.show()
def model(image): w1_1 = fun.weight_variable([3, 3, 1, 64]) b1_1 = fun.bias_variable([64]) conv1_1 = fun.conv2d(image, w1_1, b1_1) relu1_1 = fun.relu(conv1_1) pool1 = fun.avg_pool(relu1_1) w2_1 = fun.weight_variable([3, 3, 64, 128]) b2_1 = fun.weight_variable([128]) conv2_1 = fun.conv2d(pool1, w2_1, b2_1) relu2_1 = fun.relu(conv2_1) w2_2 = fun.weight_variable([3, 3, 128, 128]) b2_2 = fun.weight_variable([128]) conv2_2 = fun.conv2d(relu2_1, w2_2, b2_2) relu2_2 = fun.relu(conv2_2) pool2 = fun.avg_pool(relu2_2) w3_1 = fun.weight_variable([3, 3, 128, 64]) b3_1 = fun.bias_variable([64]) conv3_1 = fun.conv2d(pool2, w3_1, b3_1) relu3_1 = fun.relu(conv3_1) w3_2 = fun.weight_variable([3, 3, 64, 64]) b3_2 = fun.bias_variable([64]) conv3_2 = fun.conv2d(relu3_1, w3_2, b3_2) relu3_2 = fun.relu(conv3_2) w3_3 = fun.weight_variable([3, 3, 64, 64]) b3_3 = fun.bias_variable([64]) conv3_3 = fun.conv2d(relu3_2, w3_3, b3_3) relu3_3 = fun.relu(conv3_3) w3_4 = fun.weight_variable([3, 3, 64, 32]) b3_4 = fun.bias_variable([32]) conv3_4 = fun.conv2d(relu3_3, w3_4, b3_4) relu3_4 = fun.relu(conv3_4) pool3 = fun.avg_pool(relu3_4) w4_1 = fun.weight_variable([3, 3, 32, 32]) b4_1 = fun.bias_variable([32]) conv4_1 = fun.conv2d(pool3, w4_1, b4_1) relu4_1 = fun.relu(conv4_1) w4_2 = fun.weight_variable([3, 3, 32, 32]) b4_2 = fun.bias_variable([32]) conv4_2 = fun.conv2d(relu4_1, w4_2, b4_2) relu4_2 = fun.relu(conv4_2) w4_3 = fun.weight_variable([3, 3, 32, 32]) b4_3 = fun.bias_variable([3, 3, 32, 32]) conv4_3 = fun.conv2d(relu4_2, w4_3, b4_3) relu4_3 = fun.relu(conv4_3) w4_4 = fun.weight_variable([3, 3, 32, 32]) b4_4 = fun.bias_variable([3, 3, 32, 32]) conv4_4 = fun.conv2d(relu4_3, w4_4, b4_4) relu4_4 = fun.relu(conv4_4) pool4 = fun.avg_pool(relu4_4) w5_1 = fun.weight_variable([3, 3, 32, 64]) b5_1 = fun.bias_variable([64]) conv5_1 = fun.conv2d(pool4, w5_1, b5_1) relu5_1 = fun.relu(conv5_1) w5_2 = fun.weight_variable([3, 3, 64, 64])
def __call__(self, x): h = F.relu(x @ self.w1.T() + self.b1) h = F.relu(h @ self.w2.T() + self.b2) y = F.softmax(h @ self.w3.T() + self.b3) return y
from functions import relu, softmax, cross_entropy_error import numpy as np """データ""" x = np.array([[0.1, 0.8]]) w1 = np.array([[10, 7], [0.8, 6]]) b1 = np.array([[1, 1]]) w2 = np.array([[0.4, 30], [0.8, 0.2]]) b2 = np.array([[1, 1]]) t = np.array([[1, 0]]) learning_rate = 0.02 for i in range(1): a1 = np.dot(x, w1) + b1 z1 = relu(a1) a2 = np.dot(z1, w2) + b2 y = softmax(a2) loss = cross_entropy_error(y, t) """共通の偏微分""" dEdY = -(t / y) # [dEdy1, dEdy2] S = np.sum(np.exp(a2)) dYdS = np.exp(a2) / np.square(S) # [dY1dS, dY2dS] print("dw2_11: ", (-(t[0][0] / y[0][0]) * (np.exp(a2[0][1]) / np.square(S))) * np.exp(a2[0][0]) * z1[0][0]) print("dw2_12: ", (-(t[0][0] / y[0][0]) * (np.exp(a2[0][1]) / np.square(S))) * np.exp(a2[0][1]) * z1[0][0]) print("dw2_21: ", (-(t[0][0] / y[0][0]) * (np.exp(a2[0][1]) / np.square(S))) * np.exp(a2[0][0]) * z1[0][1])
import numpy as np import pandas as pd #import plotly.plotly as py import plotly.graph_objs as go import functions as fxn x = np.arange(0, 1.01, 0.01) trace0 = go.Scatter(x=x, y=[fxn.sigmoid(i, 0.5, k=10) for i in x], mode='lines', name='Logistic', line={'color': 'blue'}) trace1 = go.Scatter(x=x, y=[fxn.relu(i, 2) for i in x], mode='lines', name='Linear', line={'color': 'black'}) trace2 = go.Scatter(x=x, y=[i**5 for i in x], mode='lines', name='Exponential', line={'color': 'orange'}) trace3 = go.Scatter(x=x, y=[i**0.2 for i in x], mode='lines', name='Logarithmic', line={'color': 'purple'})
def forward(self, input): """Apply the ReLu activation function on the input and save the input.""" self.input = input return functions.relu(input)
def forward(self, x): self.x = x return fun.relu(x)
def forward(self, x): out = self.fc1(x) out = relu(out) out = self.fc2(out) return out
def forward(self, data): res = relu(self.W.dot(data) + self.b) cache = data, res return res, cache
def forward(self, x): self.x = x self.u = x @ self.w + self.b self.y = fn.relu(self.u)