def TestBackpropMmt(): X = np.array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) D = np.array([[0], [1], [1], [0]]) W1 = 2*np.random.random((4, 3)) - 1 W2 = 2*np.random.random((1, 4)) - 1 for _epoch in range(10000): W1, W2 = BackPropMmt(W1, W2, X, D) N = 4 for k in range(N): x = X[k, :].T v1 = np.matmul(W1, x) y1 = Sigmoid(v1) v = np.matmul(W2, y1) y = Sigmoid(v) print(y)
def backprop(self, x, y): """Return a tuple ``(nabla_b, nabla_w)`` representing the gradient for the cost function C_x. ``nabla_b`` and ``nabla_w`` are layer-by-layer lists of numpy arrays, similar to ``self.biases`` and ``self.weights``.""" nabla_b = [np.zeros(b.shape) for b in self.biases] nabla_w = [np.zeros(w.shape) for w in self.weights] # process inputs activation = x # list to store all the activations, layer by layer activations = [x] # list to store all the z vectors, layer by layer zs = [] for b, w in zip(self.biases, self.weights): z = np.dot(w, activation) + b zs.append(z) activation = Sigmoid.sigmoid_vec(z) activations.append(activation) # backward pass delta = self.cost_derivative(activations[-1], y) * \ Sigmoid.sigmoid_prime_vec(zs[-1]) nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, activations[-2].transpose()) # Here we iterate from the lowest neural layer (layer = 1 is lowest) # all the way up to the highest neural layer for layer in xrange(2, self.layer_count): z = zs[-layer] spv = Sigmoid.sigmoid_prime_vec(z) delta = np.dot(self.weights[-layer+1].transpose(), delta) * spv nabla_b[-layer] = delta nabla_w[-layer] = np.dot(delta, activations[-layer-1].transpose()) return (nabla_b, nabla_w)
def BackpropXOR(W1, W2, X, D): alpha = 0.9 N = 4 for k in range(N): xT = X[k, :].T # transpose, 역행렬 d = D[k] v1 = np.matmul(W1, xT) y1 = Sigmoid(v1) v = np.matmul(W2, y1) y = Sigmoid(v) e = d - y delta = y * (1 - y) * e e1 = np.matmul(W2.T, delta) delta1 = y1 * (1 - y1) * e1 dW1 = (alpha * delta1).reshape(4, 1) * xT.reshape(1, 3) W1 = W1 + dW1 dW2 = alpha * delta * y1 W2 = W2 + dW2 return W1, W2
def nnCostFunction(theta1, theta2, X, y, lamda): m = np.shape(X)[0] A_1 = np.hstack((np.ones((m, 1)), X)) # 加偏置 Z_2 = A_1 * theta1.T A_2 = Sigmoid.sigmoid(Z_2) A_2 = np.hstack((np.ones((m, 1)), A_2)) # 加入偏置 Z_3 = A_2 * theta2.T A_3 = Sigmoid.sigmoid(Z_3) log_A_3 = np.log(A_3) log_a = np.log(np.ones((m, 1)) - A_3) a = np.multiply(-y, log_A_3) - np.multiply(np.ones((m, 1)) - y, log_a) cost = np.sum(a) / m #没有正则项的cost #含有正则项的cost cost = cost + lamda * ( np.sum(np.multiply(theta1[:, 1:m], theta1[:, 1:m])) + np.sum(np.multiply(theta2[:, 1:m], theta2[:, 1:m]))) / (2 * m) D1 = np.zeros(np.shape(theta1)) D2 = np.zeros(np.shape(theta2)) for i in range(0, m): a1 = A_1[i, :].T z2 = Z_2[i, :].T z2 = np.vstack((1, z2)) a2 = A_2[i, :].T z3 = Z_3[i, :].T a3 = A_3[i, :].T y1 = y[i, :].T delta3 = a3 - y1 delta2 = np.multiply((theta2.T) * delta3, SigmoidGradient.SG(z2)) D1 += delta2[1:m] * a1.T D2 += delta3 * a2.T D1 = D1 / m + lamda * theta1 / m D2 = D2 / m + lamda * theta2 / m return cost, D1, D2
def BackPropMmt(W1, W2, X, D): alpha = 0.9 beta = 0.9 mmt1 = np.zeros_like(W1) mmt2 = np.zeros_like(W2) N = 4 for k in range(N): x = X[k, :].T d = D[k] v1 = np.matmul(W1, x) y1 = Sigmoid(v1) v = np.matmul(W2, y1) y = Sigmoid(v) e = d - y delta = y * (1 - y) * e e1 = np.matmul(W2.T, delta) delta1 = y1 * (1 - y1) * e1 dW1 = (alpha * delta1).reshape(4, 1) * x.reshape(1, 3) mmt1 = dW1 + beta * mmt1 W1 = W1 + mmt1 dW2 = alpha * delta * y1 mmt2 = dW2 + beta * mmt2 W2 = W2 + mmt2 return W1, W2
def BackpropCE(W1, W2, X, D): alpha = 0.9 N = 4 for k in range(N): x = X[k, :].T d = D[k] v1 = np.matmul(W1, x) y1 = Sigmoid(v1) v = np.matmul(W2, y1) y = Sigmoid(v) e = d - y delta = e e1 = np.matmul(W2.T, delta) delta1 = y1 * (1 - y1) * e1 dW1 = (alpha * delta1).reshape(4, 1) * x.reshape(1, 3) W1 = W1 + dW1 dW2 = alpha * delta * y1 W2 = W2 + dW2 return W1, W2
def train(Theta, X_train, Y_train, learning_rate, noOfIter): train_size = X_train.shape[0] z = np.dot(X_train, Theta) h = Sigmoid.sigmoid(z) J = CostFunction.cost(h, Y_train, train_size) Jarr = [] Jarr = np.hstack((Jarr, J)) for i in range(noOfIter): dTheta = 1 / train_size * np.dot((h - Y_train).T, X_train).T #print(dTheta.shape) Theta = Theta - learning_rate * dTheta z = np.dot(X_train, Theta) h = Sigmoid.sigmoid(z) J = CostFunction.cost(h, Y_train, train_size) Jarr = np.hstack((Jarr, J)) print(J) plotGraph = False if plotGraph: PlotData.plotGraph(Jarr, noOfIter) return Theta, J
def feed_forward(theta1, theta2, X): z2 = X @ theta1.T a2 = sg.sigmoid(z2) #(5000,25) a2 = np.insert(a2, 0, 1, axis=1) #(5000,26) z3 = a2 @ theta2.T a3 = sg.sigmoid(z3) return z2, a2, z3, a3
def cost_reg(theta, X, y, lmd): # 不惩罚第一项 _theta = theta[1:] reg = (lmd / (2 * len(X))) * (_theta @ _theta) first = (y) * np.log(Sigmoid.sigmoid(X @ theta)) second = (1 - y) * np.log(1 - Sigmoid.sigmoid(X @ theta)) final = -np.mean(first + second) return final + reg
def __init__(self): conv1 = ConvLayer(28, 28, 1, 6, 5, 1, 2) sigmoid1 = Sigmoid() pool1 = Pool(2) conv2 = ConvLayer(14, 14, 6, 16, 5, 1, 0) sigmoid2 = Sigmoid() pool2 = Pool(2) fc = Perceptron([400, 600, 10]) self.layers = [conv1, sigmoid1, pool1, conv2, sigmoid2, pool2, fc]
def costfunction(theta, X, y): m = len(y) J = 0 grad = np.zeros(np.shape(theta)) a = np.dot((-y).reshape(1, m), np.log(Sigmoid.sigmoid(np.dot(X, theta)))) c = (np.ones((m, 1)) - y).reshape(1, m) d = np.log(np.ones((m, 1)) - Sigmoid.sigmoid(np.dot(X, theta))) b = np.dot(c, d) J = (a - b) / m grad = np.dot((Sigmoid.sigmoid(np.dot(X, theta)) - y).reshape(1, m), X) / m grad = grad.reshape(np.shape(theta)) return J, grad
def costfunction(theta, X, y, lamda): m = y.shape[0] a = (-y).T * np.log(Sigmoid.sigmoid(X * theta)) c = (np.ones((m, 1)) - y).T d = np.log(np.ones((m, 1)) - Sigmoid.sigmoid(X * theta)) b = c * d J = (a - b) / m + lamda * (theta.T * theta - theta[0]**2) / (2 * m) grad = X.T * (Sigmoid.sigmoid(X * theta) - y) / m grad = grad + lamda * theta / m grad[0] = X[:, 0] * (Sigmoid.sigmoid(X * theta) - y) / m return J, grad
def costfunction(theta,X,y,lamda): m = len(y) J = 0 grad = np.zeros(np.shape(theta)) a = np.dot((-y).reshape(1,m),np.log(Sigmoid.sigmoid(np.dot(X, theta)))) c = (np.ones((m,1))-y).reshape(1,m) d = np.log(np.ones((m,1))-Sigmoid.sigmoid(np.dot(X,theta))) b = np.dot(c,d) J = (a -b)/m + lamda * np.dot(np.transpose(theta),theta)/(2*m) grad = np.dot((Sigmoid.sigmoid(np.dot(X, theta))-y).reshape(1,m),X)/m grad = grad.reshape(np.shape(theta)) grad = grad + lamda * theta/m return J,grad
def main(): theta1, theta2 = load_weigth('ex3weights.mat') X, y = mc.load_data('ex3data1.mat') y = y.flatten() X = np.insert(X, 0, values=np.ones(X.shape[0]), axis=1) z2 = X @ theta1.T z2 = np.insert(z2, 0, 1, axis=1) a2 = Sigmoid.sigmoid(z2) z3 = a2 @ theta2.T a3 = Sigmoid.sigmoid(z3) y_pred = np.argmax(a3, axis=1) + 1 accurcy = np.mean(y_pred == y) # 精确度97.52%
def DeepDropout(W1, W2, W3, W4, X, D): alpha = 0.01 N = 5 for k in range(N): x = np.reshape(X[:, :, k], (25, 1)) v1 = np.matmul(W1, x) y1 = Sigmoid(v1) y1 = y1 * Dropout(y1, 0.2) v2 = np.matmul(W2, y1) y2 = Sigmoid(v2) y2 = y2 * Dropout(y2, 0.2) v3 = np.matmul(W3, y2) y3 = Sigmoid(v3) y3 = y3 * Dropout(y3, 0.2) v = np.matmul(W4, y3) y = Softmax(v) d = D[k, :].T e = d - y delta = e e3 = np.matmul(W4.T, delta) delta3 = y3 * (1 - y3) * e3 e2 = np.matmul(W3.T, delta3) delta2 = y2 * (1 - y2) * e2 e1 = np.matmul(W2.T, delta2) delta1 = y1 * (1 - y1) * e1 dW4 = alpha * delta * y3.T W4 = W4 + dW4 dW3 = alpha * delta3 * y2.T W3 = W3 + dW3 dW2 = alpha * delta2 * y1.T W2 = W2 + dW2 dW1 = alpha * delta1 * x.T W1 = W1 + dW1 return W1, W2, W3, W4
def MultiClass(W1, W2, X, D): alpha = 0.9 N = 5 for k in range(N): x = np.reshape(X[:, :, k], (25, 1)) d = D[k, :].T v1 = np.matmul(W1, x) y1 = Sigmoid(v1) v = np.matmul(W2, y1) y = Softmax(v) e = d - y delta = e e1 = np.matmul(W2.T, delta) delta1 = y1 * (1 - y1) * e1 dW1 = alpha * delta1 * x.T W1 = W1 + dW1 dW2 = alpha * delta * y1.T W2 = W2 + dW2 return W1, W2
def DeltaBatch(W, X, D): alpha = 0.9 dWsum = np.zeros(3) N = 4 for k in range(N): x = X[k, :].T d = D[k] v = np.matmul(W, x) y = Sigmoid(v) e = d - y delta = y * (1 - y) * e dW = alpha * delta * x dWsum = dWsum + dW dWavg = dWsum / N W[0][0] = W[0][0] + dWavg[0] W[0][1] = W[0][1] + dWavg[1] W[0][2] = W[0][2] + dWavg[2] return W
def feedforward(self, inputs): new_neurons = np.array([inputs]) for i in range(len(self.structure) - 1): sum_without_bias = np.dot(new_neurons, self.weights[i]) sum = np.add(sum_without_bias, self.biases[i]) new_neurons = Sigmoid.sigmoid(sum) return np.ndarray.tolist(new_neurons)[0]
def RealMultiClass(): W1, W2 = TestMultiClass() X = np.zeros((5, 5, 5)) X[:, :, 0] = [[0, 0, 1, 1, 0], [0, 0, 1, 1, 0], [0, 1, 0, 1, 0], [0, 0, 0, 1, 0], [0, 1, 1, 1, 0]] X[:, :, 1] = [[1, 1, 1, 1, 0], [0, 0, 0, 0, 1], [0, 1, 1, 1, 0], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]] X[:, :, 2] = [[1, 1, 1, 1, 0], [0, 0, 0, 0, 1], [0, 1, 1, 1, 0], [1, 0, 0, 0, 1], [1, 1, 1, 1, 0]] X[:, :, 3] = [[0, 1, 1, 1, 0], [0, 1, 0, 0, 0], [0, 1, 1, 1, 0], [0, 0, 0, 1, 0], [0, 1, 1, 1, 0]] X[:, :, 4] = [[0, 1, 1, 1, 1], [0, 1, 0, 0, 0], [0, 1, 1, 1, 0], [0, 0, 0, 1, 0], [1, 1, 1, 1, 0]] N = 5 for k in range(N): x = np.reshape(X[:, :, k], (25, 1)) v1 = np.matmul(W1, x) y1 = Sigmoid(v1) v = np.matmul(W2, y1) y = Softmax(v) print("N = {}: ".format(k + 1)) print(y)
def TestDeepDropout(): X = np.zeros((5, 5, 5)) X[:, :, 0] = [[0, 1, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 1, 1, 1, 0]] X[:, :, 1] = [[1, 1, 1, 1, 0], [0, 0, 0, 0, 1], [0, 1, 1, 1, 0], [1, 0, 0, 0, 0], [1, 1, 1, 1, 1]] X[:, :, 2] = [[1, 1, 1, 1, 0], [0, 0, 0, 0, 1], [0, 1, 1, 1, 0], [0, 0, 0, 0, 1], [1, 1, 1, 1, 0]] X[:, :, 3] = [[0, 0, 0, 1, 0], [0, 0, 1, 1, 0], [0, 1, 0, 1, 0], [1, 1, 1, 1, 1], [0, 0, 0, 1, 0]] X[:, :, 4] = [[1, 1, 1, 1, 1], [1, 0, 0, 0, 0], [1, 1, 1, 1, 0], [0, 0, 0, 0, 1], [1, 1, 1, 1, 0]] D = np.array([[[1, 0, 0, 0, 0]], [[0, 1, 0, 0, 0]], [[0, 0, 1, 0, 0]], [[0, 0, 0, 1, 0]], [[0, 0, 0, 0, 1]]]) W1 = 2 * np.random.random((20, 25)) - 1 W2 = 2 * np.random.random((20, 20)) - 1 W3 = 2 * np.random.random((20, 20)) - 1 W4 = 2 * np.random.random((5, 20)) - 1 for _epoch in range(20000): W1, W2, W3, W4 = DeepDropout(W1, W2, W3, W4, X, D) N = 5 for k in range(N): x = np.reshape(X[:, :, k], (25, 1)) v1 = np.matmul(W1, x) y1 = Sigmoid(v1) v2 = np.matmul(W2, y1) y2 = Sigmoid(v2) v3 = np.matmul(W3, y2) y3 = Sigmoid(v3) v = np.matmul(W4, y3) y = Softmax(v) print("Y = ", k + 1, ": ") print(y)
def predict(theta1, theta2, X): m = np.shape(X)[0] X_1 = np.hstack((np.ones((m, 1)), X)) A = Sigmoid.sigmoid(X_1 * theta1.T) A_1 = np.hstack((np.ones((m, 1)), A)) B = Sigmoid.sigmoid(A_1 * theta2.T) pred = np.mat(np.zeros(np.shape(B))) '''for i in range(m): for j in range(B.shape[1]): if B[i, j] >= 0.5: pred[i, j] = 1 else: pred[i, j] = 0''' for i in range(m): a = np.argmax(B[i]) pred[i, a] = 1 return pred
def gradient(theta1, theta2, X, y): z2, a2, z3, h = ff.feed_forward(theta1, theta2, X) d3 = h - y # (5000,10) d2 = d3 @ theta2[:, 1:] * sg.sigmoid_gradient(z2) # (5000,25) D2 = d3.T @ a2 # (10,26) D1 = d2.T @ X # (25,401) D = (1 / len(X)) * serialize(D1, D2) #(10285,) return D
def TestDeltaSGD(): X = np.array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) D = np.array([[0], [0], [1], [1]]) W = 2 * np.random.random((1, 3)) - 1 for _epoch in range(10000): W = DeltaSGD(W, X, D) N = 4 for k in range(N): x = X[k, :].T v = np.matmul(W, x) y = Sigmoid(v) print(y)
def TestDeltaXOR(): X = np.array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) D = np.array([[0], [1], [1], [0]]) W = 2 * np.random.random((1, 3)) - 1 for _epoch in range(40000): #train W = DeltaXOR(W, X, D) N = 4 #inference for k in range(N): x = X[k, :].T v = np.matmul(W, x) y = Sigmoid(v) print(y)
def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size # 重みとバイアスの初期化 W1 = 0.01 * np.random.randn(I, H) b1 = np.zeros(H) W2 = 0.01 * np.random.randn(H, O) b2 = npzeros(O) # レイヤの作成 self.layers = [ Affine(W1, b1), Sigmoid() Affine(W2, b2) ] self.loss_layer = SoftmaxWithLoss() # 全ての重みと勾配をリストにまとめる self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params
def DeltaSGD(W, X, D): alpha = 0.9 N = 4 for k in range(N): x = X[k, :].T d = D[k] v = np.matmul(W, x) y = Sigmoid(v) e = d - y delta = y * (1 - y) * e dW = alpha * delta * x W[0][0] = W[0][0] + dW[0] W[0][1] = W[0][1] + dW[1] W[0][2] = W[0][2] + dW[2] return W
def loglik(self,t,x = np.array([0]),k=0.5,lmb=0.3,W=None,x_samples=None,x_censored=None): # ## If there are features, calculate gradient of features. # if W is not None and len(W.shape)==2 and x_samples is not None and x_censored is not None: lik = 0 s1 = Sigmoid(6.0) s2 = Sigmoid(1000.0) for i in range(len(x_samples)): theta = np.dot(W.T,x_samples[i]) [k, lmb] = [s1.transformed(theta[0]), s2.transformed(theta[1])] lik += self.logpdf(t[i],k,lmb) for i in range(len(x_censored)): theta = np.dot(W.T,x_censored[i]) [k, lmb] = [s1.transformed(theta[0]), s2.transformed(theta[1])] lik += self.logsurvival(x[i],k,lmb) return lik # ## If there are no features, calculate feature-less gradients. # else: return sum(self.logpdf(t, k, lmb)) + sum(self.logsurvival(x, k, lmb))
def TestMultiClass(): X = np.zeros((5, 5, 5)) X[:, :, 0] = [[0, 1, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 1, 1, 1, 0]] X[:, :, 1] = [[1, 1, 1, 1, 0], [0, 0, 0, 0, 1], [0, 1, 1, 1, 0], [1, 0, 0, 0, 0], [1, 1, 1, 1, 1]] X[:, :, 2] = [[1, 1, 1, 1, 0], [0, 0, 0, 0, 1], [0, 1, 1, 1, 0], [0, 0, 0, 0, 1], [1, 1, 1, 1, 0]] X[:, :, 3] = [[0, 0, 0, 1, 0], [0, 0, 1, 1, 0], [0, 1, 0, 1, 0], [1, 1, 1, 1, 1], [0, 0, 0, 1, 0]] X[:, :, 4] = [[1, 1, 1, 1, 1], [1, 0, 0, 0, 0], [1, 1, 1, 1, 0], [0, 0, 0, 0, 1], [1, 1, 1, 1, 0]] D = np.array([[[1, 0, 0, 0, 0]], [[0, 1, 0, 0, 0]], [[0, 0, 1, 0, 0]], [[0, 0, 0, 1, 0]], [[0, 0, 0, 0, 1]]]) W1 = 2 * np.random.random((50, 25)) - 1 W2 = 2 * np.random.random((5, 50)) - 1 for _epoch in range(10000): W1, W2 = MultiClass(W1, W2, X, D) N = 5 for k in range(N): x = np.reshape(X[:, :, k], (25, 1)) v1 = np.matmul(W1, x) y1 = Sigmoid(v1) v = np.matmul(W2, y1) y = Softmax(v) print("Y = {}: ".format(k + 1)) print(y) return W1, W2
class NeuralNetwork: inputLayer = [] hiddenLayer = [] outputLayer = [] inputNeurons = 2 hiddenNeurons = 3 outputNeurons = 1 alpha = 0.5 eta = 0.8 activationFunction = Sigmoid.Sigmoid() def __init__(self): self.createNeurons(self.inputLayer, self.inputNeurons, 0) self.createNeurons(self.hiddenLayer, self.hiddenNeurons, self.inputNeurons) self.createNeurons(self.outputLayer, self.outputNeurons, self.hiddenNeurons) def createNeurons(self, layer, layerNeurons, inputLayerNeurons): for x in xrange(layerNeurons): neuron = Neuron.Neuron() neuron.inputWeights = self.getRandomWeights(inputLayerNeurons) neuron.inputDeltaWeights = self.getRandomWeights(inputLayerNeurons) layer.append(neuron) def getRandomWeights(self, n): weights = [] for i in range(n): weights.append(random.uniform(0,1)) return weights def setInputLayerActivation(self, row): i = 0 for neuron in self.inputLayer: neuron.activation = row[i] i+=1
def predict(theta, X): probability = Sigmoid.sigmoid(X @ theta) return [1 if x >= 0.5 else 0 for x in probability]
def gradient(theta, X, y): return (1 / len(X)) * (X.T @ (Sigmoid.sigmoid(X @ theta) - y))
import os import Sigmoid data=sio.loadmat('28x28_300.mat'); Theta1 = data.get("Theta1"); Theta2 = data.get("Theta2"); while True: if(os.path.exists("RECEIVED.png")== True): img =cv2.imread("RECEIVED.png",0); cv2.imshow("re",img) cv2.waitKey(0); #img = img.reshape((1,28*28)) # add column of ones img = np.array([1]+img.reshape((1,28*28))[0].tolist()) #print img.shape #print np.transpose(Theta1).shape d1 = Sigmoid.sigmoid(np.dot(img,np.transpose(Theta1))) #print d1.tolist() d1 = np.array([1]+d1.tolist()) d2 = Sigmoid.sigmoid(np.dot(d1, np.transpose(Theta2))) #print d2.shape maxitem = d2.argmax(axis=0) if(maxitem == 9): output = 0 else: output = maxitem+1 #print output print d2 print max(d2) break
def processinputs(self, iput): ''' returns outputs in iput is an input ''' for bias, weight in zip(self.biases, self.weights): iput = Sigmoid.sigmoid_vec(np.dot(weight, iput) + bias) return iput