def fit(self, X, Y, learning_rate=10e-6, regularisation=10e-1, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) # print("X.shape"+str(X.shape)) # print("Y.shape"+str(Y.shape)) Xvalid, Yvalid = X[-1000:], Y[-1000:] # Tvalid = y2indicator(Yvalid) # WE DONT NEED TVALID CAUSE WE ARE USING COST2 X, Y = X[:-1000], Y[:-1000] # print("X.shape"+str(X.shape)) # print("Y.shape"+str(Y.shape)) N, D = X.shape K = len(set(Y)) T = y2indicator(Y) #Need this for gradient descent self.W1, self.b1 = init_weight_and_bias(D, self.M) self.W2, self.b2 = init_weight_and_bias(self.M, K) costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation pY, Z = self.forward(X) # gradient descent pY_T = pY - T self.W2 -= learning_rate * (Z.T.dot(pY_T) + regularisation * self.W2) self.b2 -= learning_rate * ( (pY_T).sum(axis=0) + regularisation * self.b2) # dZ = pY_T.dot(self.W2.T) * (Z>0) #Relu dZ = pY_T.dot(self.W2.T) * (1 - Z * Z) # Tanh self.W1 -= learning_rate * (X.T.dot(dZ) + regularisation * self.W1) self.b1 -= learning_rate * (dZ.sum(axis=0) + regularisation * self.b1) if i % 10 == 0: pYvalid, _ = self.forward(Xvalid) c = cost2(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) print("i : " + str(i) + "; Cost : " + str(c) + "; Error : " + str(e)) if e < best_validation_error: best_validation_error = e print("Best Validation error : " + str(best_validation_error)) if (show_fig): plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=1e-6, reg=1e-6, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) valid_X = X[-1000:] valid_Y = Y[-1000:] train_X = X[:-1000] train_Y = Y[:-1000] T_train = indicator(train_Y) T_valid = indicator(valid_Y) N, D = train_X.shape K = len(set(train_Y)) self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M) self.b1 = np.zeros(self.M) self.b2 = np.zeros(K) costs = [] best_error_rate = 1 for i in range(epochs): pY, Z = self.forward(train_X) self.W2 -= learning_rate * (Z.T.dot(pY - T_train) + reg * self.W2) self.b2 -= learning_rate * ( (pY - T_train).sum(axis=0) + reg * self.b2) dz = (pY - T_train).dot(self.W2.T) * (1 - Z * Z) self.W1 -= learning_rate * (train_X.T.dot(dz) + reg * self.W1) self.b1 -= learning_rate * (dz.sum(axis=0) + reg * self.b1) if i % 10 == 0: pY_valid, Z_valid = self.forward(valid_X) c = cost2(valid_Y, pY_valid) e = error_rate(valid_Y, np.argmax(pY_valid, axis=1)) costs.append(c) print("i: ", i, " cost: ", c, " error: ", e) if e < best_error_rate: best_error_rate = e print("best_error_rate: ", e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, T, learning_rate=10e-7, reg=10e-7, epochs=10000, show_fig=False): X, T = shuffle(X, T) X_train, T_train = X[:-1000], T[:-1000] X_valid, T_valid = X[-1000:], T[-1000:] N, D = X_train.shape K = len(set(T_train)) #initialize parameters self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M) self.b2 = np.zeros(K) costs = [] best_validation_error = 1 for n in range(epochs): #forwardpropogation process Y, Z = self.forwardprop(X_train) #Gradient Descent T_train_ind = y2indicator(T_train) Y_T = Y - T_train_ind self.W2 -= learning_rate * (Z.T.dot(Y_T) + reg * self.W2) self.b2 -= learning_rate * (Y_T.sum(axis=0) + reg * self.b2) dZ = Y_T.dot(self.W2.T) * (1 - Z * Z) self.W1 = learning_rate * (X_train.T.dot(dZ) + reg * self.W1) self.b1 = learning_rate * (dZ.sum(axis=0) + reg * self.b1) #representation of validation cost and error rate if n % 10 == 0: Y_valid, _ = self.forwardprop(X_valid) cost = cost2(T_valid, Y_valid) costs.append(cost) er = error_rate(T_valid, np.argmax(Y_valid, axis=1)) print(n, 'cost:', cost, 'error', er) if er < best_validation_error: best_validation_error = er print('Best validation error:', best_validation_error) if show_fig: plt.plot(costs) plt.title('cross entropy loss') plt.show()
def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-6, reg=1e-6, epochs=10000, show_fig=False): Tvalid = y2indicator(Yvalid) N, D = X.shape K = len(set(Y) | set(Yvalid)) T = y2indicator(Y) self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M) self.b2 = np.zeros(K) costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation and cost calculation pY, Z = self.forward(X) # Gradient Descent step ''' 在玩這個資料集的時候,首度引入L2概念, 注意L2原本就寫在 loss function內, 整個loss funciton作微分後,||W||變成一次方的型態,如下方運算式''' pY_T = pY - T # 先設成變數,這樣之後計算才會快阿 self.W2 -= learning_rate * (Z.T.dot(pY_T) + reg * self.W2) self.b2 -= learning_rate * (pY_T.sum(axis=0) + reg * self.b2) # dZ = pY_T.dot(self.W2.T) * (Z > 0) # relu dZ = pY_T.dot(self.W2.T) * (1 - Z * Z) # tanh self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1) self.b1 -= learning_rate * (dZ.sum(axis=0) + reg * self.b1) if i % 20 == 0: pYvalid, _ = self.forward(Xvalid) c = cost2(Yvalid, pYvalid) # c = cost(Tvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) print("i:", i, "cost:", c, "error:", e) if e < best_validation_error: best_validation_error = e print('best_validation_error:', best_validation_error) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=10e-7, reg=10e-7, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] # Tvalid = y2indicator(Yvalid) X, Y = X[:-1000], Y[:-1000] N, D = X.shape K = len(set(Y)) T = y2indicator(Y) self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M) self.b2 = np.zeros(K) costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation and cost calculation pY, Z = self.forward(X) # gradient descent step pY_T = pY - T self.W2 -= learning_rate * (Z.T.dot(pY_T) + reg * self.W2) self.b2 -= learning_rate * (pY_T.sum(axis=0) + reg * self.b2) # dZ = pY_T.dot(self.W2.T) * (Z > 0) # relu dZ = pY_T.dot(self.W2.T) * (1 - Z * Z) # tanh self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1) self.b1 -= learning_rate * (dZ.sum(axis=0) + reg * self.b1) if i % 10 == 0: pYvalid, _ = self.forward(Xvalid) # c = cost2(Yvalid, pYvalid) c = cost2(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) s = self.score(Xvalid, Yvalid) print("i:", i, "cost:", c, "error:", e, "score:", s) if e < best_validation_error: best_validation_error = e print("best_validation_error:", best_validation_error) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=1e-6, reg=1e-6, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:1000], Y[:1000] N, D = X.shape K = max(Y) + 1 T = y2indicator(Y) self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M, k) / np.sqrt(self.M) self.b2 = np.zeros(K) cost = [] best_validation_error = 1 # forward propagation and cost calculation for i in range(epochs): pY, Z = self.forward(X) # gradient descent step pY_T = pY - T self.W2 -= learning_rate * (Z.T.dot(pY_T) + reg * self.W2) self.b2 -= learning_rate * (pY_T.sum(axis=0) + reg * self.b2) # dZ = pY_T.dot(self.W2.T) * (Z > 0) #relu dZ = pY_T.dot(self.W2.T) * (1 - Z * Z) # tanh self.W1 -= learning_rate * (X.T.dot(Z) + reg * self.W1) self.b1 -= learning_rate * (dZ.sum(axis=0) + reg * self.b1) if i % 10 == 0: pYvalid, _ = self.forward(Xvalid) c = cost2(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) print('i: ', i, 'cost:', c, 'error: ', e) if e < best_validation_error: best_validation_error = e print('best_validation_error: ', best_validation_error) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=10e-7, reg=10e-1, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape K = len(set(Y)) T = y2indicator(Y) self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M + K) self.b2 = np.zeros(K) costs = [] best_validation_error = 1 for i in xrange(epochs): pY, Z = self.forward(X) pY_T = pY - T self.W2 -= learning_rate * (Z.T.dot(pY_T) + reg * self.W2) self.b2 -= learning_rate * (pY_T.sum(axis=0) + reg * self.b2) #dZ = pY_T.dot(self.W2.T) * (Z > 0) #relu dZ = pY_T.dot(self.W2.T) * (1 - Z * Z) self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1) self.b1 -= learning_rate * (dZ.sum(axis=0) + reg * self.b1) if i % 10 == 0: pYvalid, _ = self.forward(Xvalid) c = cost2(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) print "i:", i, "cost:", c, "error", e if e < best_validation_error: best_validation_error = e print "best_validation_error:", best_validation_error if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=1e-6, reg=1e-6, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] # Tvalid = y2indicator(Yvalid) X, Y = X[:-1000], Y[:-1000] N, D = X.shape K = len(set(Y)) T = y2indicator(Y) self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M) self.b2 = np.zeros(K) costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation and cost calculation pY, Z = self.forward(X) # gradient descent step pY_T = pY - T self.W2 -= learning_rate*(Z.T.dot(pY_T) + reg*self.W2) self.b2 -= learning_rate*(pY_T.sum(axis=0) + reg*self.b2) # dZ = pY_T.dot(self.W2.T) * (Z > 0) # relu dZ = pY_T.dot(self.W2.T) * (1 - Z*Z) # tanh self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1) self.b1 -= learning_rate*(dZ.sum(axis=0) + reg*self.b1) if i % 10 == 0: pYvalid, _ = self.forward(Xvalid) c = cost2(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) print("i:", i, "cost:", c, "error:", e) if e < best_validation_error: best_validation_error = e print("best_validation_error:", best_validation_error) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=1e-6, reg=1e-6, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Nvalid = X.shape[0] // 5 Xvalid, Yvalid = X[-Nvalid:], Y[-Nvalid:] #Tvalid = y2indicator(Yvalid) X, Y = X[:-Nvalid], Y[:-Nvalid] N, D = X.shape K = len(set(Y)) T = y2indicator(Y) self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M + K) self.b2 = np.zeros(K) costs = [] best_valid_err = 1 t0 = datetime.now() for i in range(epochs): pY, Z = self.forward(X) # grad desc pY_T = pY - T self.W2 -= learning_rate * (Z.T.dot(pY_T) + reg * self.W2) self.b2 -= learning_rate * (pY_T.sum(axis=0) + reg * self.b2) #dZ = pY_T.dot(self.W2.T) * (Z > 0) dZ = pY_T.dot(self.W2.T) * (1 - Z * Z) # tanh self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1) self.b1 -= learning_rate * ((dZ).sum(axis=0) + reg * self.b1) if i % 20 == 0: pYvalid, _ = self.forward(Xvalid) c = cost2(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) dt = datetime.now() - t0 print("i: ", i, ". cost: ", c, ". error: ", e, ". dt: ", dt) t0 = datetime.now() if e < best_valid_err: best_valid_err = e print("best valid err: ", best_valid_err) if show_fig: plt.plot(costs) plt.show()