def fit(self, X, Y, test_size=0.2, learning_rate=1e-6, reg=0, epochs=1000, show_fig=False): N, D = X.shape class_num = len(set(Y)) self.W = np.random.randn(D, class_num) / np.sqrt(D) self.b = np.random.randn(class_num) X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=test_size, random_state=0) Y_train_onehot = DataTransform.y2one_hot(Y_train, class_num=class_num) Y_test_onehot = DataTransform.y2one_hot(Y_test, class_num=class_num) costs = [] for i in range(epochs): p_y = self.forward(X=X_train) # 训练一次,更新参数 self.W -= learning_rate * (gradW(p_y, Y_train_onehot, X_train) + reg * self.W) self.b -= learning_rate * (gradb(p_y, Y_train_onehot) + reg * self.b) # 用训练好的参数预测一次 p_y_test = self.forward(X=X_test) loss = cost(p_y_test, Y_test_onehot) costs.append(loss) error = error_rate(p_y_test, Y_test) if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, loss)) print("Error rate: ", error) if show_fig: plt.plot(costs) plt.show()
W1 = weights0[0].copy() b1 = weights0[1].copy() weights1 = model.layers[1].get_weights() W2 = weights1[0].copy() b2 = weights1[1].copy() config = model.get_config() t0 = datetime.now() model.compile(loss='mean_squared_error', metrics=['accuracy'], optimizer='rmsprop') r = model.fit(X_train, Y_train_onehot, epochs=epochs, batch_size=batch_size) p_y_test = model.predict(X_test) error = error_rate(p_y_test, Y_test) print("keras test error rate %f." % error) print("Elapsted time for keras rmsprop: ", datetime.now() - t0) print(r.history.keys()) ##### tensorflow ##### import tensorflow as tf inputs = tf.placeholder(tf.float32, shape=(None, D), name='inputs') outputs = tf.placeholder(tf.float32, shape=(None, class_num), name='outputs') tfW1 = tf.Variable(W1) tfb1 = tf.Variable(b1) tfW2 = tf.Variable(W2) tfb2 = tf.Variable(b2) # opreation
def fit(self, X, Y, batch_size=300, test_size=0.2, learning_rate=1e-5, mu=0.9, reg=0, epochs=100, show_fig=False): N, D = X.shape class_num = len(set(Y)) self.W1 = np.random.randn(D, self.units) / np.sqrt(D) self.b1 = np.random.randn(self.units) self.W2 = np.random.randn(self.units, class_num) / np.sqrt(self.units) self.b2 = np.random.randn(class_num) X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=test_size, random_state=0) Y_train_onehot = DataTransform.y2one_hot(Y_train, class_num=class_num) Y_test_onehot = DataTransform.y2one_hot(Y_test, class_num=class_num) dW2 = 0 db2 = 0 dW1 = 0 db1 = 0 costs = [] for i in range(50): tmpX, tmpY = shuffle(X_train, Y_train_onehot) for j in range(N // batch_size): X_batch = tmpX[j * batch_size:(j * batch_size + batch_size), :] Y_batch = tmpY[j * batch_size:(j * batch_size + batch_size), :] Z, p_y = self.forward(X_batch) # 更新权重 gW2 = derivative_w2(Z, p_y, Y_batch) + reg * self.W2 gb2 = derivative_b2(p_y, Y_batch) + reg * self.b2 gW1 = derivative_w1(X_batch, Z, p_y, Y_batch, self.W2) + reg * self.W1 gb1 = derivative_b1(Z, p_y, Y_batch, self.W2) + reg * self.b1 dW2 = mu * dW2 - learning_rate * gW2 db2 = mu * db2 - learning_rate * gb2 dW1 = mu * dW1 - learning_rate * gW1 db1 = mu * db1 - learning_rate * gb1 # 更新权重 self.W2 += dW2 self.b2 += db2 self.W1 += dW1 self.b1 += db1 # 测试集上计算loss _, p_y_test = self.forward(X_test) batch_loss = cost(p_y_test, Y_test_onehot) costs.append(batch_loss) error = error_rate(p_y_test, Y_test) # 这里是Y_test,维度1 if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, batch_loss)) print("Error rate: ", error) if show_fig: plt.plot(costs) plt.show() # 验证集验证 _, p_y = self.forward(X_test) print("\nFinal Error rate: ", error_rate(p_y, Y_test))
def score(self, X, Y): p_y_index = self.predict(X) return 1 - error_rate(p_y_index, Y)
tmpX, tmpY = shuffle(X_train, Y_train_onehot) for j in range(n_batch): X_batch = tmpX[j * batch_size:(j * batch_size + batch_size), :] Y_batch = tmpY[j * batch_size:(j * batch_size + batch_size), :] Z, p_y = forward(X_batch, W1, b1, W2, b2) # 更新权重 W2 -= lr * (derivative_w2(Z, p_y, Y_batch) + reg * W2) b2 -= lr * (derivative_b2(p_y, Y_batch) + reg * b2) W1 -= lr * (derivative_w1(X_batch, Z, p_y, Y_batch, W2) + reg * W1) b1 -= lr * (derivative_b1(Z, p_y, Y_batch, W2) + reg * b1) # 测试集上计算loss _, p_y_test = forward(X_test, W1, b1, W2, b2) batch_loss = cost(p_y_test, Y_test_onehot) losses_batch.append(batch_loss) error = error_rate(p_y_test, Y_test) # 这里是Y_test,维度1 if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, batch_loss)) print("Error rate: ", error) # 验证集验证 _, p_y = forward(X_test, W1, b1, W2, b2) print("\nFinal Error rate: ", error_rate(p_y, Y_test)) print("Elapsted time for batch GD: ", datetime.now() - t0) # 2. batch with momentum W1 = W1_0.copy() b1 = b1_0.copy() W2 = W2_0.copy() b2 = b2_0.copy() mu = 0.9 dW2 = 0