# (在这里参数就是权重)所以也包含了X_train和Y_train def CIFAR10_loss_fun(data, weights): """ data = [X_train, Y_train] """ from Lecture3.Loss import L_SVM return L_SVM(data[0], data[1], weights) if __name__ == '__main__': from load_data import load_CIFAR10, sample_training_data X_train, Y_train = load_CIFAR10('../data/cifar10/')[0:2] X_train = np.append(X_train, np.ones((X_train.shape[0], 1)), axis=1) data_train = [X_train, Y_train] data_batch = sample_training_data(data_train, 256) # 256个数据 W = np.random.rand(10, 3073) * 0.001 # 随机权重向量 op = Optimization() df, loss_original = op.eval_numerical_gradient(CIFAR10_loss_fun, data_batch, W) # 得到梯度、初始损失值 print('original loss: %f' % (loss_original, )) min_loss = loss_original # 查看不同步长的效果 for step_size_log in [-10, -9, -8, -7, -6, -5, -4, -3, -2, -1]: step_size = 10**step_size_log W_new = W - step_size * df # 权重空间中的新位置 loss_new = CIFAR10_loss_fun(data_batch, W_new) print('for step size %f new loss: %.7f' % (step_size, loss_new)) if loss_new < min_loss:
if __name__ == '__main__': from load_data import load_CIFAR10, sample_training_data X_train, Y_train, X_test, Y_test = load_CIFAR10('../data/cifar10/') # 3072 x 50000 mean_train = np.mean(X_train, axis = 1).reshape((-1, 1)) std_train = np.std(X_train, axis = 1).reshape((-1, 1)) X_train -= mean_train # 0中心化:均值减法 X_train /= np.std(X_train, axis = 1).reshape((-1, 1)) # 归一化:每个维度都除以其标准差 X_test -= mean_train X_test /= np.std(X_train, axis = 1).reshape((-1, 1)) data_batch = sample_training_data([X_train, Y_train], 256) # 256个数据, 3072 x 256 in_num = 3072 hidden_num = 100 out_num = 10 nn = NN(data_batch[0], in_num, hidden_num, out_num, data_batch[1], 10 ** -3, 0.01) loss_original = nn.loss print("original loss: %f" % (loss_original,)) min_loss = loss_original for step_size_log in [-10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, ]: step_size = 10 ** step_size_log n = copy.deepcopy(nn) n.hidden_layer.step_size = step_size n.output_layer.step_size = step_size dh = n.output_layer.backward(n.gradient) n.hidden_layer.backward(dh)
def train(self, X_train, Y_train, X_validation, Y_validation, node_num, num_epochs, reg, update = 'sgd', learning_rate_decay = 1, learning_rate = None, sample_batch = False, batch_size = None, model = None): # 训练数据集,批量训练 if sample_batch and batch_size is not None: train_data = sample_training_data([X_train, Y_train], batch_size) # 256个数据, 3072 x 256 val_data = sample_training_data([X_validation, Y_validation], batch_size // 2) elif not sample_batch: train_data = [X_train, Y_train] val_data = [X_validation, Y_validation] else: raise Exception("missing 'batch_size") train_data_size = train_data[0].shape[1] val_data_size = val_data[0].shape[1] in_num = node_num[0] # 3072 hidden_num = node_num[1] # 100 out_num = node_num[2] # 10 if model is not None: learning_rate = model['lr'] nn = NN(train_data[0], in_num, hidden_num, out_num, train_data[1], step_size = learning_rate, lam = reg) if model is not None: nn.set(model['W1'], model['b1'], model['W2'], model['b2']) if learning_rate is None: # 选取合适步长 loss_original = nn.loss print("original loss: %f" % (loss_original,)) min_loss = loss_original step_size_log_list = [-10 + learning_rate_decay * x for x in range(10 // learning_rate_decay + 1)] for step_size_log in step_size_log_list: step_size = 10 ** step_size_log n = copy.deepcopy(nn) n.hidden_layer.step_size = step_size n.output_layer.step_size = step_size dh = n.output_layer.backward(n.gradient) n.hidden_layer.backward(dh) loss_new = n.forward()[0] print("step_size: %s, loss: %f" % (format(step_size, '.2e'), loss_new,)) if loss_new < min_loss: min_loss = loss_new best_step_size = step_size print("best step size %s" % format(best_step_size, '.2e')) # 设置步长 nn.hidden_layer.step_size = best_step_size nn.output_layer.step_size = best_step_size best_val_accuracy = 0 for i in range(num_epochs): if sample_batch: train_data = sample_training_data([X_train, Y_train], batch_size) # 256个数据, 3072 x 256 val_data = sample_training_data([X_validation, Y_validation], batch_size // 2) else: train_data = None val_loss, val_probability = nn.validation(val_data) loss, tr_probability = nn.forward(train_data) # 按置信度求正确率 # tr_accuracy = np.sum(tr_probability > 0.9) / train_data_size # val_accuracy = np.sum(val_probability > 0.9) / val_data_size # 按 softmax 结果的均值求正确率 tr_accuracy = np.sum(tr_probability) / train_data_size val_accuracy = np.sum(val_probability) / val_data_size lr = learning_rate if learning_rate is not None else best_step_size print("epoch %d / %d, loss: %f, train: %f, validation: %f, lr: %s" % (i + 1, num_epochs, loss, tr_accuracy, val_accuracy, format(lr, '.2e'))) if val_accuracy > best_val_accuracy: best_val_accuracy = val_accuracy dh = nn.output_layer.backward(nn.gradient) nn.hidden_layer.backward(dh) # time.sleep(0.1) print("finished optimization, best validation accuracy: %f" % best_val_accuracy) best_model = {} best_model['W1'] = nn.hidden_layer.W best_model['b1'] = nn.hidden_layer.b best_model['W2'] = nn.output_layer.W best_model['b2'] = nn.output_layer.b best_model['lr'] = lr return best_model
def CIFAR10_test(): from load_data import load_CIFAR10, sample_training_data X_train, Y_train, X_test, Y_test = load_CIFAR10( '../data/cifar10/') # 3072 x 50000 '''数据预处理''' mean_train = np.mean(X_train, axis=1).reshape((-1, 1)) std_train = np.std(X_train, axis=1).reshape((-1, 1)) X_train -= mean_train # 0中心化:均值减法 X_train /= std_train # 归一化:每个维度都除以其标准差 X_test -= mean_train X_test /= std_train '''神经网络初始化''' data_batch = sample_training_data([X_train, Y_train], 256) # 256个数据, 3072 x 256 in_num = 3072 hidden_num = 100 out_num = 10 nn = NN(data_batch[0], in_num, hidden_num, out_num, data_batch[1], 10**-3, 0.01) loss_original = nn.loss print("original loss: %f" % (loss_original, )) '''选取合适步长''' min_loss = loss_original for step_size_log in [ -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, ]: step_size = 10**step_size_log n = copy.deepcopy(nn) n.hidden_layer.step_size = step_size n.output_layer.step_size = step_size dh = n.output_layer.backward(n.gradient) n.hidden_layer.backward(dh) loss_new = n.forward() print("step_size: %.10f, loss: %f" % ( step_size, loss_new, )) if loss_new < min_loss: min_loss = loss_new best_step_size = step_size print("best step size %.10f" % (best_step_size, )) nn.hidden_layer.step_size = best_step_size nn.output_layer.step_size = best_step_size time.sleep(1) '''训练之前先选取一小部分数据,看神经网络是否可以过饱和,判断反向传播是否正常工作''' batch_size = 16 data_batch = sample_training_data([X_train, Y_train], batch_size) for i in range(300): nn.forward(data_batch) loss = nn.loss correct = np.sum(nn.probability > 0.9) / batch_size print("i: %d , loss: %f, correct ratio: %f" % ( i, loss, correct, )) if (loss < 0.00001 or correct >= 0.99): break dh = nn.output_layer.backward(nn.gradient) nn.hidden_layer.backward(dh) time.sleep(0.1) print("loss: %f, correct ratio: %f" % ( loss, correct, )) if (correct < 0.99): raise Exception("BP does not work correctly") '''开始训练''' for i in range(1000): batch_size = 256 data_batch = sample_training_data([X_train, Y_train], batch_size) nn.forward(data_batch) # h_weights_grad = nn.eval_numerical_gradient(nn.hidden_layer) # o_weights_grad = nn.eval_numerical_gradient(nn.output_layer) loss = nn.loss correct = np.sum(nn.probability > 0.5) / batch_size print("i: %d , loss: %f, correct ratio: %f" % ( i, loss, correct, )) if (loss < 0.00001): break dh = nn.output_layer.backward(nn.gradient) nn.hidden_layer.backward(dh) # time.sleep(0.1) '''训练结果 训练次数,损失,正确率 i: 0 , loss: 5.286373, correct ratio: 0.027344 i: 1 , loss: 5.164099, correct ratio: 0.015625 i: 2 , loss: 5.090839, correct ratio: 0.015625 i: 3 , loss: 4.836606, correct ratio: 0.035156 i: 4 , loss: 4.861015, correct ratio: 0.031250 i: 5 , loss: 4.918304, correct ratio: 0.019531 i: 6 , loss: 4.632134, correct ratio: 0.015625 ... i: 991 , loss: 3.045871, correct ratio: 0.187500 i: 992 , loss: 3.044970, correct ratio: 0.207031 i: 993 , loss: 2.993322, correct ratio: 0.257812 i: 994 , loss: 3.028033, correct ratio: 0.187500 i: 995 , loss: 2.953715, correct ratio: 0.234375 i: 996 , loss: 2.930693, correct ratio: 0.234375 i: 997 , loss: 3.072322, correct ratio: 0.183594 i: 998 , loss: 3.051221, correct ratio: 0.191406 i: 999 , loss: 2.898737, correct ratio: 0.207031 ''' print("loss: %f" % (loss, )) '''测试神经网络''' data_test = [X_test, Y_test] nn.forward(data_test) loss = nn.loss print("test loss: %f, correct ratio: %f" % ( loss, np.sum(nn.probability > 0.5) / batch_size, )) '''测试结果 test loss: 2.840334, correct ratio: 0.222656''' '''保存神经网络网络''' select = input("save weights and bias ? (y or n)") if select is "y": save_weights(nn) print("save successfully")