def main(): # DenseNet의 경우 {L=40, K=12}, {L=100, K=12}, {L=100, K=24} # DenseNet-BC의 경우 {L=100, K=12, {L=250, K=24}, {L=190, K=40} print('>>> load data') train_loader, test_loader = load_CIFAR10() L = 250 k = 24 training_epochs = 300 print('>>> make model') densenet = DenseNet(L=L, k=k) print(densenet) densenet = densenet.to(device) # SGD아닐지도 모름 optimzer = optim.SGD(densenet.parameters(), weight_decay=0.001, momentum=0.9, lr=0.1, nesterov=True) scheduler = lr_scheduler.MultiStepLR( optimizer=optimzer, milestones=(int(training_epochs * 0.5), int(training_epochs * 0.25)), gamma=0.1) criterion = nn.CrossEntropyLoss() print(">>> training start") training(model=densenet, epochs=training_epochs, train_loader=train_loader, scheduler=scheduler, optimizer=optimzer, criterion=criterion)
def CIFAR10_test(): from load_data import load_CIFAR10 X_train, Y_train, X_test, Y_test = load_CIFAR10('../data/cifar10/') # 3072 x 50000 nn = NN3(4, reg_lam = 0.01) nn.load_examples(X_train, Y_train, axis = 1) nn.load_test_data(X_test, Y_test) nn.data_preprocessing() nn.weight_init() nn.visualize()
def test_tfrecords(): reader = tf.TFRecordReader() file = 'train.tfrecords' filename_queue = tf.train.string_input_producer([file], num_epochs=None) _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example,features={ 'label':tf.FixedLenFeature([],tf.int64), 'image':tf.FixedLenFeature([],tf.string) }) image = tf.image.decode_png(features['image'], channels=3) image = tf.image.resize_image_with_crop_or_pad(image, 32, 32) #image = tf.decode_raw(features['image'],tf.uint8) label = tf.cast(features['label'],tf.int32) images, labels = tf.train.batch([image, label], batch_size=128, num_threads = 1, capacity = 10 * 128, ) images = tf.cast(images, tf.float32) with tf.Session() as sess: #sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) a,b = sess.run([images, labels]) print('rrrrrr') print(a.shape) print(b.shape) print(a) print(b) c = data_preprocess(a) print(c.shape) coord.request_stop() coord.join(threads) cifar10_dir = 'cifar-10-batches-py' X_train, Y_train, X_test, Y_test = load_CIFAR10(cifar10_dir) print(X_train[0]) print(Y_train[0])
def main(model_name): cifar10_dir = 'cifar-10-batches-py' X_train, Y_train, X_test, Y_test = load_CIFAR10(cifar10_dir) X_test = data_preprocess(X_test, train=False, model=model_name) print(X_train.shape) print(X_test.shape) # return ; X_train, Y_train = input('train', 128) with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) parameter_path = "checkpoint_" + model_name + "/variable.ckpt" path_exists = "checkpoint_" + model_name if model_name == "lenet": print('begin to train lenet model') model = Model_Lenet() elif model_name == "vgg19": print('begin to train vgg19 model') model = Model_Vgg19() else: print('we do not have this model') return ; saver = tf.train.Saver() if os.path.exists(path_exists): saver.restore(sess, parameter_path) print('loaded the weight') else: sess.run(tf.global_variables_initializer()) print('init all the weight') train = Trainer(model, sess, X_train, Y_train, X_test, Y_test, model_name) save_path = saver.save(sess, parameter_path)
fxph = f(x) # evaluate f(x + h) x[ix] = oldval - h # increment by h fxmh = f(x) # evaluate f(x - h) x[ix] = oldval # reset grad_numerical = (fxph - fxmh) / (2 * h) grad_analytic = analytic_grad[ix] rel_error = abs(grad_numerical - grad_analytic) / ( abs(grad_numerical) + abs(grad_analytic)) print('numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error)) if __name__ == "__main__": X_train, y_train, X_test, y_test = load_CIFAR10("cifar-10-batches-py") num_training = 49000 num_validation = 1000 num_test = 1000 num_dev = 500 mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask]
# 要使用上面的代码我们需要一个只有一个参数的函数 # (在这里参数就是权重)所以也包含了X_train和Y_train def CIFAR10_loss_fun(data, weights): """ data = [X_train, Y_train] """ from Lecture3.Loss import L_SVM return L_SVM(data[0], data[1], weights) if __name__ == '__main__': from load_data import load_CIFAR10, sample_training_data X_train, Y_train = load_CIFAR10('../data/cifar10/')[0:2] X_train = np.append(X_train, np.ones((X_train.shape[0], 1)), axis=1) data_train = [X_train, Y_train] data_batch = sample_training_data(data_train, 256) # 256个数据 W = np.random.rand(10, 3073) * 0.001 # 随机权重向量 op = Optimization() df, loss_original = op.eval_numerical_gradient(CIFAR10_loss_fun, data_batch, W) # 得到梯度、初始损失值 print('original loss: %f' % (loss_original, )) min_loss = loss_original # 查看不同步长的效果 for step_size_log in [-10, -9, -8, -7, -6, -5, -4, -3, -2, -1]: step_size = 10**step_size_log W_new = W - step_size * df # 权重空间中的新位置
best_model['lr'] = lr return best_model def save_weights(model): np.save(str(datetime.now()) + "_hidden_W.npy", model['W1']) np.save(str(datetime.now()) + "_hidden_b.npy", model['b1']) np.save(str(datetime.now()) + "_out_W.npy", model['W2']) np.save(str(datetime.now()) + "_out_b.npy", model['b2']) np.save(str(datetime.now()) + "_lr.npy", model['lr']) if __name__ == '__main__': from load_data import load_CIFAR10, sample_training_data X_train, Y_train, X_test, Y_test = load_CIFAR10('../data/cifar10/') # 3072 x 50000 mean_train = np.mean(X_train, axis = 1).reshape((-1, 1)) std_train = np.std(X_train, axis = 1).reshape((-1, 1)) X_train -= mean_train # 0中心化:均值减法 X_train /= std_train # 归一化:每个维度都除以其标准差 X_test -= mean_train X_test /= std_train np.linalg.norm() trainer = ClassifierTrainer() node_num = [3072, 100, 10] x_tiny = X_train[:, :20] y_tiny = Y_train[:, :20] first_model = trainer.train(x_tiny, y_tiny, x_tiny, y_tiny, node_num, num_epochs = 150, reg = 0.01, update = 'sgd',
#! /usr/bin/env python3 # -*- coding: utf-8 -*- # 文件的读取,我们直接通过给定的`load_CIFAR10`模块读取数据。 # 感谢这个magic函数,你不必要担心如何写读取的过程。如果想了解细节,可以参考此文件。 from load_data import load_CIFAR10 import os import numpy as np import matplotlib.pyplot as plt # 定义文件夹的路径:请不要修改此路径! 不然提交后的模型不能够运行。 cifar10_dir = os.path.join(os.path.dirname(__file__), '../../data/cifar-10-batches-py') # 读取文件,并把数据保存到训练集和测试集合。 X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # 先来查看一下每个变量的大小,确保没有任何错误!X_train和X_test的大小应该为 N*W*H*3 # N: 样本个数, W: 样本宽度 H: 样本高度, 3: RGB颜色。 y_train和y_test为图片的标签。 print("训练数据和测试数据:", X_train.shape, y_train.shape, X_test.shape, y_test.shape) print("标签的种类: ", np.unique(y_train)) # 查看标签的个数以及标签种类,预计10个类别。
print(num_test) # loop over all test rows for i in range(num_test): print(str(i) + ".") # find the nearest training image to the i'th test image # using the L1 distance (sum of absolute value differences) # distances = np.sum(np.abs(self.Xtr - X[i, :]), axis = 1) # using the L2 distance (computing the euclidean distance between two vectors) distances = np.sqrt(np.sum(np.square(self.Xtr - X[i, :]), axis = 1)) min_index = np.argmin(distances) # get the index with smallest distance Ypred[i] = self.ytr[min_index] # predict the label of the nearest example return Ypred if __name__ == '__main__': Xtr, Ytr, Xte, Yte = load_CIFAR10('../data/cifar10/') # a magic function we provide # flatten out all images to be one-dimensional Xtr_rows = Xtr.reshape(Xtr.shape[0], 32 * 32 * 3) # Xtr_rows becomes 50000 x 3072 Xte_rows = Xte.reshape(Xte.shape[0], 32 * 32 * 3) # Xte_rows becomes 10000 x 3072 nn = NearestNeighbor() # create a Nearest Neighbor classifier class nn.train(Xtr_rows, Ytr) # train the classifier on the training images and labels Yte_predict = nn.predict(Xte_rows) # predict labels on the test images # and now print the classification accuracy, which is the average number # of examples that are correctly predicted (i.e. label matches) print('accuracy: %f' % (np.mean(Yte_predict == Yte)))
def CIFAR10_test(): from load_data import load_CIFAR10, sample_training_data X_train, Y_train, X_test, Y_test = load_CIFAR10( '../data/cifar10/') # 3072 x 50000 '''数据预处理''' mean_train = np.mean(X_train, axis=1).reshape((-1, 1)) std_train = np.std(X_train, axis=1).reshape((-1, 1)) X_train -= mean_train # 0中心化:均值减法 X_train /= std_train # 归一化:每个维度都除以其标准差 X_test -= mean_train X_test /= std_train '''神经网络初始化''' data_batch = sample_training_data([X_train, Y_train], 256) # 256个数据, 3072 x 256 in_num = 3072 hidden_num = 100 out_num = 10 nn = NN(data_batch[0], in_num, hidden_num, out_num, data_batch[1], 10**-3, 0.01) loss_original = nn.loss print("original loss: %f" % (loss_original, )) '''选取合适步长''' min_loss = loss_original for step_size_log in [ -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, ]: step_size = 10**step_size_log n = copy.deepcopy(nn) n.hidden_layer.step_size = step_size n.output_layer.step_size = step_size dh = n.output_layer.backward(n.gradient) n.hidden_layer.backward(dh) loss_new = n.forward() print("step_size: %.10f, loss: %f" % ( step_size, loss_new, )) if loss_new < min_loss: min_loss = loss_new best_step_size = step_size print("best step size %.10f" % (best_step_size, )) nn.hidden_layer.step_size = best_step_size nn.output_layer.step_size = best_step_size time.sleep(1) '''训练之前先选取一小部分数据,看神经网络是否可以过饱和,判断反向传播是否正常工作''' batch_size = 16 data_batch = sample_training_data([X_train, Y_train], batch_size) for i in range(300): nn.forward(data_batch) loss = nn.loss correct = np.sum(nn.probability > 0.9) / batch_size print("i: %d , loss: %f, correct ratio: %f" % ( i, loss, correct, )) if (loss < 0.00001 or correct >= 0.99): break dh = nn.output_layer.backward(nn.gradient) nn.hidden_layer.backward(dh) time.sleep(0.1) print("loss: %f, correct ratio: %f" % ( loss, correct, )) if (correct < 0.99): raise Exception("BP does not work correctly") '''开始训练''' for i in range(1000): batch_size = 256 data_batch = sample_training_data([X_train, Y_train], batch_size) nn.forward(data_batch) # h_weights_grad = nn.eval_numerical_gradient(nn.hidden_layer) # o_weights_grad = nn.eval_numerical_gradient(nn.output_layer) loss = nn.loss correct = np.sum(nn.probability > 0.5) / batch_size print("i: %d , loss: %f, correct ratio: %f" % ( i, loss, correct, )) if (loss < 0.00001): break dh = nn.output_layer.backward(nn.gradient) nn.hidden_layer.backward(dh) # time.sleep(0.1) '''训练结果 训练次数,损失,正确率 i: 0 , loss: 5.286373, correct ratio: 0.027344 i: 1 , loss: 5.164099, correct ratio: 0.015625 i: 2 , loss: 5.090839, correct ratio: 0.015625 i: 3 , loss: 4.836606, correct ratio: 0.035156 i: 4 , loss: 4.861015, correct ratio: 0.031250 i: 5 , loss: 4.918304, correct ratio: 0.019531 i: 6 , loss: 4.632134, correct ratio: 0.015625 ... i: 991 , loss: 3.045871, correct ratio: 0.187500 i: 992 , loss: 3.044970, correct ratio: 0.207031 i: 993 , loss: 2.993322, correct ratio: 0.257812 i: 994 , loss: 3.028033, correct ratio: 0.187500 i: 995 , loss: 2.953715, correct ratio: 0.234375 i: 996 , loss: 2.930693, correct ratio: 0.234375 i: 997 , loss: 3.072322, correct ratio: 0.183594 i: 998 , loss: 3.051221, correct ratio: 0.191406 i: 999 , loss: 2.898737, correct ratio: 0.207031 ''' print("loss: %f" % (loss, )) '''测试神经网络''' data_test = [X_test, Y_test] nn.forward(data_test) loss = nn.loss print("test loss: %f, correct ratio: %f" % ( loss, np.sum(nn.probability > 0.5) / batch_size, )) '''测试结果 test loss: 2.840334, correct ratio: 0.222656''' '''保存神经网络网络''' select = input("save weights and bias ? (y or n)") if select is "y": save_weights(nn) print("save successfully")