def train_nn(data_set=None, save=True): if data_set is None: data_set = read_data_sets() nn = cnn.train_cnn(data_set, epochs=epochs, learning_rate=learning_rate, save=save) return nn
def main_cnn(): mnist_train = sio.loadmat('./mnist_train.mat') mnist_test = sio.loadmat('./mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 mini_batch_x, mini_batch_y = cnn.get_mini_batch(im_train, label_train, batch_size) # learning_rates = [.14, .16, .18] # decay_rates = [.85, .9, .95] # for l in learning_rates: # for d in decay_rates: w_conv, b_conv, w_fc, b_fc = cnn.train_cnn(mini_batch_x, mini_batch_y) sio.savemat('cnn.mat', mdict={ 'w_conv': w_conv, 'b_conv': b_conv, 'w_fc': w_fc, 'b_fc': b_fc }) # could use following two lines to replace above two lines if only want to check results # data = sio.loadmat('cnn.mat') # w_conv, b_conv, w_fc, b_fc = data['w_conv'], data['b_conv'], data['w_fc'], data['b_fc'] acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): x = im_test[:, [i]].reshape((14, 14, 1), order='F') pred1 = cnn.conv(x, w_conv, b_conv) # (14, 14, 3) pred2 = cnn.relu(pred1) # (14, 14, 3) pred3, maxes = cnn.pool2x2(pred2) # (7, 7, 3) pred4 = cnn.flattening(pred3) # (147, 1) y = cnn.fc(pred4, w_fc, b_fc) # (10, 1) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test # print("Learning rate:", l, "Decay rate:", d, "Accuracy:", accuracy) for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) label_classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] visualize_confusion_matrix(confusion, accuracy, label_classes, 'CNN Confusion Matrix')
def main_cnn(retrain_tag): mnist_train = sio.loadmat('./mnist_train.mat') mnist_test = sio.loadmat('./mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 mini_batch_x, mini_batch_y = get_mini_batch(im_train, label_train, batch_size) if retrain_tag: w_conv, b_conv, w_fc, b_fc = train_cnn(mini_batch_x, mini_batch_y) sio.savemat('cnn.mat', mdict={ 'w_conv': w_conv, 'b_conv': b_conv, 'w_fc': w_fc, 'b_fc': b_fc }) else: data = sio.loadmat('cnn.mat') w_conv, b_conv, w_fc, b_fc = data['w_conv'], data['b_conv'], data[ 'w_fc'], data['b_fc'] acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): x = im_test[:, [i]].reshape((14, 14, 1), order='F') pred1 = conv(x, w_conv, b_conv) # (14, 14, 3) pred2 = relu(pred1) # (14, 14, 3) pred3 = pool2x2(pred2) # (7, 7, 3) pred4 = flattening(pred3) # (147, 1) y = fc(pred4, w_fc, b_fc) # (10, 1) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) label_classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] visualize_confusion_matrix(confusion, accuracy, label_classes, 'CNN Confusion Matrix')
def main_cnn(): mnist_train = sio.loadmat('./ReducedMNIST/mnist_train.mat') mnist_test = sio.loadmat('./ReducedMNIST/mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 mini_batch_x, mini_batch_y = get_mini_batch(im_train, label_train, batch_size) w_conv, b_conv, w_fc, b_fc = train_cnn(mini_batch_x, mini_batch_y) sio.savemat('cnn.mat', mdict={ 'w_conv': w_conv, 'b_conv': b_conv, 'w_fc': w_fc, 'b_fc': b_fc }) # could use following two lines to replace above two lines if only want to check results # data = sio.loadmat('cnn.mat') # w_conv, b_conv, w_fc, b_fc = data['w_conv'], data['b_conv'], data['w_fc'], data['b_fc'] acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): x = im_test[:, [i]].reshape((14, 14, 1), order='F') pred1 = conv(x, w_conv, b_conv) # (14, 14, 3) pred2 = relu(pred1) # (14, 14, 3) pred3 = pool2x2(pred2) # (7, 7, 3) pred4 = flattening(pred3) # (147, 1) y = fc(pred4, w_fc, b_fc) # (10, 1) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) return confusion, accuracy
def main_cnn(): mnist_train = sio.loadmat('./mnist_train.mat') mnist_test = sio.loadmat('./mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 # plt.imshow(mnist_train['im_train'][:, 0].reshape((14, 14), order='F'), cmap='gray') # plt.show() # x = im_train[:, 0].reshape((14, 14, 1), order='F') # y = pool2x2(x) # dl_dy = np.random.rand(7, 7, 1) # dl_dx = pool2x2_backward(dl_dy, x, y) # plt.imshow(x[:, :, 0], cmap='gray') # plt.show() # plt.imshow(y[:, :, 0], cmap='gray') # plt.show() # plt.imshow(dl_dy[:, :, 0], cmap='gray') # plt.show() # plt.imshow(dl_dx[:, :, 0], cmap='gray') # plt.show() # x = np.arange(25).reshape((5, 5, 1)) # w_conv = np.arange(27).reshape((3, 3, 1, 3)) # b_conv = np.arange(3).reshape((3, 1)) # y = conv(x, w_conv, b_conv) # dl_dy = np.random.random((5, 5, 3)) # dl_dw, dl_db = conv_backward(dl_dy, x, w_conv, b_conv, y) # print(x) # print(w_conv) # print(b_conv) # print(y) # print(dl_dw.shape) # print(dl_db) # exit(-1) mini_batches_x, mini_batches_y = get_mini_batch(im_train, label_train, batch_size) w_conv, b_conv, w_fc, b_fc = train_cnn(mini_batches_x, mini_batches_y # , im_test, label_test ) sio.savemat('cnn.mat', mdict={ 'w_conv': w_conv, 'b_conv': b_conv, 'w_fc': w_fc, 'b_fc': b_fc }) # could use following two lines to replace above two lines if only want to check results # data = sio.loadmat('cnn.mat') # w_conv, b_conv, w_fc, b_fc = data['w_conv'], data['b_conv'], data['w_fc'], data['b_fc'] acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): print('Test # {}/{}: \r'.format(i + 1, num_test), end='') x = im_test[:, [i]].reshape((14, 14, 1), order='F') pred1 = conv(x, w_conv, b_conv) # (14, 14, 3) pred2 = relu(pred1) # (14, 14, 3) pred3 = pool2x2(pred2) # (7, 7, 3) pred4 = flattening(pred3) # (147, 1) y = fc(pred4, w_fc, b_fc) # (10, 1) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test print(accuracy) for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) label_classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] visualize_confusion_matrix(confusion, accuracy, label_classes, 'CNN Confusion Matrix')
def train_ner(obs_set, out_set, count_set, window_size=[1, 1], batch_size=256, param_scale=0.01, num_epochs=250, step_size=0.001, l2_lambda=0): ''' function to train the NN for word vectors to NER category. Args ---- obs_set : np array created by gen_dataset out_set : np.array created by gen_dataset count_set : np.array created by gen_dataset window_size : integer group nearby vecvtors batch_size : integer size of batch in learning param_scale : float size of weights if none num_epochs : int number of epochs to train step_size : float initial step size ''' param_set = {} param_set['window_size'] = window_size param_set['batch_size'] = batch_size param_set['param_scale'] = param_scale param_set['num_epochs'] = num_epochs param_set['step_size'] = step_size obs_lst, out_lst = [], [] # loop through each sentence and window featurize it for sent_i in range(obs_set.shape[0]): obs_slice = obs_set[sent_i, :, :][:int(count_set[sent_i])] out_slice = out_set[sent_i, :, :][:int(count_set[sent_i])] obs_window = featurizers.window_featurizer(obs_slice, size=window_size) obs_window = obs_window.reshape(obs_slice.shape[0], sum(window_size) + 1, obs_slice.shape[-1]) obs_lst.append(obs_window) out_lst.append(out_slice) # flatten vectors inputs = np.concatenate(obs_lst) inputs = np.expand_dims(inputs, axis=1) outputs = np.concatenate(out_lst) layer_specs = [ cnn.conv_layer((2, 41), 4), cnn.maxpool_layer((2, 2)), cnn.conv_layer((1, 21), 8), cnn.maxpool_layer((1, 2)), cnn.tanh_layer(256), cnn.softmax_layer(9) ] pred_fun, loglike_fun, trained_weights = \ cnn.train_cnn(inputs, outputs, layer_specs, batch_size=batch_size, param_scale=param_scale, num_epochs=num_epochs, L2_reg=l2_lambda) param_set['pred_fun'] = pred_fun param_set['loglike_fun'] = loglike_fun param_set['trained_weights'] = trained_weights return param_set
epochs - number of epochs batch_size - images trained on per epoch no_hidden_layers - number of hidden layers cls.size - number of classes (wmin,wmax) - Initial weight values distribution range alpha - momentum eta - learning rate Outputs: w1 - last first set of weights w2 - last second set of weights total_cost - cost obtained as each image was fed forward pred_acc - accuracy after a given epoch y_pred - guesses for Xts """ w1, w2, total_cost, pred_acc, y_pred = cnn.train_cnn(Xtr, Ytr, Xts, Yts, epochs, batch_size, no_hidden_layers, cls.size, (wmin, wmax), alpha, eta) #Accuracy on test set after all epochs print("Final accuracy: ", cnn.cnn_acc(w1, w2, Xts, Yts), "%") #Input a 1D array to plot it, good for the cost and accuracy print("Cost function") cnn.visualize_plot(total_cost) print("CNN accuracy over epochs") cnn.visualize_plot(pred_acc) #Visualize a set of images, True -> correct predictions, False -> incorrect predictions, last value is number of pictures cnn.visualize_images(classes, Xts, Yts, y_pred.flatten(), True, 5) cnn.visualize_images(classes, Xts, Yts, y_pred.flatten(), False, 5)
def train_nn( data_set=None, category='person', save=True ): if data_set is None: data_set = read_data_set( category=category ) session = cnn.train_cnn( data_set, epochs=epochs, learning_rate=learning_rate, save=save ) return session