def __call__(self, nn, train_history): epoch = train_history[-1]['epoch'] # plot current loss and accuracy plot_train_history(train_history, self.netId, self.name, [("train_loss", "train loss"), ("valid_loss", "valid loss")], "loss", "Loss", ylimit=None, yscale=None, xLimit=(0, Settings.NN_EPOCHS), path=self.lossPath, fileName=str(epoch)) plot_train_history(train_history, self.netId, self.name, [("valid_accuracy", "Accuracy")], "accuracy", "Accuracy", ylimit=None, yscale=None, xLimit=(0, Settings.NN_EPOCHS), path=self.accPath, fileName=str(epoch)) if self.plotLayers: for layer in self.plotLayers: plt = plot_conv_weights(nn.layers_[layer], figsize=(6, 6)) utils.save_plt_figure(plt, str(epoch), self.cWeightsPath + str(layer) + "/") plt.close("all")
def main(model='MLP'): if (model=='MLP'): x_train,y_train = load_TrainData() ###########training process ##################### net = build_mlp() print "Start training:" net.fit(x_train,y_train) ################## calculate the precision################### print "======================== accuracy==========================" x_test = np.array(pd.read_csv('x_test.csv').iloc[:,1:]/255) #y_test = np.array(pd.read_csv('y_test.csv').iloc[:,1:]) y_label = np.array(pd.read_csv('y_testLabel.csv').iloc[:,1:]) #print x_test y_pred = net.predict(x_test) pd.DataFrame(y_pred).to_csv('y_pred_MLP.csv') y = [] for i in range(y_pred.shape[0]): y.append(y_pred[i].tolist().index(max(y_pred[i]))+1) pd.DataFrame(y).to_csv('y_predlabel_MLP.csv') print classification_report(y_label, y) ##############save the MLP weights to file########################## net.save_params_to('MLP_weights_file') elif (model=='CNN'): x_train,y_train = load_TrainData2D() #print x_train.shape print y_train ###########training process ##################### net = buildCNN() print "Start training:" net.fit(x_train,y_train) plot_loss(net) plot_conv_weights(net.layers_[1], figsize=(4, 4)) plt.show() ################## calculate the precision################### print "======================== accuracy==========================" x_test ,y_test = load_TestData2D(); y_label = np.array(pd.read_csv('y_testLabel.csv').iloc[:,1:]) #print x_test y_pred = net.predict(x_test) pd.DataFrame(y_pred).to_csv('y_pred_CNN.csv') y = [] for i in range(y_pred.shape[0]): y.append(y_pred[i].tolist().index(max(y_pred[i]))+1) pd.DataFrame(y).to_csv('y_predlabel_CNN.csv') print classification_report(y_label, y) #print classification_report(y_pred, y_label) ##############save the MLP weights to file########################## net.save_params_to('CNN_weights_file') else: print 'ERROR: Please select a model #MLP or #CNN !'
update_momentum=0.9, max_epochs=50, verbose=1, ) # Train the network nn = net1.fit(X_train_s, y_train_s) #nn = net1.fit(X_train, y_train) # Test CNN preds = net1.predict(X_test) # Confusion matrix plot_confusion_matrix(y_test, preds) visualize.plot_conv_weights(net1.layers_['conv2d1']) visualize.plot_conv_weights(net1.layers_['conv2d2']) # ============================================================================= # Architecture 2 # ============================================================================= # Convolutional NN net2 = NeuralNet( layers=[ ('input', layers.InputLayer), ('conv2d1', layers.Conv2DLayer), ('maxpool1', layers.MaxPool2DLayer), ('conv2d2', layers.Conv2DLayer), ('maxpool2', layers.MaxPool2DLayer), ('output', layers.DenseLayer), ],
def plot_conv_weights(self, net, **kwargs): from nolearn.lasagne.visualize import plot_conv_weights plot_conv_weights(net.layers_['conv1'], **kwargs) plt.clf() plt.cla()
('dense', layers.DenseLayer), ('dropout2', layers.DropoutLayer), ('output', layers.DenseLayer),], input_shape=(None,1,28, 28), conv2d1_num_filters=32, conv2d1_filter_size=(5, 5), conv2d1_nonlinearity=lasagne.nonlinearities.rectify, conv2d1_W=lasagne.init.GlorotUniform(), maxpool1_pool_size=(2, 2), conv2d2_num_filters=32, conv2d2_filter_size=(5, 5), conv2d2_nonlinearity=lasagne.nonlinearities.rectify, maxpool2_pool_size=(2, 2), dropout1_p=0.5, dense_num_units=256, dense_nonlinearity=lasagne.nonlinearities.rectify, dropout2_p=0.5, output_nonlinearity=lasagne.nonlinearities.softmax, output_num_units=10, update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9, max_epochs=10, verbose=1,) nn = CNN.fit(X_train, y_train) prediction = CNN.predict(X_test) visualize.plot_conv_weights(CNN.layers_['conv2d1'])
def test_plot_conv_weights(self, net_fitted): from nolearn.lasagne.visualize import plot_conv_weights plot_conv_weights(net_fitted.layers_['conv1']) plot_conv_weights(net_fitted.layers_['conv2'], figsize=(1, 2)) plt.clf() plt.cla()
x_train = np.zeros((50000, 3, 32, 32), dtype='uint8') y_train = np.zeros((50000,), dtype="uint8") for i in range(1, 6): data = unpickle(os.path.join(path, 'data_batch_' + str(i))) images = data['data'].reshape(10000, 3, 32, 32) labels = data['labels'] x_train[(i - 1) * 10000:i * 10000, :, :, :] = images y_train[(i - 1) * 10000:i * 10000] = labels test_data = unpickle(os.path.join(path, 'test_batch')) x_test = test_data['data'].reshape(10000, 3, 32, 32) y_test = np.array(test_data['labels']) return x_train, y_train, x_test, y_test #For Saving with open('cifar7.pickle', 'wb') as f: pickle.dump(cifar7,f, -1) #For loadig back file_cifar=open('cifar6.pickle', 'r') #For Visualizig layers visualize.plot_conv_weights(cifar7.layers_['conv2d2'])
net0.fit(X_train, y_train) # visualization from nolearn.lasagne.visualize import draw_to_notebook, plot_loss from nolearn.lasagne.visualize import plot_conv_weights, plot_conv_activity from nolearn.lasagne.visualize import plot_occlusion, plot_saliency draw_to_notebook(net0) plot_loss(net0) #plot helps determine if we are overfitting: #If the train loss is much lower than the validation loss, #we should probably do something to regularize the net. # visualize layer weights plot_conv_weights(net0.layers_[1], figsize = (4,4)) #If the weights just look like noise, we might have to do something #(e.g. use more filters so that each can specialize better). # visualize layers' activities x = X_train[0:1] # an image in the bc01 format (so use X[0:1] instead of just X[0]). plot_conv_activity(net0.layers_[1], x) plot_occlusion(net0, X_train[:5], y_train[:5]) plot_saliency(net0, X_train[:5]) from nolearn.lasagne import PrintLayerInfo layers1 = [ (InputLayer, {'shape': (None, 1, 28, 28)}),
def test_visualize_functions_with_cnn(mnist): # this test simply tests that no exception is raised when using # the plotting functions from nolearn.lasagne import NeuralNet from nolearn.lasagne.visualize import plot_conv_activity from nolearn.lasagne.visualize import plot_conv_weights from nolearn.lasagne.visualize import plot_loss from nolearn.lasagne.visualize import plot_occlusion X, y = mnist X_train, y_train = X[:100].reshape(-1, 1, 28, 28), y[:100] X_train = X_train.reshape(-1, 1, 28, 28) num_epochs = 3 nn = NeuralNet( layers=[ ('input', InputLayer), ('conv1', Conv2DLayer), ('conv2', Conv2DLayer), ('pool2', MaxPool2DLayer), ('conv3', Conv2DLayer), ('conv4', Conv2DLayer), ('pool4', MaxPool2DLayer), ('hidden1', DenseLayer), ('output', DenseLayer), ], input_shape=(None, 1, 28, 28), output_num_units=10, output_nonlinearity=softmax, more_params=dict( conv1_filter_size=(5, 5), conv1_num_filters=16, conv2_filter_size=(3, 3), conv2_num_filters=16, pool2_ds=(3, 3), conv3_filter_size=(3, 3), conv3_num_filters=16, conv4_filter_size=(3, 3), conv4_num_filters=16, pool4_ds=(2, 2), hidden1_num_units=16, ), update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9, max_epochs=num_epochs, ) nn.fit(X_train, y_train) plot_loss(nn) plot_conv_weights(nn.layers_['conv1']) plot_conv_weights(nn.layers_['conv2'], figsize=(1, 2)) plot_conv_activity(nn.layers_['conv3'], X_train[:1]) plot_conv_activity(nn.layers_['conv4'], X_train[10:11], figsize=(3, 4)) plot_occlusion(nn, X_train[:1], y_train[:1]) plot_occlusion(nn, X_train[2:4], y_train[2:4], square_length=3, figsize=(5, 5)) # clear figures from memory plt.clf() plt.cla()
def main(): def signal_handler(signal, frame): global terminate terminate = True print('terminating...'.format(terminate)) signal.signal(signal.SIGINT, signal_handler) configure_theano() options = parse_options() X, X_val = generate_data() # X = np.reshape(X, (-1, 1, 30, 40))[:-5] print('X type and shape:', X.dtype, X.shape) print('X.min():', X.min()) print('X.max():', X.max()) # X_val = np.reshape(X_val, (-1, 1, 30, 40))[:-1] print('X_val type and shape:', X_val.dtype, X_val.shape) print('X_val.min():', X_val.min()) print('X_val.max():', X_val.max()) # we need our target to be 1 dimensional X_out = X.reshape((X.shape[0], -1)) X_val_out = X_val.reshape((X_val.shape[0], -1)) print('X_out:', X_out.dtype, X_out.shape) print('X_val_out', X_val_out.dtype, X_val_out.shape) # X_noisy = apply_gaussian_noise(X_out) # visualize_reconstruction(X_noisy[0:25], X_out[0:25], shape=(28, 28)) # X = np.reshape(X_noisy, (-1, 1, 28, 28)) print('constructing and compiling model...') # input_var = T.tensor4('input', dtype='float32') input_var = T.tensor3('input', dtype='float32') target_var = T.matrix('output', dtype='float32') lr = theano.shared(np.array(0.8, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(0.9, dtype=theano.config.floatX) # try building a reshaping layer # network = create_model(input_var, (None, 1, 30, 40), options) l_input = InputLayer((None, None, 1200), input_var, name='input') l_input = ReshapeLayer(l_input, (-1, 1, 30, 40), name='reshape_input') # l_input = InputLayer((None, 1, 30, 40), input_var, name='input') if options['MODEL'] == 'normal': network, encoder = avletters_convae.create_model(l_input, options) if options['MODEL'] == 'batchnorm': network, encoder = avletters_convae_bn.create_model(l_input, options) if options['MODEL'] == 'dropout': network, encoder = avletters_convae_drop.create_model(l_input, options) if options['MODEL'] == 'bn+dropout': network, encoder = avletters_convae_bndrop.create_model(l_input, options) print('AE Network architecture: {}'.format(options['MODEL'])) print_network(network) recon = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(squared_error(recon, target_var)) updates = adadelta(cost, all_params, lr) # updates = las.updates.apply_nesterov_momentum(updates, all_params, momentum=0.90) use_max_constraint = False print('apply max norm constraint: {}'.format(use_max_constraint)) if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases # updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) updates[param] = norm_constraint(param, MAX_NORM) train = theano.function([input_var, target_var], recon, updates=updates, allow_input_downcast=True) train_cost_fn = theano.function([input_var, target_var], cost, allow_input_downcast=True) eval_recon = las.layers.get_output(network, deterministic=True) eval_cost = T.mean(las.objectives.squared_error(eval_recon, target_var)) eval_cost_fn = theano.function([input_var, target_var], eval_cost, allow_input_downcast=True) recon_fn = theano.function([input_var], eval_recon, allow_input_downcast=True) if terminate: exit() NUM_EPOCHS = options['NUM_EPOCHS'] EPOCH_SIZE = options['EPOCH_SIZE'] NO_STRIDES = options['NO_STRIDES'] VAL_NO_STRIDES = options['VAL_NO_STRIDES'] print('begin training for {} epochs...'.format(NUM_EPOCHS)) datagen = batch_iterator(X, X_out, 128) costs = [] val_costs = [] for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): batch_X, batch_y = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(batch_X), lr.get_value()) print(print_str, end='') sys.stdout.flush() batch_X = batch_X.reshape((-1, 1, 1200)) train(batch_X, batch_y) print('\r', end='') if terminate: break if terminate: break cost = batch_compute_cost(X, X_out, NO_STRIDES, train_cost_fn) val_cost = batch_compute_cost(X_val, X_val_out, VAL_NO_STRIDES, eval_cost_fn) costs.append(cost) val_costs.append(val_cost) print("Epoch {} train cost = {}, validation cost = {} ({:.1f}sec) " .format(epoch + 1, cost, val_cost, time.time() - time_start)) if epoch > 10: lr.set_value(lr.get_value() * lr_decay) X_val_recon = recon_fn(X_val) visualize_reconstruction(X_val_out[450:550], X_val_recon[450:550], shape=(30, 40), savefilename='avletters') plot_validation_cost(costs, val_costs, None, savefilename='valid_cost') conv2d1 = las.layers.get_all_layers(network)[2] visualize.plot_conv_weights(conv2d1, (15, 14)).savefig('conv2d1.png') print('saving encoder...') save_model(encoder, 'models/conv_encoder.dat') save_model(network, 'models/conv_ae.dat')
def cnn(name, cnn_layers, classes, epochs=500, learning_rate=0.0002, verbose=1, seed=0, test_size=0.2, data_folder="all", oversampling=0, undersampling=0, oversampling_ratio, undersampling_ratio, update_func=lasagne.updates.adam, objective_l2=0.0025, train_split_eval_size=0.05, output_folder): # NOTE: while running the function the current working directory should be ../name(one of the arguments)/code/ # and the dmdt processed data should be in ../name(one of the arguments)/data/data_folder(one of the arguments)/ # containing X_2d.npy which is a 3D matrix containing dmdts with dimensions as (#dmdts, height of dmdt, width of dmdt), # X_features.npy which is a 2D matrix with dimensions (#dmdts, #features) and y.npy containing dmdt labels # corresponding to X_2D.npy with dimension (#dmdts,) # Arguments: # name: denotes the parent directory for which cnn is to be trained eg: ensemble, cnn_with, cnn_without, gdr21, periodic, # trans, ptf_classifier # cnn_layers: denotes the list of layers making a CNN. Refer: https://lasagne.readthedocs.io/en/latest/modules/layers.html # for different layers which can be used # eg: # [ # (InputLayer, {'name':'input', 'shape': (None, X_train.shape[1], X_train.shape[2], X_train.shape[3])}), # (Conv2DLayer, {'name':'conv2d1', 'num_filters': 64, 'filter_size': (5, 5), 'pad': 0, 'nonlinearity':rectify}), # (MaxPool2DLayer, {'name':'maxpool1','pool_size': (2, 2)}), # (DropoutLayer, {'name':'dropout1','p':0.1}), # #(Conv2DLayer, {'name':'conv2d2','num_filters': 128, 'filter_size': (5, 5), 'pad': 2, 'nonlinearity':rectify}), # #(MaxPool2DLayer, {'pool_size': (2, 2)}), # #(DropoutLayer, {'p':0.3}), # #(Conv2DLayer, {'name':'conv2d3','num_filters': 256, 'filter_size': (5, 5), 'pad': 2, 'nonlinearity':rectify}), # #(MaxPool2DLayer, {'pool_size': (2, 2)}), # #(DropoutLayer, {'p':0.5}), # #(DenseLayer, {'name':'dense1','num_units': 512}), # #(DropoutLayer, {'name':'dropout2','p':0.5}), # #(DenseLayer, {'name':'dense2','num_units': 512}), # (DenseLayer, {'name':'output','num_units': len(list(set(y))), 'nonlinearity': softmax}), # ] # classes: a list denoting the class numbers to be used for training the CNN eg: for ensemble CNN, # classes = [1,2,3,4,5,6,7,9,10,11,13,18] # data_folder: refer to the 'name' argument details # epochs, update_func, learning_rate, objective_l2, train_split_eval_size, verbose: denotes NeuralNet parameters # output_folder: denotes the name of the directory in which the results of trained CNN will be saved, most of the time it # will be similar to name argument # oversampling, undersampling: equal to 1 for oversampling or undersampling respectively the training data, else 0 # oversampling_ratio: Refer ratio argument in # http://contrib.scikit-learn.org/imbalanced-learn/stable/generated/imblearn.over_sampling.SMOTE.html # undersampling_ratio: Refer ratio argument in # http://contrib.scikit-learn.org/imbalanced-learn/stable/generated/imblearn.under_sampling.RandomUnderSampler.html import matplotlib matplotlib.use('Agg') import os import copy import generate import numpy import theano import theano.gpuarray import pygpu from pygpu import gpuarray #gpuarray.use("gpu"+str(0)) #import theano.sandbox.cuda #theano.sandbox.cuda.use("gpu"+str(0)) #theano.gpuarray.use("gpu" + str(0)) theano.gpuarray.use("cuda" + str(0)) import lasagne from nolearn.lasagne import NeuralNet, objective, TrainSplit, visualize from lasagne.nonlinearities import softmax, rectify from lasagne.layers import InputLayer from lasagne.layers import Conv2DLayer from lasagne.layers import MaxPool2DLayer from lasagne.layers import DropoutLayer from lasagne.layers import DenseLayer from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from sklearn import preprocessing from util import plot_confusion, plot_misclassifications import numpy as np from six.moves import cPickle import pickle from imblearn.over_sampling import SMOTE from imblearn.under_sampling import TomekLinks, RandomUnderSampler # parameters #epochs = 500 #learning_rate = 0.0002 #verbose = 1 #seed = 0 #classes = [5,6] #what are classes #test_size = 0.2 # get data and encode labels #X_2d, X_features, y, indices = generate.get_data("all", classes=classes, shuffle=True, seed=seed) X_2d, X_features, y, indices = generate.get_data(data_folder, classes=classes, shuffle=True, seed=seed) ##sm=SMOTE(random_state=seed) ##(f,g,h)=X_2d.shape ##X_2d,y=sm.fit_sample(X_2d.reshape(f,g*h),y) ##X_2d=X_2d.reshape((X_2d.shape[0],g,h)) #print(scenario) labelencoder = preprocessing.LabelEncoder() labelencoder.fit(y) y = labelencoder.transform(y).astype(numpy.int32) print("Total number of instances: " + str(len(y))) # split data (train/test) X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split( X_2d, y, indices, test_size=test_size, random_state=seed) if oversampling == 1: #sm=SMOTE(random_state=seed) sm = SMOTE(random_state=seed, ratio=oversampling_ratio) #ratio={2:1000,4:1000,5:1000}) (f, g, h) = X_train.shape X_train, y_train = sm.fit_sample(X_train.reshape(f, g * h), y_train) X_train = X_train.reshape((X_train.shape[0], g, h)) if undersampling == 1: rus = RandomUnderSampler( random_state=seed, ratio=undersampling_ratio) #ratio={0:1000,1:1000,3:1000}) (ff, gg, hh) = X_train.shape X_train, y_train = rus.fit_sample(X_train.reshape(ff, gg * hh), y_train) X_train = X_train.reshape((X_train.shape[0], gg, hh)) #sm=SMOTE(random_state=seed) #X_train,y_train=sm.fit_sample(X_train,y_train) X_test_plot = copy.deepcopy(X_test) # why reshaping ? X_train = X_train.reshape( (X_train.shape[0], 1, X_train.shape[1], X_train.shape[2])) X_test = X_test.reshape( (X_test.shape[0], 1, X_test.shape[1], X_test.shape[2])) print("Number of training instances: %i" % len(y_train)) print("Number of test instances: %i" % len(y_test)) layers = cnn_layers # [ # (InputLayer, {'name':'input', 'shape': (None, X_train.shape[1], X_train.shape[2], X_train.shape[3])}), # (Conv2DLayer, {'name':'conv2d1', 'num_filters': 64, 'filter_size': (5, 5), 'pad': 0, 'nonlinearity':rectify}), # (MaxPool2DLayer, {'name':'maxpool1','pool_size': (2, 2)}), # (DropoutLayer, {'name':'dropout1','p':0.1}), # #(Conv2DLayer, {'name':'conv2d2','num_filters': 128, 'filter_size': (5, 5), 'pad': 2, 'nonlinearity':rectify}), # #(MaxPool2DLayer, {'pool_size': (2, 2)}), # #(DropoutLayer, {'p':0.3}), # #(Conv2DLayer, {'name':'conv2d3','num_filters': 256, 'filter_size': (5, 5), 'pad': 2, 'nonlinearity':rectify}), # #(MaxPool2DLayer, {'pool_size': (2, 2)}), # #(DropoutLayer, {'p':0.5}), # #(DenseLayer, {'name':'dense1','num_units': 512}), # #(DropoutLayer, {'name':'dropout2','p':0.5}), # #(DenseLayer, {'name':'dense2','num_units': 512}), # (DenseLayer, {'name':'output','num_units': len(list(set(y))), 'nonlinearity': softmax}), # ] net = NeuralNet( layers=layers, #layers=[('input', InputLayer), # ('conv2d1', Conv2DLayer), # ('maxpool1', MaxPool2DLayer), # ('dropout1', DropoutLayer), # ('conv2d2', Conv2DLayer), # ('conv2d3', Conv2DLayer), # ('dense1', DenseLayer), # ('dropout2', DropoutLayer), # ('dense2', DenseLayer), # ('output', DenseLayer), # ], max_epochs=epochs, update=update_func, update_learning_rate=learning_rate, objective_l2=objective_l2, train_split=TrainSplit(eval_size=train_split_eval_size), verbose=verbose, ) net.fit(X_train, y_train) preds = net.predict(X_test) preds_proba = net.predict_proba(X_test) acc = accuracy_score(y_test, preds) print("Accuracy: %f" % acc) y_test = labelencoder.inverse_transform(y_test) preds = labelencoder.inverse_transform(preds) # plot misclassifications plot_misclassifications(y_test, preds, X_test_plot, indices_test, output_folder + "/misclassifications") # save output # os.mkdir("cnn_cd") numpy.save(output_folder + "/X_test", X_test) numpy.save(output_folder + "/y_test", y_test) numpy.save(output_folder + "/preds_proba", preds_proba) numpy.save(output_folder + "/preds", preds) numpy.savetxt(output_folder + "/y_test_cnn.csv", y_test, delimiter=",", fmt='%.4f') numpy.savetxt(output_folder + "/preds_cnn.csv", preds, delimiter=",", fmt='%.4f') numpy.savetxt(output_folder + "/preds_proba_cnn.csv", preds_proba, delimiter=",", fmt='%.4f') plot_confusion(y_test, preds, output_folder + "/confusion_cnn_hpercent.png") plt1 = visualize.plot_conv_weights(net.layers_['conv2d1']) plt1.savefig(output_folder + "/filters1.png") plt1.close() plt2 = visualize.plot_conv_weights(net.layers_['conv2d2']) plt2.savefig(output_folder + "/filters2.png") plt3 = visualize.plot_conv_weights(net.layers_['conv2d3']) plt3.savefig(output_folder + "/filters3.png") plt3.close() f = open(output_folder + '/obj.save_cd', 'wb') cPickle.dump(net, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() #f=open('obj.save','rb') #net=cPickle.load(f) #f.close() #print(net) print("F1 Score: " + str( f1_score(y_test.reshape(y_test.shape[0]), preds.reshape(preds.shape[0]), average=None))) print("Matthews correlation coefficient (MCC): " + str( matthews_corrcoef(y_test.reshape(y_test.shape[0]), preds.reshape(preds.shape[0]))))
def plot_conv_weights(self, net, **kwargs): from nolearn.lasagne.visualize import plot_conv_weights plot_conv_weights(net.layers_["conv1"], **kwargs) plt.clf() plt.cla()
update=nesterov_momentum, update_learning_rate=0.1, update_momentum=0.9, objective_l2=0.0025, train_split=TrainSplit(eval_size=0.25), verbose=2, ) # Train net0.fit(X_train, y_train) # Plot learning curve plot_loss(net0) # Plot learned filters plot_conv_weights(net0.layers_[1], figsize=(4, 4)) # Layer 1 (conv1) # Plot activation maps x = X_train[0:1] plot_conv_activity(net0.layers_[1], x) # Show filter occlusion maps to detect importance plot_occlusion(net0, X_train[:5], y_train[:5]) #plot_saliency(net0, X_train[:5]) #layer_info = PrintLayerInfo() #layer_info(net0) # Predict a label net0.predict(X_test[0:1])
name = exp_name + '_' + str(date.today()) with open('models/conv_net_'+name+'.pkl', 'wb') as f: cPickle.dump(conv_net, f, -1) conv_net.save_params_to('models/params_'+name) # ----- Train set ---- train_predictions = conv_net.predict_proba(X) make_submission_file(train_predictions[:sample_size], images_id[:sample_size], output_filepath='models/training_'+name+'.csv') # ----- Test set ---- X_test, _, images_id_test = load_numpy_arrays(args['test_file']) print "Test:" print "X_test.shape:", X_test.shape predictions = conv_net.predict_proba(X_test) make_submission_file(predictions, images_id_test, output_filepath='submissions/submission_'+name+'.csv') # ----- Make plots ---- plot_loss(conv_net, "models/loss_"+name+".png", show=False) plot_conv_weights(conv_net.layers_[1], figsize=(4, 4)) plt.savefig('models/weights_'+name+'.png') plot_conv_activity(conv_net.layers_[1], X[0:1]) plt.savefig('models/activity_'+name+'.png') plot_occlusion(conv_net, X[:5], y[:5]) plt.savefig('models/occlusion_'+name+'.png')
def main(): def signal_handler(signal, frame): global terminate terminate = True print('terminating...'.format(terminate)) signal.signal(signal.SIGINT, signal_handler) configure_theano() options = parse_options() X, X_val = generate_data() # X = np.reshape(X, (-1, 1, 30, 40))[:-5] print('X type and shape:', X.dtype, X.shape) print('X.min():', X.min()) print('X.max():', X.max()) # X_val = np.reshape(X_val, (-1, 1, 30, 40))[:-1] print('X_val type and shape:', X_val.dtype, X_val.shape) print('X_val.min():', X_val.min()) print('X_val.max():', X_val.max()) # we need our target to be 1 dimensional X_out = X.reshape((X.shape[0], -1)) X_val_out = X_val.reshape((X_val.shape[0], -1)) print('X_out:', X_out.dtype, X_out.shape) print('X_val_out', X_val_out.dtype, X_val_out.shape) # X_noisy = apply_gaussian_noise(X_out) # visualize_reconstruction(X_noisy[0:25], X_out[0:25], shape=(28, 28)) # X = np.reshape(X_noisy, (-1, 1, 28, 28)) print('constructing and compiling model...') # input_var = T.tensor4('input', dtype='float32') input_var = T.tensor3('input', dtype='float32') target_var = T.matrix('output', dtype='float32') lr = theano.shared(np.array(0.8, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(0.9, dtype=theano.config.floatX) # try building a reshaping layer # network = create_model(input_var, (None, 1, 30, 40), options) l_input = InputLayer((None, None, 1200), input_var, name='input') l_input = ReshapeLayer(l_input, (-1, 1, 30, 40), name='reshape_input') # l_input = InputLayer((None, 1, 30, 40), input_var, name='input') if options['MODEL'] == 'normal': network, encoder = avletters_convae.create_model(l_input, options) if options['MODEL'] == 'batchnorm': network, encoder = avletters_convae_bn.create_model(l_input, options) if options['MODEL'] == 'dropout': network, encoder = avletters_convae_drop.create_model(l_input, options) if options['MODEL'] == 'bn+dropout': network, encoder = avletters_convae_bndrop.create_model( l_input, options) print('AE Network architecture: {}'.format(options['MODEL'])) print_network(network) recon = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(squared_error(recon, target_var)) updates = adadelta(cost, all_params, lr) # updates = las.updates.apply_nesterov_momentum(updates, all_params, momentum=0.90) use_max_constraint = False print('apply max norm constraint: {}'.format(use_max_constraint)) if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases # updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) updates[param] = norm_constraint(param, MAX_NORM) train = theano.function([input_var, target_var], recon, updates=updates, allow_input_downcast=True) train_cost_fn = theano.function([input_var, target_var], cost, allow_input_downcast=True) eval_recon = las.layers.get_output(network, deterministic=True) eval_cost = T.mean(las.objectives.squared_error(eval_recon, target_var)) eval_cost_fn = theano.function([input_var, target_var], eval_cost, allow_input_downcast=True) recon_fn = theano.function([input_var], eval_recon, allow_input_downcast=True) if terminate: exit() NUM_EPOCHS = options['NUM_EPOCHS'] EPOCH_SIZE = options['EPOCH_SIZE'] NO_STRIDES = options['NO_STRIDES'] VAL_NO_STRIDES = options['VAL_NO_STRIDES'] print('begin training for {} epochs...'.format(NUM_EPOCHS)) datagen = batch_iterator(X, X_out, 128) costs = [] val_costs = [] for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): batch_X, batch_y = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(batch_X), lr.get_value()) print(print_str, end='') sys.stdout.flush() batch_X = batch_X.reshape((-1, 1, 1200)) train(batch_X, batch_y) print('\r', end='') if terminate: break if terminate: break cost = batch_compute_cost(X, X_out, NO_STRIDES, train_cost_fn) val_cost = batch_compute_cost(X_val, X_val_out, VAL_NO_STRIDES, eval_cost_fn) costs.append(cost) val_costs.append(val_cost) print("Epoch {} train cost = {}, validation cost = {} ({:.1f}sec) ". format(epoch + 1, cost, val_cost, time.time() - time_start)) if epoch > 10: lr.set_value(lr.get_value() * lr_decay) X_val_recon = recon_fn(X_val) visualize_reconstruction(X_val_out[450:550], X_val_recon[450:550], shape=(30, 40), savefilename='avletters') plot_validation_cost(costs, val_costs, None, savefilename='valid_cost') conv2d1 = las.layers.get_all_layers(network)[2] visualize.plot_conv_weights(conv2d1, (15, 14)).savefig('conv2d1.png') print('saving encoder...') save_model(encoder, 'models/conv_encoder.dat') save_model(network, 'models/conv_ae.dat')
('output', layers.DenseLayer), ], input_shape=(None, 1, 28, 28), conv2d1_num_filters=32, conv2d1_filter_size=(5, 5), conv2d1_nonlinearity=lasagne.nonlinearities.rectify, conv2d1_W=lasagne.init.GlorotUniform(), maxpool1_pool_size=(2, 2), conv2d2_num_filters=32, conv2d2_filter_size=(5, 5), conv2d2_nonlinearity=lasagne.nonlinearities.rectify, maxpool2_pool_size=(2, 2), dropout1_p=0.5, dense_num_units=256, dense_nonlinearity=lasagne.nonlinearities.rectify, dropout2_p=0.5, output_nonlinearity=lasagne.nonlinearities.softmax, output_num_units=10, update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9, max_epochs=10, verbose=1, ) nn = CNN.fit(X_train, y_train) prediction = CNN.predict(X_test) visualize.plot_conv_weights(CNN.layers_['conv2d1'])