def get_layer_info(): """Created 04/11/2016""" from nolearn.lasagne import PrintLayerInfo layer_info = PrintLayerInfo() return layer_info
def make_submission(self, train_images, train_results, test_images, output_file_path, feature_extractors, model, size=64): # Create a vector of feature vectors and initialize the codebook of sift extractor feature_vectors = [] sift_extractor = temp_extractor = next((extractor for extractor in feature_extractors if type(extractor) == SiftFeatureExtractor), None) if(sift_extractor != None): sift_extractor.set_codebook(train_images) feature_extractors[feature_extractors.index(temp_extractor)] = sift_extractor # Extract features from every image for image in train_images: print("Training ", image, "...") preprocessed_color_image = self.preprocess_image(image, size) feature_vector = [] if feature_extractors != []: for feature_extractor in feature_extractors: if type(feature_extractor) != SiftFeatureExtractor: feature_vector = append(feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image)) else: feature_vector = append(feature_vector, feature_extractor.extract_feature_vector(image)) else: feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48)) feature_vectors.append(feature_vector) # Logistic Regression for feature selection, higher C = more features will be deleted clf2 = LogisticRegression(penalty='l1', dual=False, tol=0.0001, C=4) # Feature selection/reduction if(model != "conv"): print("Old feature vector shape = ", len(feature_vectors), len(feature_vectors[0])) new_feature_vectors = clf2.fit_transform(feature_vectors, train_results) print("New feature vector shape = ", len(new_feature_vectors), len(new_feature_vectors[0])) if(model == "neural"): model = self.build_nn(nr_features=len(new_feature_vectors[0])) new_feature_vectors = np.asarray(new_feature_vectors) train_results = np.asarray(train_results) model.initialize() layer_info = PrintLayerInfo() layer_info(model) # Fit our model model.fit(new_feature_vectors, train_results) else: model = self.build_conv() # Fit our model model.fit(np.asarray(feature_vectors), np.asarray(train_results)) # Iterate over the test images and add their prediction to a prediction object prediction_object = Prediction() for im in test_images: print("Predicting ", im) preprocessed_color_image = self.preprocess_image(im, size) validation_feature_vector = [] if feature_extractors != []: for feature_extractor in feature_extractors: if type(feature_extractor) != SiftFeatureExtractor: validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image)) else: validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(im)) validation_feature_vector = clf2.transform(validation_feature_vector) else: validation_feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48)) prediction_object.addPrediction(model.predict_proba(validation_feature_vector)[0]) # Write out the prediction object FileParser.write_CSV(output_file_path, prediction_object)
# Random Forest rf.fit(train_features_df.values.tolist(), train_labels_df['cat'].tolist()) predicted_labels = [] for index, vector in enumerate(test_features_df.values): predicted_labels.append(str(rf.predict(vector.reshape(1, -1))[0])) tree_confusion_matrices["Random Forest"].append(tree.plot_confusion_matrix(test_labels_df['cat'].values.astype(str), predicted_labels)) # Bit hacky to use the tree method train_features_df = (train_features_df - train_features_df.mean()) / (train_features_df.max() - train_features_df.min()) train_features_df = train_features_df.reset_index(drop=True) test_features_df = (test_features_df - test_features_df.mean()) / (test_features_df.max() - test_features_df.min()) test_features_df = test_features_df.reset_index(drop=True) # Neural Network model = build_nn(nr_features=len(train_features_df.columns)) model.initialize() layer_info = PrintLayerInfo() layer_info(model) y_train = np.reshape(np.asarray(train_labels_df, dtype='int32'), (-1, 1)).ravel() model.fit(train_features_df.values, np.add(y_train, -1)) predicted_labels = [] for index, vector in enumerate(test_features_df.values): predicted_labels.append(str(model.predict(vector.reshape(1, -1))[0]+1)) tree_confusion_matrices["Neural Network"].append(tree.plot_confusion_matrix(test_labels_df['cat'].values.astype(str), predicted_labels)) # Bit hacky to use the tree method #Bayesian Network train_features_df, test_features_df = features_df.iloc[train_index,:].copy(), features_df.iloc[test_index,:].copy() train_labels_df, test_labels_df = labels_df.iloc[train_index,:].copy(), labels_df.iloc[test_index,:].copy() train_features_df = train_features_df.reset_index(drop=True) test_features_df = test_features_df.reset_index(drop=True) train_labels_df = train_labels_df.reset_index(drop=True)
nn = net1.fit(X_train, y_train) print "# Saving weights" net1.save_weights_to(s.net_name) if _test: print "# Loading weights" net1.load_weights_from(s.net_name) # evaluate print "# Evaluating" from sklearn.metrics import confusion_matrix, classification_report, accuracy_score predictions = net1.predict(X_test) ####################### OUTPUT to shell and to file for later filename = "experiment_log.txt" target = open(filename, 'a+') target.write("-------------------------------------------------------------------------\n") from nolearn.lasagne import PrintLayerInfo pli = PrintLayerInfo() net1.verbose = 3 layer_info, legend = pli._get_layer_info_conv(net1) target.write(layer_info) target.write(classification_report(y_test, predictions)) #target.write(accuracy_score(y_test, predictions)) target.close() print layer_info print classification_report(y_test, predictions) accuracy_score(y_test, predictions)
def local_test(self, images, results, feature_extractors, model, k=2, size=64): kf = KFold(len(images), n_folds=k, shuffle=True, random_state=1337) # kf = KFold(500, n_folds=k, shuffle=True, random_state=1337) train_errors = [] test_errors = [] for train, validation in kf: # Divide the train_images in a training and validation set (using KFold) train_set = [images[i % len(images)] for i in train] validation_set = [images[i % len(images)] for i in validation] train_set_results = [results[i % len(images)] for i in train] validation_set_results = [results[i % len(images)] for i in validation] # Create an empty feature_vectors array and set the codebook of the sift extractor if there is any feature_vectors = [] sift_extractor = temp_extractor = next( (extractor for extractor in feature_extractors if type(extractor) == SiftFeatureExtractor), None) if (sift_extractor != None): sift_extractor.set_codebook(train_set) feature_extractors[feature_extractors.index(temp_extractor)] = sift_extractor # Iterate over the train_set, extract the features from each image and append them to feature_vectors for image in train_set: print("Training ", image, "...") preprocessed_color_image = self.preprocess_image(image, size) feature_vector = [] if feature_extractors != []: for feature_extractor in feature_extractors: if type(feature_extractor) != SiftFeatureExtractor: feature_vector = append(feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image)) else: feature_vector = append(feature_vector, feature_extractor.extract_feature_vector(image)) else: feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48)) feature_vectors.append(feature_vector) # Logistic Regression for feature selection, higher C = more features will be deleted clf2 = LogisticRegression(penalty='l1', dual=False, tol=0.0001, C=4) # Feature selection/reduction if(model != "conv"): new_feature_vectors = clf2.fit_transform(feature_vectors, train_set_results) if(model == "neural"): model = self.build_nn(nr_features=len(new_feature_vectors[0])) new_feature_vectors = np.asarray(new_feature_vectors) train_set_results = np.asarray(train_set_results) model.initialize() layer_info = PrintLayerInfo() layer_info(model) # Fit our model model.fit(new_feature_vectors, train_set_results) else: model = self.build_conv() # Fit our model model.fit(np.asarray(feature_vectors), np.asarray(train_set_results)) train_prediction_object = Prediction() counter=0 for im in train_set: print("predicting train image ", counter) counter+=1 preprocessed_color_image = self.preprocess_image(im, size) validation_feature_vector = [] if feature_extractors != []: for feature_extractor in feature_extractors: if type(feature_extractor) != SiftFeatureExtractor: validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image)) else: validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(im)) validation_feature_vector = clf2.transform(validation_feature_vector) else: validation_feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48)) train_prediction_object.addPrediction(model.predict_proba(validation_feature_vector)[0]) print("predicting test images") test_prediction_object = Prediction() counter=0 for im in validation_set: print("predicting test image ", counter) counter+=1 preprocessed_color_image = self.preprocess_image(im, size) validation_feature_vector = [] if feature_extractors != []: for feature_extractor in feature_extractors: if type(feature_extractor) != SiftFeatureExtractor: validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image)) else: validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(im)) validation_feature_vector = clf2.transform(validation_feature_vector) else: validation_feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48)) test_prediction_object.addPrediction(model.predict_proba(validation_feature_vector)[0]) train_errors.append(train_prediction_object.evaluate(train_set_results)) test_errors.append(test_prediction_object.evaluate(validation_set_results)) return [train_errors, test_errors]
input_shape=(None, num_features), dense_num_units=64, narrow_num_units=48, denseReverse1_num_units=64, denseReverse2_num_units=128, output_num_units=128, #input_nonlinearity = None, #nonlinearities.sigmoid, #dense_nonlinearity = nonlinearities.tanh, narrow_nonlinearity=nonlinearities.softplus, #denseReverse1_nonlinearity = nonlinearities.tanh, denseReverse2_nonlinearity=nonlinearities.softplus, output_nonlinearity=nonlinearities.linear, #nonlinearities.softmax, #dropout0_p=0.1, dropout1_p=0.01, dropout2_p=0.001, regression=True, verbose=1) ae.initialize() PrintLayerInfo()(ae) maybe_this_is_a_history = ae.fit(Z, Z) #learned_parameters = ae.get_all_params_values() #np.save("task4/learned_parameter.npy", learned_parameters) #SaveWeights(path='task4/koebi_train_history_AE')(ae, maybe_this_is_a_history) ae.save_params_to('task4/koebi_train_history_AE2')
def make_memnn(vocab_size, cont_sl, cont_wl, quest_wl, answ_wl, rnn_size, rnn_type='LSTM', pool_size=4, answ_n=4, dence_l=[100], dropout=0.5, batch_size=16, emb_size=50, grad_clip=40, init_std=0.1, num_hops=3, rnn_style=False, nonlin=LN.softmax, init_W=None, rng=None, art_pool=4, lr=0.01, mom=0, updates=LU.adagrad, valid_indices=0.2, permute_answ=False, permute_cont=False): def select_rnn(x): return { 'RNN': LL.RecurrentLayer, 'LSTM': LL.LSTMLayer, 'GRU': LL.GRULayer, }.get(x, LL.LSTMLayer) # dence = dence + [1] RNN = select_rnn(rnn_type) #-----------------------------------------------------------------------weights tr_variables = {} tr_variables['WQ'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WA'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WC'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WTA'] = theano.shared( init_std * np.random.randn(cont_sl, emb_size).astype('float32')) tr_variables['WTC'] = theano.shared( init_std * np.random.randn(cont_sl, emb_size).astype('float32')) tr_variables['WAnsw'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) #------------------------------------------------------------------input layers layers = [(LL.InputLayer, { 'name': 'l_in_q', 'shape': (batch_size, 1, quest_wl), 'input_var': T.itensor3('l_in_q_') }), (LL.InputLayer, { 'name': 'l_in_a', 'shape': (batch_size, answ_n, answ_wl), 'input_var': T.itensor3('l_in_a_') }), (LL.InputLayer, { 'name': 'l_in_q_pe', 'shape': (batch_size, 1, quest_wl, emb_size) }), (LL.InputLayer, { 'name': 'l_in_a_pe', 'shape': (batch_size, answ_n, answ_wl, emb_size) }), (LL.InputLayer, { 'name': 'l_in_cont', 'shape': (batch_size, cont_sl, cont_wl), 'input_var': T.itensor3('l_in_cont_') }), (LL.InputLayer, { 'name': 'l_in_cont_pe', 'shape': (batch_size, cont_sl, cont_wl, emb_size) })] #------------------------------------------------------------------slice layers # l_qs = [] # l_cas = [] l_a_names = ['l_a_{}'.format(i) for i in range(answ_n)] l_a_pe_names = ['l_a_pe{}'.format(i) for i in range(answ_n)] for i in range(answ_n): layers.extend([(LL.SliceLayer, { 'name': l_a_names[i], 'incoming': 'l_in_a', 'indices': slice(i, i + 1), 'axis': 1 })]) for i in range(answ_n): layers.extend([(LL.SliceLayer, { 'name': l_a_pe_names[i], 'incoming': 'l_in_a_pe', 'indices': slice(i, i + 1), 'axis': 1 })]) #------------------------------------------------------------------MEMNN layers #question---------------------------------------------------------------------- layers.extend([(EncodingFullLayer, { 'name': 'l_emb_f_q', 'incomings': ('l_in_q', 'l_in_q_pe'), 'vocab_size': vocab_size, 'emb_size': emb_size, 'W': tr_variables['WQ'], 'WT': None })]) l_mem_names = ['ls_mem_n2n_{}'.format(i) for i in range(num_hops)] layers.extend([(MemoryLayer, { 'name': l_mem_names[0], 'incomings': ('l_in_cont', 'l_in_cont_pe', 'l_emb_f_q'), 'vocab_size': vocab_size, 'emb_size': emb_size, 'A': tr_variables['WA'], 'C': tr_variables['WC'], 'AT': tr_variables['WTA'], 'CT': tr_variables['WTC'], 'nonlin': nonlin })]) for i in range(1, num_hops): if i % 2: WC, WA = tr_variables['WA'], tr_variables['WC'] WTC, WTA = tr_variables['WTA'], tr_variables['WTC'] else: WA, WC = tr_variables['WA'], tr_variables['WC'] WTA, WTC = tr_variables['WTA'], tr_variables['WTC'] layers.extend([(MemoryLayer, { 'name': l_mem_names[i], 'incomings': ('l_in_cont', 'l_in_cont_pe', l_mem_names[i - 1]), 'vocab_size': vocab_size, 'emb_size': emb_size, 'A': WA, 'C': WC, 'AT': WTA, 'CT': WTC, 'nonlin': nonlin })]) #answers----------------------------------------------------------------------- l_emb_f_a_names = ['l_emb_f_a{}'.format(i) for i in range(answ_n)] for i in range(answ_n): layers.extend([(EncodingFullLayer, { 'name': l_emb_f_a_names[i], 'incomings': (l_a_names[i], l_a_pe_names[i]), 'vocab_size': vocab_size, 'emb_size': emb_size, 'W': tr_variables['WAnsw'], 'WT': None })]) #------------------------------------------------------------concatenate layers layers.extend([(LL.ConcatLayer, { 'name': 'l_qma_concat', 'incomings': l_mem_names + l_emb_f_a_names })]) #--------------------------------------------------------------------RNN layers layers.extend([( RNN, { 'name': 'l_qa_rnn_f', 'incoming': 'l_qma_concat', # 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': False, 'only_return_final': False, 'grad_clipping': grad_clip })]) layers.extend([( RNN, { 'name': 'l_qa_rnn_b', 'incoming': 'l_qma_concat', # 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': True, 'only_return_final': False, 'grad_clipping': grad_clip })]) layers.extend([(LL.SliceLayer, { 'name': 'l_qa_rnn_f_sl', 'incoming': 'l_qa_rnn_f', 'indices': slice(-answ_n, None), 'axis': 1 })]) layers.extend([(LL.SliceLayer, { 'name': 'l_qa_rnn_b_sl', 'incoming': 'l_qa_rnn_b', 'indices': slice(-answ_n, None), 'axis': 1 })]) layers.extend([(LL.ElemwiseMergeLayer, { 'name': 'l_qa_rnn_conc', 'incomings': ('l_qa_rnn_f_sl', 'l_qa_rnn_b_sl'), 'merge_function': T.add })]) #-----------------------------------------------------------------pooling layer # layers.extend([(LL.DimshuffleLayer, {'name': 'l_qa_rnn_conc_', # 'incoming': 'l_qa_rnn_conc', 'pattern': (0, 'x', 1)})]) layers.extend([(LL.Pool1DLayer, { 'name': 'l_qa_pool', 'incoming': 'l_qa_rnn_conc', 'pool_size': pool_size, 'mode': 'max' })]) #------------------------------------------------------------------dence layers l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence_l)] if dropout: layers.extend([(LL.DropoutLayer, { 'name': 'l_dence_do', 'p': dropout })]) for i, d in enumerate(dence_l): if i < len(dence_l) - 1: nonlin = LN.tanh else: nonlin = LN.softmax layers.extend([(LL.DenseLayer, { 'name': l_dence_names[i], 'num_units': d, 'nonlinearity': nonlin })]) if i < len(dence_l) - 1 and dropout: layers.extend([(LL.DropoutLayer, { 'name': l_dence_names[i] + 'do', 'p': dropout })]) if isinstance(valid_indices, np.ndarray) or isinstance( valid_indices, list): train_split = TrainSplit_indices(valid_indices=valid_indices) else: train_split = TrainSplit(eval_size=valid_indices, stratify=False) if permute_answ or permute_cont: batch_iterator_train = PermIterator(batch_size, permute_answ, permute_cont) else: batch_iterator_train = BatchIterator(batch_size=batch_size) def loss(x, t): return LO.aggregate( LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t)) # return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t)) nnet = NeuralNet( y_tensor_type=T.ivector, layers=layers, update=updates, update_learning_rate=lr, # update_epsilon=1e-7, objective_loss_function=loss, regression=False, verbose=2, batch_iterator_train=batch_iterator_train, batch_iterator_test=BatchIterator(batch_size=batch_size / 2), # batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_test=BatchIterator(batch_size=batch_size), #train_split=TrainSplit(eval_size=eval_size) train_split=train_split, on_batch_finished=[zero_memnn]) nnet.initialize() PrintLayerInfo()(nnet) return nnet
def local_test(feature_vectors_df, labels_df, k=2): # labeltjes = [None] * labels_df.values.shape[0] labeltjes = labels_df.values print labeltjes.shape labeltjes = labeltjes labeltjes -= 1 labeltjes = labeltjes.ravel().tolist() kf = StratifiedKFold(labeltjes, n_folds=k, shuffle=True) # kf = StratifiedKFold(len(feature_vectors_df.index), n_folds=k, shuffle=True) # kf = KFold(500, n_folds=k, shuffle=True, random_state=1337) confusion_matrices_folds = [] for train, test in kf: # Divide the train_images in a training and validation set (using KFold) X_train = feature_vectors_df.values[train, :] X_test = feature_vectors_df.values[test, :] y_train = [labeltjes[i] for i in train] y_test = [labeltjes[i] for i in test] # Logistic Regression for feature selection, higher C = more features will be deleted # Feature selection/reduction model = build_nn(nr_features=X_train.shape[1]) model.initialize() layer_info = PrintLayerInfo() layer_info(model) # Fit our model y_train = np.reshape(np.asarray(y_train, dtype='int32'), (-1, 1)).ravel() # print y_train # print "Train feature vectors shape: " + X_train.shape.__str__() # print "Train labels shape:" + len(y_train).__str__() # # print "X_train as array shape: " + str(X_train.shape) # print "y_train as array shape: " + str(np.reshape(np.asarray(y_train), (-1, 1)).shape) model.fit(X_train, np.reshape(np.asarray(y_train), (-1, 1)).ravel()) preds = model.predict(X_test) c = [] [ c.append(preds[i]) if preds[i] == y_test[i] else None for i in range(min(len(y_test), len(preds))) ] # checks = len([i for i, j in zip(preds, np.reshape(np.asarray(y_train), (-1, 1))) if i == j]) model = None del model # Save the confusion matrix for this fold and plot it confusion_matrix = sklearn.metrics.confusion_matrix(y_test, preds) confusion_matrices_folds.append(confusion_matrix) # print preds.tolist() # print "number of ones: " + str(sum(preds)) # print y_test # print c print "Accuracy for fold: " + str( ((len(c) * 1.0) / (len(y_test) * 1.0) )) + "\n\n\n\n\n-----------------------------\n\n\n" # Let's plot the confusion matrix of the avarage confusion matrix sum = confusion_matrices_folds[0] * 1.0 for i in range(1, len(confusion_matrices_folds)): sum += confusion_matrices_folds[i] sum /= len(confusion_matrices_folds) metrics.plot_confusion_matrix(sum)
def run(LEARNING_RATE=0.04, UPDATE_MOMENTUM=0.9,UPDATE_RHO=None, NUM_OF_EPOCH=50, OUTPUT_SIZE = 20 , input_width=300, input_height=140, dataset='ISH.pkl.gz', TRAIN_VALIDATION_SPLIT=0.2, #activation=lasagne.nonlinearities.tanh, #rectify NUM_UNITS_HIDDEN_LAYER=[5, 10, 20, 40], BATCH_SIZE=40, toShuffleInput = False , withZeroMeaning = False): global counter FILE_PREFIX = os.path.split(dataset)[1][:-6] #os.path.split(__file__)[1][:-3] FOLDER_PREFIX = "results/"+FILE_PREFIX+"/run_"+str(counter)+"/" if not os.path.exists(FOLDER_PREFIX): os.makedirs(FOLDER_PREFIX) PARAMS_FILE_NAME = FOLDER_PREFIX + "parameters.txt" HIDDEN_LAYER_OUTPUT_FILE_NAME = FOLDER_PREFIX +"hiddenLayerOutput.pickle" FIG_FILE_NAME = FOLDER_PREFIX + "fig" PICKLES_NET_FILE_NAME = FOLDER_PREFIX + "picklesNN.pickle" SVM_FILE_NAME = FOLDER_PREFIX + "svmData.txt" # VALIDATION_FILE_NAME = "results/"+os.path.split(__file__)[1][:-3]+"_validation_"+str(counter)+".txt" # PREDICTION_FILE_NAME = "results/"+os.path.split(__file__)[1][:-3]+"_prediction.txt" counter +=1 outputFile = open(PARAMS_FILE_NAME, "w") def load2d(dataset='ISH.pkl.gz', toShuffleInput = False , withZeroMeaning = False): print 'loading data...' datasets = load_data(dataset, toShuffleInput, withZeroMeaning) train_set_x, train_set_y = datasets[0] # valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] train_set_x = train_set_x.reshape(-1, 1, input_width, input_height) # valid_set_x = valid_set_x.reshape(-1, 1, input_width, input_height) test_set_x = test_set_x.reshape(-1, 1, input_width, input_height) print(train_set_x.shape[0], 'train samples') return train_set_x, train_set_y, test_set_x, test_set_y def createNNwithMomentom(input_height, input_width): net2 = NeuralNet(layers=[ ('input', layers.InputLayer), ('conv1', layers.Conv2DLayer), ('pool1', layers.MaxPool2DLayer), ('conv2', layers.Conv2DLayer), ('pool2', layers.MaxPool2DLayer), ('conv3', layers.Conv2DLayer), ('pool3', layers.MaxPool2DLayer), ('conv4', layers.Conv2DLayer), ('pool4', layers.MaxPool2DLayer), ('hidden5', layers.DenseLayer), ('hidden6', layers.DenseLayer), ('hidden7', layers.DenseLayer), ('output', layers.DenseLayer)], input_shape=(None, 1, input_width, input_height), conv1_num_filters=NUM_UNITS_HIDDEN_LAYER[0], conv1_filter_size=(5, 5), pool1_pool_size=(2, 2), conv2_num_filters=NUM_UNITS_HIDDEN_LAYER[1], conv2_filter_size=(9, 9), pool2_pool_size=(2, 2), conv3_num_filters=NUM_UNITS_HIDDEN_LAYER[2], conv3_filter_size=(11, 11), pool3_pool_size=(4, 2), conv4_num_filters=NUM_UNITS_HIDDEN_LAYER[3], conv4_filter_size=(8, 5), pool4_pool_size=(2, 2), hidden5_num_units=500, hidden6_num_units=200, hidden7_num_units=100, output_num_units=20, output_nonlinearity=None, update_learning_rate=LEARNING_RATE, update_momentum=UPDATE_MOMENTUM, update=nesterov_momentum, train_split=TrainSplit(eval_size=TRAIN_VALIDATION_SPLIT), batch_iterator_train=BatchIterator(batch_size=BATCH_SIZE), regression=True, max_epochs=NUM_OF_EPOCH, verbose=1, hiddenLayer_to_output=-2) # on_training_finished=last_hidden_layer, return net2 def createNNwithDecay(input_height, input_width): net2 = NeuralNet(layers=[ ('input', layers.InputLayer), ('conv1', layers.Conv2DLayer), ('pool1', layers.MaxPool2DLayer), ('conv2', layers.Conv2DLayer), ('pool2', layers.MaxPool2DLayer), ('conv3', layers.Conv2DLayer), ('pool3', layers.MaxPool2DLayer), ('conv4', layers.Conv2DLayer), ('pool4', layers.MaxPool2DLayer), ('hidden5', layers.DenseLayer), ('hidden6', layers.DenseLayer), ('hidden7', layers.DenseLayer), ('output', layers.DenseLayer)], input_shape=(None, 1, input_width, input_height), conv1_num_filters=NUM_UNITS_HIDDEN_LAYER[0], conv1_filter_size=(5, 5), pool1_pool_size=(2, 2), conv2_num_filters=NUM_UNITS_HIDDEN_LAYER[1], conv2_filter_size=(9, 9), pool2_pool_size=(2, 2), conv3_num_filters=NUM_UNITS_HIDDEN_LAYER[2], conv3_filter_size=(11, 11), pool3_pool_size=(4, 2), conv4_num_filters=NUM_UNITS_HIDDEN_LAYER[3], conv4_filter_size=(8, 5), pool4_pool_size=(2, 2), hidden5_num_units=500, hidden6_num_units=200, hidden7_num_units=100, output_num_units=20, output_nonlinearity=None, update_learning_rate=LEARNING_RATE, update_rho=UPDATE_RHO, update=rmsprop, train_split=TrainSplit(eval_size=TRAIN_VALIDATION_SPLIT), batch_iterator_train=BatchIterator(batch_size=BATCH_SIZE), regression=True, max_epochs=NUM_OF_EPOCH, verbose=1, hiddenLayer_to_output=-2) # on_training_finished=last_hidden_layer, return net2 def last_hidden_layer(s, h): print s.output_last_hidden_layer_(X) # input_layer = s.get_all_layers()[0] # last_h_layer = s.get_all_layers()[-2] # f = theano.function(s.X_inputs, last_h_layer.get_output(last_h_layer),allow_input_downcast=True) # myFunc = theano.function( # inputs=s.input_X, # outputs=s.h_predict, # allow_input_downcast=True, # ) # print s.output_last_hidden_layer_(X,-2) def writeOutputFile(outputFile,train_history,layer_info): # save the network's parameters outputFile.write("Validation set Prediction rate is: "+str((1-train_history[-1]['valid_accuracy'])*100) + "%\n") outputFile.write("Run time[minutes] is: "+str(run_time) + "\n\n") outputFile.write("Learning rate: " + str(LEARNING_RATE) + "\n") outputFile.write("Momentum: " + str(UPDATE_MOMENTUM) + "\n") if (UPDATE_RHO == None) else outputFile.write("Decay Factor: " + str(UPDATE_RHO) + "\n") outputFile.write("Batch size: " + str(BATCH_SIZE) + "\n") outputFile.write("Num epochs: " + str(NUM_OF_EPOCH) + "\n") outputFile.write("Num units hidden layers: " + str(NUM_UNITS_HIDDEN_LAYER) + "\n") # outputFile.write("activation func: " + str(activation) + "\n") outputFile.write("Train/validation split: " + str(TRAIN_VALIDATION_SPLIT) + "\n") outputFile.write("toShuffleInput: " + str(toShuffleInput) + "\n") outputFile.write("withZeroMeaning: " + str(withZeroMeaning) + "\n\n") outputFile.write("history: " + str(train_history) + "\n\n") outputFile.write("layer_info:\n" + str(layer_info) + "\n") start_time = time.clock() net2 = createNNwithMomentom(input_height, input_width) if UPDATE_RHO == None else createNNwithDecay(input_height, input_width) X, y, test_x, test_y = load2d() # load 2-d data net2.fit(X, y) run_time = (time.clock() - start_time) / 60. print "outputing last hidden layer" train_last_hiddenLayer = net2.output_hiddenLayer(X) test_last_hiddenLayer = net2.output_hiddenLayer(test_x) # ohlFile = open(HIDDEN_LAYER_OUTPUT_FILE_NAME+".txt", "w") # for line in train_last_hiddenLayer: # ohlFile.write(str(line) + "\n") with open(HIDDEN_LAYER_OUTPUT_FILE_NAME,'wb') as f: ob = (train_last_hiddenLayer,y,test_last_hiddenLayer,test_y) pickle.dump(ob, f, -1) f.close() writeOutputFile(outputFile,net2.train_history_,PrintLayerInfo._get_layer_info_plain(net2)) errorRates = runSvm(HIDDEN_LAYER_OUTPUT_FILE_NAME) errorRate = np.average(errorRates) outputFile.write("\nSVM Total Prediction rate is: "+str(100-errorRate) + "\n\n") outputFile.write("SVM Error rate is:\n"+str(errorRates) + "\n") outputFile.close() # write svm data # writeDataToFile(HIDDEN_LAYER_OUTPUT_FILE_NAME,SVM_FILE_NAME) ############################################## train_loss = np.array([i["train_loss"] for i in net2.train_history_]) valid_loss = np.array([i["valid_loss"] for i in net2.train_history_]) pyplot.plot(train_loss, linewidth=3, label="train") pyplot.plot(valid_loss, linewidth=3, label="valid") pyplot.grid() pyplot.legend() pyplot.xlabel("epoch") pyplot.ylabel("loss") pyplot.ylim(1e-3, 1) pyplot.yscale("log") pyplot.savefig(FIG_FILE_NAME) ################################################# # def plot_sample(x, y, axis): # img = x.reshape(96, 96) # axis.imshow(img, cmap='gray') # axis.scatter(y[0::2] * 48 + 48, y[1::2] * 48 + 48, marker='x', s=10) # # X, _ = load(test=True) # y_pred = net1.predict(X) # # fig = pyplot.figure(figsize=(6, 6)) # fig.subplots_adjust( # left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) # # for i in range(16): # ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[]) # plot_sample(X[i], y_pred[i], ax) # # pyplot.show() ########## pickle the network ########## print "pickling" with open(PICKLES_NET_FILE_NAME,'wb') as f: pickle.dump(net2, f, -1) f.close()
def run(loadedData=None,FOLDER_NAME="defualt",LEARNING_RATE=0.04, UPDATE_MOMENTUM=0.9, UPDATE_RHO=None, NUM_OF_EPOCH=15, input_width=300, input_height=140, dataset='withOutDataSet', TRAIN_VALIDATION_SPLIT=0.2, MULTI_POSITIVES=20, dropout_percent=0.1, USE_NUM_CAT=20,end_index=16351, #activation=lasagne.nonlinearities.tanh, #rectify NUM_UNITS_HIDDEN_LAYER=[5, 10, 20, 40], BATCH_SIZE=40, toShuffleInput = False , withZeroMeaning = False): global counter # FILE_PREFIX = os.path.split(dataset)[1][:-6] #os.path.split(__file__)[1][:-3] FOLDER_PREFIX = "results/"+FOLDER_NAME+"/run_"+str(counter)+"/" if not os.path.exists(FOLDER_PREFIX): os.makedirs(FOLDER_PREFIX) PARAMS_FILE_NAME = FOLDER_PREFIX + "parameters.txt" HIDDEN_LAYER_OUTPUT_FILE_NAME = FOLDER_PREFIX +"hiddenLayerOutput.pkl.gz" FIG_FILE_NAME = FOLDER_PREFIX + "fig" PICKLES_NET_FILE_NAME = FOLDER_PREFIX + "picklesNN.pkl.gz" SVM_FILE_NAME = FOLDER_PREFIX + "svmData.txt" # VALIDATION_FILE_NAME = "results/"+os.path.split(__file__)[1][:-3]+"_validation_"+str(counter)+".txt" # PREDICTION_FILE_NAME = "results/"+os.path.split(__file__)[1][:-3]+"_prediction.txt" counter +=1 outputFile = open(PARAMS_FILE_NAME, "w") def createNNwithMomentom(input_height, input_width): if USE_NUM_CAT==20: outputLayerSize=20 else: outputLayerSize=15 net2 = NeuralNet(layers=[ ('input', layers.InputLayer), ('conv1', layers.Conv2DLayer), ('pool1', layers.MaxPool2DLayer), ('conv2', layers.Conv2DLayer), ('pool2', layers.MaxPool2DLayer), ('conv3', layers.Conv2DLayer), ('pool3', layers.MaxPool2DLayer), ('conv4', layers.Conv2DLayer), ('pool4', layers.MaxPool2DLayer), ('hidden5', layers.DenseLayer), ('hidden6', layers.DenseLayer), ('hidden7', layers.DenseLayer), ('output', layers.DenseLayer)], input_shape=(None, 1, input_width, input_height), conv1_num_filters=NUM_UNITS_HIDDEN_LAYER[0], conv1_filter_size=(5, 5), pool1_pool_size=(2, 2), conv2_num_filters=NUM_UNITS_HIDDEN_LAYER[1], conv2_filter_size=(9, 9), pool2_pool_size=(2, 2), conv3_num_filters=NUM_UNITS_HIDDEN_LAYER[2], conv3_filter_size=(11, 11), pool3_pool_size=(4, 2), conv4_num_filters=NUM_UNITS_HIDDEN_LAYER[3], conv4_filter_size=(8, 5), pool4_pool_size=(2, 2), hidden5_num_units=500, hidden6_num_units=200, hidden7_num_units=100, output_num_units=outputLayerSize, output_nonlinearity=None, update_learning_rate=LEARNING_RATE, update_momentum=UPDATE_MOMENTUM, update=nesterov_momentum, train_split=TrainSplit(eval_size=TRAIN_VALIDATION_SPLIT), batch_iterator_train=BatchIterator(batch_size=BATCH_SIZE), regression=True, max_epochs=NUM_OF_EPOCH, verbose=1, hiddenLayer_to_output=-2) # on_training_finished=last_hidden_layer, return net2 def createNNwithDecay(input_height, input_width): if USE_NUM_CAT==20: outputLayerSize=20 else: outputLayerSize=15 net2 = NeuralNet(layers=[ ('input', layers.InputLayer), ('conv1', layers.Conv2DLayer), ('pool1', layers.MaxPool2DLayer), ('conv2', layers.Conv2DLayer), ('pool2', layers.MaxPool2DLayer), ('conv3', layers.Conv2DLayer), ('pool3', layers.MaxPool2DLayer), ('conv4', layers.Conv2DLayer), ('pool4', layers.MaxPool2DLayer), ('hidden5', layers.DenseLayer), ('hidden6', layers.DenseLayer), ('hidden7', layers.DenseLayer), ('output', layers.DenseLayer)], input_shape=(None, 1, input_width, input_height), conv1_num_filters=NUM_UNITS_HIDDEN_LAYER[0], conv1_filter_size=(5, 5), pool1_pool_size=(2, 2), conv2_num_filters=NUM_UNITS_HIDDEN_LAYER[1], conv2_filter_size=(9, 9), pool2_pool_size=(2, 2), conv3_num_filters=NUM_UNITS_HIDDEN_LAYER[2], conv3_filter_size=(11, 11), pool3_pool_size=(4, 2), conv4_num_filters=NUM_UNITS_HIDDEN_LAYER[3], conv4_filter_size=(8, 5), pool4_pool_size=(2, 2), hidden5_num_units=500, hidden6_num_units=200, hidden7_num_units=100, output_num_units=outputLayerSize, output_nonlinearity=None, update_learning_rate=LEARNING_RATE, update_rho=UPDATE_RHO, update=rmsprop, train_split=TrainSplit(eval_size=TRAIN_VALIDATION_SPLIT), batch_iterator_train=BatchIterator(batch_size=BATCH_SIZE), regression=True, max_epochs=NUM_OF_EPOCH, verbose=1, hiddenLayer_to_output=-2) # on_training_finished=last_hidden_layer, return net2 def last_hidden_layer(s, h): print s.output_last_hidden_layer_(X) # input_layer = s.get_all_layers()[0] # last_h_layer = s.get_all_layers()[-2] # f = theano.function(s.X_inputs, last_h_layer.get_output(last_h_layer),allow_input_downcast=True) # myFunc = theano.function( # inputs=s.input_X, # outputs=s.h_predict, # allow_input_downcast=True, # ) # print s.output_last_hidden_layer_(X,-2) def outputLastLayer_CNN(net2, X, y, test_x, test_y): print "outputing last hidden layer" # train_last_hiddenLayer = net2.output_hiddenLayer(X) quarter_x = np.floor(X.shape[0] / 4) train_last_hiddenLayer1 = net2.output_hiddenLayer(X[:quarter_x]) print "after first quarter train output" train_last_hiddenLayer2 = net2.output_hiddenLayer(X[quarter_x:2 * quarter_x]) print "after seconed quarter train output" train_last_hiddenLayer3 = net2.output_hiddenLayer(X[2 * quarter_x:3 * quarter_x]) print "after third quarter train output" train_last_hiddenLayer4 = net2.output_hiddenLayer(X[3 * quarter_x:]) print "after all train output" test_last_hiddenLayer = net2.output_hiddenLayer(test_x) print "after test output" # lastLayerOutputs = (train_last_hiddenLayer,y,test_last_hiddenLayer,test_y) lastLayerOutputs = np.concatenate((train_last_hiddenLayer1, train_last_hiddenLayer2, train_last_hiddenLayer3, train_last_hiddenLayer4), axis=0), y, test_last_hiddenLayer, test_y return lastLayerOutputs def writeOutputFile(outputFile,train_history,layer_info): # save the network's parameters outputFile.write("Validation set Prediction rate is: "+str((1-train_history[-1]['valid_accuracy'])*100) + "%\n") outputFile.write("Run time[minutes] is: "+str(run_time) + "\n\n") outputFile.write("Training NN on: " + ("20 Top Categorys\n" if USE_NUM_CAT==20 else "Article Categorys\n")) outputFile.write("Learning rate: " + str(LEARNING_RATE) + "\n") outputFile.write(("Momentum: " + str(UPDATE_MOMENTUM)+ "\n") if (UPDATE_RHO == None) else ("Decay Factor: " + str(UPDATE_RHO)+ "\n") ) outputFile.write("Batch size: " + str(BATCH_SIZE) + "\n") outputFile.write("Num epochs: " + str(NUM_OF_EPOCH) + "\n") outputFile.write("Num units hidden layers: " + str(NUM_UNITS_HIDDEN_LAYER) + "\n\n") # outputFile.write("activation func: " + str(activation) + "\n") outputFile.write("Multipuly Positives by: " + str(MULTI_POSITIVES) + "\n") outputFile.write("New Positives Dropout rate: " + str(dropout_percent) + "\n") outputFile.write("Train/validation split: " + str(TRAIN_VALIDATION_SPLIT) + "\n") outputFile.write("toShuffleInput: " + str(toShuffleInput) + "\n") outputFile.write("withZeroMeaning: " + str(withZeroMeaning) + "\n\n") outputFile.write("history: " + str(train_history) + "\n\n") outputFile.write("layer_info:\n" + str(layer_info) + "\n") outputFile.flush() start_time = time.clock() print "Start time: " , time.ctime() net2 = createNNwithMomentom(input_height, input_width) if UPDATE_RHO == None else createNNwithDecay(input_height, input_width) if loadedData is None: X, y, test_x, test_y = load2d(USE_NUM_CAT,outputFile,input_width,input_height,end_index,MULTI_POSITIVES,dropout_percent) # load 2-d data else: X, y, test_x, test_y = loadedData net2.fit(X, y) run_time = (time.clock() - start_time) / 60. writeOutputFile(outputFile,net2.train_history_,PrintLayerInfo._get_layer_info_plain(net2)) lastLayerOutputs = outputLastLayer_CNN(net2, X, y, test_x, test_y) print "running Category Classifier" errorRates, aucScores = runSvm(lastLayerOutputs,15) #HIDDEN_LAYER_OUTPUT_FILE_NAME,15) # errorRates, aucScores = runCrossSvm(lastLayerOutputs,15) # errorRates, aucScores = runNNclassifier(lastLayerOutputs,15) errorRate = np.average(errorRates) aucScore = np.average(aucScores) outputFile.write("\nClassifiers Total Prediction rate is: "+str(100-errorRate) + "\n\n") outputFile.write("Classifiers Error rates are:\n"+str(errorRates) + "\n") outputFile.write("\nClassifiers Total AUC Score is: "+str(aucScore) + "\n\n") outputFile.write("Classifiers AUC Scores are:\n"+str(aucScores) + "\n") outputFile.close() print "saving last layer outputs" # with open(HIDDEN_LAYER_OUTPUT_FILE_NAME,'wb') as f: # pickle.dump(lastLayerOutputs, f, -1) # f.close() f = gzip.open(HIDDEN_LAYER_OUTPUT_FILE_NAME,'wb') cPickle.dump(lastLayerOutputs, f, protocol=2) f.close() # write svm data # writeDataToFile(HIDDEN_LAYER_OUTPUT_FILE_NAME,SVM_FILE_NAME) ############################################## train_loss = np.array([i["train_loss"] for i in net2.train_history_]) valid_loss = np.array([i["valid_loss"] for i in net2.train_history_]) pyplot.plot(train_loss, linewidth=3, label="train") pyplot.plot(valid_loss, linewidth=3, label="valid") pyplot.grid() pyplot.legend() pyplot.xlabel("epoch") pyplot.ylabel("loss") pyplot.ylim(1e-3, 1) pyplot.yscale("log") pyplot.savefig(FIG_FILE_NAME) ################################################# # def plot_sample(x, y, axis): # img = x.reshape(96, 96) # axis.imshow(img, cmap='gray') # axis.scatter(y[0::2] * 48 + 48, y[1::2] * 48 + 48, marker='x', s=10) # # X, _ = load(test=True) # y_pred = net1.predict(X) # # fig = pyplot.figure(figsize=(6, 6)) # fig.subplots_adjust( # left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) # # for i in range(16): # ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[]) # plot_sample(X[i], y_pred[i], ax) # # pyplot.show() ########## pickle the network ########## print "pickling" # with open(PICKLES_NET_FILE_NAME,'wb') as f: # pickle.dump(net2, f, -1) # f.close() f = gzip.open(PICKLES_NET_FILE_NAME,'wb') cPickle.dump(net2, f, protocol=2) f.close()
def run(LEARNING_RATE=0.04, UPDATE_MOMENTUM=0.9, NUM_OF_EPOCH=50, OUTPUT_SIZE = 20 , input_width=300, input_height=140, dataset='ISH.pkl.gz', TRAIN_VALIDATION_SPLIT=0.2, #activation=lasagne.nonlinearities.tanh, #rectify NUM_UNITS_HIDDEN_LAYER=[5, 10, 20, 40], BATCH_SIZE=40, toShuffleInput = False , withZeroMeaning = False): global counter FILE_PREFIX = os.path.split(dataset)[1][4:16] #os.path.split(__file__)[1][:-3] PARAMS_FILE_NAME = "results/"+FILE_PREFIX+"_parameters_"+str(counter)+".txt" FIG_FILE_NAME = "results/"+FILE_PREFIX+"_fig_"+str(counter) PICKLES_NET_FILE_NAME = "results/"+FILE_PREFIX+"_picklesNN_"+str(counter)+".pickle" # VALIDATION_FILE_NAME = "results/"+os.path.split(__file__)[1][:-3]+"_validation_"+str(counter)+".txt" # PREDICTION_FILE_NAME = "results/"+os.path.split(__file__)[1][:-3]+"_prediction.txt" counter +=1 outputFile = open(PARAMS_FILE_NAME, "w") def load2d(dataset='ISH.pkl.gz', toShuffleInput = False , withZeroMeaning = False): print 'loading data...' datasets = load_data(dataset, toShuffleInput, withZeroMeaning) train_set_x, train_set_y = datasets[0] # valid_set_x, valid_set_y = datasets[1] # test_set_x, test_set_y = datasets[2] train_set_x = train_set_x.reshape(-1, 1, input_width, input_height) print(train_set_x.shape[0], 'train samples') return train_set_x, train_set_y def writeOutputFile(outputFile,train_history,layer_info): # save the network's parameters outputFile.write("error is: "+str(1-train_history[-1]['valid_accuracy']) + "\n") outputFile.write("time is: "+str(run_time) + "\n\n") outputFile.write("learning rate: " + str(LEARNING_RATE) + "\n") outputFile.write("momentum: " + str(UPDATE_MOMENTUM) + "\n") outputFile.write("batch size: " + str(BATCH_SIZE) + "\n") outputFile.write("num epochs: " + str(NUM_OF_EPOCH) + "\n") outputFile.write("num units hidden layers: " + str(NUM_UNITS_HIDDEN_LAYER) + "\n") # outputFile.write("activation func: " + str(activation) + "\n") outputFile.write("train/validation split: " + str(TRAIN_VALIDATION_SPLIT) + "\n") outputFile.write("toShuffleInput: " + str(toShuffleInput) + "\n") outputFile.write("withZeroMeaning: " + str(withZeroMeaning) + "\n\n") outputFile.write("history: " + str(train_history) + "\n\n") outputFile.write("layer_info:\n" + str(layer_info) + "\n") start_time = time.clock() net2 = NeuralNet( layers=[ ('input', layers.InputLayer), ('conv1', layers.Conv2DLayer), ('pool1', layers.MaxPool2DLayer), ('conv2', layers.Conv2DLayer), ('pool2', layers.MaxPool2DLayer), ('conv3', layers.Conv2DLayer), ('pool3', layers.MaxPool2DLayer), ('conv4', layers.Conv2DLayer), ('pool4', layers.MaxPool2DLayer), ('hidden5', layers.DenseLayer), ('hidden6', layers.DenseLayer), ('hidden7', layers.DenseLayer), ('output', layers.DenseLayer), ], input_shape=(None, 1, input_width, input_height), conv1_num_filters=NUM_UNITS_HIDDEN_LAYER[0], conv1_filter_size=(5, 5), pool1_pool_size=(2, 2), conv2_num_filters=NUM_UNITS_HIDDEN_LAYER[1], conv2_filter_size=(9, 9), pool2_pool_size=(2, 2), conv3_num_filters=NUM_UNITS_HIDDEN_LAYER[2], conv3_filter_size=(11, 11), pool3_pool_size=(4, 2), conv4_num_filters=NUM_UNITS_HIDDEN_LAYER[3], conv4_filter_size=(8, 5), pool4_pool_size=(2, 2), hidden5_num_units=500, hidden6_num_units=200, hidden7_num_units=100, output_num_units=20, output_nonlinearity=None, update_learning_rate=LEARNING_RATE, update_momentum=UPDATE_MOMENTUM, train_split=TrainSplit(eval_size=TRAIN_VALIDATION_SPLIT), batch_iterator_train=BatchIterator(batch_size=BATCH_SIZE), regression=True, max_epochs=NUM_OF_EPOCH, verbose=1, ) X, y = load2d() # load 2-d data net2.fit(X, y) run_time = (time.clock() - start_time) / 60. writeOutputFile(outputFile,net2.train_history_,PrintLayerInfo._get_layer_info_plain(net2)) # import numpy as np # np.sqrt(0.003255) * 48 ############################################## train_loss = np.array([i["train_loss"] for i in net2.train_history_]) valid_loss = np.array([i["valid_loss"] for i in net2.train_history_]) pyplot.plot(train_loss, linewidth=3, label="train") pyplot.plot(valid_loss, linewidth=3, label="valid") pyplot.grid() pyplot.legend() pyplot.xlabel("epoch") pyplot.ylabel("loss") pyplot.ylim(1e-3, 1) pyplot.yscale("log") pyplot.savefig(FIG_FILE_NAME) ################################################# # def plot_sample(x, y, axis): # img = x.reshape(96, 96) # axis.imshow(img, cmap='gray') # axis.scatter(y[0::2] * 48 + 48, y[1::2] * 48 + 48, marker='x', s=10) # # X, _ = load(test=True) # y_pred = net1.predict(X) # # fig = pyplot.figure(figsize=(6, 6)) # fig.subplots_adjust( # left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) # # for i in range(16): # ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[]) # plot_sample(X[i], y_pred[i], ax) # # pyplot.show() ########## pickle the network ########## print "pickling" with open(PICKLES_NET_FILE_NAME,'wb') as f: pickle.dump(net2, f, -1) f.close()
def run(loadedData=None, learning_rate=0.04, update_momentum=0.9, update_rho=None, epochs=15, input_width=300, input_height=140, train_valid_split=0.2, multiple_positives=20, flip_batch=True, dropout_percent=0.1, end_index=16351, activation=None, last_layer_activation=None, batch_size=32, layers_size=[5, 10, 20, 40], shuffle_input=False, zero_meaning=False, filters_type=3, input_noise_rate=0.3, pre_train_epochs=1, softmax_train_epochs=2, fine_tune_epochs=2, categories=15, folder_name="default", dataset='withOutDataSet'): global counter folder_path = "results_dae"+FILE_SEPARATOR + folder_name + FILE_SEPARATOR + "run_" + str(counter) + FILE_SEPARATOR if not os.path.exists(folder_path): os.makedirs(folder_path) PARAMS_FILE_NAME = folder_path + "parameters.txt" HIDDEN_LAYER_OUTPUT_FILE_NAME = folder_path + "hiddenLayerOutput.pkl.gz" FIG_FILE_NAME = folder_path + "fig" PICKLES_NET_FILE_NAME = folder_path + "picklesNN.pkl.gz" SVM_FILE_NAME = folder_path + "svmData.txt" LOG_FILE_NAME = folder_path + "message.log" All_Results_FIle = "results_dae"+FILE_SEPARATOR + "all_results.txt" # old_stdout = sys.stdout # print "less",LOG_FILE_NAME log_file = open(LOG_FILE_NAME, "w") # sys.stdout = log_file counter += 1 output_file = open(PARAMS_FILE_NAME, "w") results_file = open(All_Results_FIle, "a") if filters_type == 3: filter_1 = (3, 3) filter_2 = (3, 3) filter_3 = (3, 3) filter_4 = (3, 3) filter_5 = (3, 3) filter_6 = (3, 3) elif filters_type == 5: filter_1 = (5, 5) filter_2 = (5, 5) filter_3 = (5, 5) filter_4 = (5, 5) filter_5 = (5, 5) filter_6 = (5, 5) elif filters_type == 7: filter_1 = (7, 7) filter_2 = (7, 7) filter_3 = (5, 5) filter_4 = (7, 7) filter_5 = (7, 7) filter_6 = (5, 5) elif filters_type == 9: filter_1 = (9, 9) filter_2 = (7, 7) filter_3 = (5, 5) filter_4 = (7, 7) filter_5 = (9, 9) filter_6 = (5, 5) def createCSAE(input_height, input_width, X_train, X_out): X_train = np.random.binomial(1, 1-dropout_percent, size=X_train.shape) * X_train cnn = NeuralNet(layers=[ ('input', layers.InputLayer), ('conv1', layers.Conv2DLayer), ('conv11', layers.Conv2DLayer), # ('conv12', layers.Conv2DLayer), ('pool1', layers.MaxPool2DLayer), ('conv2', layers.Conv2DLayer), ('conv21', layers.Conv2DLayer), # ('conv22', layers.Conv2DLayer), ('pool2', layers.MaxPool2DLayer), ('conv3', layers.Conv2DLayer), # ('conv31', layers.Conv2DLayer), ('conv32', layers.Conv2DLayer), ('unpool1', Unpool2DLayer), ('conv4', layers.Conv2DLayer), ('conv41', layers.Conv2DLayer), # ('conv42', layers.Conv2DLayer), ('unpool2', Unpool2DLayer), ('conv5', layers.Conv2DLayer), ('conv51', layers.Conv2DLayer), # ('conv52', layers.Conv2DLayer), ('conv6', layers.Conv2DLayer), ('output_layer', ReshapeLayer), ], input_shape=(None, 1, input_width, input_height), # Layer current size - 1x300x140 conv1_num_filters=layers_size[0], conv1_filter_size=filter_1, conv1_nonlinearity=activation, # conv1_border_mode="same", conv1_pad="same", conv11_num_filters=layers_size[0], conv11_filter_size=filter_1, conv11_nonlinearity=activation, # conv11_border_mode="same", conv11_pad="same", # conv12_num_filters=layers_size[0], conv12_filter_size=filter_1, conv12_nonlinearity=activation, # # conv12_border_mode="same", # conv12_pad="same", pool1_pool_size=(2, 2), conv2_num_filters=layers_size[1], conv2_filter_size=filter_2, conv2_nonlinearity=activation, # conv2_border_mode="same", conv2_pad="same", conv21_num_filters=layers_size[1], conv21_filter_size=filter_2, conv21_nonlinearity=activation, # conv21_border_mode="same", conv21_pad="same", # conv22_num_filters=layers_size[1], conv22_filter_size=filter_2, conv22_nonlinearity=activation, # # conv22_border_mode="same", # conv22_pad="same", pool2_pool_size=(2, 2), conv3_num_filters=layers_size[2], conv3_filter_size=filter_3, conv3_nonlinearity=activation, # conv3_border_mode="same", conv3_pad="same", # conv31_num_filters=layers_size[2], conv31_filter_size=filter_3, conv31_nonlinearity=activation, # # conv31_border_mode="same", # conv31_pad="same", conv32_num_filters=1, conv32_filter_size=filter_3, conv32_nonlinearity=activation, # conv32_border_mode="same", conv32_pad="same", unpool1_ds=(2, 2), conv4_num_filters=layers_size[3], conv4_filter_size=filter_4, conv4_nonlinearity=activation, # conv4_border_mode="same", conv4_pad="same", conv41_num_filters=layers_size[3], conv41_filter_size=filter_4, conv41_nonlinearity=activation, # conv41_border_mode="same", conv41_pad="same", # conv42_num_filters=layers_size[3], conv42_filter_size=filter_4, conv42_nonlinearity=activation, # # conv42_border_mode="same", # conv42_pad="same", unpool2_ds=(2, 2), conv5_num_filters=layers_size[4], conv5_filter_size=filter_5, conv5_nonlinearity=activation, # conv5_border_mode="same", conv5_pad="same", conv51_num_filters=layers_size[4], conv51_filter_size=filter_5, conv51_nonlinearity=activation, # conv51_border_mode="same", conv51_pad="same", # conv52_num_filters=layers_size[4], conv52_filter_size=filter_5, conv52_nonlinearity=activation, # # conv52_border_mode="same", # conv52_pad="same", conv6_num_filters=1, conv6_filter_size=filter_6, conv6_nonlinearity=last_layer_activation, # conv6_border_mode="same", conv6_pad="same", output_layer_shape=(([0], -1)), update_learning_rate=learning_rate, update_momentum=update_momentum, update=nesterov_momentum, train_split=TrainSplit(eval_size=train_valid_split), batch_iterator_train=FlipBatchIterator(batch_size=batch_size) if flip_batch else BatchIterator(batch_size=batch_size), regression=True, max_epochs=epochs, verbose=1, hiddenLayer_to_output=-9) cnn.fit(X_train, X_out) try: pickle.dump(cnn, open(folder_path + 'conv_ae.pkl', 'w')) # cnn = pickle.load(open(folder_path + 'conv_ae.pkl','r')) cnn.save_weights_to(folder_path + 'conv_ae.np') except: print ("Could not pickle cnn") X_pred = cnn.predict(X_train).reshape(-1, input_height, input_width) # * sigma + mu # # X_pred = np.rint(X_pred).astype(int) # # X_pred = np.clip(X_pred, a_min=0, a_max=255) # # X_pred = X_pred.astype('uint8') # # try: # trian_last_hiddenLayer = cnn.output_hiddenLayer(X_train) # # test_last_hiddenLayer = cnn.output_hiddenLayer(test_x) # pickle.dump(trian_last_hiddenLayer, open(folder_path + 'encode.pkl', 'w')) # except: # print "Could not save encoded images" print ("Saving some images....") for i in range(10): index = np.random.randint(train_x.shape[0]) print (index) def get_picture_array(X, index): array = np.rint(X[index] * 256).astype(np.int).reshape(input_height, input_width) array = np.clip(array, a_min=0, a_max=255) return array.repeat(4, axis=0).repeat(4, axis=1).astype(np.uint8()) original_image = Image.fromarray(get_picture_array(X_out, index)) # original_image.save(folder_path + 'original' + str(index) + '.png', format="PNG") # # array = np.rint(trian_last_hiddenLayer[index] * 256).astype(np.int).reshape(input_height/2, input_width/2) # array = np.clip(array, a_min=0, a_max=255) # encode_image = Image.fromarray(array.repeat(4, axis=0).repeat(4, axis=1).astype(np.uint8())) # encode_image.save(folder_path + 'encode' + str(index) + '.png', format="PNG") new_size = (original_image.size[0] * 3, original_image.size[1]) new_im = Image.new('L', new_size) new_im.paste(original_image, (0, 0)) pred_image = Image.fromarray(get_picture_array(X_pred, index)) # pred_image.save(folder_path + 'pred' + str(index) + '.png', format="PNG") new_im.paste(pred_image, (original_image.size[0], 0)) noise_image = Image.fromarray(get_picture_array(X_train, index)) new_im.paste(noise_image, (original_image.size[0]*2, 0)) new_im.save(folder_path+'origin_prediction_noise-'+str(index)+'.png', format="PNG") # diff = ImageChops.difference(original_image, pred_image) # diff = diff.convert('L') # diff.save(folder_path + 'diff' + str(index) + '.png', format="PNG") # plt.imshow(new_im) # new_size = (original_image.size[0] * 2, original_image.size[1]) # new_im = Image.new('L', new_size) # new_im.paste(original_image, (0, 0)) # pred_image = Image.fromarray(get_picture_array(X_train, index)) # # pred_image.save(folder_path + 'noisyInput' + str(index) + '.png', format="PNG") # new_im.paste(pred_image, (original_image.size[0], 0)) # new_im.save(folder_path+'origin_VS_noise-'+str(index)+'.png', format="PNG") # plt.imshow(new_im) return cnn def createSAE(input_height, input_width, X_train, X_out): encode_size = 200 cnn1 = NeuralNet(layers=[ ('input', layers.InputLayer), ('hidden', layers.DenseLayer), ('hiddenOut', layers.DenseLayer), ('output_layer', ReshapeLayer), ], input_shape=(None, 1, input_width, input_height), hidden_num_units= 10000, hiddenOut_num_units= 42000, output_layer_shape = (([0], -1)), update_learning_rate=learning_rate, update_momentum=update_momentum, update=nesterov_momentum, train_split=TrainSplit(eval_size=train_valid_split), # batch_iterator_train=BatchIterator(batch_size=batch_size), batch_iterator_train=FlipBatchIterator(batch_size=batch_size), regression=True, max_epochs=epochs, verbose=1, hiddenLayer_to_output=-3) cnn1.fit(X_train, X_out) trian_last_hiddenLayer = cnn1.output_hiddenLayer(X_train) test_last_hiddenLayer = cnn1.output_hiddenLayer(test_x) cnn2 = NeuralNet(layers=[ ('input', layers.InputLayer), ('hidden', layers.DenseLayer), ('output_layer', layers.DenseLayer), ], input_shape=(None,10000), hidden_num_units= 3000, output_layer_num_units = 10000, update_learning_rate=learning_rate, update_momentum=update_momentum, update=nesterov_momentum, train_split=TrainSplit(eval_size=train_valid_split), batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_train=FlipBatchIterator(batch_size=batch_size), regression=True, max_epochs=epochs, verbose=1, hiddenLayer_to_output=-2) trian_last_hiddenLayer = trian_last_hiddenLayer.astype(np.float32) cnn2.fit(trian_last_hiddenLayer, trian_last_hiddenLayer) trian_last_hiddenLayer = cnn2.output_hiddenLayer(trian_last_hiddenLayer) test_last_hiddenLayer = cnn2.output_hiddenLayer(test_last_hiddenLayer) cnn3 = NeuralNet(layers=[ ('input', layers.InputLayer), ('hidden', layers.DenseLayer), ('output_layer', layers.DenseLayer), ], input_shape=(None,3000), hidden_num_units= 1000, output_layer_num_units = 3000, update_learning_rate=learning_rate, update_momentum=update_momentum, update=nesterov_momentum, train_split=TrainSplit(eval_size=train_valid_split), batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_train=FlipBatchIterator(batch_size=batch_size), regression=True, max_epochs=epochs, verbose=1, hiddenLayer_to_output=-2) trian_last_hiddenLayer = trian_last_hiddenLayer.astype(np.float32) cnn3.fit(trian_last_hiddenLayer, trian_last_hiddenLayer) trian_last_hiddenLayer = cnn3.output_hiddenLayer(trian_last_hiddenLayer) test_last_hiddenLayer = cnn3.output_hiddenLayer(test_last_hiddenLayer) cnn4 = NeuralNet(layers=[ ('input', layers.InputLayer), ('hidden', layers.DenseLayer), ('output_layer', layers.DenseLayer), ], input_shape=(None,1000), hidden_num_units= 300, output_layer_num_units = 1000, update_learning_rate=learning_rate, update_momentum=update_momentum, update=nesterov_momentum, train_split=TrainSplit(eval_size=train_valid_split), batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_train=FlipBatchIterator(batch_size=batch_size), regression=True, max_epochs=epochs, verbose=1, hiddenLayer_to_output=-2) trian_last_hiddenLayer = trian_last_hiddenLayer.astype(np.float32) cnn4.fit(trian_last_hiddenLayer, trian_last_hiddenLayer) trian_last_hiddenLayer = cnn4.output_hiddenLayer(trian_last_hiddenLayer) test_last_hiddenLayer = cnn4.output_hiddenLayer(test_last_hiddenLayer) input_layer = cnn1.get_all_layers()[0] hidden1_layer = cnn1.get_all_layers()[1] hidden1_layer.input_layer = input_layer hidden2_layer = cnn2.get_all_layers()[1] hidden2_layer.input_layer = hidden1_layer hidden3_layer = cnn3.get_all_layers()[1] hidden3_layer.input_layer = hidden2_layer final_layer = cnn4.get_all_layers()[1] final_layer.input_layer = hidden3_layer # out_train = final_layer.get_output(x_train).eval() # out_test = final_layer.get_output(test_x).eval() f = gzip.open(folder_path + "output.pkl.gz",'wb') cPickle.dump((trian_last_hiddenLayer, test_last_hiddenLayer), f, protocol=2) f.close() # f = gzip.open("pickled_images/tmp.pkl.gz", 'rb') # trian_last_hiddenLayer, test_last_hiddenLayer = cPickle.load(f) # f.close() return cnn1 def createCnn_AE(input_height, input_width): if categories==20: outputLayerSize=20 else: outputLayerSize=15 encode_size = 1024 border_mode = "same" cnn = NeuralNet(layers=[ ('input', layers.InputLayer), ('conv1', layers.Conv2DLayer), ('pool1', layers.MaxPool2DLayer), ('conv2', layers.Conv2DLayer), ('pool2', layers.MaxPool2DLayer), ('conv3', layers.Conv2DLayer), ('pool3', layers.MaxPool2DLayer), # ('conv4', layers.Conv2DLayer), # ('pool4', layers.MaxPool2DLayer), ('flatten', ReshapeLayer), # output_dense ('encode_layer', layers.DenseLayer), ('hidden', layers.DenseLayer), # output_dense ('unflatten', ReshapeLayer), # ('unpool4', Unpool2DLayer), # ('deconv4', layers.Conv2DLayer), ('unpool3', Unpool2DLayer), ('deconv3', layers.Conv2DLayer), ('unpool2', Unpool2DLayer), ('deconv2', layers.Conv2DLayer), ('unpool1', Unpool2DLayer), ('deconv1', layers.Conv2DLayer), ('output_layer', ReshapeLayer), # ('hidden5', layers.DenseLayer), # ('hidden6', layers.DenseLayer), # ('hidden7', layers.DenseLayer), # ('output', layers.DenseLayer) ], input_shape=(None, 1, input_width, input_height), # Layer current size - 1x300x140 conv1_num_filters=layers_size[0], conv1_filter_size=(5, 5), conv1_border_mode="valid", conv1_nonlinearity=None, #Layer current size - NFx296x136 pool1_pool_size=(2, 2), # Layer current size - NFx148x68 conv2_num_filters=layers_size[1], conv2_filter_size=(5, 5), conv2_border_mode=border_mode, conv2_nonlinearity=None, # Layer current size - NFx148x68 pool2_pool_size=(2, 2), # Layer current size - NFx74x34 conv3_num_filters=layers_size[2], conv3_filter_size=(3, 3), conv3_border_mode=border_mode, conv3_nonlinearity=None, # Layer current size - NFx74x34 pool3_pool_size=(2, 2), # conv4_num_filters=layers_size[3], conv4_filter_size=(5, 5), conv4_border_mode=border_mode, conv4_nonlinearity=None, # pool4_pool_size=(2, 2), # Layer current size - NFx37x17 flatten_shape=(([0], -1)), # not sure if necessary? # Layer current size - NF*37*17 encode_layer_num_units = encode_size, # Layer current size - 200 hidden_num_units=layers_size[-1] * 37 * 17, # Layer current size - NF*37*17 unflatten_shape=(([0], layers_size[-1], 37, 17)), # deconv4_num_filters=layers_size[3], deconv4_filter_size=(5, 5), deconv4_border_mode=border_mode, deconv4_nonlinearity=None, # unpool4_ds=(2, 2), # Layer current size - NFx37x17 unpool3_ds=(2, 2), # Layer current size - NFx74x34 deconv3_num_filters=layers_size[-2], deconv3_filter_size=(3, 3), deconv3_border_mode=border_mode, deconv3_nonlinearity=None, # Layer current size - NFx74x34 unpool2_ds=(2, 2), # Layer current size - NFx148x68 deconv2_num_filters=layers_size[-3], deconv2_filter_size=(5, 5), deconv2_border_mode=border_mode, deconv2_nonlinearity=None, # Layer current size - NFx148x68 unpool1_ds=(2, 2), # Layer current size - NFx296x136 deconv1_num_filters=1, deconv1_filter_size=(5, 5), deconv1_border_mode="full", deconv1_nonlinearity=None, # Layer current size - 1x300x140 output_layer_shape = (([0], -1)), # Layer current size - 300*140 # output_num_units=outputLayerSize, output_nonlinearity=None, update_learning_rate=learning_rate, update_momentum=update_momentum, update=nesterov_momentum, train_split=TrainSplit(eval_size=train_valid_split), # batch_iterator_train=BatchIterator(batch_size=batch_size), batch_iterator_train=FlipBatchIterator(batch_size=batch_size), regression=True, max_epochs=epochs, verbose=1, hiddenLayer_to_output=-10) # on_training_finished=last_hidden_layer, return cnn def createNNwithDecay(input_height, input_width): if categories==20: outputLayerSize=20 else: outputLayerSize=15 cnn = NeuralNet(layers=[ ('input', layers.InputLayer), ('conv1', layers.Conv2DLayer), ('pool1', layers.MaxPool2DLayer), ('conv2', layers.Conv2DLayer), ('pool2', layers.MaxPool2DLayer), ('conv3', layers.Conv2DLayer), ('pool3', layers.MaxPool2DLayer), ('conv4', layers.Conv2DLayer), ('pool4', layers.MaxPool2DLayer), ('hidden5', layers.DenseLayer), ('hidden6', layers.DenseLayer), ('hidden7', layers.DenseLayer), ('output', layers.DenseLayer)], input_shape=(None, 1, input_width, input_height), conv1_num_filters=layers_size[0], conv1_filter_size=(5, 5), pool1_pool_size=(2, 2), conv2_num_filters=layers_size[1], conv2_filter_size=(9, 9), pool2_pool_size=(2, 2), conv3_num_filters=layers_size[2], conv3_filter_size=(11, 11), pool3_pool_size=(4, 2), conv4_num_filters=layers_size[3], conv4_filter_size=(8, 5), pool4_pool_size=(2, 2), hidden5_num_units=500, hidden6_num_units=200, hidden7_num_units=100, output_num_units=outputLayerSize, output_nonlinearity=None, update_learning_rate=learning_rate, update_rho=update_rho, update=rmsprop, train_split=TrainSplit(eval_size=train_valid_split), batch_iterator_train=BatchIterator(batch_size=batch_size), regression=True, max_epochs=epochs, verbose=1, hiddenLayer_to_output=-2) # on_training_finished=last_hidden_layer, return cnn def last_hidden_layer(s, h): print s.output_last_hidden_layer_(train_x) # input_layer = s.get_all_layers()[0] # last_h_layer = s.get_all_layers()[-2] # f = theano.function(s.X_inputs, last_h_layer.get_output(last_h_layer),allow_input_downcast=True) # myFunc = theano.function( # inputs=s.input_X, # outputs=s.h_predict, # allow_input_downcast=True, # ) # print s.output_last_hidden_layer_(train_x,-2) def writeOutputFile(output_file, train_history, layer_info): # save the network's parameters output_file.write("Validation set error: " + str(train_history[-1]['valid_accuracy']) + "\n\n") results_file.write(str(train_history[-1]['valid_accuracy']) + "\t") output_file.write("Training NN on: " + ("20 Top Categories\n" if 20 == categories else "Article Categories\n")) output_file.write("Learning rate: " + str(learning_rate) + "\n") results_file.write(str(learning_rate) + "\t") output_file.write(("Momentum: " + str(update_momentum) + "\n") if update_rho is None else ( "Decay Factor: " + str(update_rho) + "\n")) results_file.write(str(update_momentum) + "\t") output_file.write(("FlipBatcherIterater" if flip_batch else "BatchIterator") + " with batch: " + str(batch_size) + "\n") results_file.write("FlipBatcherIterater\t" + str(batch_size) + "\t") output_file.write("Num epochs: " + str(epochs) + "\n") results_file.write(str(epochs) + "\t") output_file.write("Layers size: " + str(layers_size) + "\n\n") results_file.write(str(layers_size) + "\t") output_file.write("Activation func: " + ("Rectify" if activation is None else str(activation)) + "\n") results_file.write(("Rectify" if activation is None else str(activation)) + "\t") output_file.write( "Last layer activation func: " + ("Rectify" if last_layer_activation is None else str(last_layer_activation)) + "\n") results_file.write(("Rectify" if activation is None else str(last_layer_activation)) + "\t") # output_file.write("Multiple Positives by: " + str(multiple_positives) + "\n") output_file.write("Number of images: " + str(end_index) + "\n") results_file.write(str(end_index) + "\t") output_file.write("Dropout noise precent: " + str(dropout_percent * 100) + "%\n") results_file.write(str(dropout_percent * 100) + "%\t") output_file.write("Train/validation split: " + str(train_valid_split) + "\n") results_file.write(str(train_valid_split) + "\t") output_file.write("shuffle_input: " + str(shuffle_input) + "\n") results_file.write(str(shuffle_input) + "\t") output_file.write("zero_meaning: " + str(zero_meaning) + "\n\n") results_file.write(str(zero_meaning) + "\t") output_file.write("history: " + str(train_history) + "\n\n") results_file.write(str(train_history) + "\t") output_file.write("layer_info:\n" + str(layer_info) + "\n") results_file.write("[" + str(layer_info).replace("\n", ",") + "]\t") output_file.write("filters_info:\n" + str(filter_1) + "\n") output_file.write(str(filter_2) + "\n") output_file.write(str(filter_3) + "\n") output_file.write(str(filter_4) + "\n") output_file.write(str(filter_5) + "\n") output_file.write(str(filter_6) + "\n\n") results_file.write("{" + str((filter_1, filter_2, filter_3, filter_4, filter_5, filter_6)) + "]\t") output_file.write("Run time[minutes] is: " + str(run_time) + "\n") output_file.flush() results_file.write(str(time.ctime()) + "\t") results_file.write(folder_name + "\n") results_file.flush() def outputLastLayer_CNN(cnn, X, y, test_x, test_y): print "outputing last hidden layer" # train_last_hiddenLayer = cnn.output_hiddenLayer(train_x) quarter_x = np.floor(X.shape[0] / 4) train_last_hiddenLayer1 = cnn.output_hiddenLayer(X[:quarter_x]) print "after first quarter train output" train_last_hiddenLayer2 = cnn.output_hiddenLayer(X[quarter_x:2 * quarter_x]) print "after seconed quarter train output" train_last_hiddenLayer3 = cnn.output_hiddenLayer(X[2 * quarter_x:3 * quarter_x]) print "after third quarter train output" train_last_hiddenLayer4 = cnn.output_hiddenLayer(X[3 * quarter_x:]) print "after all train output" test_last_hiddenLayer = cnn.output_hiddenLayer(test_x) print "after test output" # lastLayerOutputs = (train_last_hiddenLayer,train_y,test_last_hiddenLayer,test_y) lastLayerOutputs = np.concatenate((train_last_hiddenLayer1, train_last_hiddenLayer2, train_last_hiddenLayer3, train_last_hiddenLayer4), axis=0), y, test_last_hiddenLayer, test_y return lastLayerOutputs def outputLastLayer_DAE(train_x, train_y, test_x, test_y): # building the SDA sDA = StackedDA(layers_size) # pre-trainning the SDA sDA.pre_train(train_x, noise_rate=input_noise_rate, epochs=pre_train_epochs,LOG=log_file) # saving a PNG representation of the first layer W = sDA.Layers[0].W.T[:, 1:] # import rl_dae.utils # utils.saveTiles(W, img_shape= (28,28), tile_shape=(10,10), filename="results/res_dA.png") # adding the final layer # sDA.finalLayer(train_x, train_y, epochs=softmax_train_epochs) # trainning the whole network # sDA.fine_tune(train_x, train_x, epochs=fine_tune_epochs) # predicting using the SDA testRepresentation = sDA.predict(test_x) pred = testRepresentation.argmax(1) # let's see how the network did # test_category = test_y.argmax(1) e = 0.0 t = 0.0 for i in range(test_y.shape[0]): if any(test_y[i]): e += (test_y[i,pred[i]]==1) t += 1 # printing the result, this structure should result in 80% accuracy print "DAE accuracy: %2.2f%%"%(100*e/t) output_file.write("DAE predict rate: "+str(100*e/t) + "%\n") lastLayerOutputs = (sDA.predict(train_x), train_y, testRepresentation, test_y) return lastLayerOutputs #sDA start_time = time.clock() print ("Start time: ", time.ctime()) if loadedData is None: train_x, train_y, test_x, test_y = load2d(categories, output_file, input_width, input_height, end_index, multiple_positives, dropout_percent) # load 2-d data else: train_x, train_y, test_x, test_y = loadedData if zero_meaning: train_x = train_x.astype(np.float64) mu, sigma = np.mean(train_x.flatten()), np.std(train_x.flatten()) print("Mean- ", mu) print("Std- ", sigma) train_x = (train_x - mu) / sigma x_train = train_x[:end_index].astype(np.float32).reshape((-1, 1, input_width, input_height)) x_out = x_train.reshape((x_train.shape[0], -1)) # test_x = test_x.astype(np.float32).reshape((-1, 1, input_width, input_height)) cnn = createCSAE(input_height, input_width, x_train, x_out) ''' Denoising Autoencoder dae = DenoisingAutoencoder(n_hidden=10) dae.fit(train_x) new_X = dae.transform(train_x) print new_X ''' '''Conv Stacked AE train_x = np.rint(train_x * 256).astype(np.int).reshape((-1, 1, input_width, input_height )) # convert to (0,255) int range (we'll do our own scaling) mu, sigma = np.mean(train_x.flatten()), np.std(train_x.flatten()) x_train = train_x.astype(np.float64) x_train = (x_train - mu) / sigma x_train = x_train.astype(np.float32) # we need our target to be 1 dimensional x_out = x_train.reshape((x_train.shape[0], -1)) test_x = np.rint(test_x * 256).astype(np.int).reshape((-1, 1, input_width, input_height )) # convert to (0,255) int range (we'll do our own scaling) # mu, sigma = np.mean(test_x.flatten()), np.std(test_x.flatten()) test_x = train_x.astype(np.float64) test_x = (x_train - mu) / sigma test_x = x_train.astype(np.float32) ''' ''' CNN with lasagne cnn = createNNwithMomentom(input_height, input_width) if update_rho == None else createNNwithDecay(input_height, input_width) cnn.fit(train_x, train_y) lastLayerOutputs = outputLastLayer_CNN(cnn, train_x, train_y, test_x, test_y) ''' ''' AE (not Stacked) with Convolutional layers cnn = createCnn_AE(input_height, input_width) cnn.fit(x_train, x_out) ''' ''' Stacaked AE with lasagne cnn = createSAE(input_height, input_width, x_train, x_out) ''' run_time = (time.clock() - start_time) / 60. writeOutputFile(output_file, cnn.train_history_, PrintLayerInfo._get_layer_info_plain(cnn)) print ("Learning took (min)- ", run_time) try: train_x = np.random.binomial(1, 1 - dropout_percent, size=train_x.shape) * train_x trian_last_hiddenLayer = cnn.output_hiddenLayer(train_x) print ("Pickling all encoded images:") pickle.dump(trian_last_hiddenLayer, open(folder_path + 'encode.pkl', 'w')) except: print ("Could not save encoded images") print ("Runing SVM:") run_svm(trian_last_hiddenLayer) sys.setrecursionlimit(10000) # pickle.dump(cnn, open(folder_path+'conv_ae.pkl', 'w')) # ae = pickle.load(open('mnist/conv_ae.pkl','r')) # cnn.save_weights_to(folder_path+'conv_ae.np') # run_svm(cnn) return cnn.train_history_[-1]['valid_accuracy']
def make_grnn( batch_size, emb_size, g_hidden_size, word_n, wc_num, dence, wsm_num=1, rnn_type='LSTM', rnn_size=12, dropout_d=0.5, # pooling='mean', quest_na=4, gradient_steps=-1, valid_indices=None, lr=0.05, grad_clip=10): def select_rnn(x): return { 'RNN': LL.RecurrentLayer, 'LSTM': LL.LSTMLayer, 'GRU': LL.GRULayer, }.get(x, LL.LSTMLayer) # dence = dence + [1] RNN = select_rnn(rnn_type) #------------------------------------------------------------------input layers layers = [ (LL.InputLayer, { 'name': 'l_in_se_q', 'shape': (None, word_n, emb_size) }), (LL.InputLayer, { 'name': 'l_in_se_a', 'shape': (None, quest_na, word_n, emb_size) }), (LL.InputLayer, { 'name': 'l_in_mask_q', 'shape': (None, word_n) }), (LL.InputLayer, { 'name': 'l_in_mask_a', 'shape': (None, quest_na, word_n) }), (LL.InputLayer, { 'name': 'l_in_mask_ri_q', 'shape': (None, word_n) }), (LL.InputLayer, { 'name': 'l_in_mask_ri_a', 'shape': (None, quest_na, word_n) }), (LL.InputLayer, { 'name': 'l_in_wt_q', 'shape': (None, word_n, word_n) }), (LL.InputLayer, { 'name': 'l_in_wt_a', 'shape': (None, word_n, quest_na, word_n) }), (LL.InputLayer, { 'name': 'l_in_act_', 'shape': (None, word_n, g_hidden_size) }), (LL.InputLayer, { 'name': 'l_in_act__', 'shape': (None, word_n, word_n, g_hidden_size) }), ] #------------------------------------------------------------------slice layers # l_qs = [] # l_cas = [] l_ase_names = ['l_ase_{}'.format(i) for i in range(quest_na)] l_amask_names = ['l_amask_{}'.format(i) for i in range(quest_na)] l_amask_ri_names = ['l_amask_ri_{}'.format(i) for i in range(quest_na)] l_awt_names = ['l_awt_{}'.format(i) for i in range(quest_na)] for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_ase_names[i], 'incoming': 'l_in_se_a', 'indices': i, 'axis': 1 })]) for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_amask_names[i], 'incoming': 'l_in_mask_a', 'indices': i, 'axis': 1 })]) for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_amask_ri_names[i], 'incoming': 'l_in_mask_ri_a', 'indices': i, 'axis': 1 })]) for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_awt_names[i], 'incoming': 'l_in_wt_a', 'indices': i, 'axis': 1 })]) #-------------------------------------------------------------------GRNN layers WC = theano.shared( np.random.randn(wc_num, g_hidden_size, g_hidden_size).astype('float32')) # WC = LI.Normal(0.1) WSM = theano.shared( np.random.randn(emb_size, g_hidden_size).astype('float32')) b = theano.shared(np.ones(g_hidden_size).astype('float32')) # b = lasagne.init.Constant(1.0) layers.extend([(GRNNLayer, { 'name': 'l_q_grnn', 'incomings': ['l_in_se_q', 'l_in_mask_q', 'l_in_wt_q', 'l_in_act_', 'l_in_act__'], 'emb_size': emb_size, 'hidden_size': g_hidden_size, 'word_n': word_n, 'wc_num': wc_num, 'wsm_num': wsm_num, 'only_return_final': False, 'WC': WC, 'WSM': WSM, 'b': b })]) l_a_grnns_names = ['l_a_grnn_{}'.format(i) for i in range(quest_na)] for i, l_a_grnns_name in enumerate(l_a_grnns_names): layers.extend([(GRNNLayer, { 'name': l_a_grnns_name, 'incomings': [ l_ase_names[i], l_amask_names[i], l_awt_names[i], 'l_in_act_', 'l_in_act__' ], 'emb_size': emb_size, 'hidden_size': g_hidden_size, 'word_n': word_n, 'wc_num': wc_num, 'wsm_num': wsm_num, 'only_return_final': False, 'WC': WC, 'WSM': WSM, 'b': b })]) #------------------------------------------------------------concatenate layers layers.extend([(LL.ConcatLayer, { 'name': 'l_qa_concat', 'incomings': ['l_q_grnn'] + l_a_grnns_names })]) layers.extend([(LL.ConcatLayer, { 'name': 'l_qamask_concat', 'incomings': ['l_in_mask_ri_q'] + l_amask_ri_names })]) #--------------------------------------------------------------------RNN layers layers.extend([(RNN, { 'name': 'l_qa_rnn_f', 'incoming': 'l_qa_concat', 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': False, 'only_return_final': True, 'grad_clipping': grad_clip })]) layers.extend([(RNN, { 'name': 'l_qa_rnn_b', 'incoming': 'l_qa_concat', 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': True, 'only_return_final': True, 'grad_clipping': grad_clip })]) layers.extend([(LL.ElemwiseSumLayer, { 'name': 'l_qa_rnn_conc', 'incomings': ['l_qa_rnn_f', 'l_qa_rnn_b'] })]) ##-----------------------------------------------------------------pooling layer ## l_qa_pool = layers.extend([(LL.ExpressionLayer, {'name': 'l_qa_pool', ## 'incoming': l_qa_rnn_conc, ## 'function': lambda X: X.mean(-1), ## 'output_shape'='auto'})]) #------------------------------------------------------------------dence layers l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence)] if dropout_d: layers.extend([(LL.DropoutLayer, { 'name': 'l_dence_do' + 'do', 'p': dropout_d })]) for i, d in enumerate(dence): if i < len(dence) - 1: nonlin = LN.tanh else: nonlin = LN.softmax layers.extend([(LL.DenseLayer, { 'name': l_dence_names[i], 'num_units': d, 'nonlinearity': nonlin })]) if i < len(dence) - 1 and dropout_d: layers.extend([(LL.DropoutLayer, { 'name': l_dence_names[i] + 'do', 'p': dropout_d })]) def loss(x, t): return LO.aggregate( LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t)) # return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t)) if isinstance(valid_indices, np.ndarray) or isinstance( valid_indices, list): train_split = TrainSplit_indices(valid_indices=valid_indices) else: train_split = TrainSplit(eval_size=valid_indices, stratify=False) nnet = NeuralNet( y_tensor_type=T.ivector, layers=layers, update=LU.adagrad, update_learning_rate=lr, # update_epsilon=1e-7, objective_loss_function=loss, regression=False, verbose=2, batch_iterator_train=PermIterator(batch_size=batch_size), batch_iterator_test=BatchIterator(batch_size=batch_size / 2), # batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_test=BatchIterator(batch_size=batch_size), #train_split=TrainSplit(eval_size=eval_size) train_split=train_split) nnet.initialize() PrintLayerInfo()(nnet) return nnet
narrow_nonlinearity=nonlinearities.softplus, reverse_nonlinearity=nonlinearities.sigmoid, coutput_nonlinearity=nonlinearities.softmax, #dropout0_p=0.1, dropout1_p=0.01, #regression=True, regression=False, verbose=1) nn.initialize() nn.load_params_from('task4/koebi_train_history_AE') PrintLayerInfo()(nn) nn.fit(X, Y) test = pd.read_hdf("task4/test.h5", "test") id_col = test.index test_data = np.array(test) test_data = skpre.StandardScaler().fit_transform(test_data) test_prediction = nn.predict(test_data) # Write prediction and it's linenumber into a csv file with open('task4/' + result_file_name + '.csv', 'wb') as csvfile: fieldnames = ['Id', 'y'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) # print test_prediction writer.writeheader()