# Random Forest rf.fit(train_features_df.values.tolist(), train_labels_df['cat'].tolist()) predicted_labels = [] for index, vector in enumerate(test_features_df.values): predicted_labels.append(str(rf.predict(vector.reshape(1, -1))[0])) tree_confusion_matrices["Random Forest"].append(tree.plot_confusion_matrix(test_labels_df['cat'].values.astype(str), predicted_labels)) # Bit hacky to use the tree method train_features_df = (train_features_df - train_features_df.mean()) / (train_features_df.max() - train_features_df.min()) train_features_df = train_features_df.reset_index(drop=True) test_features_df = (test_features_df - test_features_df.mean()) / (test_features_df.max() - test_features_df.min()) test_features_df = test_features_df.reset_index(drop=True) # Neural Network model = build_nn(nr_features=len(train_features_df.columns)) model.initialize() layer_info = PrintLayerInfo() layer_info(model) y_train = np.reshape(np.asarray(train_labels_df, dtype='int32'), (-1, 1)).ravel() model.fit(train_features_df.values, np.add(y_train, -1)) predicted_labels = [] for index, vector in enumerate(test_features_df.values): predicted_labels.append(str(model.predict(vector.reshape(1, -1))[0]+1)) tree_confusion_matrices["Neural Network"].append(tree.plot_confusion_matrix(test_labels_df['cat'].values.astype(str), predicted_labels)) # Bit hacky to use the tree method #Bayesian Network train_features_df, test_features_df = features_df.iloc[train_index,:].copy(), features_df.iloc[test_index,:].copy() train_labels_df, test_labels_df = labels_df.iloc[train_index,:].copy(), labels_df.iloc[test_index,:].copy() train_features_df = train_features_df.reset_index(drop=True) test_features_df = test_features_df.reset_index(drop=True) train_labels_df = train_labels_df.reset_index(drop=True)
def get_layer_info(): """Created 04/11/2016""" from nolearn.lasagne import PrintLayerInfo layer_info = PrintLayerInfo() return layer_info
def local_test(self, images, results, feature_extractors, model, k=2, size=64): kf = KFold(len(images), n_folds=k, shuffle=True, random_state=1337) # kf = KFold(500, n_folds=k, shuffle=True, random_state=1337) train_errors = [] test_errors = [] for train, validation in kf: # Divide the train_images in a training and validation set (using KFold) train_set = [images[i % len(images)] for i in train] validation_set = [images[i % len(images)] for i in validation] train_set_results = [results[i % len(images)] for i in train] validation_set_results = [results[i % len(images)] for i in validation] # Create an empty feature_vectors array and set the codebook of the sift extractor if there is any feature_vectors = [] sift_extractor = temp_extractor = next( (extractor for extractor in feature_extractors if type(extractor) == SiftFeatureExtractor), None) if (sift_extractor != None): sift_extractor.set_codebook(train_set) feature_extractors[feature_extractors.index(temp_extractor)] = sift_extractor # Iterate over the train_set, extract the features from each image and append them to feature_vectors for image in train_set: print("Training ", image, "...") preprocessed_color_image = self.preprocess_image(image, size) feature_vector = [] if feature_extractors != []: for feature_extractor in feature_extractors: if type(feature_extractor) != SiftFeatureExtractor: feature_vector = append(feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image)) else: feature_vector = append(feature_vector, feature_extractor.extract_feature_vector(image)) else: feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48)) feature_vectors.append(feature_vector) # Logistic Regression for feature selection, higher C = more features will be deleted clf2 = LogisticRegression(penalty='l1', dual=False, tol=0.0001, C=4) # Feature selection/reduction if(model != "conv"): new_feature_vectors = clf2.fit_transform(feature_vectors, train_set_results) if(model == "neural"): model = self.build_nn(nr_features=len(new_feature_vectors[0])) new_feature_vectors = np.asarray(new_feature_vectors) train_set_results = np.asarray(train_set_results) model.initialize() layer_info = PrintLayerInfo() layer_info(model) # Fit our model model.fit(new_feature_vectors, train_set_results) else: model = self.build_conv() # Fit our model model.fit(np.asarray(feature_vectors), np.asarray(train_set_results)) train_prediction_object = Prediction() counter=0 for im in train_set: print("predicting train image ", counter) counter+=1 preprocessed_color_image = self.preprocess_image(im, size) validation_feature_vector = [] if feature_extractors != []: for feature_extractor in feature_extractors: if type(feature_extractor) != SiftFeatureExtractor: validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image)) else: validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(im)) validation_feature_vector = clf2.transform(validation_feature_vector) else: validation_feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48)) train_prediction_object.addPrediction(model.predict_proba(validation_feature_vector)[0]) print("predicting test images") test_prediction_object = Prediction() counter=0 for im in validation_set: print("predicting test image ", counter) counter+=1 preprocessed_color_image = self.preprocess_image(im, size) validation_feature_vector = [] if feature_extractors != []: for feature_extractor in feature_extractors: if type(feature_extractor) != SiftFeatureExtractor: validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image)) else: validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(im)) validation_feature_vector = clf2.transform(validation_feature_vector) else: validation_feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48)) test_prediction_object.addPrediction(model.predict_proba(validation_feature_vector)[0]) train_errors.append(train_prediction_object.evaluate(train_set_results)) test_errors.append(test_prediction_object.evaluate(validation_set_results)) return [train_errors, test_errors]
def make_submission(self, train_images, train_results, test_images, output_file_path, feature_extractors, model, size=64): # Create a vector of feature vectors and initialize the codebook of sift extractor feature_vectors = [] sift_extractor = temp_extractor = next((extractor for extractor in feature_extractors if type(extractor) == SiftFeatureExtractor), None) if(sift_extractor != None): sift_extractor.set_codebook(train_images) feature_extractors[feature_extractors.index(temp_extractor)] = sift_extractor # Extract features from every image for image in train_images: print("Training ", image, "...") preprocessed_color_image = self.preprocess_image(image, size) feature_vector = [] if feature_extractors != []: for feature_extractor in feature_extractors: if type(feature_extractor) != SiftFeatureExtractor: feature_vector = append(feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image)) else: feature_vector = append(feature_vector, feature_extractor.extract_feature_vector(image)) else: feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48)) feature_vectors.append(feature_vector) # Logistic Regression for feature selection, higher C = more features will be deleted clf2 = LogisticRegression(penalty='l1', dual=False, tol=0.0001, C=4) # Feature selection/reduction if(model != "conv"): print("Old feature vector shape = ", len(feature_vectors), len(feature_vectors[0])) new_feature_vectors = clf2.fit_transform(feature_vectors, train_results) print("New feature vector shape = ", len(new_feature_vectors), len(new_feature_vectors[0])) if(model == "neural"): model = self.build_nn(nr_features=len(new_feature_vectors[0])) new_feature_vectors = np.asarray(new_feature_vectors) train_results = np.asarray(train_results) model.initialize() layer_info = PrintLayerInfo() layer_info(model) # Fit our model model.fit(new_feature_vectors, train_results) else: model = self.build_conv() # Fit our model model.fit(np.asarray(feature_vectors), np.asarray(train_results)) # Iterate over the test images and add their prediction to a prediction object prediction_object = Prediction() for im in test_images: print("Predicting ", im) preprocessed_color_image = self.preprocess_image(im, size) validation_feature_vector = [] if feature_extractors != []: for feature_extractor in feature_extractors: if type(feature_extractor) != SiftFeatureExtractor: validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(preprocessed_color_image)) else: validation_feature_vector = append(validation_feature_vector, feature_extractor.extract_feature_vector(im)) validation_feature_vector = clf2.transform(validation_feature_vector) else: validation_feature_vector = np.asarray(resize(cv2.imread(image), (48, 48, 3)).transpose(2,0,1).reshape(3, 48, 48)) prediction_object.addPrediction(model.predict_proba(validation_feature_vector)[0]) # Write out the prediction object FileParser.write_CSV(output_file_path, prediction_object)
def make_memnn(vocab_size, cont_sl, cont_wl, quest_wl, answ_wl, rnn_size, rnn_type='LSTM', pool_size=4, answ_n=4, dence_l=[100], dropout=0.5, batch_size=16, emb_size=50, grad_clip=40, init_std=0.1, num_hops=3, rnn_style=False, nonlin=LN.softmax, init_W=None, rng=None, art_pool=4, lr=0.01, mom=0, updates=LU.adagrad, valid_indices=0.2, permute_answ=False, permute_cont=False): def select_rnn(x): return { 'RNN': LL.RecurrentLayer, 'LSTM': LL.LSTMLayer, 'GRU': LL.GRULayer, }.get(x, LL.LSTMLayer) # dence = dence + [1] RNN = select_rnn(rnn_type) #-----------------------------------------------------------------------weights tr_variables = {} tr_variables['WQ'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WA'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WC'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WTA'] = theano.shared( init_std * np.random.randn(cont_sl, emb_size).astype('float32')) tr_variables['WTC'] = theano.shared( init_std * np.random.randn(cont_sl, emb_size).astype('float32')) tr_variables['WAnsw'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) #------------------------------------------------------------------input layers layers = [(LL.InputLayer, { 'name': 'l_in_q', 'shape': (batch_size, 1, quest_wl), 'input_var': T.itensor3('l_in_q_') }), (LL.InputLayer, { 'name': 'l_in_a', 'shape': (batch_size, answ_n, answ_wl), 'input_var': T.itensor3('l_in_a_') }), (LL.InputLayer, { 'name': 'l_in_q_pe', 'shape': (batch_size, 1, quest_wl, emb_size) }), (LL.InputLayer, { 'name': 'l_in_a_pe', 'shape': (batch_size, answ_n, answ_wl, emb_size) }), (LL.InputLayer, { 'name': 'l_in_cont', 'shape': (batch_size, cont_sl, cont_wl), 'input_var': T.itensor3('l_in_cont_') }), (LL.InputLayer, { 'name': 'l_in_cont_pe', 'shape': (batch_size, cont_sl, cont_wl, emb_size) })] #------------------------------------------------------------------slice layers # l_qs = [] # l_cas = [] l_a_names = ['l_a_{}'.format(i) for i in range(answ_n)] l_a_pe_names = ['l_a_pe{}'.format(i) for i in range(answ_n)] for i in range(answ_n): layers.extend([(LL.SliceLayer, { 'name': l_a_names[i], 'incoming': 'l_in_a', 'indices': slice(i, i + 1), 'axis': 1 })]) for i in range(answ_n): layers.extend([(LL.SliceLayer, { 'name': l_a_pe_names[i], 'incoming': 'l_in_a_pe', 'indices': slice(i, i + 1), 'axis': 1 })]) #------------------------------------------------------------------MEMNN layers #question---------------------------------------------------------------------- layers.extend([(EncodingFullLayer, { 'name': 'l_emb_f_q', 'incomings': ('l_in_q', 'l_in_q_pe'), 'vocab_size': vocab_size, 'emb_size': emb_size, 'W': tr_variables['WQ'], 'WT': None })]) l_mem_names = ['ls_mem_n2n_{}'.format(i) for i in range(num_hops)] layers.extend([(MemoryLayer, { 'name': l_mem_names[0], 'incomings': ('l_in_cont', 'l_in_cont_pe', 'l_emb_f_q'), 'vocab_size': vocab_size, 'emb_size': emb_size, 'A': tr_variables['WA'], 'C': tr_variables['WC'], 'AT': tr_variables['WTA'], 'CT': tr_variables['WTC'], 'nonlin': nonlin })]) for i in range(1, num_hops): if i % 2: WC, WA = tr_variables['WA'], tr_variables['WC'] WTC, WTA = tr_variables['WTA'], tr_variables['WTC'] else: WA, WC = tr_variables['WA'], tr_variables['WC'] WTA, WTC = tr_variables['WTA'], tr_variables['WTC'] layers.extend([(MemoryLayer, { 'name': l_mem_names[i], 'incomings': ('l_in_cont', 'l_in_cont_pe', l_mem_names[i - 1]), 'vocab_size': vocab_size, 'emb_size': emb_size, 'A': WA, 'C': WC, 'AT': WTA, 'CT': WTC, 'nonlin': nonlin })]) #answers----------------------------------------------------------------------- l_emb_f_a_names = ['l_emb_f_a{}'.format(i) for i in range(answ_n)] for i in range(answ_n): layers.extend([(EncodingFullLayer, { 'name': l_emb_f_a_names[i], 'incomings': (l_a_names[i], l_a_pe_names[i]), 'vocab_size': vocab_size, 'emb_size': emb_size, 'W': tr_variables['WAnsw'], 'WT': None })]) #------------------------------------------------------------concatenate layers layers.extend([(LL.ConcatLayer, { 'name': 'l_qma_concat', 'incomings': l_mem_names + l_emb_f_a_names })]) #--------------------------------------------------------------------RNN layers layers.extend([( RNN, { 'name': 'l_qa_rnn_f', 'incoming': 'l_qma_concat', # 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': False, 'only_return_final': False, 'grad_clipping': grad_clip })]) layers.extend([( RNN, { 'name': 'l_qa_rnn_b', 'incoming': 'l_qma_concat', # 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': True, 'only_return_final': False, 'grad_clipping': grad_clip })]) layers.extend([(LL.SliceLayer, { 'name': 'l_qa_rnn_f_sl', 'incoming': 'l_qa_rnn_f', 'indices': slice(-answ_n, None), 'axis': 1 })]) layers.extend([(LL.SliceLayer, { 'name': 'l_qa_rnn_b_sl', 'incoming': 'l_qa_rnn_b', 'indices': slice(-answ_n, None), 'axis': 1 })]) layers.extend([(LL.ElemwiseMergeLayer, { 'name': 'l_qa_rnn_conc', 'incomings': ('l_qa_rnn_f_sl', 'l_qa_rnn_b_sl'), 'merge_function': T.add })]) #-----------------------------------------------------------------pooling layer # layers.extend([(LL.DimshuffleLayer, {'name': 'l_qa_rnn_conc_', # 'incoming': 'l_qa_rnn_conc', 'pattern': (0, 'x', 1)})]) layers.extend([(LL.Pool1DLayer, { 'name': 'l_qa_pool', 'incoming': 'l_qa_rnn_conc', 'pool_size': pool_size, 'mode': 'max' })]) #------------------------------------------------------------------dence layers l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence_l)] if dropout: layers.extend([(LL.DropoutLayer, { 'name': 'l_dence_do', 'p': dropout })]) for i, d in enumerate(dence_l): if i < len(dence_l) - 1: nonlin = LN.tanh else: nonlin = LN.softmax layers.extend([(LL.DenseLayer, { 'name': l_dence_names[i], 'num_units': d, 'nonlinearity': nonlin })]) if i < len(dence_l) - 1 and dropout: layers.extend([(LL.DropoutLayer, { 'name': l_dence_names[i] + 'do', 'p': dropout })]) if isinstance(valid_indices, np.ndarray) or isinstance( valid_indices, list): train_split = TrainSplit_indices(valid_indices=valid_indices) else: train_split = TrainSplit(eval_size=valid_indices, stratify=False) if permute_answ or permute_cont: batch_iterator_train = PermIterator(batch_size, permute_answ, permute_cont) else: batch_iterator_train = BatchIterator(batch_size=batch_size) def loss(x, t): return LO.aggregate( LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t)) # return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t)) nnet = NeuralNet( y_tensor_type=T.ivector, layers=layers, update=updates, update_learning_rate=lr, # update_epsilon=1e-7, objective_loss_function=loss, regression=False, verbose=2, batch_iterator_train=batch_iterator_train, batch_iterator_test=BatchIterator(batch_size=batch_size / 2), # batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_test=BatchIterator(batch_size=batch_size), #train_split=TrainSplit(eval_size=eval_size) train_split=train_split, on_batch_finished=[zero_memnn]) nnet.initialize() PrintLayerInfo()(nnet) return nnet
input_shape=(None, num_features), dense_num_units=64, narrow_num_units=48, denseReverse1_num_units=64, denseReverse2_num_units=128, output_num_units=128, #input_nonlinearity = None, #nonlinearities.sigmoid, #dense_nonlinearity = nonlinearities.tanh, narrow_nonlinearity=nonlinearities.softplus, #denseReverse1_nonlinearity = nonlinearities.tanh, denseReverse2_nonlinearity=nonlinearities.softplus, output_nonlinearity=nonlinearities.linear, #nonlinearities.softmax, #dropout0_p=0.1, dropout1_p=0.01, dropout2_p=0.001, regression=True, verbose=1) ae.initialize() PrintLayerInfo()(ae) maybe_this_is_a_history = ae.fit(Z, Z) #learned_parameters = ae.get_all_params_values() #np.save("task4/learned_parameter.npy", learned_parameters) #SaveWeights(path='task4/koebi_train_history_AE')(ae, maybe_this_is_a_history) ae.save_params_to('task4/koebi_train_history_AE2')
def local_test(feature_vectors_df, labels_df, k=2): # labeltjes = [None] * labels_df.values.shape[0] labeltjes = labels_df.values print labeltjes.shape labeltjes = labeltjes labeltjes -= 1 labeltjes = labeltjes.ravel().tolist() kf = StratifiedKFold(labeltjes, n_folds=k, shuffle=True) # kf = StratifiedKFold(len(feature_vectors_df.index), n_folds=k, shuffle=True) # kf = KFold(500, n_folds=k, shuffle=True, random_state=1337) confusion_matrices_folds = [] for train, test in kf: # Divide the train_images in a training and validation set (using KFold) X_train = feature_vectors_df.values[train, :] X_test = feature_vectors_df.values[test, :] y_train = [labeltjes[i] for i in train] y_test = [labeltjes[i] for i in test] # Logistic Regression for feature selection, higher C = more features will be deleted # Feature selection/reduction model = build_nn(nr_features=X_train.shape[1]) model.initialize() layer_info = PrintLayerInfo() layer_info(model) # Fit our model y_train = np.reshape(np.asarray(y_train, dtype='int32'), (-1, 1)).ravel() # print y_train # print "Train feature vectors shape: " + X_train.shape.__str__() # print "Train labels shape:" + len(y_train).__str__() # # print "X_train as array shape: " + str(X_train.shape) # print "y_train as array shape: " + str(np.reshape(np.asarray(y_train), (-1, 1)).shape) model.fit(X_train, np.reshape(np.asarray(y_train), (-1, 1)).ravel()) preds = model.predict(X_test) c = [] [ c.append(preds[i]) if preds[i] == y_test[i] else None for i in range(min(len(y_test), len(preds))) ] # checks = len([i for i, j in zip(preds, np.reshape(np.asarray(y_train), (-1, 1))) if i == j]) model = None del model # Save the confusion matrix for this fold and plot it confusion_matrix = sklearn.metrics.confusion_matrix(y_test, preds) confusion_matrices_folds.append(confusion_matrix) # print preds.tolist() # print "number of ones: " + str(sum(preds)) # print y_test # print c print "Accuracy for fold: " + str( ((len(c) * 1.0) / (len(y_test) * 1.0) )) + "\n\n\n\n\n-----------------------------\n\n\n" # Let's plot the confusion matrix of the avarage confusion matrix sum = confusion_matrices_folds[0] * 1.0 for i in range(1, len(confusion_matrices_folds)): sum += confusion_matrices_folds[i] sum /= len(confusion_matrices_folds) metrics.plot_confusion_matrix(sum)
def make_grnn( batch_size, emb_size, g_hidden_size, word_n, wc_num, dence, wsm_num=1, rnn_type='LSTM', rnn_size=12, dropout_d=0.5, # pooling='mean', quest_na=4, gradient_steps=-1, valid_indices=None, lr=0.05, grad_clip=10): def select_rnn(x): return { 'RNN': LL.RecurrentLayer, 'LSTM': LL.LSTMLayer, 'GRU': LL.GRULayer, }.get(x, LL.LSTMLayer) # dence = dence + [1] RNN = select_rnn(rnn_type) #------------------------------------------------------------------input layers layers = [ (LL.InputLayer, { 'name': 'l_in_se_q', 'shape': (None, word_n, emb_size) }), (LL.InputLayer, { 'name': 'l_in_se_a', 'shape': (None, quest_na, word_n, emb_size) }), (LL.InputLayer, { 'name': 'l_in_mask_q', 'shape': (None, word_n) }), (LL.InputLayer, { 'name': 'l_in_mask_a', 'shape': (None, quest_na, word_n) }), (LL.InputLayer, { 'name': 'l_in_mask_ri_q', 'shape': (None, word_n) }), (LL.InputLayer, { 'name': 'l_in_mask_ri_a', 'shape': (None, quest_na, word_n) }), (LL.InputLayer, { 'name': 'l_in_wt_q', 'shape': (None, word_n, word_n) }), (LL.InputLayer, { 'name': 'l_in_wt_a', 'shape': (None, word_n, quest_na, word_n) }), (LL.InputLayer, { 'name': 'l_in_act_', 'shape': (None, word_n, g_hidden_size) }), (LL.InputLayer, { 'name': 'l_in_act__', 'shape': (None, word_n, word_n, g_hidden_size) }), ] #------------------------------------------------------------------slice layers # l_qs = [] # l_cas = [] l_ase_names = ['l_ase_{}'.format(i) for i in range(quest_na)] l_amask_names = ['l_amask_{}'.format(i) for i in range(quest_na)] l_amask_ri_names = ['l_amask_ri_{}'.format(i) for i in range(quest_na)] l_awt_names = ['l_awt_{}'.format(i) for i in range(quest_na)] for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_ase_names[i], 'incoming': 'l_in_se_a', 'indices': i, 'axis': 1 })]) for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_amask_names[i], 'incoming': 'l_in_mask_a', 'indices': i, 'axis': 1 })]) for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_amask_ri_names[i], 'incoming': 'l_in_mask_ri_a', 'indices': i, 'axis': 1 })]) for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_awt_names[i], 'incoming': 'l_in_wt_a', 'indices': i, 'axis': 1 })]) #-------------------------------------------------------------------GRNN layers WC = theano.shared( np.random.randn(wc_num, g_hidden_size, g_hidden_size).astype('float32')) # WC = LI.Normal(0.1) WSM = theano.shared( np.random.randn(emb_size, g_hidden_size).astype('float32')) b = theano.shared(np.ones(g_hidden_size).astype('float32')) # b = lasagne.init.Constant(1.0) layers.extend([(GRNNLayer, { 'name': 'l_q_grnn', 'incomings': ['l_in_se_q', 'l_in_mask_q', 'l_in_wt_q', 'l_in_act_', 'l_in_act__'], 'emb_size': emb_size, 'hidden_size': g_hidden_size, 'word_n': word_n, 'wc_num': wc_num, 'wsm_num': wsm_num, 'only_return_final': False, 'WC': WC, 'WSM': WSM, 'b': b })]) l_a_grnns_names = ['l_a_grnn_{}'.format(i) for i in range(quest_na)] for i, l_a_grnns_name in enumerate(l_a_grnns_names): layers.extend([(GRNNLayer, { 'name': l_a_grnns_name, 'incomings': [ l_ase_names[i], l_amask_names[i], l_awt_names[i], 'l_in_act_', 'l_in_act__' ], 'emb_size': emb_size, 'hidden_size': g_hidden_size, 'word_n': word_n, 'wc_num': wc_num, 'wsm_num': wsm_num, 'only_return_final': False, 'WC': WC, 'WSM': WSM, 'b': b })]) #------------------------------------------------------------concatenate layers layers.extend([(LL.ConcatLayer, { 'name': 'l_qa_concat', 'incomings': ['l_q_grnn'] + l_a_grnns_names })]) layers.extend([(LL.ConcatLayer, { 'name': 'l_qamask_concat', 'incomings': ['l_in_mask_ri_q'] + l_amask_ri_names })]) #--------------------------------------------------------------------RNN layers layers.extend([(RNN, { 'name': 'l_qa_rnn_f', 'incoming': 'l_qa_concat', 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': False, 'only_return_final': True, 'grad_clipping': grad_clip })]) layers.extend([(RNN, { 'name': 'l_qa_rnn_b', 'incoming': 'l_qa_concat', 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': True, 'only_return_final': True, 'grad_clipping': grad_clip })]) layers.extend([(LL.ElemwiseSumLayer, { 'name': 'l_qa_rnn_conc', 'incomings': ['l_qa_rnn_f', 'l_qa_rnn_b'] })]) ##-----------------------------------------------------------------pooling layer ## l_qa_pool = layers.extend([(LL.ExpressionLayer, {'name': 'l_qa_pool', ## 'incoming': l_qa_rnn_conc, ## 'function': lambda X: X.mean(-1), ## 'output_shape'='auto'})]) #------------------------------------------------------------------dence layers l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence)] if dropout_d: layers.extend([(LL.DropoutLayer, { 'name': 'l_dence_do' + 'do', 'p': dropout_d })]) for i, d in enumerate(dence): if i < len(dence) - 1: nonlin = LN.tanh else: nonlin = LN.softmax layers.extend([(LL.DenseLayer, { 'name': l_dence_names[i], 'num_units': d, 'nonlinearity': nonlin })]) if i < len(dence) - 1 and dropout_d: layers.extend([(LL.DropoutLayer, { 'name': l_dence_names[i] + 'do', 'p': dropout_d })]) def loss(x, t): return LO.aggregate( LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t)) # return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t)) if isinstance(valid_indices, np.ndarray) or isinstance( valid_indices, list): train_split = TrainSplit_indices(valid_indices=valid_indices) else: train_split = TrainSplit(eval_size=valid_indices, stratify=False) nnet = NeuralNet( y_tensor_type=T.ivector, layers=layers, update=LU.adagrad, update_learning_rate=lr, # update_epsilon=1e-7, objective_loss_function=loss, regression=False, verbose=2, batch_iterator_train=PermIterator(batch_size=batch_size), batch_iterator_test=BatchIterator(batch_size=batch_size / 2), # batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_test=BatchIterator(batch_size=batch_size), #train_split=TrainSplit(eval_size=eval_size) train_split=train_split) nnet.initialize() PrintLayerInfo()(nnet) return nnet
narrow_nonlinearity=nonlinearities.softplus, reverse_nonlinearity=nonlinearities.sigmoid, coutput_nonlinearity=nonlinearities.softmax, #dropout0_p=0.1, dropout1_p=0.01, #regression=True, regression=False, verbose=1) nn.initialize() nn.load_params_from('task4/koebi_train_history_AE') PrintLayerInfo()(nn) nn.fit(X, Y) test = pd.read_hdf("task4/test.h5", "test") id_col = test.index test_data = np.array(test) test_data = skpre.StandardScaler().fit_transform(test_data) test_prediction = nn.predict(test_data) # Write prediction and it's linenumber into a csv file with open('task4/' + result_file_name + '.csv', 'wb') as csvfile: fieldnames = ['Id', 'y'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) # print test_prediction writer.writeheader()