def run_models(settings=None): analysis_scr = [] with_auc_score = settings['with_auc_score'] n_outs = settings['n_outs'] for subset_no in xrange(1, settings['number_iterations'] + 1): print("Subset:", subset_no) ################## generate data ################### array_A = [] array_B = [] for i in range(100000): array_A.append(np.random.random_integers(0, 59999)) array_B.append(np.random.random_integers(0, 59999)) pos_index = [] neg_index = [] for index in xrange(100000): if y_total[array_A[index]] - y_total[array_B[index]] == 1: pos_index.append(index) else: neg_index.append(index) print 'number of positive examples', len(pos_index) selected_neg_index = neg_index[:len(pos_index)] array_A = np.array(array_A) array_B = np.array(array_B) index_for_positive_image_A = array_A[pos_index] index_for_positive_image_B = array_B[pos_index] index_for_neg_image_A = array_A[selected_neg_index] index_for_neg_image_B = array_B[selected_neg_index] X_pos_A = X_total[index_for_positive_image_A] X_pos_B = X_total[index_for_positive_image_B] X_pos_whole = np.hstack((X_pos_A, X_pos_B)) X_neg_A = X_total[index_for_neg_image_A] X_neg_B = X_total[index_for_neg_image_B] X_neg_whole = np.hstack((X_neg_A, X_neg_B)) print X_pos_A.shape, X_pos_B.shape, X_pos_whole.shape print X_neg_A.shape, X_neg_B.shape, X_neg_whole.shape X_whole = np.vstack((X_pos_whole, X_neg_whole)) print X_whole.shape y_pos = np.ones(X_pos_whole.shape[0]) y_neg = np.zeros(X_neg_whole.shape[0]) y_whole = np.concatenate([y_pos, y_neg]) print y_whole.shape x_train_pre_validation, x_test, y_train_pre_validation, y_test = train_test_split( X_whole, y_whole, test_size=0.2, random_state=211) for number_of_training in settings['number_of_training']: x_train, x_validation, y_train, y_validation = train_test_split(x_train_pre_validation[:number_of_training], y_train_pre_validation[:number_of_training],\ test_size=0.2, random_state=21) ''' x_train, x_validation, y_train, y_validation = train_test_split(x_train_pre_validation[:], y_train_pre_validation[:],\ test_size=0.4, random_state=21) ''' print x_train.shape, y_train.shape, x_validation.shape, \ y_validation.shape, x_test.shape, y_test.shape x_train_minmax, x_validation_minmax, x_test_minmax = x_train, x_validation, x_test train_X_reduced = x_train train_y_reduced = y_train test_X = x_test test_y = y_test y_train_minmax = y_train y_validation_minmax = y_validation y_test_minmax = y_test ###original data### ################ end of data #################### standard_scaler = preprocessing.StandardScaler().fit( train_X_reduced) scaled_train_X = standard_scaler.transform(train_X_reduced) scaled_test_X = standard_scaler.transform(test_X) if settings['SVM']: print "SVM" Linear_SVC = LinearSVC(C=1, penalty="l2") Linear_SVC.fit(scaled_train_X, y_train) predicted_test_y = Linear_SVC.predict(scaled_test_X) isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'SVM', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = Linear_SVC.predict(scaled_train_X) isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'SVM', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) if settings['SVM_RBF']: print "SVM_RBF" L1_SVC_RBF_Selector = SVC(C=1, gamma=0.01, kernel='rbf').fit( scaled_train_X, y_train) predicted_test_y = L1_SVC_RBF_Selector.predict(scaled_test_X) isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'SVM_RBF', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = L1_SVC_RBF_Selector.predict(scaled_train_X) isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'SVM_RBF', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) if settings['SVM_POLY']: print "SVM_POLY" L1_SVC_POLY_Selector = SVC(C=1, kernel='poly').fit( scaled_train_X, train_y_reduced) predicted_test_y = L1_SVC_POLY_Selector.predict(scaled_test_X) isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'SVM_POLY', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = L1_SVC_POLY_Selector.predict( scaled_train_X) isTest = False #new analysis_scr.append(( subset_no, number_of_training, 'SVM_POLY', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) if settings['Log']: print "Log" log_clf_l2 = sklearn.linear_model.LogisticRegression( C=1, penalty='l2') log_clf_l2.fit(scaled_train_X, train_y_reduced) predicted_test_y = log_clf_l2.predict(scaled_test_X) isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'Log', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = log_clf_l2.predict(scaled_train_X) isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'Log', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) # direct deep learning finetune_lr = settings['finetune_lr'] batch_size = settings['batch_size'] pretraining_epochs = cal_epochs( settings['pretraining_interations'], x_train_minmax, batch_size=batch_size) #pretrain_lr=0.001 pretrain_lr = settings['pretrain_lr'] training_epochs = cal_epochs(settings['training_interations'], x_train_minmax, batch_size=batch_size) hidden_layers_sizes = settings['hidden_layers_sizes'] corruption_levels = settings['corruption_levels'] settings['lrate'] = settings['lrate_pre'] + str(training_epochs) if settings['DL']: print "direct deep learning" sda = trainSda(x_train_minmax, y_train, x_validation_minmax, y_validation, x_test_minmax, test_y, hidden_layers_sizes = hidden_layers_sizes, corruption_levels = corruption_levels, batch_size = batch_size , \ training_epochs = training_epochs, pretraining_epochs = pretraining_epochs, pretrain_lr = pretrain_lr, finetune_lr=finetune_lr, n_outs = n_outs ) print 'hidden_layers_sizes:', hidden_layers_sizes print 'corruption_levels:', corruption_levels test_predicted = sda.predict(x_test_minmax) isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'DL', isTest) + tuple(performance_score(y_test, test_predicted).values())) training_predicted = sda.predict(x_train_minmax) isTest = False #new analysis_scr.append(( subset_no, number_of_training, 'DL', isTest ) + tuple( performance_score(y_train, training_predicted).values())) ####transformed original data#### x = train_X_reduced a_MAE_original = train_a_MultipleAEs( x, pretraining_epochs=pretraining_epochs, pretrain_lr=pretrain_lr, batch_size=batch_size, hidden_layers_sizes=hidden_layers_sizes, corruption_levels=corruption_levels) new_x_train_minmax_A = a_MAE_original.transform(train_X_reduced) new_x_test_minmax_A = a_MAE_original.transform(x_test_minmax) standard_scaler = preprocessing.StandardScaler().fit( new_x_train_minmax_A) new_x_train_scaled = standard_scaler.transform( new_x_train_minmax_A) new_x_test_scaled = standard_scaler.transform(new_x_test_minmax_A) new_x_train_combo = np.hstack((scaled_train_X, new_x_train_scaled)) new_x_test_combo = np.hstack((scaled_test_X, new_x_test_scaled)) if settings['SAE_SVM']: # SAE_SVM print 'SAE followed by SVM' Linear_SVC = LinearSVC(C=1, penalty="l2") Linear_SVC.fit(new_x_train_scaled, train_y_reduced) predicted_test_y = Linear_SVC.predict(new_x_test_scaled) isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'SAE_SVM', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = Linear_SVC.predict(new_x_train_scaled) isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'SAE_SVM', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) if settings['SAE_Log']: print 'SAE followed by Log' log_clf_l2 = sklearn.linear_model.LogisticRegression( C=1, penalty='l2') log_clf_l2.fit(new_x_train_scaled, train_y_reduced) predicted_test_y = log_clf_l2.predict(new_x_test_scaled) isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'SAE_Log', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = log_clf_l2.predict(new_x_train_scaled) isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'SAE_Log', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) if settings['SAE_SVM_RBF']: # SAE_SVM print 'SAE followed by SVM RBF' L1_SVC_RBF_Selector = SVC(C=1, gamma=0.01, kernel='rbf').fit( new_x_train_scaled, train_y_reduced) predicted_test_y = L1_SVC_RBF_Selector.predict( new_x_test_scaled) isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'SAE_SVM_RBF', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = L1_SVC_RBF_Selector.predict( new_x_train_scaled) isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'SAE_SVM_RBF', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) if settings['SAE_SVM_POLY']: # SAE_SVM print 'SAE followed by SVM POLY' L1_SVC_RBF_Selector = SVC(C=1, kernel='poly').fit( new_x_train_scaled, train_y_reduced) predicted_test_y = L1_SVC_RBF_Selector.predict( new_x_test_scaled) isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'SAE_SVM_POLY', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = L1_SVC_RBF_Selector.predict( new_x_train_scaled) isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'SAE_SVM_POLY', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) #### separated transformed data #### y_test = test_y print 'deep learning using split network' # get the new representation for A set. first 784-D pretraining_epochs = cal_epochs( settings['pretraining_interations'], x_train_minmax, batch_size=batch_size) x = x_train_minmax[:, :x_train_minmax.shape[1] / 2] print "original shape for A", x.shape a_MAE_A = train_a_MultipleAEs( x, pretraining_epochs=pretraining_epochs, pretrain_lr=pretrain_lr, batch_size=batch_size, hidden_layers_sizes=[x / 2 for x in hidden_layers_sizes], corruption_levels=corruption_levels) new_x_train_minmax_A = a_MAE_A.transform( x_train_minmax[:, :x_train_minmax.shape[1] / 2]) x = x_train_minmax[:, x_train_minmax.shape[1] / 2:] print "original shape for B", x.shape a_MAE_B = train_a_MultipleAEs( x, pretraining_epochs=pretraining_epochs, pretrain_lr=pretrain_lr, batch_size=batch_size, hidden_layers_sizes=[x / 2 for x in hidden_layers_sizes], corruption_levels=corruption_levels) new_x_train_minmax_B = a_MAE_B.transform( x_train_minmax[:, x_train_minmax.shape[1] / 2:]) new_x_test_minmax_A = a_MAE_A.transform( x_test_minmax[:, :x_test_minmax.shape[1] / 2]) new_x_test_minmax_B = a_MAE_B.transform( x_test_minmax[:, x_test_minmax.shape[1] / 2:]) new_x_validation_minmax_A = a_MAE_A.transform( x_validation_minmax[:, :x_validation_minmax.shape[1] / 2]) new_x_validation_minmax_B = a_MAE_B.transform( x_validation_minmax[:, x_validation_minmax.shape[1] / 2:]) new_x_train_minmax_whole = np.hstack( (new_x_train_minmax_A, new_x_train_minmax_B)) new_x_test_minmax_whole = np.hstack( (new_x_test_minmax_A, new_x_test_minmax_B)) new_x_validationt_minmax_whole = np.hstack( (new_x_validation_minmax_A, new_x_validation_minmax_B)) standard_scaler = preprocessing.StandardScaler().fit( new_x_train_minmax_whole) new_x_train_minmax_whole_scaled = standard_scaler.transform( new_x_train_minmax_whole) new_x_test_minmax_whole_scaled = standard_scaler.transform( new_x_test_minmax_whole) if settings['DL_S']: # deep learning using split network sda_transformed = trainSda(new_x_train_minmax_whole, y_train, new_x_validationt_minmax_whole, y_validation , new_x_test_minmax_whole, y_test, hidden_layers_sizes = hidden_layers_sizes, corruption_levels = corruption_levels, batch_size = batch_size , \ training_epochs = training_epochs, pretraining_epochs = pretraining_epochs, pretrain_lr = pretrain_lr, finetune_lr=finetune_lr ) print 'hidden_layers_sizes:', hidden_layers_sizes print 'corruption_levels:', corruption_levels predicted_test_y = sda_transformed.predict( new_x_test_minmax_whole) y_test = test_y isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'DL_S', isTest) + tuple( performance_score(y_test, predicted_test_y, with_auc_score).values())) training_predicted = sda_transformed.predict( new_x_train_minmax_whole) isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'DL_S', isTest) + tuple( performance_score(y_train, training_predicted, with_auc_score).values())) if settings['SAE_S_SVM']: print 'SAE_S followed by SVM' Linear_SVC = LinearSVC(C=1, penalty="l2") Linear_SVC.fit(new_x_train_minmax_whole_scaled, train_y_reduced) predicted_test_y = Linear_SVC.predict( new_x_test_minmax_whole_scaled) isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'SAE_S_SVM', isTest) + tuple( performance_score(test_y, predicted_test_y, with_auc_score).values())) #new predicted_train_y = Linear_SVC.predict( new_x_train_minmax_whole_scaled) isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'SAE_S_SVM', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y, with_auc_score).values())) if settings['SAE_S_SVM_RBF']: print 'SAE S followed by SVM RBF' L1_SVC_RBF_Selector = SVC(C=1, gamma=0.01, kernel='rbf').fit( new_x_train_minmax_whole_scaled, train_y_reduced) predicted_test_y = L1_SVC_RBF_Selector.predict( new_x_test_minmax_whole_scaled) isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'SAE_S_SVM_RBF', isTest) + tuple( performance_score(test_y, predicted_test_y, with_auc_score).values())) #new predicted_train_y = L1_SVC_RBF_Selector.predict( new_x_train_minmax_whole_scaled) isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'SAE_S_SVM_RBF', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y, with_auc_score).values())) if settings['SAE_S_SVM_POLY']: # SAE_SVM print 'SAE S followed by SVM POLY' L1_SVC_RBF_Selector = SVC(C=1, kernel='poly').fit( new_x_train_minmax_whole_scaled, train_y_reduced) predicted_test_y = L1_SVC_RBF_Selector.predict( new_x_test_minmax_whole_scaled) isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'SAE_S_SVM_POLY', isTest) + tuple( performance_score(test_y, predicted_test_y, with_auc_score).values())) #new predicted_train_y = L1_SVC_RBF_Selector.predict( new_x_train_minmax_whole_scaled) isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'SAE_S_SVM_POLY', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y, with_auc_score).values())) settings['epoch_number'] = cal_epochs( settings['pretraining_interations'], x_train_minmax, batch_size=batch_size) # deep xy autoencoders settings['n_ins'] = x_train_minmax.shape[1] if settings['DL_xy']: cfg = settings.copy() cfg['weight_y'] = 0.1 print 'DL_xy' train_x = x_train_minmax train_y = y_train_minmax sdaf = Sda_xy_factory(cfg) sdaf.sda.pretraining(train_x, train_y) dnnf = DNN_factory(cfg) dnnf.dnn.load_pretrain_from_Sda(sdaf.sda) dnnf.dnn.finetuning((x_train_minmax, y_train_minmax), (x_validation_minmax, y_validation_minmax)) training_predicted = dnnf.dnn.predict(x_train_minmax) y_train = y_train_minmax isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'DL_xy', isTest) + tuple( performance_score(train_y_reduced, training_predicted, with_auc_score).values())) test_predicted = dnnf.dnn.predict(x_test_minmax) y_test = test_y isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'DL_xy', isTest) + tuple( performance_score(test_y, test_predicted, with_auc_score).values())) if settings['Sda_xy_with_first']: cfg = settings.copy() cfg['weight_y'] = 0.1 cfg['firstlayer_xy'] = 1 print 'firstlayer_xy' train_x = x_train_minmax train_y = y_train_minmax sdaf = Sda_xy_factory(cfg) sdaf.sda.pretraining(train_x, train_y) dnnf = DNN_factory(cfg) dnnf.dnn.load_pretrain_from_Sda(sdaf.sda) dnnf.dnn.finetuning((x_train_minmax, y_train_minmax), (x_validation_minmax, y_validation_minmax)) training_predicted = dnnf.dnn.predict(x_train_minmax) y_train = y_train_minmax isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'Sda_xy_with_first', isTest) + tuple( performance_score(train_y_reduced, training_predicted, with_auc_score).values())) test_predicted = dnnf.dnn.predict(x_test_minmax) y_test = test_y isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'Sda_xy_with_first', isTest) + tuple( performance_score(test_y, test_predicted, with_auc_score).values())) if settings['Sda_new']: print 'Sda_new' cfg = settings.copy() train_x = x_train_minmax train_y = y_train_minmax cfg['n_ins'] = train_x.shape[1] sdaf = Sda_factory(cfg) sda = sdaf.sda.pretraining(train_x=train_x) sdaf.dnn.finetuning((x_train_minmax, y_train_minmax), (x_validation_minmax, y_validation_minmax)) training_predicted = sdaf.dnn.predict(x_train_minmax) y_train = y_train_minmax isTest = False #new analysis_scr.append( (subset_no, number_of_training, 'Sda_new', isTest) + tuple( performance_score(train_y_reduced, training_predicted, with_auc_score).values())) test_predicted = sdaf.dnn.predict(x_test_minmax) y_test = test_y isTest = True #new analysis_scr.append( (subset_no, number_of_training, 'Sda_new', isTest) + tuple( performance_score(test_y, test_predicted, with_auc_score).values())) if settings['DL_S_new']: # deep learning using split network print 'new deep learning using split network' cfg = settings.copy() p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append(( subset_no, number_of_training, 'DL_S_new', isTest) + tuple( performance_score(train_y_reduced, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append(( subset_no, number_of_training, 'DL_S_new', isTest) + tuple( performance_score(test_y, test_predicted, with_auc_score).values())) if settings['DL_S_new_contraction']: print 'DL_S_new_contraction' cfg = settings.copy() cfg['contraction_level'] = 0.01 p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append( (subset_no, number_of_training, 'DL_S_new_contraction', isTest) + tuple( performance_score(train_y_reduced, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append( (subset_no, number_of_training, 'DL_S_new_contraction', isTest) + tuple( performance_score(test_y, test_predicted, with_auc_score).values())) if settings['DL_S_new_sparsity'] == 1: print 'DL_S_new_sparsity' cfg = settings.copy() cfg['sparsity'] = 0.01 cfg['sparsity_weight'] = 0.01 p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append( (subset_no, number_of_training, 'DL_S_new_sparsity', isTest) + tuple( performance_score(train_y_reduced, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append( (subset_no, number_of_training, 'DL_S_new_sparsity', isTest) + tuple( performance_score(test_y, test_predicted, with_auc_score).values())) if settings['DL_S_new_weight_decay'] == 2: cfg = settings.copy() cfg['l2_reg'] = 0.01 print 'l2_reg' p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append( (subset_no, number_of_training, 'l2_reg', isTest) + tuple( performance_score(train_y_reduced, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append( (subset_no, number_of_training, 'l2_reg', isTest) + tuple( performance_score(test_y, test_predicted, with_auc_score).values())) if settings['DL_S_new_weight_decay'] == 1: print 'l1_reg' cfg = settings.copy() cfg['l1_reg'] = 0.01 p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append( (subset_no, number_of_training, 'l1_reg', isTest) + tuple( performance_score(train_y_reduced, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append( (subset_no, number_of_training, 'l1_reg', isTest) + tuple( performance_score(test_y, test_predicted, with_auc_score).values())) if settings['DL_S_new_Drop_out'] == 1: cfg = settings.copy() cfg['dropout_factor'] = 0.5 print 'DL_S_new_Drop_out' p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append( (subset_no, number_of_training, 'DL_S_new_Drop_out', isTest) + tuple( performance_score(train_y_reduced, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append( (subset_no, number_of_training, 'DL_S_new_Drop_out', isTest) + tuple( performance_score(test_y, test_predicted, with_auc_score).values())) report_name = 'DL_handwritten_digits' + '_size_'.join(map(str, hidden_layers_sizes)) + \ '_' + str(pretrain_lr) + '_' + str(finetune_lr) + '_' + \ '_' + str(settings['pretraining_interations']) + '_' + current_date saveAsCsv(with_auc_score, report_name, performance_score(test_y, predicted_test_y, with_auc_score), analysis_scr) return sda, a_MAE_original, a_MAE_A, a_MAE_B, analysis_scr
def get_ten_fold_crossvalid_perfermance(self, settings=None): fisher_mode = settings['fisher_mode'] analysis_scr = [] with_auc_score = settings['with_auc_score'] reduce_ratio = settings['reduce_ratio'] #for seq_no in range(1, self.ddi_obj.total_number_of_sequences+1): #subset_size = math.floor(self.ddi_obj.total_number_of_sequences / 10.0) kf = KFold(self.ddi_obj.total_number_of_sequences, n_folds=10, shuffle=True) #for subset_no in range(1, 11): for ((train_index, test_index), subset_no) in izip(kf, range(1, 11)): #for train_index, test_index in kf; print("Subset:", subset_no) print("Train index: ", train_index) print("Test index: ", test_index) #logger.info('subset number: ' + str(subset_no)) (train_X_10fold, train_y_10fold), (train_X_reduced, train_y_reduced), ( test_X, test_y) = self.ddi_obj.get_ten_fold_crossvalid_one_subset( train_index, test_index, fisher_mode=fisher_mode, reduce_ratio=reduce_ratio) standard_scaler = preprocessing.StandardScaler().fit( train_X_reduced) scaled_train_X = standard_scaler.transform(train_X_reduced) scaled_test_X = standard_scaler.transform(test_X) if settings['SVM']: print "SVM" Linear_SVC = LinearSVC(C=1, penalty="l2") Linear_SVC.fit(scaled_train_X, train_y_reduced) predicted_test_y = Linear_SVC.predict(scaled_test_X) isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SVM', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = Linear_SVC.predict(scaled_train_X) isTest = False #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SVM', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) if settings['SVM_RBF']: print "SVM_RBF" L1_SVC_RBF_Selector = SVC(C=1, gamma=0.01, kernel='rbf').fit( scaled_train_X, train_y_reduced) predicted_test_y = L1_SVC_RBF_Selector.predict(scaled_test_X) isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SVM_RBF', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = L1_SVC_RBF_Selector.predict(scaled_train_X) isTest = False #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SVM_RBF', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) if settings['SVM_POLY']: print "SVM_POLY" L1_SVC_POLY_Selector = SVC(C=1, kernel='poly').fit( scaled_train_X, train_y_reduced) predicted_test_y = L1_SVC_POLY_Selector.predict(scaled_test_X) isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SVM_POLY', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = L1_SVC_POLY_Selector.predict( scaled_train_X) isTest = False #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SVM_POLY', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) min_max_scaler = Preprocessing_Scaler_with_mean_point5() X_train_pre_validation_minmax = min_max_scaler.fit(train_X_reduced) X_train_pre_validation_minmax = min_max_scaler.transform( train_X_reduced) x_test_minmax = min_max_scaler.transform(test_X) x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax = train_test_split( X_train_pre_validation_minmax, train_y_reduced, test_size=0.4, random_state=42) finetune_lr = settings['finetune_lr'] batch_size = settings['batch_size'] pretraining_epochs = cal_epochs( settings['pretraining_interations'], x_train_minmax, batch_size=batch_size) #pretrain_lr=0.001 pretrain_lr = settings['pretrain_lr'] training_epochs = cal_epochs(settings['training_interations'], x_train_minmax, batch_size=batch_size) hidden_layers_sizes = settings['hidden_layers_sizes'] corruption_levels = settings['corruption_levels'] settings['epoch_number'] = cal_epochs( settings['pretraining_interations'], x_train_minmax, batch_size=batch_size) # deep xy autoencoders settings['lrate'] = settings['lrate_pre'] + str(training_epochs) settings['n_ins'] = x_train_minmax.shape[1] if settings['DL_xy']: cfg = settings.copy() cfg['weight_y'] = 1 print 'DL_xy' train_x = x_train_minmax train_y = y_train_minmax sdaf = Sda_xy_factory(cfg) sdaf.sda.pretraining(train_x, train_y) dnnf = DNN_factory(cfg) dnnf.dnn.load_pretrain_from_Sda(sdaf.sda) dnnf.dnn.finetuning((x_train_minmax, y_train_minmax), (x_validation_minmax, y_validation_minmax)) training_predicted = dnnf.dnn.predict(x_train_minmax) y_train = y_train_minmax isTest = False #new analysis_scr.append(( self.ddi, subset_no, fisher_mode, 'DL_xy', isTest ) + tuple( performance_score(y_train, training_predicted).values())) test_predicted = dnnf.dnn.predict(x_test_minmax) y_test = test_y isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_xy', isTest) + tuple(performance_score(y_test, test_predicted).values())) if settings['Sda_xy_with_first']: cfg = settings.copy() cfg['weight_y'] = 10 cfg['firstlayer_xy'] = 1 print 'firstlayer_xy' train_x = x_train_minmax train_y = y_train_minmax sdaf = Sda_xy_factory(cfg) sdaf.sda.pretraining(train_x, train_y) dnnf = DNN_factory(cfg) dnnf.dnn.load_pretrain_from_Sda(sdaf.sda) dnnf.dnn.finetuning((x_train_minmax, y_train_minmax), (x_validation_minmax, y_validation_minmax)) training_predicted = dnnf.dnn.predict(x_train_minmax) y_train = y_train_minmax isTest = False #new analysis_scr.append(( self.ddi, subset_no, fisher_mode, 'Sda_xy_with_first', isTest ) + tuple( performance_score(y_train, training_predicted).values())) test_predicted = dnnf.dnn.predict(x_test_minmax) y_test = test_y isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'Sda_xy_with_first', isTest) + tuple(performance_score(y_test, test_predicted).values())) if settings['Sda_new']: print 'Sda_new' cfg = settings.copy() train_x = x_train_minmax train_y = y_train_minmax cfg['n_ins'] = train_x.shape[1] sdaf = Sda_factory(cfg) sda = sdaf.sda.pretraining(train_x=train_x) sdaf.dnn.finetuning((x_train_minmax, y_train_minmax), (x_validation_minmax, y_validation_minmax)) training_predicted = sdaf.dnn.predict(x_train_minmax) y_train = y_train_minmax isTest = False #new analysis_scr.append(( self.ddi, subset_no, fisher_mode, 'Sda_new', isTest ) + tuple( performance_score(y_train, training_predicted).values())) test_predicted = sdaf.dnn.predict(x_test_minmax) y_test = test_y isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'Sda_new', isTest) + tuple(performance_score(y_test, test_predicted).values())) #### new prepresentation x = X_train_pre_validation_minmax a_MAE_A = pretrain_a_Sda_with_estop( x, pretraining_epochs=pretraining_epochs, pretrain_lr=pretrain_lr, batch_size=batch_size, hidden_layers_sizes=hidden_layers_sizes, corruption_levels=corruption_levels) new_x_train_minmax_A = a_MAE_A.transform( X_train_pre_validation_minmax) new_x_test_minmax_A = a_MAE_A.transform(x_test_minmax) standard_scaler = preprocessing.StandardScaler().fit( new_x_train_minmax_A) new_x_train_scaled = standard_scaler.transform( new_x_train_minmax_A) new_x_test_scaled = standard_scaler.transform(new_x_test_minmax_A) new_x_train_combo = np.hstack((scaled_train_X, new_x_train_scaled)) new_x_test_combo = np.hstack((scaled_test_X, new_x_test_scaled)) if settings['SAE_SVM']: print 'SAE followed by SVM' Linear_SVC = LinearSVC(C=1, penalty="l2") Linear_SVC.fit(new_x_train_scaled, train_y_reduced) predicted_test_y = Linear_SVC.predict(new_x_test_scaled) isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SAE_SVM', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = Linear_SVC.predict(new_x_train_scaled) isTest = False #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SAE_SVM', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) if settings['SAE_SVM_RBF']: print 'SAE followed by SVM RBF' x = X_train_pre_validation_minmax L1_SVC_RBF_Selector = SVC(C=1, gamma=0.01, kernel='rbf').fit( new_x_train_scaled, train_y_reduced) predicted_test_y = L1_SVC_RBF_Selector.predict( new_x_test_scaled) isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SAE_SVM_RBF', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = L1_SVC_RBF_Selector.predict( new_x_train_scaled) isTest = False #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SAE_SVM_RBF', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) if settings['SAE_SVM_COMBO']: print 'SAE followed by SVM with combo feature' Linear_SVC = LinearSVC(C=1, penalty="l2") Linear_SVC.fit(new_x_train_combo, train_y_reduced) predicted_test_y = Linear_SVC.predict(new_x_test_combo) isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SAE_SVM_COMBO', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = Linear_SVC.predict(new_x_train_combo) isTest = False #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SAE_SVM_COMBO', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) if settings['SAE_SVM_RBF_COMBO']: print 'SAE followed by SVM RBF with combo feature' L1_SVC_RBF_Selector = SVC(C=1, gamma=0.01, kernel='rbf').fit( new_x_train_combo, train_y_reduced) predicted_test_y = L1_SVC_RBF_Selector.predict( new_x_test_combo) isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SAE_SVM_RBF_COMBO', isTest) + tuple( performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = L1_SVC_RBF_Selector.predict( new_x_train_combo) isTest = False #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'SAE_SVM_RBF_COMBO', isTest) + tuple( performance_score(train_y_reduced, predicted_train_y).values())) if settings['DL']: print "direct deep learning" sda = train_a_Sda(x_train_minmax, pretrain_lr, finetune_lr, y_train_minmax, x_validation_minmax, y_validation_minmax , x_test_minmax, test_y, hidden_layers_sizes = hidden_layers_sizes, corruption_levels = corruption_levels, batch_size = batch_size , \ training_epochs = training_epochs, pretraining_epochs = pretraining_epochs, n_outs = settings['n_outs'] ) print 'hidden_layers_sizes:', hidden_layers_sizes print 'corruption_levels:', corruption_levels training_predicted = sda.predict(x_train_minmax) y_train = y_train_minmax isTest = False #new analysis_scr.append(( self.ddi, subset_no, fisher_mode, 'DL', isTest ) + tuple( performance_score(y_train, training_predicted).values())) test_predicted = sda.predict(x_test_minmax) y_test = test_y isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL', isTest) + tuple(performance_score(y_test, test_predicted).values())) if settings['DL_old']: print "direct deep learning old without early stop" sda = trainSda(x_train_minmax, y_train, x_validation_minmax, y_validation_minmax, x_test_minmax, y_test,pretrain_lr, finetune_lr, pretraining_X_minmax=None, hidden_layers_sizes = hidden_layers_sizes, corruption_levels = corruption_levels, batch_size = batch_size , \ training_epochs = training_epochs, pretraining_epochs = pretraining_epochs, n_outs = settings['n_outs'] ) print 'hidden_layers_sizes:', hidden_layers_sizes print 'corruption_levels:', corruption_levels training_predicted = sda.predict(x_train_minmax) y_train = y_train_minmax isTest = False #new analysis_scr.append(( self.ddi, subset_no, fisher_mode, 'DL_old', isTest ) + tuple( performance_score(y_train, training_predicted).values())) test_predicted = sda.predict(x_test_minmax) y_test = test_y isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_old', isTest) + tuple(performance_score(y_test, test_predicted).values())) if settings['DL_U']: # deep learning using unlabeled data for pretraining print 'deep learning with unlabel data' pretraining_X_minmax = min_max_scaler.transform(train_X_10fold) pretraining_epochs = cal_epochs( settings['pretraining_interations'], x_train_minmax, batch_size=batch_size) sda_unlabel = trainSda(x_train_minmax, y_train_minmax, x_validation_minmax, y_validation_minmax , x_test_minmax, test_y, pretraining_X_minmax = pretraining_X_minmax, hidden_layers_sizes = hidden_layers_sizes, corruption_levels = corruption_levels, batch_size = batch_size , \ training_epochs = training_epochs, pretraining_epochs = pretraining_epochs, pretrain_lr = pretrain_lr, finetune_lr=finetune_lr, n_outs = settings['n_outs'] ) print 'hidden_layers_sizes:', hidden_layers_sizes print 'corruption_levels:', corruption_levels training_predicted = sda_unlabel.predict(x_train_minmax) y_train = y_train_minmax isTest = False #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_U', isTest) + tuple( performance_score(y_train, training_predicted, with_auc_score).values())) test_predicted = sda_unlabel.predict(x_test_minmax) y_test = test_y isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_U', isTest) + tuple( performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S']: # deep learning using split network y_test = test_y print 'deep learning using split network' # get the new representation for A set. first 784-D pretraining_epochs = cal_epochs( settings['pretraining_interations'], x_train_minmax, batch_size=batch_size) x = x_train_minmax[:, :x_train_minmax.shape[1] / 2] print "original shape for A", x.shape a_MAE_A = pretrain_a_Sda_with_estop( x, pretraining_epochs=pretraining_epochs, pretrain_lr=pretrain_lr, batch_size=batch_size, hidden_layers_sizes=hidden_layers_sizes, corruption_levels=corruption_levels) new_x_train_minmax_A = a_MAE_A.transform( x_train_minmax[:, :x_train_minmax.shape[1] / 2]) x = x_train_minmax[:, x_train_minmax.shape[1] / 2:] print "original shape for B", x.shape a_MAE_B = pretrain_a_Sda_with_estop( x, pretraining_epochs=pretraining_epochs, pretrain_lr=pretrain_lr, batch_size=batch_size, hidden_layers_sizes=hidden_layers_sizes, corruption_levels=corruption_levels) new_x_train_minmax_B = a_MAE_B.transform( x_train_minmax[:, x_train_minmax.shape[1] / 2:]) new_x_test_minmax_A = a_MAE_A.transform( x_test_minmax[:, :x_test_minmax.shape[1] / 2]) new_x_test_minmax_B = a_MAE_B.transform( x_test_minmax[:, x_test_minmax.shape[1] / 2:]) new_x_validation_minmax_A = a_MAE_A.transform( x_validation_minmax[:, :x_validation_minmax.shape[1] / 2]) new_x_validation_minmax_B = a_MAE_B.transform( x_validation_minmax[:, x_validation_minmax.shape[1] / 2:]) new_x_train_minmax_whole = np.hstack( (new_x_train_minmax_A, new_x_train_minmax_B)) new_x_test_minmax_whole = np.hstack( (new_x_test_minmax_A, new_x_test_minmax_B)) new_x_validationt_minmax_whole = np.hstack( (new_x_validation_minmax_A, new_x_validation_minmax_B)) sda_transformed = train_a_Sda(new_x_train_minmax_whole, pretrain_lr, finetune_lr, y_train_minmax, new_x_validationt_minmax_whole, y_validation_minmax , new_x_test_minmax_whole, y_test, hidden_layers_sizes = hidden_layers_sizes, corruption_levels = corruption_levels, batch_size = batch_size , \ training_epochs = training_epochs, pretraining_epochs = pretraining_epochs, n_outs = settings['n_outs'] ) print 'hidden_layers_sizes:', hidden_layers_sizes print 'corruption_levels:', corruption_levels training_predicted = sda_transformed.predict( new_x_train_minmax_whole) y_train = y_train_minmax isTest = False #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_S', isTest) + tuple( performance_score(y_train, training_predicted, with_auc_score).values())) test_predicted = sda_transformed.predict( new_x_test_minmax_whole) y_test = test_y isTest = True #new analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_S', isTest) + tuple( performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S_new']: # deep learning using split network print 'new deep learning using split network' cfg = settings.copy() p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_S_new', isTest) + tuple( performance_score(y_train, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_S_new', isTest) + tuple( performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S_new_contraction']: print 'DL_S_new_contraction' cfg = settings.copy() cfg['contraction_level'] = 0.0001 p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_S_new_contraction', isTest) + tuple( performance_score(y_train, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_S_new_contraction', isTest) + tuple( performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S_new_sparsity'] == 1: print 'DL_S_new_sparsity' cfg = settings.copy() cfg['sparsity'] = 0.1 cfg['sparsity_weight'] = 0.0001 p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_S_new_sparsity', isTest) + tuple( performance_score(y_train, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_S_new_sparsity', isTest) + tuple( performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S_new_weight_decay'] == 2: cfg = settings.copy() cfg['l2_reg'] = 0.0001 print 'l2_reg' p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'l2_reg', isTest) + tuple( performance_score(y_train, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'l2_reg', isTest) + tuple( performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S_new_weight_decay'] == 1: print 'l1_reg' cfg = settings.copy() cfg['l1_reg'] = 0.1 p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'l1_reg', isTest) + tuple( performance_score(y_train, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'l1_reg', isTest) + tuple( performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S_new_Drop_out'] == 1: cfg = settings.copy() cfg['dropout_factor'] = 0.5 print 'DL_S_new_Drop_out' p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_S_new_Drop_out', isTest) + tuple( performance_score(y_train, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append( (self.ddi, subset_no, fisher_mode, 'DL_S_new_Drop_out', isTest) + tuple( performance_score(y_test, test_predicted, with_auc_score).values())) report_name = filename + '_' + '_newDL_'.join( map(str, hidden_layers_sizes)) + '_' + str( pretrain_lr) + '_' + str(finetune_lr) + '_' + str( settings['training_interations']) + '_' + current_date saveAsCsv(with_auc_score, report_name, performance_score(test_y, predicted_test_y, with_auc_score), analysis_scr)
def run_models(settings = None): analysis_scr = [] with_auc_score = settings['with_auc_score'] f = gzip.open('mnist.pkl.gz', 'rb') train_set, valid_set, test_set = cPickle.load(f) X_train,y_train = train_set X_valid,y_valid = valid_set X_test,y_test = test_set n_outs = settings['n_outs'] for subset_no in xrange(1,settings['number_iterations']+1): print("Subset:", subset_no) #(train_X_10fold, train_y_10fold),(train_X_reduced, train_y_reduced), (test_X, test_y) = (X_train[:1000],y_train[:1000]),(X_train[:1000],y_train[:1000]), (X_test[:1000],y_test[:1000]) (train_X_10fold, train_y_10fold),(train_X_reduced, train_y_reduced), (test_X, test_y) = (X_train,y_train),(X_train,y_train), (X_test,y_test) standard_scaler = preprocessing.StandardScaler().fit(train_X_reduced) scaled_train_X = standard_scaler.transform(train_X_reduced) scaled_test_X = standard_scaler.transform(test_X) fisher_mode = settings['fisher_mode'] if settings['SVM']: print "SVM" Linear_SVC = LinearSVC(C=1, penalty="l2") Linear_SVC.fit(scaled_train_X, train_y_reduced) predicted_test_y = Linear_SVC.predict(scaled_test_X) isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'SVM', isTest) + tuple(performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = Linear_SVC.predict(scaled_train_X) isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'SVM', isTest) + tuple(performance_score(train_y_reduced, predicted_train_y).values())) if settings['SVM_RBF']: print "SVM_RBF" L1_SVC_RBF_Selector = SVC(C=1, gamma=0.01, kernel='rbf').fit(scaled_train_X, train_y_reduced) predicted_test_y = L1_SVC_RBF_Selector.predict(scaled_test_X) isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'SVM_RBF', isTest) + tuple(performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = L1_SVC_RBF_Selector.predict(scaled_train_X) isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'SVM_RBF', isTest) + tuple(performance_score(train_y_reduced, predicted_train_y).values())) if settings['SVM_POLY']: print "SVM_POLY" L1_SVC_POLY_Selector = SVC(C=1, kernel='poly').fit(scaled_train_X, train_y_reduced) predicted_test_y = L1_SVC_POLY_Selector.predict(scaled_test_X) isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'SVM_POLY', isTest) + tuple(performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = L1_SVC_POLY_Selector.predict(scaled_train_X) isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'SVM_POLY', isTest) + tuple(performance_score(train_y_reduced, predicted_train_y).values())) min_max_scaler = Preprocessing_Scaler_with_mean_point5() X_train_pre_validation_minmax = min_max_scaler.fit(train_X_reduced) X_train_pre_validation_minmax = min_max_scaler.transform(train_X_reduced) x_test_minmax = min_max_scaler.transform(test_X) x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax = train_test_split(X_train_pre_validation_minmax, train_y_reduced , test_size=0.4, random_state=42) finetune_lr = settings['finetune_lr'] batch_size = settings['batch_size'] pretraining_epochs = cal_epochs(settings['pretraining_interations'], x_train_minmax, batch_size = batch_size) #pretrain_lr=0.001 pretrain_lr = settings['pretrain_lr'] training_epochs = cal_epochs(settings['training_interations'], x_train_minmax, batch_size = batch_size) settings['lrate'] = settings['lrate_pre'] + str(training_epochs) hidden_layers_sizes= settings['hidden_layers_sizes'] corruption_levels = settings['corruption_levels'] settings['epoch_number'] = cal_epochs(settings['pretraining_interations'], x_train_minmax, batch_size = batch_size) # deep xy autoencoders settings['n_ins'] = x_train_minmax.shape[1] if settings['DL_xy']: cfg = settings.copy() cfg['weight_y'] = 0.01 print 'DL_xy' train_x = x_train_minmax; train_y = y_train_minmax sdaf = Sda_xy_factory(cfg) sdaf.sda.pretraining(train_x, train_y) dnnf = DNN_factory(cfg) dnnf.dnn.load_pretrain_from_Sda(sdaf.sda) dnnf.dnn.finetuning((x_train_minmax, y_train_minmax),(x_validation_minmax, y_validation_minmax)) training_predicted = dnnf.dnn.predict(x_train_minmax) y_train = y_train_minmax isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'DL_xy', isTest) + tuple(performance_score(y_train, training_predicted).values())) test_predicted = dnnf.dnn.predict(x_test_minmax) y_test = test_y isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'DL_xy', isTest) + tuple(performance_score(y_test, test_predicted).values())) if settings['Sda_xy_with_first']: cfg = settings.copy() cfg['weight_y'] = 1 cfg['firstlayer_xy'] = 1 print 'firstlayer_xy' train_x = x_train_minmax; train_y = y_train_minmax sdaf = Sda_xy_factory(cfg) sdaf.sda.pretraining(train_x, train_y) dnnf = DNN_factory(cfg) dnnf.dnn.load_pretrain_from_Sda(sdaf.sda) dnnf.dnn.finetuning((x_train_minmax, y_train_minmax),(x_validation_minmax, y_validation_minmax)) training_predicted = dnnf.dnn.predict(x_train_minmax) y_train = y_train_minmax isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'Sda_xy_with_first', isTest) + tuple(performance_score(y_train, training_predicted).values())) test_predicted = dnnf.dnn.predict(x_test_minmax) y_test = test_y isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'Sda_xy_with_first', isTest) + tuple(performance_score(y_test, test_predicted).values())) if settings['Sda_new']: print 'Sda_new' cfg = settings.copy() train_x = x_train_minmax; train_y = y_train_minmax cfg['n_ins'] = train_x.shape[1] sdaf = Sda_factory(cfg) sda = sdaf.sda.pretraining(train_x = train_x) sdaf.dnn.finetuning((x_train_minmax, y_train_minmax),(x_validation_minmax, y_validation_minmax)) training_predicted = sdaf.dnn.predict(x_train_minmax) y_train = y_train_minmax isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'Sda_new', isTest) + tuple(performance_score(y_train, training_predicted).values())) test_predicted = sdaf.dnn.predict(x_test_minmax) y_test = test_y isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'Sda_new', isTest) + tuple(performance_score(y_test, test_predicted).values())) #### new prepresentation x = X_train_pre_validation_minmax a_MAE_A = pretrain_a_Sda_with_estop(x, pretraining_epochs=pretraining_epochs, pretrain_lr=pretrain_lr, batch_size=batch_size, hidden_layers_sizes =hidden_layers_sizes, corruption_levels=corruption_levels, n_outs = n_outs) new_x_train_minmax_A = a_MAE_A.transform(X_train_pre_validation_minmax) new_x_test_minmax_A = a_MAE_A.transform(x_test_minmax) standard_scaler = preprocessing.StandardScaler().fit(new_x_train_minmax_A) new_x_train_scaled = standard_scaler.transform(new_x_train_minmax_A) new_x_test_scaled = standard_scaler.transform(new_x_test_minmax_A) new_x_train_combo = np.hstack((scaled_train_X, new_x_train_scaled)) new_x_test_combo = np.hstack((scaled_test_X, new_x_test_scaled)) if settings['SAE_SVM']: print 'SAE followed by SVM' Linear_SVC = LinearSVC(C=1, penalty="l2") Linear_SVC.fit(new_x_train_scaled, train_y_reduced) predicted_test_y = Linear_SVC.predict(new_x_test_scaled) isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'SAE_SVM', isTest) + tuple(performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = Linear_SVC.predict(new_x_train_scaled) isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'SAE_SVM', isTest) + tuple(performance_score(train_y_reduced, predicted_train_y).values())) if settings['SAE_SVM_RBF']: print 'SAE followed by SVM RBF' x = X_train_pre_validation_minmax L1_SVC_RBF_Selector = SVC(C=1, gamma=0.01, kernel='rbf').fit(new_x_train_scaled, train_y_reduced) predicted_test_y = L1_SVC_RBF_Selector.predict(new_x_test_scaled) isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'SAE_SVM_RBF', isTest) + tuple(performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = L1_SVC_RBF_Selector.predict(new_x_train_scaled) isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'SAE_SVM_RBF', isTest) + tuple(performance_score(train_y_reduced, predicted_train_y).values())) if settings['SAE_SVM_COMBO']: print 'SAE followed by SVM with combo feature' Linear_SVC = LinearSVC(C=1, penalty="l2") Linear_SVC.fit(new_x_train_combo, train_y_reduced) predicted_test_y = Linear_SVC.predict(new_x_test_combo) isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'SAE_SVM_COMBO', isTest) + tuple(performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = Linear_SVC.predict(new_x_train_combo) isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'SAE_SVM_COMBO', isTest) + tuple(performance_score(train_y_reduced, predicted_train_y).values())) if settings['SAE_SVM_RBF_COMBO']: print 'SAE followed by SVM RBF with combo feature' L1_SVC_RBF_Selector = SVC(C=1, gamma=0.01, kernel='rbf').fit(new_x_train_combo, train_y_reduced) predicted_test_y = L1_SVC_RBF_Selector.predict(new_x_test_combo) isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'SAE_SVM_RBF_COMBO', isTest) + tuple(performance_score(test_y, predicted_test_y).values())) #new predicted_train_y = L1_SVC_RBF_Selector.predict(new_x_train_combo) isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'SAE_SVM_RBF_COMBO', isTest) + tuple(performance_score(train_y_reduced, predicted_train_y).values())) if settings['DL']: print "direct deep learning" sda = train_a_Sda(x_train_minmax, pretrain_lr, finetune_lr, y_train_minmax, x_validation_minmax, y_validation_minmax , x_test_minmax, test_y, hidden_layers_sizes = hidden_layers_sizes, corruption_levels = corruption_levels, batch_size = batch_size , \ training_epochs = training_epochs, pretraining_epochs = pretraining_epochs, n_outs =n_outs ) print 'hidden_layers_sizes:', hidden_layers_sizes print 'corruption_levels:', corruption_levels training_predicted = sda.predict(x_train_minmax) y_train = y_train_minmax isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'DL', isTest) + tuple(performance_score(y_train, training_predicted).values())) test_predicted = sda.predict(x_test_minmax) y_test = test_y isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'DL', isTest) + tuple(performance_score(y_test, test_predicted).values())) if settings['DL_U']: # deep learning using unlabeled data for pretraining print 'deep learning with unlabel data' pretraining_X_minmax = min_max_scaler.transform(train_X_10fold) pretraining_epochs = cal_epochs(settings['pretraining_interations'], x_train_minmax, batch_size = batch_size) sda_unlabel = trainSda(x_train_minmax, y_train_minmax, x_validation_minmax, y_validation_minmax , x_test_minmax, test_y, pretraining_X_minmax = pretraining_X_minmax, hidden_layers_sizes = hidden_layers_sizes, corruption_levels = corruption_levels, batch_size = batch_size , \ training_epochs = training_epochs, pretraining_epochs = pretraining_epochs, pretrain_lr = pretrain_lr, finetune_lr=finetune_lr, n_outs =n_outs ) print 'hidden_layers_sizes:', hidden_layers_sizes print 'corruption_levels:', corruption_levels training_predicted = sda_unlabel.predict(x_train_minmax) y_train = y_train_minmax isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'DL_U', isTest) + tuple(performance_score(y_train, training_predicted, with_auc_score).values())) test_predicted = sda_unlabel.predict(x_test_minmax) y_test = test_y isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'DL_U', isTest) + tuple(performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S']: # deep learning using split network y_test = test_y print 'deep learning using split network' # get the new representation for A set. first 784-D pretraining_epochs = cal_epochs(settings['pretraining_interations'], x_train_minmax, batch_size = batch_size) x = x_train_minmax[:, :x_train_minmax.shape[1]/2] print "original shape for A", x.shape a_MAE_A = pretrain_a_Sda_with_estop(x, pretraining_epochs=pretraining_epochs, pretrain_lr=pretrain_lr, batch_size=batch_size, hidden_layers_sizes =hidden_layers_sizes, corruption_levels=corruption_levels, n_outs = n_outs) new_x_train_minmax_A = a_MAE_A.transform(x_train_minmax[:, :x_train_minmax.shape[1]/2]) x = x_train_minmax[:, x_train_minmax.shape[1]/2:] print "original shape for B", x.shape a_MAE_B = pretrain_a_Sda_with_estop(x, pretraining_epochs=pretraining_epochs, pretrain_lr=pretrain_lr, batch_size=batch_size, hidden_layers_sizes =hidden_layers_sizes, corruption_levels=corruption_levels, n_outs = n_outs) new_x_train_minmax_B = a_MAE_B.transform(x_train_minmax[:, x_train_minmax.shape[1]/2:]) new_x_test_minmax_A = a_MAE_A.transform(x_test_minmax[:, :x_test_minmax.shape[1]/2]) new_x_test_minmax_B = a_MAE_B.transform(x_test_minmax[:, x_test_minmax.shape[1]/2:]) new_x_validation_minmax_A = a_MAE_A.transform(x_validation_minmax[:, :x_validation_minmax.shape[1]/2]) new_x_validation_minmax_B = a_MAE_B.transform(x_validation_minmax[:, x_validation_minmax.shape[1]/2:]) new_x_train_minmax_whole = np.hstack((new_x_train_minmax_A, new_x_train_minmax_B)) new_x_test_minmax_whole = np.hstack((new_x_test_minmax_A, new_x_test_minmax_B)) new_x_validationt_minmax_whole = np.hstack((new_x_validation_minmax_A, new_x_validation_minmax_B)) sda_transformed = train_a_Sda(new_x_train_minmax_whole, pretrain_lr, finetune_lr, y_train_minmax, new_x_validationt_minmax_whole, y_validation_minmax , new_x_test_minmax_whole, y_test, hidden_layers_sizes = hidden_layers_sizes, corruption_levels = corruption_levels, batch_size = batch_size , \ training_epochs = training_epochs, pretraining_epochs = pretraining_epochs, n_outs = n_outs ) print 'hidden_layers_sizes:', hidden_layers_sizes print 'corruption_levels:', corruption_levels training_predicted = sda_transformed.predict(new_x_train_minmax_whole) y_train = y_train_minmax isTest = False; #new analysis_scr.append(( subset_no, fisher_mode, 'DL_S', isTest) + tuple(performance_score(y_train, training_predicted, with_auc_score).values())) test_predicted = sda_transformed.predict(new_x_test_minmax_whole) y_test = test_y isTest = True; #new analysis_scr.append(( subset_no, fisher_mode, 'DL_S', isTest) + tuple(performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S_new']: # deep learning using split network print 'new deep learning using split network' cfg = settings.copy() p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append(( subset_no, fisher_mode, 'DL_S_new', isTest) + tuple(performance_score(y_train, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append(( subset_no, fisher_mode, 'DL_S_new', isTest) + tuple(performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S_new_contraction']: print 'DL_S_new_contraction' cfg = settings.copy() cfg['contraction_level'] = 0.1 p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append(( subset_no, fisher_mode, 'DL_S_new_contraction', isTest) + tuple(performance_score(y_train, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append(( subset_no, fisher_mode, 'DL_S_new_contraction', isTest) + tuple(performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S_new_sparsity'] == 1: print 'DL_S_new_sparsity' cfg = settings.copy() cfg['sparsity'] = 0.01 cfg['sparsity_weight'] = 0.01 p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append(( subset_no, fisher_mode, 'DL_S_new_sparsity', isTest) + tuple(performance_score(y_train, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append(( subset_no, fisher_mode, 'DL_S_new_sparsity', isTest) + tuple(performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S_new_weight_decay'] == 2: cfg = settings.copy() cfg['l2_reg'] =0.01 print 'l2_reg' p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append(( subset_no, fisher_mode, 'l2_reg', isTest) + tuple(performance_score(y_train, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append(( subset_no, fisher_mode, 'l2_reg', isTest) + tuple(performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S_new_weight_decay'] == 1: print 'l1_reg' cfg = settings.copy() cfg['l1_reg'] =0.01 p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append(( subset_no, fisher_mode, 'l1_reg', isTest) + tuple(performance_score(y_train, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append(( subset_no, fisher_mode, 'l1_reg', isTest) + tuple(performance_score(y_test, test_predicted, with_auc_score).values())) if settings['DL_S_new_Drop_out'] == 1: cfg = settings.copy() cfg['dropout_factor'] = 0.5 print 'DL_S_new_Drop_out' p_sda = Parellel_Sda_factory(cfg) p_sda.supervised_training(x_train_minmax, x_validation_minmax, y_train_minmax, y_validation_minmax) isTest = False #new training_predicted = p_sda.predict(x_train_minmax) y_train = y_train_minmax analysis_scr.append(( subset_no, fisher_mode, 'DL_S_new_Drop_out', isTest) + tuple(performance_score(y_train, training_predicted, with_auc_score).values())) isTest = True #new y_test = test_y test_predicted = p_sda.predict(x_test_minmax) analysis_scr.append(( subset_no, fisher_mode, 'DL_S_new_Drop_out', isTest) + tuple(performance_score(y_test, test_predicted, with_auc_score).values())) report_name = 'Hand_classification_' + '_'.join(map(str, hidden_layers_sizes)) + '_' + str(pretrain_lr) + '_' + str(finetune_lr) + '_' + str(settings['training_interations']) + '_' + current_date saveAsCsv(with_auc_score, report_name, performance_score(test_y, predicted_test_y, with_auc_score), analysis_scr)