# y_predicted_report_rfecv_cv=[] # y_predicted_report_rfecv_cv,y_test_report_rfecv_cv=cl.do_cross_validation(classifier,cv,all_feature_matrix,y,index_num) # # ####### Compute confusion matrix and classsfication report ####### # cl.print_confusion_matrix(y_test_report_rfecv_cv, y_predicted_report_rfecv_cv,"rfecv_cvloop") # cl.print_classification_report(y_test_report_rfecv_cv, y_predicted_report_rfecv_cv,['class 0', 'class 1']) print("############################################") folds=5 cv = StratifiedKFold(y_eval, n_folds=folds) classifier = svm.SVC(kernel='linear', probability=True) ###################### Feature selection using RFECV only ################################################# only_feature_selection,index_arr_onlyfs=cl.select_optimal_features(X_cv_normalized_matrix,y_cv,classifier) print("number of features selected only with rfecv: " +str(len(index_arr_onlyfs))) index_num_fs_only,index_freq_fs_only=cl.sort_and_combine_feature_indices(index_arr_onlyfs) print("index numbers are: " + str(index_num_fs_only)) rw.write_features_to_file(index_num_fs_only,output_folder,"rfecv_selected13_features.txt") #print("index freq are: " + str(index_freq_fs_only)) ####### print features selected by rfecv alone ######################################### rfecv_only_feature_arr=[] for val in index_num_fs_only: #print val #print (inv_global_vocab[val]) rfecv_only_feature_arr.append(inv_global_vocab[val])
normalized_matrix_train = cl.normalise_mean_var(all_feature_matrix[train]) normalised_matrix_test = cl.normalise_mean_var(all_feature_matrix[test]) y_predicted2 = [] ##### #for clf,name_clf in zip(classifiers #create a pipeline # wrapper_filter= somefilter # pipe_line = Pipeline([('wrapper', wrapper_selection), (name_clf, clf)]) # pipe_line.fit(normalized_matrix_train,y[train]) #pipe_line.score() ##### #select features using rfecv only on train data only_feature_selection_matrix, index_arr_onlyfs = cl.select_optimal_features( normalized_matrix_train, y[train], classifier) #index_num,index_freq=cl.sort_and_combine_feature_indices(index_arr_onlyfs) for val in index_arr_onlyfs: #print ("val is: " +str(val)) print(inv_global_vocab[val]) #index_num_fs_only,index_freq_fs_only=cl.sort_and_combine_feature_indices(index_arr_onlyfs) matrix_for_train = cl.make_new_matrix(index_arr_onlyfs, normalized_matrix_train) #classifier.fit(matrix_for_train, y[train]) matrix_for_test = cl.make_new_matrix(index_arr_onlyfs, normalised_matrix_test) probas_ = classifier.fit(matrix_for_train,
# y_predicted_report_rfecv_cv=[] # y_predicted_report_rfecv_cv,y_test_report_rfecv_cv=cl.do_cross_validation(classifier,cv,all_feature_matrix,y,index_num) # # ####### Compute confusion matrix and classsfication report ####### # cl.print_confusion_matrix(y_test_report_rfecv_cv, y_predicted_report_rfecv_cv,"rfecv_cvloop") # cl.print_classification_report(y_test_report_rfecv_cv, y_predicted_report_rfecv_cv,['class 0', 'class 1']) print("############################################") folds = 5 cv = StratifiedKFold(y_eval, n_folds=folds) classifier = svm.SVC(kernel='linear', probability=True) ###################### Feature selection using RFECV only ################################################# only_feature_selection, index_arr_onlyfs = cl.select_optimal_features( X_cv_normalized_matrix, y_cv, classifier) print("number of features selected only with rfecv: " + str(len(index_arr_onlyfs))) index_num_fs_only, index_freq_fs_only = cl.sort_and_combine_feature_indices( index_arr_onlyfs) print("index numbers are: " + str(index_num_fs_only)) rw.write_features_to_file(index_num_fs_only, output_folder, "rfecv_selected13_features.txt") #print("index freq are: " + str(index_freq_fs_only)) ####### print features selected by rfecv alone ######################################### rfecv_only_feature_arr = [] for val in index_num_fs_only: #print val #print (inv_global_vocab[val])
normalised_matrix_test=cl.normalise_mean_var(all_feature_matrix[test]) y_predicted2=[] ##### #for clf,name_clf in zip(classifiers #create a pipeline # wrapper_filter= somefilter # pipe_line = Pipeline([('wrapper', wrapper_selection), (name_clf, clf)]) # pipe_line.fit(normalized_matrix_train,y[train]) #pipe_line.score() ##### #select features using rfecv only on train data only_feature_selection_matrix,index_arr_onlyfs=cl.select_optimal_features(normalized_matrix_train,y[train],classifier) #index_num,index_freq=cl.sort_and_combine_feature_indices(index_arr_onlyfs) for val in index_arr_onlyfs: #print ("val is: " +str(val)) print (inv_global_vocab[val]) #index_num_fs_only,index_freq_fs_only=cl.sort_and_combine_feature_indices(index_arr_onlyfs) matrix_for_train=cl.make_new_matrix(index_arr_onlyfs,normalized_matrix_train) #classifier.fit(matrix_for_train, y[train]) matrix_for_test=cl.make_new_matrix(index_arr_onlyfs,normalised_matrix_test)