def select_k_best_features_using_univariate_selection(class_name, k_best_features, training_set): class_names = map(lambda x: x["classes"][class_name], training_set) numerical_characteristics_training_set = map( lambda x: x, map(lambda x: select_numerical_characteristics(x), training_set)) X_new = SelectKBest(f_classif, k=k_best_features).fit_transform( numerical_characteristics_training_set, class_names) best_features_indexes = map( lambda x: numerical_characteristics_training_set[0].index(x), X_new[0]) return map(lambda i: numerical_characteristics[i], best_features_indexes)
def adjust_optimal_features_using_recursive_feature_elimination(class_name, training_set): class_names = map(lambda x: x["classes"][class_name], training_set) numerical_characteristics_training_set = map(lambda x: x, map(lambda x: select_numerical_characteristics(x), training_set)) # Create the RFE object and compute a cross-validated score. svc = SVC(kernel="linear") # The "accuracy" scoring is proportional to the number of correct classifications rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(class_names, 2), scoring='accuracy') # ToDo: check if class has more than one representant always rfecv.fit(numerical_characteristics_training_set, class_names) optimal_features_indexes = [i for i, x in enumerate(rfecv.ranking_) if x == 1] print("Optimal number of features : %d" % rfecv.n_features_) return map(lambda i: numerical_characteristics[i], optimal_features_indexes) # X_new.shape
def adjust_optimal_features_using_recursive_feature_elimination( class_name, training_set): class_names = map(lambda x: x["classes"][class_name], training_set) numerical_characteristics_training_set = map( lambda x: x, map(lambda x: select_numerical_characteristics(x), training_set)) # Create the RFE object and compute a cross-validated score. svc = SVC(kernel="linear") # The "accuracy" scoring is proportional to the number of correct classifications rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(class_names, 2), scoring='accuracy') # ToDo: check if class has more than one representant always rfecv.fit(numerical_characteristics_training_set, class_names) optimal_features_indexes = [ i for i, x in enumerate(rfecv.ranking_) if x == 1 ] print("Optimal number of features : %d" % rfecv.n_features_) return map(lambda i: numerical_characteristics[i], optimal_features_indexes) # X_new.shape
def select_k_best_features_using_univariate_selection(class_name, k_best_features, training_set): class_names = map(lambda x: x["classes"][class_name], training_set) numerical_characteristics_training_set = map(lambda x: x, map(lambda x: select_numerical_characteristics(x), training_set)) X_new = SelectKBest(f_classif, k=k_best_features).fit_transform(numerical_characteristics_training_set, class_names) best_features_indexes = map(lambda x: numerical_characteristics_training_set[0].index(x), X_new[0]) return map(lambda i: numerical_characteristics[i], best_features_indexes)