コード例 #1
0
def select_k_best_features_using_univariate_selection(class_name,
                                                      k_best_features,
                                                      training_set):
    class_names = map(lambda x: x["classes"][class_name], training_set)
    numerical_characteristics_training_set = map(
        lambda x: x,
        map(lambda x: select_numerical_characteristics(x), training_set))
    X_new = SelectKBest(f_classif, k=k_best_features).fit_transform(
        numerical_characteristics_training_set, class_names)
    best_features_indexes = map(
        lambda x: numerical_characteristics_training_set[0].index(x), X_new[0])
    return map(lambda i: numerical_characteristics[i], best_features_indexes)
コード例 #2
0
def adjust_optimal_features_using_recursive_feature_elimination(class_name, training_set):
  class_names = map(lambda x: x["classes"][class_name], training_set)
  numerical_characteristics_training_set = map(lambda x: x, map(lambda x: select_numerical_characteristics(x), training_set))
  # Create the RFE object and compute a cross-validated score.
  svc = SVC(kernel="linear")
  # The "accuracy" scoring is proportional to the number of correct classifications
  rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(class_names, 2), scoring='accuracy')
  # ToDo: check if class has more than one representant always
  rfecv.fit(numerical_characteristics_training_set, class_names)
  optimal_features_indexes = [i for i, x in enumerate(rfecv.ranking_) if x == 1]
  print("Optimal number of features : %d" % rfecv.n_features_)
  return map(lambda i: numerical_characteristics[i], optimal_features_indexes)
    # X_new.shape
コード例 #3
0
def adjust_optimal_features_using_recursive_feature_elimination(
        class_name, training_set):
    class_names = map(lambda x: x["classes"][class_name], training_set)
    numerical_characteristics_training_set = map(
        lambda x: x,
        map(lambda x: select_numerical_characteristics(x), training_set))
    # Create the RFE object and compute a cross-validated score.
    svc = SVC(kernel="linear")
    # The "accuracy" scoring is proportional to the number of correct classifications
    rfecv = RFECV(estimator=svc,
                  step=1,
                  cv=StratifiedKFold(class_names, 2),
                  scoring='accuracy')
    # ToDo: check if class has more than one representant always
    rfecv.fit(numerical_characteristics_training_set, class_names)
    optimal_features_indexes = [
        i for i, x in enumerate(rfecv.ranking_) if x == 1
    ]
    print("Optimal number of features : %d" % rfecv.n_features_)
    return map(lambda i: numerical_characteristics[i],
               optimal_features_indexes)
    # X_new.shape
コード例 #4
0
def select_k_best_features_using_univariate_selection(class_name, k_best_features, training_set):
  class_names = map(lambda x: x["classes"][class_name], training_set)
  numerical_characteristics_training_set = map(lambda x: x, map(lambda x: select_numerical_characteristics(x), training_set))
  X_new = SelectKBest(f_classif, k=k_best_features).fit_transform(numerical_characteristics_training_set, class_names)
  best_features_indexes = map(lambda x: numerical_characteristics_training_set[0].index(x), X_new[0])
  return map(lambda i: numerical_characteristics[i], best_features_indexes)