def run_and_get_classifier(X_test, X_train, y_test, y_train): # Declare the fitness function we want to use def fit(y_true, y_pred): # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need # to binarize the output using round_to_cls() y_pred_bin = round_to_cls(y_pred, n_classes=2) return accuracy_score(y_true, y_pred_bin) # Initialize our classifier clf = TrefleClassifier( n_rules=4, n_classes_per_cons=[2], # there is only 1 consequent with 2 classes n_labels_per_mf=3, # use 3 labels LOW, MEDIUM, HIGH default_cons=[0], # default rule yield the class 0 n_max_vars_per_rule=3, # WBCD dataset has 30 variables, here we force # to use a maximum of 3 variables per rule # to have a better interpretability # In total we can have up to 3*4=12 different variables # for a fuzzy system n_generations=5, fitness_function=fit, verbose=True, ) # Train our classifier clf.fit(X_train, y_train) # Make predictions y_pred = clf.predict_classes(X_test) clf.print_best_fuzzy_system() print_score(y_pred, y_test) return clf
def main(): np.random.seed(0) random.seed(0) # Load dataset data = load_boston() # Organize our data X = data["data"] print(X.shape) y = data["target"] y = np.reshape(y, (-1, 1)) # output needs to be at least 1 column wide # Split our data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Declare the fitness function we want to use def fit(y_true, y_pred): # Here no need to threshold y_pred because we are using a regression # metric return -mean_squared_error(y_true, y_pred) # Initialize our classifier clf = TrefleClassifier( n_rules=5, n_classes_per_cons=[0], # In regression, there is no class (i.e. 0) n_labels_per_cons=Label4, # use 4 labels LOW, MEDIUM, HIGH, VERY HIGH # # for the consequent # # Recall: even for continuous variables # # we use a label e.g. # # "[...] THEN temperature is LOW" n_labels_per_mf=2, # use 2 labels LOW, HIGH (for the antecedents) default_cons=[Label4.VERY_HIGH()], # default rule yield the 4th (and last) label n_max_vars_per_rule=2, n_generations=30, fitness_function=fit, verbose=True, ) # Train our classifier clf.fit(X_train, y_train) # Make predictions y_pred = clf.predict_classes(X_test) # Alternatively, you can use predict() which return non-thresholded y_pred # but you could need to add a threshold yourself. For example: # y_pred_raw = clf.predict(X_test) # y_pred = round_to_cls(y_pred_raw, n_classes=2) clf.print_best_fuzzy_system() # Evaluate accuracy score = mean_squared_error(y_test, y_pred) print("Score on test set: {:.3f}".format(score))
def main(): np.random.seed(0) random.seed(0) # Load dataset data = load_breast_cancer() # Organize our data X = data["data"] print(X.shape) y = data["target"] y = np.reshape(y, (-1, 1)) # output needs to be at least 1 column wide # Split our data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Declare the fitness function we want to use def fit(y_true, y_pred): # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need # to binarize the output using round_to_cls() y_pred_bin = round_to_cls(y_pred, n_classes=2) return accuracy_score(y_true, y_pred_bin) # Initialize our classifier clf = TrefleClassifier( n_rules=4, n_classes_per_cons=[2], # there is only 1 consequent with 2 classes n_labels_per_mf=3, # use 3 labels LOW, MEDIUM, HIGH default_cons=[0], # default rule yield the class 0 n_max_vars_per_rule=3, # WBCD dataset has 30 variables, here we force # to use a maximum of 3 variables per rule # to have a better interpretability # In total we can have up to 3*4=12 different variables # for a fuzzy system n_generations=20, fitness_function=fit, verbose=True, ) # Train our classifier clf.fit(X_train, y_train) # Make predictions y_pred = clf.predict_classes(X_test) # Alternatively, you can use predict() which return non-thresholded y_pred # but you could need to add a threshold yourself. For example: # y_pred_raw = clf.predict(X_test) # y_pred = round_to_cls(y_pred_raw, n_classes=2) clf.print_best_fuzzy_system() # Evaluate accuracy score = accuracy_score(y_test, y_pred) print("Score on test set: {:.3f}".format(score))
def main(): np.random.seed(0) random.seed(0) # Load dataset data = load_iris() # Organize our data X = data["data"] y = data["target"] # y.shape is (150,) y = create_one_hot_from_array(y) # y.shape is now (150,3) # Split our data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Declare the fitness function we want to use def fit(y_true, y_pred): # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need # to binarize the output using round_to_cls() # Warning /!\ here since it has been one-hot-encoded we need to set # n_classes=2 instead n_classes=N_CLASSES because each consequent # is a binary class y_pred_bin = round_to_cls(y_pred, n_classes=2) return accuracy_score(y_true, y_pred_bin) # Initialize our classifier clf = TrefleClassifier( n_rules=3, # here we need to increase the number of rule to 3 # # because we need at least 1 rule per class in the case # # of a one-hot-encoded problem n_classes_per_cons=[2, 2, 2], # there are 3 consequents with 2 classes # # each. n_labels_per_mf=4, # use 4 labels LOW, MEDIUM, HIGH, VERY HIGH default_cons=[0, 0, 1], # default rule yield the class 2 n_max_vars_per_rule=4, # let's use the 4 iris variables (PL, PW, SL, SW) n_generations=30, fitness_function=fit, verbose=True, ) # Train our classifier clf.fit(X_train, y_train) # Make predictions # y_pred = clf.predict_classes(X_test) y_pred_raw = clf.predict(X_test) y_pred = round_to_cls(y_pred_raw, n_classes=2) clf.print_best_fuzzy_system() # Evaluate accuracy # Important /!\ the fitness can be different than the scoring function score = accuracy_score(y_test, y_pred) print("Score on test set: {:.3f}".format(score))
def main(): np.random.seed(0) random.seed(0) # Load dataset data = load_iris() N_CLASSES = 3 # Organize our data X = data["data"] y = data["target"] y = np.reshape(y, (-1, 1)) # output needs to be at least 1 column wide # Split our data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Declare the fitness function we want to use def fit(y_true, y_pred): # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need # to binarize the output using round_to_cls() y_pred_bin = round_to_cls(y_pred, N_CLASSES) return accuracy_score(y_true, y_pred_bin) # Initialize our classifier clf = TrefleClassifier( n_rules=2, n_classes_per_cons=[N_CLASSES], # there is only 1 # # consequent with 3 classes n_labels_per_mf=4, # use 4 labels LOW, MEDIUM, HIGH, VERY HIGH default_cons=[1], # default rule yield the class 1 n_max_vars_per_rule=4, # let's use the 4 iris variables (PL, PW, SL, SW) n_generations=30, fitness_function=fit, verbose=True, ) # Train our classifier clf.fit(X_train, y_train) # Make predictions y_pred = clf.predict_classes(X_test) clf.print_best_fuzzy_system() # Evaluate f1 score. # Important /!\ the fitness can be different than the scoring function score = f1_score(y_test, y_pred, average="weighted") print("Score on test set: {:.3f}".format(score))
def _create_fis(self): def fit(y_true, y_pred): rmse = -mean_squared_error(y_true, y_pred) return rmse # Initialize our classifier clf = TrefleClassifier( n_rules=3, n_classes_per_cons=[3], default_cons=[1], n_max_vars_per_rule=4, n_generations=5, pop_size=100, n_labels_per_mf=3, dc_weight=2, fitness_function=fit, ) clf.fit(self._X_train, self._y_train) return clf
def run(): import numpy as np import random np.random.seed(6) random.seed(6) # Load dataset data = load_breast_cancer() # data = load_iris() # Organize our data y_names = data["target_names"] print("target names", y_names) y = data["target"] y = y.reshape(-1, 1) X_names = data["feature_names"] print("features names", X_names) X = data["data"] # X, y = make_classification( # n_samples=1000, n_features=10, n_informative=5, n_classes=2 # ) # y = y.reshape(-1, 1) # multi_class_y_col = np.random.randint(0, 4, size=y.shape) # regr_y_col = np.random.random(size=y.shape) * 100 + 20 # y = np.hstack((y, multi_class_y_col, regr_y_col)) # print(y) # Split our data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) def fit(y_true, y_pred): y_pred_thresholded = round_to_cls(y_pred, n_classes=2) fitness_val = accuracy_score(y_true, y_pred_thresholded) return fitness_val # Initialize our classifier clf = TrefleClassifier( n_rules=3, n_classes_per_cons=[2], default_cons=[1], n_max_vars_per_rule=3, n_generations=10, pop_size=100, n_labels_per_mf=3, verbose=True, dc_weight=1, # p_positions_per_lv=16, n_lv_per_ind_sp1=40, fitness_function=fit, ) # Train our classifier model = clf.fit(X_train, y_train) # Make predictions y_pred = clf.predict(X_test) clf.print_best_fuzzy_system() tff_str = clf.get_best_fuzzy_system_as_tff() print(tff_str) yolo = TrefleFIS.from_tff(tff_str) yolo.describe() # fis = clf.get_best_fuzzy_system() # print("best fis is ", end="") # print(fis) # FISViewer(fis).show() # Evaluate accuracy print("Simple run score: ") y_pred_thresholded = round_to_cls(y_pred, n_classes=2) print("acc", accuracy_score(y_test, y_pred_thresholded)) print(classification_report(y_test, y_pred_thresholded))
def main(): """ Executing this method is time consuming """ np.random.seed(0) random.seed(0) # Load dataset data = load_breast_cancer() # Organize our data X = data["data"] print(X.shape) y = data["target"] y = np.reshape(y, (-1, 1)) # output needs to be at least 1 column wide # Split our data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Declare the fitness function we want to use def evaluate(y_true, y_pred): # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need # to binarize the output using round_to_cls() y_pred_bin = round_to_cls(y_pred, n_classes=2) return accuracy_score(y_true, y_pred_bin) # Initialize our classifier # note that some arguments are mandatory such as n_rules. If you want to # grid search that parameter you must set it to None before tuning it. estimator = TrefleClassifier( n_rules=None, # mandatory argument, set to None and tune it below n_classes_per_cons=[2], # there is only 1 consequent with 2 classes n_labels_per_mf=3, # use 3 labels LOW, MEDIUM, HIGH default_cons=None, n_max_vars_per_rule=3, # WBCD dataset has 30 variables, here we force # to use a maximum of 3 variables per rule # to have a better interpretability # In total we can have up to 3*4=12 different variables # for a fuzzy system n_generations=20, fitness_function=evaluate, verbose=True, ) def get_fitness_functions(): def get_recall_and_precision_score(y_true, y_pred): y_pred_bin = round_to_cls(y_pred, n_classes=2) recall = recall_score(y_true, y_pred_bin) precision = precision_score(y_true, y_pred_bin) return recall, precision def ff1(y_true, y_pred): recall, precision = get_recall_and_precision_score(y_true, y_pred) return (1.0 * recall + 2.0 * precision) / 3.0 def ff2(y_true, y_pred): recall, precision = get_recall_and_precision_score(y_true, y_pred) return (2.0 * recall + 1.0 * precision) / 3.0 def ff3(y_true, y_pred): recall, precision = get_recall_and_precision_score(y_true, y_pred) return (1.0 * recall + 3.0 * precision) / 4.0 return ff1, ff2, ff3 tuned_parameters = [ {"n_rules": [2, 5], "default_cons": [[0], [1]]}, { "n_rules": [4, 5], "default_cons": [[0], [1]], "n_max_vars_per_rule": [3, 5, 6], }, { "n_rules": [4], "default_cons": [[0]], "fitness_function": get_fitness_functions(), }, ] # Note that the scoring reuses the evaluate function but fitness_function # (i.e. the function that compares models for a given configuration/run) # can be different than scoring function (i.e. the function that compares # best individuals between different configurations/runs) clf = GridSearchCV(estimator, tuned_parameters, cv=3, scoring=make_scorer(evaluate)) # Train our classifier clf.fit(X_train, y_train) print("Best params: ") print(clf.best_params_) best_estimator = clf.best_estimator_ y_pred_test = best_estimator.predict_classes(X_test) print(accuracy_score(y_true=y_test, y_pred=y_pred_test)) best_estimator.print_best_fuzzy_system()