コード例 #1
0
def run_and_get_classifier(X_test, X_train, y_test, y_train):
    # Declare the fitness function we want to use
    def fit(y_true, y_pred):
        # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need
        # to binarize the output using round_to_cls()
        y_pred_bin = round_to_cls(y_pred, n_classes=2)
        return accuracy_score(y_true, y_pred_bin)

    # Initialize our classifier
    clf = TrefleClassifier(
        n_rules=4,
        n_classes_per_cons=[2],  # there is only 1 consequent with 2 classes
        n_labels_per_mf=3,  # use 3 labels LOW, MEDIUM, HIGH
        default_cons=[0],  # default rule yield the class 0
        n_max_vars_per_rule=3,  # WBCD dataset has 30 variables, here we force
        # to use a maximum of 3 variables per rule
        # to have a better interpretability
        # In total we can have up to 3*4=12 different variables
        # for a fuzzy system
        n_generations=5,
        fitness_function=fit,
        verbose=True,
    )
    # Train our classifier
    clf.fit(X_train, y_train)

    # Make predictions
    y_pred = clf.predict_classes(X_test)
    clf.print_best_fuzzy_system()

    print_score(y_pred, y_test)
    return clf
コード例 #2
0
    def _create_fis(self):
        def fit(y_true, y_pred):
            rmse = -mean_squared_error(y_true, y_pred)
            return rmse

        # Initialize our classifier
        clf = TrefleClassifier(
            n_rules=3,
            n_classes_per_cons=[3],
            default_cons=[1],
            n_max_vars_per_rule=4,
            n_generations=5,
            pop_size=100,
            n_labels_per_mf=3,
            dc_weight=2,
            fitness_function=fit,
        )

        clf.fit(self._X_train, self._y_train)
        return clf
コード例 #3
0
def main():
    np.random.seed(0)
    random.seed(0)

    # Load dataset
    data = load_boston()

    # Organize our data
    X = data["data"]
    print(X.shape)
    y = data["target"]
    y = np.reshape(y, (-1, 1))  # output needs to be at least 1 column wide

    # Split our data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Declare the fitness function we want to use
    def fit(y_true, y_pred):
        # Here no need to threshold y_pred because we are using a regression
        # metric
        return -mean_squared_error(y_true, y_pred)

    # Initialize our classifier
    clf = TrefleClassifier(
        n_rules=5,
        n_classes_per_cons=[0],  # In regression, there is no class (i.e. 0)
        n_labels_per_cons=Label4,  # use 4 labels LOW, MEDIUM, HIGH, VERY HIGH
        #                          # for the consequent
        #                          # Recall: even for continuous variables
        #                          # we use a label e.g.
        #                          # "[...] THEN temperature is LOW"
        n_labels_per_mf=2,  # use 2 labels LOW, HIGH (for the antecedents)
        default_cons=[Label4.VERY_HIGH()],  # default rule yield the 4th (and last) label
        n_max_vars_per_rule=2,
        n_generations=30,
        fitness_function=fit,
        verbose=True,
    )

    # Train our classifier
    clf.fit(X_train, y_train)

    # Make predictions
    y_pred = clf.predict_classes(X_test)

    # Alternatively, you can use predict() which return non-thresholded y_pred
    # but you could need to add a threshold yourself. For example:
    #   y_pred_raw = clf.predict(X_test)
    #   y_pred = round_to_cls(y_pred_raw, n_classes=2)

    clf.print_best_fuzzy_system()

    # Evaluate accuracy
    score = mean_squared_error(y_test, y_pred)
    print("Score on test set: {:.3f}".format(score))
コード例 #4
0
def main():
    np.random.seed(0)
    random.seed(0)

    # Load dataset
    data = load_breast_cancer()

    # Organize our data
    X = data["data"]
    print(X.shape)
    y = data["target"]
    y = np.reshape(y, (-1, 1))  # output needs to be at least 1 column wide

    # Split our data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Declare the fitness function we want to use
    def fit(y_true, y_pred):
        # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need
        # to binarize the output using round_to_cls()
        y_pred_bin = round_to_cls(y_pred, n_classes=2)
        return accuracy_score(y_true, y_pred_bin)

    # Initialize our classifier
    clf = TrefleClassifier(
        n_rules=4,
        n_classes_per_cons=[2],  # there is only 1 consequent with 2 classes
        n_labels_per_mf=3,  # use 3 labels LOW, MEDIUM, HIGH
        default_cons=[0],  # default rule yield the class 0
        n_max_vars_per_rule=3,  # WBCD dataset has 30 variables, here we force
        # to use a maximum of 3 variables per rule
        # to have a better interpretability
        # In total we can have up to 3*4=12 different variables
        # for a fuzzy system
        n_generations=20,
        fitness_function=fit,
        verbose=True,
    )

    # Train our classifier
    clf.fit(X_train, y_train)

    # Make predictions
    y_pred = clf.predict_classes(X_test)

    # Alternatively, you can use predict() which return non-thresholded y_pred
    # but you could need to add a threshold yourself. For example:
    #   y_pred_raw = clf.predict(X_test)
    #   y_pred = round_to_cls(y_pred_raw, n_classes=2)

    clf.print_best_fuzzy_system()

    # Evaluate accuracy
    score = accuracy_score(y_test, y_pred)
    print("Score on test set: {:.3f}".format(score))
コード例 #5
0
def main():
    np.random.seed(0)
    random.seed(0)

    # Load dataset
    data = load_iris()

    # Organize our data
    X = data["data"]
    y = data["target"]  # y.shape is (150,)
    y = create_one_hot_from_array(y)  # y.shape is now (150,3)

    # Split our data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Declare the fitness function we want to use
    def fit(y_true, y_pred):
        # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need
        # to binarize the output using round_to_cls()
        # Warning /!\ here since it has been one-hot-encoded we need to set
        # n_classes=2 instead n_classes=N_CLASSES because each consequent
        # is a binary class
        y_pred_bin = round_to_cls(y_pred, n_classes=2)
        return accuracy_score(y_true, y_pred_bin)

    # Initialize our classifier
    clf = TrefleClassifier(
        n_rules=3,  # here we need to increase the number of rule to 3
        #           # because we need at least 1 rule per class in the case
        #           # of a one-hot-encoded problem
        n_classes_per_cons=[2, 2, 2],  # there are 3 consequents with 2 classes
        #                              # each.
        n_labels_per_mf=4,  # use 4 labels LOW, MEDIUM, HIGH, VERY HIGH
        default_cons=[0, 0, 1],  # default rule yield the class 2
        n_max_vars_per_rule=4,  # let's use the 4 iris variables (PL, PW, SL, SW)
        n_generations=30,
        fitness_function=fit,
        verbose=True,
    )

    # Train our classifier
    clf.fit(X_train, y_train)

    # Make predictions
    # y_pred = clf.predict_classes(X_test)
    y_pred_raw = clf.predict(X_test)
    y_pred = round_to_cls(y_pred_raw, n_classes=2)

    clf.print_best_fuzzy_system()

    # Evaluate accuracy
    # Important /!\ the fitness can be different than the scoring function
    score = accuracy_score(y_test, y_pred)
    print("Score on test set: {:.3f}".format(score))
コード例 #6
0
def main():
    np.random.seed(0)
    random.seed(0)

    # Load dataset
    data = load_iris()

    N_CLASSES = 3

    # Organize our data
    X = data["data"]
    y = data["target"]
    y = np.reshape(y, (-1, 1))  # output needs to be at least 1 column wide

    # Split our data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Declare the fitness function we want to use
    def fit(y_true, y_pred):
        # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need
        # to binarize the output using round_to_cls()
        y_pred_bin = round_to_cls(y_pred, N_CLASSES)
        return accuracy_score(y_true, y_pred_bin)

    # Initialize our classifier
    clf = TrefleClassifier(
        n_rules=2,
        n_classes_per_cons=[N_CLASSES],  # there is only 1
        #                                # consequent with 3 classes
        n_labels_per_mf=4,  # use 4 labels LOW, MEDIUM, HIGH, VERY HIGH
        default_cons=[1],  # default rule yield the class 1
        n_max_vars_per_rule=4,  # let's use the 4 iris variables (PL, PW, SL, SW)
        n_generations=30,
        fitness_function=fit,
        verbose=True,
    )

    # Train our classifier
    clf.fit(X_train, y_train)

    # Make predictions
    y_pred = clf.predict_classes(X_test)

    clf.print_best_fuzzy_system()

    # Evaluate f1 score.
    # Important /!\ the fitness can be different than the scoring function
    score = f1_score(y_test, y_pred, average="weighted")
    print("Score on test set: {:.3f}".format(score))
コード例 #7
0
def run():
    import numpy as np
    import random

    np.random.seed(6)
    random.seed(6)

    # Load dataset
    data = load_breast_cancer()
    # data = load_iris()

    # Organize our data
    y_names = data["target_names"]
    print("target names", y_names)
    y = data["target"]
    y = y.reshape(-1, 1)
    X_names = data["feature_names"]
    print("features names", X_names)
    X = data["data"]

    # X, y = make_classification(
    #     n_samples=1000, n_features=10,  n_informative=5, n_classes=2
    # )
    # y = y.reshape(-1, 1)

    # multi_class_y_col = np.random.randint(0, 4, size=y.shape)
    # regr_y_col = np.random.random(size=y.shape) * 100 + 20
    # y = np.hstack((y, multi_class_y_col, regr_y_col))
    # print(y)

    # Split our data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    def fit(y_true, y_pred):
        y_pred_thresholded = round_to_cls(y_pred, n_classes=2)
        fitness_val = accuracy_score(y_true, y_pred_thresholded)
        return fitness_val

    # Initialize our classifier
    clf = TrefleClassifier(
        n_rules=3,
        n_classes_per_cons=[2],
        default_cons=[1],
        n_max_vars_per_rule=3,
        n_generations=10,
        pop_size=100,
        n_labels_per_mf=3,
        verbose=True,
        dc_weight=1,
        # p_positions_per_lv=16,
        n_lv_per_ind_sp1=40,
        fitness_function=fit,
    )

    # Train our classifier
    model = clf.fit(X_train, y_train)

    # Make predictions
    y_pred = clf.predict(X_test)

    clf.print_best_fuzzy_system()
    tff_str = clf.get_best_fuzzy_system_as_tff()
    print(tff_str)

    yolo = TrefleFIS.from_tff(tff_str)
    yolo.describe()

    # fis = clf.get_best_fuzzy_system()
    # print("best fis is ", end="")
    # print(fis)

    # FISViewer(fis).show()

    # Evaluate accuracy
    print("Simple run score: ")

    y_pred_thresholded = round_to_cls(y_pred, n_classes=2)
    print("acc", accuracy_score(y_test, y_pred_thresholded))
    print(classification_report(y_test, y_pred_thresholded))
コード例 #8
0
def main():
    """
    Executing this method is time consuming
    """

    np.random.seed(0)
    random.seed(0)

    # Load dataset
    data = load_breast_cancer()

    # Organize our data
    X = data["data"]
    print(X.shape)
    y = data["target"]
    y = np.reshape(y, (-1, 1))  # output needs to be at least 1 column wide

    # Split our data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Declare the fitness function we want to use
    def evaluate(y_true, y_pred):
        # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need
        # to binarize the output using round_to_cls()
        y_pred_bin = round_to_cls(y_pred, n_classes=2)
        return accuracy_score(y_true, y_pred_bin)

    # Initialize our classifier
    # note that some arguments are mandatory such as n_rules. If you want to
    # grid search that parameter you must set it to None before tuning it.
    estimator = TrefleClassifier(
        n_rules=None,  # mandatory argument, set to None and tune it below
        n_classes_per_cons=[2],  # there is only 1 consequent with 2 classes
        n_labels_per_mf=3,  # use 3 labels LOW, MEDIUM, HIGH
        default_cons=None,
        n_max_vars_per_rule=3,  # WBCD dataset has 30 variables, here we force
        # to use a maximum of 3 variables per rule
        # to have a better interpretability
        # In total we can have up to 3*4=12 different variables
        # for a fuzzy system
        n_generations=20,
        fitness_function=evaluate,
        verbose=True,
    )

    def get_fitness_functions():
        def get_recall_and_precision_score(y_true, y_pred):
            y_pred_bin = round_to_cls(y_pred, n_classes=2)
            recall = recall_score(y_true, y_pred_bin)
            precision = precision_score(y_true, y_pred_bin)
            return recall, precision

        def ff1(y_true, y_pred):
            recall, precision = get_recall_and_precision_score(y_true, y_pred)
            return (1.0 * recall + 2.0 * precision) / 3.0

        def ff2(y_true, y_pred):
            recall, precision = get_recall_and_precision_score(y_true, y_pred)
            return (2.0 * recall + 1.0 * precision) / 3.0

        def ff3(y_true, y_pred):
            recall, precision = get_recall_and_precision_score(y_true, y_pred)
            return (1.0 * recall + 3.0 * precision) / 4.0

        return ff1, ff2, ff3

    tuned_parameters = [
        {"n_rules": [2, 5], "default_cons": [[0], [1]]},
        {
            "n_rules": [4, 5],
            "default_cons": [[0], [1]],
            "n_max_vars_per_rule": [3, 5, 6],
        },
        {
            "n_rules": [4],
            "default_cons": [[0]],
            "fitness_function": get_fitness_functions(),
        },
    ]

    # Note that the scoring reuses the evaluate function but fitness_function
    # (i.e. the function that compares models for a given configuration/run)
    # can be different than scoring function (i.e. the function that compares
    # best individuals between different configurations/runs)
    clf = GridSearchCV(estimator, tuned_parameters, cv=3, scoring=make_scorer(evaluate))

    # Train our classifier
    clf.fit(X_train, y_train)

    print("Best params: ")
    print(clf.best_params_)

    best_estimator = clf.best_estimator_
    y_pred_test = best_estimator.predict_classes(X_test)
    print(accuracy_score(y_true=y_test, y_pred=y_pred_test))
    best_estimator.print_best_fuzzy_system()