def main():
    set_random_seed()

    X_test, X_train, y_test, y_train = get_dataset()
    clf = run_and_get_classifier(X_test, X_train, y_test, y_train)

    # tff (Trefle Fuzzy system File) is a json representation of the fuzzy system.
    # It is meant to be saved on disk or to be used by LFA Toolbox
    # (https://github.com/krypty/lfa_toolbox)
    tff = clf.get_best_fuzzy_system_as_tff()
    print(tff)

    # Export: save the fuzzy model to disk
    with open("my_saved_model.tff", mode="w") as f:
        f.write(tff)

    # Import from file
    fis = TrefleFIS.from_tff_file("my_saved_model.tff")

    # In the future, it could possible to call clf.predict_classes() directly
    # see issue #1
    y_pred_test = fis.predict(X_test)

    y_pred_test_bin = round_to_cls(y_pred_test, n_classes=2)
    print_score(y_pred_test_bin, y_test)

    # Import from string
    fis2 = TrefleFIS.from_tff(tff)
    y_pred_test = fis2.predict(X_test)

    y_pred_test_bin = round_to_cls(y_pred_test, n_classes=2)
    print_score(y_pred_test_bin, y_test)
Example #2
0
    def predict_classes(self, X):
        y_pred = self.predict(X)

        for i, n_classes in enumerate(self.n_classes_per_cons):
            if n_classes > 0:  # not a continuous variable
                y_pred[:, i] = round_to_cls(y_pred[:, i], n_classes)
        return y_pred
def test_distribution_between_multiclass_output_should_be_equal():
    n_classes = 4
    raw_outputs = np.linspace(0, 1, 12) * (n_classes - 1)
    thresholded_outputs = round_to_cls(raw_outputs, n_classes=n_classes)
    expected_array = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]

    assert_array_equal(thresholded_outputs, expected_array)
Example #4
0
 def fit(y_true, y_pred):
     # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need
     # to binarize the output using round_to_cls()
     # Warning /!\ here since it has been one-hot-encoded we need to set
     # n_classes=2 instead n_classes=N_CLASSES because each consequent
     # is a binary class
     y_pred_bin = round_to_cls(y_pred, n_classes=2)
     return accuracy_score(y_true, y_pred_bin)
Example #5
0
def main():
    np.random.seed(0)
    random.seed(0)

    # Load dataset
    data = load_iris()

    # Organize our data
    X = data["data"]
    y = data["target"]  # y.shape is (150,)
    y = create_one_hot_from_array(y)  # y.shape is now (150,3)

    # Split our data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Declare the fitness function we want to use
    def fit(y_true, y_pred):
        # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need
        # to binarize the output using round_to_cls()
        # Warning /!\ here since it has been one-hot-encoded we need to set
        # n_classes=2 instead n_classes=N_CLASSES because each consequent
        # is a binary class
        y_pred_bin = round_to_cls(y_pred, n_classes=2)
        return accuracy_score(y_true, y_pred_bin)

    # Initialize our classifier
    clf = TrefleClassifier(
        n_rules=3,  # here we need to increase the number of rule to 3
        #           # because we need at least 1 rule per class in the case
        #           # of a one-hot-encoded problem
        n_classes_per_cons=[2, 2, 2],  # there are 3 consequents with 2 classes
        #                              # each.
        n_labels_per_mf=4,  # use 4 labels LOW, MEDIUM, HIGH, VERY HIGH
        default_cons=[0, 0, 1],  # default rule yield the class 2
        n_max_vars_per_rule=4,  # let's use the 4 iris variables (PL, PW, SL, SW)
        n_generations=30,
        fitness_function=fit,
        verbose=True,
    )

    # Train our classifier
    clf.fit(X_train, y_train)

    # Make predictions
    # y_pred = clf.predict_classes(X_test)
    y_pred_raw = clf.predict(X_test)
    y_pred = round_to_cls(y_pred_raw, n_classes=2)

    clf.print_best_fuzzy_system()

    # Evaluate accuracy
    # Important /!\ the fitness can be different than the scoring function
    score = accuracy_score(y_test, y_pred)
    print("Score on test set: {:.3f}".format(score))
Example #6
0
def test_predict_classes_should_return_same_results_as_predict_plus_manual_round(
):
    X_train, X_test, y_train, y_test = get_sample_data()

    clf = get_trefle_classifier_instance(X_train, X_test, y_train, y_test)

    y_pred = clf.predict_X_test()

    y_pred_rounded = round_to_cls(y_pred, n_classes=3)

    y_pred_classes = clf.predict_X_test_classes()
    print(y_pred_classes)

    assert_array_equal(y_pred_classes, y_pred_rounded)
Example #7
0
def getConfusionMatrixValues(y_true, y_pred):
    """
    return tcross validation matrix

    :param y_true: True labels
    :param y_pred: Labels predicted by the algorithm

    :type y_true: [[int]] - required
    :type y_pred: [[int]] - required

    :return: The confusion matrix
    :rtype: Float
    """
    y_pred_bin = round_to_cls(y_pred, n_classes=2)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred_bin).ravel()
    return tn, fp, fn, tp
Example #8
0
 def fit(y_true, y_pred):
     y_pred_thresholded = round_to_cls(y_pred, n_classes=2)
     fitness_val = accuracy_score(y_true, y_pred_thresholded)
     return fitness_val
Example #9
0
def run():
    import numpy as np
    import random

    np.random.seed(6)
    random.seed(6)

    # Load dataset
    data = load_breast_cancer()
    # data = load_iris()

    # Organize our data
    y_names = data["target_names"]
    print("target names", y_names)
    y = data["target"]
    y = y.reshape(-1, 1)
    X_names = data["feature_names"]
    print("features names", X_names)
    X = data["data"]

    # X, y = make_classification(
    #     n_samples=1000, n_features=10,  n_informative=5, n_classes=2
    # )
    # y = y.reshape(-1, 1)

    # multi_class_y_col = np.random.randint(0, 4, size=y.shape)
    # regr_y_col = np.random.random(size=y.shape) * 100 + 20
    # y = np.hstack((y, multi_class_y_col, regr_y_col))
    # print(y)

    # Split our data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    def fit(y_true, y_pred):
        y_pred_thresholded = round_to_cls(y_pred, n_classes=2)
        fitness_val = accuracy_score(y_true, y_pred_thresholded)
        return fitness_val

    # Initialize our classifier
    clf = TrefleClassifier(
        n_rules=3,
        n_classes_per_cons=[2],
        default_cons=[1],
        n_max_vars_per_rule=3,
        n_generations=10,
        pop_size=100,
        n_labels_per_mf=3,
        verbose=True,
        dc_weight=1,
        # p_positions_per_lv=16,
        n_lv_per_ind_sp1=40,
        fitness_function=fit,
    )

    # Train our classifier
    model = clf.fit(X_train, y_train)

    # Make predictions
    y_pred = clf.predict(X_test)

    clf.print_best_fuzzy_system()
    tff_str = clf.get_best_fuzzy_system_as_tff()
    print(tff_str)

    yolo = TrefleFIS.from_tff(tff_str)
    yolo.describe()

    # fis = clf.get_best_fuzzy_system()
    # print("best fis is ", end="")
    # print(fis)

    # FISViewer(fis).show()

    # Evaluate accuracy
    print("Simple run score: ")

    y_pred_thresholded = round_to_cls(y_pred, n_classes=2)
    print("acc", accuracy_score(y_test, y_pred_thresholded))
    print(classification_report(y_test, y_pred_thresholded))
 def get_recall_and_precision_score(y_true, y_pred):
     y_pred_bin = round_to_cls(y_pred, n_classes=2)
     recall = recall_score(y_true, y_pred_bin)
     precision = precision_score(y_true, y_pred_bin)
     return recall, precision
 def evaluate(y_true, y_pred):
     # y_pred are floats in [0, n_classes-1]. To use accuracy metric we need
     # to binarize the output using round_to_cls()
     y_pred_bin = round_to_cls(y_pred, n_classes=2)
     return accuracy_score(y_true, y_pred_bin)
def test_distribution_between_binary_outputs_should_be_equal():
    raw_outputs = np.linspace(0, 1, 11)
    thresholded_outputs = round_to_cls(raw_outputs, n_classes=2)

    expected_array = 6 * [0] + 5 * [1]
    assert_array_equal(thresholded_outputs, expected_array)
        def _fitness(y_true, y_pred):
            # source of formulae:
            # https://en.wikipedia.org/wiki/Sensitivity_and_specificity
            tot_w = 0
            fit = 0

            y_pred_bin = round_to_cls(y_pred, n_classes=2)
            tn, fp, fn, tp = confusion_matrix(y_true, y_pred_bin).ravel()

            # some metrics are set to 0 (np.nan_to_num) because we want to
            # avoid infinite numbers e.g. when dividing by 0. Oddly, we need
            # to filter "invalid" too to handle division errors. Be careful,
            # this only works if the metrics is a "the higher the
            # better"-metric
            with np.errstate(divide="ignore", invalid="ignore"):
                # accuracy
                # no need to clip, denominator is >0
                acc = (tp + tn) / (tp + fp + fn + tn)
                fit += acc_w * acc
                tot_w += acc_w

                # sensitivity
                sen = np.nan_to_num(tp / (tp + fn))
                fit += sen_w * sen
                tot_w += sen_w

                # specificity
                spe = np.nan_to_num(tn / (tn + fp))
                fit += spe_w * spe
                tot_w += spe_w

                # f1score, ignore ill-defined value, it will be set to 0
                with catch_warnings():
                    filterwarnings("ignore", category=UndefinedMetricWarning)
                    f1 = f1_score(y_true, y_pred_bin)
                    fit += f1_w * f1
                    tot_w += f1_w

                # PPV
                # if either tp or fp is 0, then the result should be 0 too.
                ppv = np.nan_to_num(tp / (tp + fp))
                fit += ppv_w * ppv
                tot_w += ppv_w

                # NPV
                # if either tp or fp is 0, then the result should be 0 too.
                # note: we don't reuse PPV value because it could have been set
                # to 0 due to nan result.
                npv = np.nan_to_num(tn / (tn + fn))
                fit += npv_w * npv
                tot_w += npv_w

                # FPR
                fpr = 1 - spe
                fit += fpr_w * fpr
                tot_w += fpr_w

                # FNR
                fnr = 1 - sen
                fit += fnr_w * fnr
                tot_w += fnr_w

                # FDR
                fdr = 1 - ppv
                fit += fdr_w * fdr
                tot_w += fdr_w

                # MSE
                mse = -mean_squared_error(y_true, y_pred)
                fit += mse_w * mse
                tot_w += mse_w

            # handle zero-division
            return 0 if abs(tot_w) < 1e-6 else fit / tot_w