コード例 #1
0
def faiss_KNORAE_knn(XTrain, YTrain, k, XTest, YTest):
    start = time.clock()
    knorae_sk = KNORAE(k=k, knn_classifier='faiss')
    knorae_sk.fit(XTrain, YTrain)
    score = knorae_sk.score(XTest, YTest)
    print("faiss_knn_knorae run_time: {}".format(time.clock() - start))
    print("faiss_knn_knorae score: {}".format(score))
コード例 #2
0
ファイル: test_integration_dfp.py プロジェクト: cg430/DESlib
def test_kne_proba():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    kne = KNORAE(pool_classifiers, DFP=True)
    kne.fit(X_dsel, y_dsel)
    probas = kne.predict_proba(X_test)
    expected = np.load('deslib/tests/expected_values/kne_proba_DFP.npy')
    assert np.allclose(probas, expected)
コード例 #3
0
ファイル: test_des_integration.py プロジェクト: postyear/DES
def test_kne_proba(knn_methods):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    kne = KNORAE(pool_classifiers, knn_classifier=knn_methods)
    kne.fit(X_dsel, y_dsel)
    probas = kne.predict_proba(X_test)
    expected = np.load(
        'deslib/tests/expected_values/kne_proba_integration.npy')
    assert np.allclose(probas, expected)
コード例 #4
0
def test_estimate_competence(index, expected):
    query = np.atleast_2d([1, 1])

    knora_e_test = KNORAE(create_pool_classifiers())
    knora_e_test.fit(X_dsel_ex1, y_dsel_ex1)
    knora_e_test.DFP_mask = np.ones(knora_e_test.n_classifiers)
    knora_e_test.neighbors = neighbors_ex1[index, :]
    knora_e_test.distances = distances_ex1[index, :]
    competences = knora_e_test.estimate_competence(query)
    assert np.isclose(competences, expected, atol=0.01).all()
コード例 #5
0
def run_knorae(pool_classifiers, X_DSEL, y_DSEL, X_test, y_test, knn_type):
    knorae = KNORAE(pool_classifiers=pool_classifiers, knn_classifier=knn_type)

    knorae.fit(X_DSEL, y_DSEL)

    start = time.clock()
    score = knorae.score(X_test, y_test)
    end = time.clock() - start

    return score, end
コード例 #6
0
ファイル: test_knorae.py プロジェクト: victorlorena/DESlib
def test_select(index, expected):
    query = np.atleast_2d([1, 1])

    knora_e_test = KNORAE(create_pool_classifiers())
    knora_e_test.fit(X_dsel_ex1, y_dsel_ex1)
    neighbors = neighbors_ex1[index, :].reshape(1, -1)
    competences = knora_e_test.estimate_competence(query, neighbors)
    selected = knora_e_test.select(competences)

    assert np.array_equal(selected, expected)
コード例 #7
0
ファイル: test_knorae.py プロジェクト: victorlorena/DESlib
def test_estimate_competence_batch():
    query = np.ones((3, 2))
    expected = np.array([[1.0, 0.0, 1.0], [2.0, 0.0, 2.0], [0.0, 3.0, 0.0]])

    knora_e_test = KNORAE(create_pool_classifiers())
    knora_e_test.fit(X_dsel_ex1, y_dsel_ex1)
    neighbors = neighbors_ex1
    distances = distances_ex1
    competences = knora_e_test.estimate_competence(query, neighbors, distances)
    assert np.allclose(competences, expected)
コード例 #8
0
def test_select(index, expected):
    query = np.atleast_2d([1, 1])

    knora_e_test = KNORAE(create_pool_classifiers())
    knora_e_test.fit(X_dsel_ex1, y_dsel_ex1)
    knora_e_test.DFP_mask = np.ones(knora_e_test.n_classifiers)
    knora_e_test.neighbors = neighbors_ex1[index, :]
    knora_e_test.distances = distances_ex1[index, :]
    competences = knora_e_test.estimate_competence(query)
    selected = knora_e_test.select(competences)

    assert selected == expected
コード例 #9
0
ファイル: test_knorae.py プロジェクト: vishalbelsare/DESlib
def test_estimate_competence_batch(example_estimate_competence,
                                   create_pool_classifiers):
    X, y, neighbors, distances, _, _ = example_estimate_competence

    expected = np.array([[1.0, 0.0, 1.0], [2.0, 0.0, 2.0], [0.0, 3.0, 0.0]])

    knora_e_test = KNORAE(create_pool_classifiers)
    knora_e_test.fit(X, y)

    competences = knora_e_test.estimate_competence(neighbors,
                                                   distances=distances)
    assert np.allclose(competences, expected)
コード例 #10
0
ファイル: test_knorae.py プロジェクト: vishalbelsare/DESlib
def test_select(index, expected, create_pool_classifiers,
                example_estimate_competence):
    X, y, neighbors, distances, _, _ = example_estimate_competence

    knora_e_test = KNORAE(create_pool_classifiers)
    knora_e_test.fit(X, y)
    neighbors = neighbors[index, :].reshape(1, -1)
    distances = distances[index, :].reshape(1, -1)
    competences = knora_e_test.estimate_competence(neighbors,
                                                   distances=distances)
    selected = knora_e_test.select(competences)

    assert np.array_equal(selected, expected)
コード例 #11
0
def test_select_none_competent():
    query = np.atleast_2d([1, 1])

    knora_e_test = KNORAE(create_pool_all_agree(2, 100))
    knora_e_test.fit(X_dsel_ex1, y_dsel_ex1)

    knora_e_test.neighbors = neighbors_ex1[0, :]
    knora_e_test.distances = distances_ex1[0, :]
    knora_e_test.DFP_mask = np.ones(knora_e_test.n_classifiers)

    competences = knora_e_test.estimate_competence(query)
    indices = knora_e_test.select(competences)

    assert indices == list(range(knora_e_test.n_classifiers))
コード例 #12
0
    def DES(self, x_train, y_train,X_test, Y_test, dsel):
        pool_classifiers = BaggingClassifier(linear_model.Perceptron(max_iter=5), self.pool_size)
        pool_classifiers.fit(x_train, y_train)

        # Initialize the DES model
        knorae = KNORAE(pool_classifiers)
        knorau = KNORAU(pool_classifiers)

        # Preprocess the Dynamic Selection dataset (DSEL)
        score1 = knorae.fit(x_train[dsel], y_train[dsel])
        score2 = knorau.fit(x_train[dsel], y_train[dsel])

        # Predict new examples:
        # print (knorae.score(X_test, Y_test), knorau.score(X_test, Y_test))
        return (score1, score2, ) + self.calc_metrics(X_test, Y_test)
コード例 #13
0
    extra_clf = ExtraTreesClassifier(n_estimators=500,
                                     max_leaf_nodes=16,
                                     n_jobs=-1,
                                     random_state=42)
    svm_clf = SVC(probability=True, kernel="linear", C=float("inf"))

    # fit and predict
    rnd_clf.fit(X_train, y_train)
    extra_clf.fit(X_train, y_train)
    svm_clf.fit(X_train, y_train)

    hard_voting_clf = VotingClassifier(estimators=[('rf', rnd_clf), ('ex', ),
                                                   ('svc', svm_clf)],
                                       voting='hard')  # hard voting

    soft_voting_clf = VotingClassifier(estimators=[('rf', rnd_clf), ('ex', ),
                                                   ('svc', svm_clf)],
                                       voting='soft')  # soft voting

    # show each classifier's accuarcy score
    for clf in (rnd_clf, extra_clf, svm_clf, hard_voting_clf, soft_voting_clf):
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

    # Exercise: 9

    knorae = KNORAE(rnd_clf)
    knorae.fit(X_dsel, y_dsel)
    print(knorae.__class__.__name__, accuracy_score(y_test, y_pred))
コード例 #14
0
ファイル: test_des_integration.py プロジェクト: postyear/DES
def test_kne(knn_methods):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    kne = KNORAE(pool_classifiers, knn_classifier=knn_methods)
    kne.fit(X_dsel, y_dsel)
    assert np.isclose(kne.score(X_test, y_test), 0.973404255319148)
コード例 #15
0
    # Training a random forest to be used as the pool of classifiers. We set the maximum depth of the tree so that it
    # can estimate probabilities
    pool_classifiers = RandomForestClassifier(n_estimators=10, max_depth=5)
    pool_classifiers.fit(X_train, y_train)

    # Initialize a DS technique. Here we specify the size of the region of competence (5 neighbors)
    knorau = KNORAU(pool_classifiers)
    kne = KNORAE(pool_classifiers, k=5)
    desp = DESP(pool_classifiers, k=5)
    ola = OLA(pool_classifiers, k=5)
    mcb = MCB(pool_classifiers, k=5)
    meta = METADES(pool_classifiers, k=5)

    # Fit the DS techniques
    knorau.fit(X_dsel, y_dsel)
    kne.fit(X_dsel, y_dsel)
    desp.fit(X_dsel, y_dsel)
    meta.fit(X_dsel, y_dsel)
    ola.fit(X_dsel, y_dsel)
    mcb.fit(X_dsel, y_dsel)

    # Calculate classification accuracy of each technique
    print('Classification accuracy RF: ', RF.score(X_test, y_test))
    print('Evaluating DS techniques:')
    print('Classification accuracy KNORAU: ', knorau.score(X_test, y_test))
    print('Classification accuracy KNORA-Eliminate: ',
          kne.score(X_test, y_test))
    print('Classification accuracy DESP: ', desp.score(X_test, y_test))
    print('Classification accuracy OLA: ', ola.score(X_test, y_test))
    print('Classification accuracy MCB: ', mcb.score(X_test, y_test))
    print('Classification accuracy META-DES: ', meta.score(X_test, y_test))
コード例 #16
0
ファイル: des.py プロジェクト: fletch22/nba_win_predictor
def main():
    ###############################################################################
    # Preparing the dataset
    # ---------------------
    # In this part we load the breast cancer dataset from scikit-learn and
    # preprocess it in order to pass to the DS models. An important point here is
    # to normalize the data so that it has zero mean and unit variance, which is
    # a common requirement for many machine learning algorithms.
    # This step can be easily done using the StandardScaler class.

    rng = np.random.RandomState(123)
    data = load_breast_cancer()
    X = data.data
    y = data.target
    # split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=rng)

    # Scale the variables to have 0 mean and unit variance
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Split the data into training and DSEL for DS techniques
    X_train, X_dsel, y_train, y_dsel = train_test_split(X_train,
                                                        y_train,
                                                        test_size=0.5,
                                                        random_state=rng)

    # Train a pool of 100 base classifiers
    pool_classifiers = BaggingClassifier(Perceptron(max_iter=10),
                                         n_estimators=100,
                                         random_state=rng)
    pool_classifiers.fit(X_train, y_train)

    # Initialize the DS techniques
    knorau = KNORAU(pool_classifiers)
    kne = KNORAE(pool_classifiers)
    desp = DESP(pool_classifiers)
    ola = OLA(pool_classifiers)
    mcb = MCB(pool_classifiers)

    ###############################################################################
    # Calibrating base classifiers
    # -----------------------------
    # Some dynamic selection techniques requires that the base classifiers estimate
    # probabilities in order to estimate its competence level. Since the Perceptron
    # model is not a probabilistic classifier (does not implements the
    # predict_proba method, it needs to be calibrated for
    # probability estimation before being used by such DS techniques. This step can
    # be conducted using the CalibrateClassifierCV class from scikit-learn. Note
    # that in this example we pass a prefited pool of classifiers to the
    # calibration method in order to use exactly the same pool used in the other
    # DS methods.
    calibrated_pool = []
    for clf in pool_classifiers:
        calibrated = CalibratedClassifierCV(base_estimator=clf, cv='prefit')
        calibrated.fit(X_dsel, y_dsel)
        calibrated_pool.append(calibrated)

    apriori = APriori(calibrated_pool)
    meta = METADES(calibrated_pool)

    knorau.fit(X_dsel, y_dsel)
    kne.fit(X_dsel, y_dsel)
    desp.fit(X_dsel, y_dsel)
    ola.fit(X_dsel, y_dsel)
    mcb.fit(X_dsel, y_dsel)
    apriori.fit(X_dsel, y_dsel)
    meta.fit(X_dsel, y_dsel)

    ###############################################################################
    # Evaluating the methods
    # -----------------------
    # Let's now evaluate the methods on the test set. We also use the performance
    # of Bagging (pool of classifiers without any selection) as a baseline
    # comparison. We can see that  the majority of DS methods achieve higher
    # classification accuracy.

    print('Evaluating DS techniques:')
    print('Classification accuracy KNORA-Union: ',
          knorau.score(X_test, y_test))
    print('Classification accuracy KNORA-Eliminate: ',
          kne.score(X_test, y_test))
    print('Classification accuracy DESP: ', desp.score(X_test, y_test))
    print('Classification accuracy OLA: ', ola.score(X_test, y_test))
    print('Classification accuracy A priori: ', apriori.score(X_test, y_test))
    print('Classification accuracy MCB: ', mcb.score(X_test, y_test))
    print('Classification accuracy META-DES: ', meta.score(X_test, y_test))
    print('Classification accuracy Bagging: ',
          pool_classifiers.score(X_test, y_test))
コード例 #17
0
ファイル: test_integration_dfp.py プロジェクト: cg430/DESlib
def test_kne():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    kne = KNORAE(pool_classifiers, DFP=True)
    kne.fit(X_dsel, y_dsel)
    assert np.isclose(kne.score(X_test, y_test), 0.9)
コード例 #18
0
threshold = 0.4

#20 repetitions
for rep in range(1,6):
    skf = StratifiedKFold(n_splits=4)
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        neigh = KNeighborsClassifier(n_neighbors=k_neigh)
        neigh.fit(X_train, y_train) 

        X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, test_size=0.66)
        pool_classifiers = es.fit(X_train, y_train).estimators_
        knorau = KNORAE(pool_classifiers)
        knorau.fit(X_dsel, y_dsel)        

        y_pred = []
        for instance in X_test:
            #use_des = select_classifier(threshold, k_neigh, X_dsel, y_dsel, instance)
            use_des = True
            if(use_des):
                result = knorau.predict([instance])
                y_pred.append(result[0])
            else:
                result = neigh.predict([instance])
                y_pred.append(result[0])



        for name, metric in zip(['accuracy','roc_auc','gmean','f1'], [accuracy_score, roc_auc_score, gmean, f1_score]):