Ejemplo n.º 1
0
    def DCS(self, x_train, y_train,X_test, Y_test, dsel):
        pool_classifiers = BaggingClassifier(linear_model.Perceptron(max_iter=5), self.pool_size)
        pool_classifiers.fit(x_train, y_train)

        # Initialize the DES model
        lca = LCA(pool_classifiers)
        ola = OLA(pool_classifiers)

        # Preprocess the Dynamic Selection dataset (DSEL)
        score1 = lca.fit(x_train[dsel], y_train[dsel])
        score2 = ola.fit(x_train[dsel], y_train[dsel])

        # Predict new examples:
        # print (lca.score(X_test, Y_test), ola.score(X_test, Y_test))
        return (score1, score2, ) + self.calc_metrics(X_test, Y_test) # dependendo da base formato nao suportado
Ejemplo n.º 2
0
    def fit(self, x_sel, y_sel, P, k):
        '''
        metodo para chamar o tipo de DS
        :param: x_sel: dados de treinamento da janela de validacao
        :param: y_sel: rotulos da janela de validacao
        :param: P: pool de classificadores
        :param: k: vizinhanca
        '''

        # escolhendo a tecnica de selecao de classificadores
        if (self.TYPE == 'knorae'):
            DS = KNORAE(P, k)
        elif (self.TYPE == 'knorau'):
            DS = KNORAU(P, k)
        elif (self.TYPE == 'ola'):
            DS = OLA(P, k)
        elif (self.TYPE == 'lca'):
            DS = LCA(P, k)
        elif (self.TYPE == 'posteriori'):
            DS = APosteriori(P, k)
        elif (self.TYPE == 'priori'):
            DS = APriori(P, k)

        # encontrando os classificadores competentes do DS escolhido
        self.DS = copy.deepcopy(DS)
        self.DS.fit(x_sel, y_sel)
Ejemplo n.º 3
0
def escolher_modelo(nome, x_sel, y_sel, P, k):
    '''
    metodo para chamar o tipo de DS
    :param: x_sel: dados de treinamento da janela de validacao
    :param: y_sel: rotulos da janela de validacao
    :param: P: pool de classificadores
    :param: k: vizinhanca
    '''
        
    # escolhendo a tecnica de selecao de classificadores
    if(nome=='OLA'):
        DS = OLA(P, k)
        number_model = 0
        
    elif(nome=='LCA'):
        DS = LCA(P, k)
        number_model = 1
        
    elif(nome=='KNORAE'):
        DS = KNORAE(P, k)
        number_model = 2
        
    elif(nome=='KNORAU'):
        DS = KNORAU(P, k)
        number_model = 3
            
    # encontrando os classificadores competentes do DS escolhido
    DS.fit(x_sel, y_sel)
    
    # retornando a tecnica de DS
    return DS, number_model
Ejemplo n.º 4
0
 def __init__(
     self,
     name: str,
     model_params: Dict[str, Any],
     classifier_paths: Iterable[Tuple[str, str]],
 ) -> None:
     super().__init__(name, model_params, classifier_paths)
     self._selector = LCA(self.classifiers, **model_params)
Ejemplo n.º 5
0
def test_estimate_competence_woods(index, expected):
    lca_test = LCA(create_pool_classifiers())
    lca_test.processed_dsel = dsel_processed_ex1
    lca_test.neighbors = neighbors_ex1[index, :]
    lca_test.distances = distances_ex1[index, :]
    lca_test.DFP_mask = [1, 1, 1]
    lca_test.DSEL_target = y_dsel_ex1
    query = np.array([1, 1])
    competences = lca_test.estimate_competence(query.reshape(1, -1))
    assert np.isclose(competences, expected).all()
Ejemplo n.º 6
0
    def train(train_index, test_index):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]

        #train_perc = 0.7
        #split_point = int(train_perc*len(train_index))
        # valid_index = train_index[split_point:]
        # train_index = train_index[:split_point]
        # X_train, X_valid, X_test = X[train_index], X[valid_index], X[test_index]
        # y_train, y_valid, y_test = Y[train_index], Y[valid_index], Y[test_index]
        #print("TRAIN:", train_index, "VALID:", valid_index, "TEST:", test_index)

        X_train, X_valid, y_train, y_valid = train_test_split(
            X_train, y_train, test_size=0.3, random_state=seed)
        pool_classifiers.fit(X_train, y_train)

        validation_data, validation_labels = get_validation_data(
            X_valid, y_valid, 0.5, hardness=hardness)

        dynamic_selection_algorithm = None
        try:
            if args.dynamic_selection == True and args.dynamic_algorithm is None:
                raise ValueError(
                    'Dynamic selection requires you provide an algorithm.')
            elif args.dynamic_selection == True and args.dynamic_algorithm is not None:
                if args.dynamic_algorithm == 'ola':
                    dynamic_selection_algorithm = OLA(pool_classifiers,
                                                      random_state=seed)
                elif args.dynamic_algorithm == 'lca':
                    dynamic_selection_algorithm = LCA(pool_classifiers,
                                                      random_state=seed)
                elif args.dynamic_algorithm == 'mcb':
                    dynamic_selection_algorithm = MCB(pool_classifiers,
                                                      random_state=seed)
                elif args.dynamic_algorithm == 'knorau':
                    dynamic_selection_algorithm = KNORAU(pool_classifiers,
                                                         random_state=seed)
                elif args.dynamic_algorithm == 'kne':
                    dynamic_selection_algorithm = KNORAE(pool_classifiers,
                                                         random_state=seed)

                dynamic_selection_algorithm.fit(validation_data,
                                                validation_labels)
                preds = dynamic_selection_algorithm.predict(X_test)
            else:
                # Static combination by voting
                preds = voting(X_test, pool_classifiers)
        except Exception as error:
            raise error

        acc = get_accuracy_score(y_test, preds)
        g1 = get_g1_score(y_test, preds, average='macro')
        f1 = get_f1_score(y_test, preds)
        roc = roc_auc_score(y_test, preds, average='macro')

        return dict(f1=f1, g1=g1, acc=acc, roc=roc)
Ejemplo n.º 7
0
def test_estimate_competence_batch(example_estimate_competence):
    _, y, neighbors, distances, dsel_processed, _ = example_estimate_competence

    expected = np.array([[0.75000000,  0.66666667,  0.75000000],
                         [0.80000000, 1.00000000, 0.80000000],
                         [1.00000000, 0.60000000, 0.50000000]])
    lca_test = LCA()
    lca_test.DSEL_processed_ = dsel_processed
    lca_test.DSEL_target_ = y

    query = np.ones((3, 2))

    predictions = np.array([[0, 1, 0]])
    competences = lca_test.estimate_competence(neighbors,
                                               distances=distances,
                                               predictions=np.array(
                                                   predictions))

    assert np.isclose(competences, expected).all()
Ejemplo n.º 8
0
def test_estimate_competence_diff_target(index):
    lca_test = LCA(create_pool_classifiers())
    lca_test.DSEL_processed_ = dsel_processed_ex1
    lca_test.DSEL_target_ = np.ones(15, dtype=int) * 3

    neighbors = neighbors_ex1[index, :].reshape(1, -1)
    distances = distances_ex1[index, :].reshape(1, -1)

    query = np.atleast_2d([1, 1])
    expected = [0.0, 0.0, 0.0]

    predictions = []
    for clf in lca_test.pool_classifiers:
        predictions.append(clf.predict(query)[0])
    competences = lca_test.estimate_competence(
        query,
        neighbors,
        distances=distances,
        predictions=np.array(predictions))

    assert np.isclose(competences, expected).all()
Ejemplo n.º 9
0
def test_estimate_competence_diff_target(index, example_estimate_competence,
                                         create_pool_classifiers):
    _, y, neighbors, distances, dsel_processed, _ = example_estimate_competence

    lca_test = LCA(create_pool_classifiers)
    lca_test.DSEL_processed_ = dsel_processed
    lca_test.DSEL_target_ = np.ones(15, dtype=int) * 3

    neighbors = neighbors[index, :].reshape(1, -1)
    distances = distances[index, :].reshape(1, -1)

    query = np.atleast_2d([1, 1])
    expected = [0.0, 0.0, 0.0]

    predictions = np.array([[0, 1, 0]])
    competences = lca_test.estimate_competence(
        query,
        neighbors,
        distances=distances,
        predictions=np.array(predictions))

    assert np.isclose(competences, expected).all()
Ejemplo n.º 10
0
def test_estimate_competence_woods(index, expected):
    lca_test = LCA(create_pool_classifiers())
    lca_test.processed_dsel = dsel_processed_ex1
    lca_test.neighbors = neighbors_ex1[index, :]
    lca_test.distances = distances_ex1[index, :]
    lca_test.DFP_mask = [1, 1, 1]
    lca_test.DSEL_target = y_dsel_ex1

    query = np.atleast_2d([1, 1])

    predictions = []
    for clf in lca_test.pool_classifiers:
        predictions.append(clf.predict(query)[0])
    competences = lca_test.estimate_competence(query, predictions=np.array(predictions))

    assert np.allclose(competences, expected)
Ejemplo n.º 11
0
def test_estimate_competence_diff_target(index):
    query = np.array([1, 1])

    lca = LCA(create_pool_classifiers())

    lca.processed_dsel = dsel_processed_ex1
    lca.DSEL_target = np.ones(15, dtype=int) * 3

    lca.neighbors = neighbors_ex1[index, :]
    lca.distances = distances_ex1[index, :]
    lca.DFP_mask = [1, 1, 1]

    expected = [0.0, 0.0, 0.0]

    competences = lca.estimate_competence(query.reshape(1, -1))
    assert np.isclose(competences, expected).all()
Ejemplo n.º 12
0
def test_estimate_competence_batch():
    expected = np.array([[0.75000000,  0.66666667,  0.75000000],
                         [0.80000000, 1.00000000, 0.80000000],
                         [1.00000000, 0.60000000, 0.50000000]])
    lca_test = LCA(create_pool_classifiers())
    lca_test.processed_dsel = dsel_processed_ex1
    lca_test.neighbors = neighbors_ex1
    lca_test.distances = distances_ex1
    lca_test.DFP_mask = np.ones((3, 3))
    lca_test.DSEL_target = y_dsel_ex1

    query = np.ones((3, 2))

    predictions = []
    for clf in lca_test.pool_classifiers:
        predictions.append(clf.predict(query)[0])
    competences = lca_test.estimate_competence(query, predictions=np.array(predictions))

    assert np.isclose(competences, expected).all()
Ejemplo n.º 13
0
def test_predict_proba():
    X = X_dsel_ex1
    y = y_dsel_ex1
    clf1 = Perceptron()
    clf1.fit(X, y)
    LCA([clf1, clf1])
Ejemplo n.º 14
0
def test_lca(knn_methods):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    lca = LCA(pool_classifiers, knn_classifier=knn_methods)
    lca.fit(X_dsel, y_dsel)
    assert np.isclose(lca.score(X_test, y_test), 0.96808510638297873)
Ejemplo n.º 15
0
        X_train, y_train)
    model_linear_svm = CalibratedClassifierCV(LinearSVC()).fit(
        X_train, y_train)
    model_svc = SVC(probability=True).fit(X_train, y_train)
    model_bayes = GaussianNB().fit(X_train, y_train)
    model_tree = DecisionTreeClassifier().fit(X_train, y_train)
    model_knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)
    pool_classifiers = [
        model_perceptron, model_linear_svm, model_svc, model_bayes, model_tree,
        model_knn
    ]

    # Initializing the DS techniques
    knop = KNOP(pool_classifiers)
    rrc = RRC(pool_classifiers)
    lca = LCA(pool_classifiers)
    mcb = MCB(pool_classifiers)
    aposteriori = APosteriori(pool_classifiers)

    # Fitting the techniques
    knop.fit(X_dsel, y_dsel)
    rrc.fit(X_dsel, y_dsel)
    lca.fit(X_dsel, y_dsel)
    mcb.fit(X_dsel, y_dsel)
    aposteriori.fit(X_dsel, y_dsel)

    # Calculate classification accuracy of each technique
    print('Evaluating DS techniques:')
    print('Classification accuracy KNOP: ', knop.score(X_test, y_test))
    print('Classification accuracy RRC: ', rrc.score(X_test, y_test))
    print('Classification accuracy LCA: ', lca.score(X_test, y_test))
Ejemplo n.º 16
0
def test_check_estimator():
    check_estimator(LCA())
Ejemplo n.º 17
0
def test_lca(knne, expected):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    lca = LCA(pool_classifiers, DFP=True, knne=knne)
    lca.fit(X_dsel, y_dsel)
    assert np.isclose(lca.score(X_test, y_test), expected)
Ejemplo n.º 18
0
def test_lca(knn_methods):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    lca = LCA(pool_classifiers, knn_classifier=knn_methods)
    lca.fit(X_dsel, y_dsel)
    assert np.isclose(lca.score(X_test, y_test), 0.973404255319149)
Ejemplo n.º 19
0
# Split the data into training and DSEL for DS techniques
X_train, X_dsel, y_train, y_dsel = train_test_split(X_train,
                                                    y_train,
                                                    test_size=0.5,
                                                    random_state=rng)
# Considering a pool composed of 10 base classifiers
pool_classifiers = RandomForestClassifier(n_estimators=10,
                                          random_state=rng,
                                          max_depth=10)
pool_classifiers.fit(X_train, y_train)

# DS techniques without DFP
apriori = APriori(pool_classifiers)
aposteriori = APosteriori(pool_classifiers)
ola = OLA(pool_classifiers)
lca = LCA(pool_classifiers)
desp = DESP(pool_classifiers)
meta = METADES(pool_classifiers)

apriori.fit(X_dsel, y_dsel)
aposteriori.fit(X_dsel, y_dsel)
ola.fit(X_dsel, y_dsel)
lca.fit(X_dsel, y_dsel)
desp.fit(X_dsel, y_dsel)
meta.fit(X_dsel, y_dsel)

print('Evaluating DS techniques:')
print('Classification accuracy of OLA: ', ola.score(X_test, y_test))
print('Classification accuracy of LCA: ', lca.score(X_test, y_test))
print('Classification accuracy of A priori: ', apriori.score(X_test, y_test))
print('Classification accuracy of A posteriori: ',
Ejemplo n.º 20
0
    def _generate_local_pool(self, query):
        """
        Local pool generation. 
        
        This procedure populates the "pool_classifiers" based on the query sample's neighborhood.
        Thus, for each query sample, a different pool is created.

        In each iteration, the training samples near the query sample are singled out and a 
        subpool is generated using the Self-Generating Hyperplanes (SGH) method. 
        Then, the DCS technique selects the best classifier in the generated subpool and it is added to the local pool. 
        In the following iteration, the neighborhood is increased and another SGH-generated subpool is obtained 
        over the new neighborhood, and again the DCS technique singles out the best in it, which is then added to the local pool. 
        This process is repeated until the pool reaches "n_classifiers".

        Parameters
        ----------
        query : array of shape = [n_features] 
                The test sample.

        Returns
        -------
        self

        References
        ----------

        M. A. Souza, G. D. Cavalcanti, R. M. Cruz, R. Sabourin, On the characterization of the
        oracle for dynamic classi
er selection, in: International Joint Conference on Neural Networks,
        IEEE, 2017, pp. 332-339.
        """
        n_samples, _ = self.DSEL_data.shape

        self.pool_classifiers = []

        n_err = 0
        max_err = 2 * self.n_classifiers

        curr_k = self.k

        # Classifier count
        n = 0

        while n < self.n_classifiers and n_err < max_err:

            subpool = SGH()

            included_samples = np.zeros((n_samples), int)

            if self.knne:
                idx_neighb = np.array([], dtype=int)

                # Obtain neighbors of each class individually
                for j in np.arange(0, self.n_classes):
                    # Obtain neighbors from the classes in the RoC
                    if np.any(self.classes[j] == self.DSEL_target[
                            self.neighbors[0][np.arange(0, curr_k)]]):
                        nc = np.where(self.classes[j] == self.DSEL_target[
                            self.neighbors[0]])
                        idx_nc = self.neighbors[0][nc]
                        idx_nc = idx_nc[np.arange(
                            0, np.minimum(curr_k, len(idx_nc)))]
                        idx_neighb = np.concatenate((idx_neighb, idx_nc),
                                                    axis=0)

            else:
                idx_neighb = np.asarray(self.neighbors)[0][np.arange(
                    0, curr_k)]

            # Indicate participating instances in the training of the subpool
            included_samples[idx_neighb] = 1

            curr_classes = np.unique(self.DSEL_target[idx_neighb])

            # If there are +1 classes in the local region
            if len(curr_classes) > 1:
                # Obtain SGH pool
                subpool.fit(self.DSEL_data, self.DSEL_target, included_samples)

                # Adjust chosen DCS technique parameters
                if self.ds_tech == 'ola':
                    ds = OLA(subpool, k=len(idx_neighb))  # change for self.k
                elif self.ds_tech == 'lca':
                    ds = LCA(subpool, k=len(idx_neighb))
                elif self.ds_tech == 'mcb':
                    ds = MCB(subpool, k=len(idx_neighb))
                elif self.ds_tech == 'mla':
                    ds = MLA(subpool, k=len(idx_neighb))
                elif self.ds_tech == 'a_priori':
                    ds = APriori(subpool, k=len(idx_neighb))
                elif self.ds_tech == 'a_posteriori':
                    ds = APosteriori(subpool, k=len(idx_neighb))

                # Fit ds technique
                ds.fit(self.DSEL_data, self.DSEL_target)

                neighb = np.in1d(
                    self.neighbors,
                    idx_neighb)  # True/False vector of selected neighbors

                # Set distances and neighbors of the query sample (already calculated)
                ds.distances = np.asarray([self.distances[0][neighb]
                                           ])  # Neighborhood
                ds.neighbors = np.asarray([self.neighbors[0][neighb]
                                           ])  # Neighborhood

                ds.DFP_mask = np.ones(ds.n_classifiers)

                # Estimate competence
                comp = ds.estimate_competence(query, ds._predict_base(query))

                # Select best classifier in subpool
                sel_c = ds.select(comp)

                # Add to local pool
                self.pool_classifiers.append(copy.deepcopy(subpool[sel_c[0]]))

                n += 1
            # else:
            #     # Exception: fewer than 2 classes in the neighborhood
            #     print('OPS! Next!')

            # Increase neighborhood size
            curr_k += 2
            n_err += 1

        return self
Ejemplo n.º 21
0
def test_lca():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    lca = LCA(pool_classifiers, DFP=True)
    lca.fit(X_dsel, y_dsel)
    assert np.isclose(lca.score(X_test, y_test), 0.88787878787878793)
Ejemplo n.º 22
0
def test_predict_proba(create_X_y):
    X, y = create_X_y

    clf1 = Perceptron()
    clf1.fit(X, y)
    LCA([clf1, clf1]).fit(X, y)