Ejemplo n.º 1
0
    def __init__(self,
                 K=10,
                 alpha=0.75,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote',
                 diversity_metric='e',
                 positive_label=1):

        self.K = K
        self.alpha = alpha

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers
        self.combination_rule = combination_rule
        self.positive_label = positive_label

        self.classifiers = None
        self.ensemble = None
        self.combiner = Combiner(rule=combination_rule)

        self.diversity_metric = diversity_metric
        self.diversity = Diversity(metric=diversity_metric)

        self.validation_X = None
        self.validation_y = None
Ejemplo n.º 2
0
    def __init__(self,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote'):

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers
        self.ensemble = None
        self.combiner = Combiner(rule=combination_rule)
Ejemplo n.º 3
0
 def __init__(self,
              base_classifier=None,
              n_classifiers=100,
              combination_rule='majority_vote',
              max_features=0.5):
     self.base_classifier = base_classifier
     self.n_classifiers = n_classifiers
     self.combiner = Combiner(rule=combination_rule)
     self.classifiers = None
     self.ensemble = None
     self.max_features = max_features
Ejemplo n.º 4
0
Archivo: base.py Proyecto: viisar/brew
    def __init__(self, ensemble=None, selector=None, combiner=None):
        self.ensemble = ensemble
        self.selector = selector

        if combiner is None:
            self.combiner = Combiner(rule='majority_vote')
        elif isinstance(combiner, str):
            self.combiner = Combiner(rule=combiner)
        elif isinstance(combiner, Combiner):
            self.combiner = combiner
        else:
            raise ValueError('Invalid parameter combiner')
Ejemplo n.º 5
0
    def __init__(self,
                 K=10,
                 alpha=0.75,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote',
                 diversity_metric='e',
                 positive_label=1):

        self.K = K
        self.alpha = alpha

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers
        self.combination_rule = combination_rule
        self.positive_label = positive_label

        self.classifiers = None
        self.ensemble = None
        self.combiner = Combiner(rule=combination_rule)

        self.diversity_metric = diversity_metric
        self.diversity = Diversity(metric=diversity_metric)

        self.validation_X = None
        self.validation_y = None
Ejemplo n.º 6
0
class BaggingSK(PoolGenerator):
    '''
    This class should not be used, use brew.generation.bagging.Bagging instead.
    '''
    def __init__(self,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote'):

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers

        # using the sklearn implementation of bagging for now
        self.sk_bagging = BaggingClassifier(base_estimator=base_classifier,
                                            n_estimators=n_classifiers,
                                            max_samples=1.0,
                                            max_features=1.0)

        self.ensemble = Ensemble()
        self.combiner = Combiner(rule=combination_rule)

    def fit(self, X, y):
        self.sk_bagging.fit(X, y)
        self.ensemble.add_classifiers(self.sk_bagging.estimators_)
        #self.classes_ = set(y)

    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)
Ejemplo n.º 7
0
class RandomSubspace(PoolGenerator):

    def __init__(self,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote',
                 max_features=0.5):
        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers
        self.combiner = Combiner(rule=combination_rule)
        self.classifiers = None
        self.ensemble = None
        self.max_features = max_features

    def fit(self, X, y):
        self.ensemble = Ensemble()

        for i in range(self.n_classifiers):
            chosen_features = np.random.choice(X.shape[1], int(
                np.ceil(X.shape[1] * self.max_features)), replace=False)
            transformer = FeatureSubsamplingTransformer(
                features=chosen_features)

            classifier = BrewClassifier(classifier=sklearn.base.clone(
                self.base_classifier), transformer=transformer)
            classifier.fit(X, y)

            self.ensemble.add(classifier)

        return

    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)
Ejemplo n.º 8
0
 def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', max_features=0.5):
     self.base_classifier = base_classifier
     self.n_classifiers = n_classifiers
     self.combiner = Combiner(rule=combination_rule)
     self.classifiers = None
     self.ensemble = None
     self.max_features = max_features
Ejemplo n.º 9
0
class RandomSubspace(PoolGenerator):

    def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', max_features=0.5):
        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers
        self.combiner = Combiner(rule=combination_rule)
        self.classifiers = None
        self.ensemble = None
        self.max_features = max_features
       
    def fit(self, X, y):
        self.ensemble = Ensemble()

        for i in range(self.n_classifiers):
            chosen_features = np.random.choice(X.shape[1], int(np.ceil(X.shape[1]*self.max_features)), replace=False)
            transformer = FeatureSubsamplingTransformer(features=chosen_features)

            classifier = BrewClassifier(classifier=sklearn.base.clone(self.base_classifier), transformer=transformer)
            classifier.fit(X, y)
            
            self.ensemble.add(classifier)

        return

    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)
Ejemplo n.º 10
0
class BaggingSK(PoolGenerator):
    '''
    This class should not be used, use brew.generation.bagging.Bagging instead.
    '''

    def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'):

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers

        # using the sklearn implementation of bagging for now
        self.sk_bagging = BaggingClassifier(base_estimator=base_classifier,
                n_estimators=n_classifiers, max_samples=1.0, max_features=1.0)
        
        self.ensemble = Ensemble()
        self.combiner = Combiner(rule=combination_rule)

    def fit(self, X, y):
        self.sk_bagging.fit(X, y)
        self.ensemble.add_classifiers(self.sk_bagging.estimators_)
        #self.classes_ = set(y)

    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)
Ejemplo n.º 11
0
class Bagging(PoolGenerator):

    def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'):

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers
        self.ensemble = None
        self.combiner = Combiner(rule=combination_rule)

    def fit(self, X, y):
        self.ensemble = Ensemble()

        for _ in range(self.n_classifiers):
            # bootstrap
            idx = np.random.choice(X.shape[0], X.shape[0], replace=True)
            data, target = X[idx, :], y[idx]

            classifier = sklearn.base.clone(self.base_classifier)
            classifier.fit(data, target)
            
            self.ensemble.add(classifier)

        return


    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)
Ejemplo n.º 12
0
class Bagging(PoolGenerator):
    def __init__(self,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote'):

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers
        self.ensemble = None
        self.combiner = Combiner(rule=combination_rule)

    def fit(self, X, y):
        self.ensemble = Ensemble()

        for _ in range(self.n_classifiers):
            # bootstrap
            idx = np.random.choice(X.shape[0], X.shape[0], replace=True)
            data, target = X[idx, :], y[idx]

            classifier = sklearn.base.clone(self.base_classifier)
            classifier.fit(data, target)

            self.ensemble.add(classifier)

        return

    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)
Ejemplo n.º 13
0
    def __init__(self,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote'):

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers

        # using the sklearn implementation of bagging for now
        self.sk_bagging = BaggingClassifier(base_estimator=base_classifier,
                                            n_estimators=n_classifiers,
                                            max_samples=1.0,
                                            max_features=1.0)

        self.ensemble = Ensemble()
        self.combiner = Combiner(rule=combination_rule)
Ejemplo n.º 14
0
    def test__arguments(self):

        c = MockClassifier()

        pool = Ensemble(classifiers=[c])
        combiner = Combiner(rule='majority_vote')

        model = EnsembleClassifier(ensemble=pool, combiner=combiner)
Ejemplo n.º 15
0
    def __init__(self, ensemble=None, selector=None, combiner=None):
        self.ensemble = ensemble
        self.selector = selector

        if combiner == None:
            combiner = Combiner(rule='majority_vote')

        self.combiner = combiner
Ejemplo n.º 16
0
    def __init__(self,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote'):

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers
        self.ensemble = None
        self.combiner = Combiner(rule=combination_rule)
    def __init__(self, classifierList, combiningMethod):
        classifiers = [None] * (len(classifierList))
        for key, tuple in enumerate(classifierList):
            classifiers[key] = tuple[1]

        hybridEnsemble = Ensemble(classifiers=classifiers)
        hybridEnsembleClassifier = EnsembleClassifier(
            ensemble=hybridEnsemble, combiner=Combiner(combiningMethod))

        super().__init__(hybridEnsembleClassifier)
        self.name = "ensemble"
Ejemplo n.º 18
0
    def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'):

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers

        # using the sklearn implementation of bagging for now
        self.sk_bagging = BaggingClassifier(base_estimator=base_classifier,
                n_estimators=n_classifiers, max_samples=1.0, max_features=1.0)
        
        self.ensemble = Ensemble()
        self.combiner = Combiner(rule=combination_rule)
Ejemplo n.º 19
0
class EnsembleStackClassifier(object):
    def __init__(self, stack, combiner=None):
        self.stack = stack
        self.combiner = combiner
        if combiner is None:
            self.combiner = Combiner(rule='majority_vote')

    def fit(self, X, y):
        self.stack.fit(X, y)

    def predict(self, X):
        out = self.stack.output(X)
        return self.combiner.combine(out)

    def predict_proba(self, X):
        out = self.stack.output(X)
        return np.mean(out, axis=2)
Ejemplo n.º 20
0
 def test_majority_vote(self):
     comb = Combiner(rule='majority_vote')
     assert comb.rule == majority_vote_rule
Ejemplo n.º 21
0
class ICSBagging(PoolGenerator):


    def __init__(self, K=10, alpha=0.75, base_classifier=None, n_classifiers=100,
            combination_rule='majority_vote', diversity_metric='e', max_samples=1.0,
            positive_label=1):

        self.K = K
        self.alpha = alpha

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers
        self.combination_rule = combination_rule
        self.positive_label = positive_label

        self.classifiers = None
        self.ensemble = None
        self.combiner = Combiner(rule=combination_rule)

        self.diversity_metric = diversity_metric
        self.diversity = Diversity(metric=diversity_metric)

        self.validation_X = None
        self.validation_y = None


    def set_validation(self, X, y):
        self.validation_X = X
        self.validation_y = y


    def fitness(self, classifier):
        '''
        #TODO normalize diversity metric.
        '''
        self.ensemble.add(classifier)
        out = self.ensemble.output(self.validation_X)
        y_pred = self.combiner.combine(out)
        y_true = self.validation_y

        auc = evaluation.auc_score(y_true, y_pred)
        div = self.diversity.calculate(self.ensemble,
                self.validation_X, self.validation_y)

        #diversity = entropy_measure_e(self.ensemble,
        #        self.validation_X, self.validation_y)

        self.ensemble.classifiers.pop()
        return self.alpha * auc + (1.0 - self.alpha) * div


    def _calc_pos_prob(self):
        y_pred = self.combiner.combine(self.ensemble.output(self.validation_X))
        mask = self.positive_label == self.validation_y
        pos_acc = float(sum(y_pred[mask] == self.validation_y[mask]))/len(self.validation_y[mask])
        neg_acc = float(sum(y_pred[~mask] == self.validation_y[~mask]))/len(self.validation_y[~mask])
        return 1.0 - (pos_acc / (pos_acc + neg_acc))


    def bootstrap_classifiers(self, X, y, K, pos_prob):
        mask = self.positive_label == y
        negative_label = y[~mask][0]

        clfs = []
        sets_cX, sets_cy = [], []
        for i in range(K):
            cX, cy = [], []
            for j in range(X.shape[0]):
                if np.random.random() < pos_prob:
                    idx = np.random.random_integers(0, len(X[mask]) - 1)
                    cX = cX + [X[mask][idx]]
                    cy = cy + [self.positive_label]
                else:
                    idx = np.random.random_integers(0, len(X[~mask]) - 1)
                    cX = cX + [X[~mask][idx]]
                    cy = cy + [negative_label]
            if not self.positive_label in cy:
                idx_1 = np.random.random_integers(0, len(cX) - 1)
                idx_2 = np.random.random_integers(0, len(X[mask])- 1)
                cX[idx_1] = X[mask][idx_2]
                cy[idx_1] = self.positive_label
            elif not negative_label in cy:
                idx_1 = np.random.random_integers(0, len(cX) - 1)
                idx_2 = np.random.random_integers(0, len(X[~mask])- 1)
                cX[idx_1] = X[~mask][idx_2]
                cy[idx_1] = negative_label
            #print len(cX), len(cy), X.shape[0], len(X), np.bincount(cy)

            sets_cX, sets_cy = sets_cX + [cX], sets_cy + [cy]
            clf = sklearn.base.clone(self.base_classifier)
            clfs = clfs + [clf.fit(cX, cy)]

        return clfs


    def fit(self, X, y):
        #if self.validation_X == None and self.validation_y == None:
        self.validation_X = X
        self.validation_y = y

        self.classes_ = set(y)
        self.ensemble = Ensemble()

        clfs = self.bootstrap_classifiers(X, y, self.K, 0.5)
        self.ensemble.add(np.random.choice(clfs))

        for i in range(1, self.n_classifiers):
            clfs = self.bootstrap_classifiers(X, y, self.K, self._calc_pos_prob())
            self.ensemble.add(max(clfs, key=lambda clf: self.fitness(clf)))

        self.validation_X = None
        self.validation_y = None
        
        return self


    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)
Ejemplo n.º 22
0
class ICSBaggingNew(PoolGenerator):


    def __init__(self, K=10, alpha=0.75, base_classifier=None, n_classifiers=100,
            combination_rule='majority_vote', diversity_metric='e', max_samples=1.0,
            positive_label=1):

        self.K = K
        self.alpha = alpha

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers
        self.positive_label = positive_label

        self.ensemble = None
        self.combiner = Combiner(rule=combination_rule)

        self.diversity = Diversity(metric=diversity_metric)

        self.validation_X = None
        self.validation_y = None


    def set_validation(self, X, y):
        self.validation_X = X
        self.validation_y = y


    def fitness(self, classifier):
        '''
        #TODO normalize diversity metric.
        '''
        self.ensemble.add(classifier)

        y_pred = self.predict(self.validation_X)
        y_true = self.validation_y

        auc = evaluation.auc_score(y_true, y_pred)
        div = self.diversity.calculate(self.ensemble, self.validation_X, y_true)

        self.ensemble.classifiers.pop() # create interface for this later

        return self.alpha * auc + (1.0 - self.alpha) * div


    def _calc_pos_prob(self):
        y_pred = self.predict(self.validation_X)
        y_true = self.validation_y

        # obtaining recall scores for each label (assuming the labels are binary)
        pos_acc = recall_score(y_true, y_pred, average='binary', pos_label=self.positive_label)
        neg_acc = recall_score(y_true, y_pred, average='binary', pos_label=int(not self.positive_label))

        return neg_acc / (pos_acc + neg_acc)


    def bootstrap_classifiers(self, X, y, K, pos_prob):
        pos_idx = (y == self.positive_label)
        neg_idx = (y == int(not self.positive_label))

        X_pos, y_pos = X[pos_idx,:], y[pos_idx] # positive examples
        X_neg, y_neg = X[neg_idx,:], y[neg_idx] # negative examples

        classifiers = []
        for i in range(K):
            X_new = np.zeros(X.shape)
            y_new = np.zeros(y.shape)

            for j in range(X.shape[0]):
                
                if pos_prob > np.random.random():
                    # add a randomly chosen positive example
                    idx = np.random.randint(X_pos.shape[0])
                    X_new[j,:] = X_pos[idx,:]
                    y_new[j] = self.positive_label

                else:
                    # add a randomly chosen negative example
                    idx = np.random.randint(X_neg.shape[0])
                    X_new[j,:] = X_neg[idx,:]
                    y_new[j] = int(not self.positive_label)

            # if no positive example is present, make sure you insert at least one
            if not np.any(y_new == self.positive_label):
                idx_new = np.random.randint(X_new.shape[0]) # chosen spot for replacement on new array
                idx_pos = np.random.randint(X_pos.shape[0]) # chosen positive example index

                X_new[idx_new,:] = X_pos[idx_pos,:]
                y_new[idx_new] = self.positive_label
            
            # if no negative example is present, make sure you insert at least one
            elif not np.any(y_new == int(not self.positive_label)):
                idx_new = np.random.randint(X_new.shape[0]) # chosen spot for replacement on new array
                idx_neg = np.random.randint(X_neg.shape[0]) # chosen positive example index

                X_new[idx_new,:] = X_neg[idx_neg,:]
                y_new[idx_new] = int(not self.positive_label)

            # train classifier with the bootstrapped data
            clf = sklearn.base.clone(self.base_classifier)
            clf.fit(X_new, y_new)

            classifiers.append(clf)

        return classifiers


    def fit(self, X, y):
        #if self.validation_X == None and self.validation_y == None:
        self.validation_X = X
        self.validation_y = y

        self.classes_ = set(y)
        self.ensemble = Ensemble()

        clfs = self.bootstrap_classifiers(X, y, self.K, 0.5)
        self.ensemble.add(np.random.choice(clfs))

        for i in range(1, self.n_classifiers):
            clfs = self.bootstrap_classifiers(X, y, self.K, self._calc_pos_prob())
            self.ensemble.add(max(clfs, key=lambda clf: self.fitness(clf)))

        self.validation_X = None
        self.validation_y = None
        
        return self


    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)
Ejemplo n.º 23
0
import fris_stolp_test
clf4 = fris_stolp_test.SklearnHelper

# Creating Ensemble
ensemble = Ensemble([clf1, clf2, clf3, clf4])
eclf = EnsembleClassifier(ensemble=ensemble, combiner='mean')

# Creating Stacking
layer_1 = Ensemble([clf1, clf2, clf3])
layer_2 = Ensemble([sklearn.clone(clf1)])

stack = EnsembleStack(cv=3)

stack.add_layer(layer_1)
stack.add_layer(layer_2)

sclf = EnsembleStackClassifier(stack, combiner=Combiner('mean'))

sclf.fit(X_train.values, y_train.values)

y_pre = sclf.predict(X_test.values)

precision = precision_score(y_test, y_pre)
recall = recall_score(y_test, y_pre)
accuracy = accuracy_score(y_test, y_pre)
fmera = f1_score(y_test, y_pre)

if __name__ == '__main__':
    print("presicion ", precision, " recall ", recall, " fmera ", fmera,
          " accuracy ", accuracy)
Ejemplo n.º 24
0
 def test_max(self):
     comb = Combiner(rule='max')
     assert comb.rule == max_rule
Ejemplo n.º 25
0
 def __init__(self, stack, combiner=None):
     self.stack = stack
     self.combiner = combiner
     if combiner is None:
         self.combiner = Combiner(rule='majority_vote')
Ejemplo n.º 26
0
 def test_median(self):
     comb = Combiner(rule='median')
     assert comb.rule == median_rule
Ejemplo n.º 27
0
class ICSBaggingNew(PoolGenerator):
    def __init__(self,
                 K=10,
                 alpha=0.75,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote',
                 diversity_metric='e',
                 positive_label=1):

        self.K = K
        self.alpha = alpha

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers
        self.positive_label = positive_label

        self.ensemble = None
        self.combiner = Combiner(rule=combination_rule)

        self.diversity = Diversity(metric=diversity_metric)

        self.validation_X = None
        self.validation_y = None

    def set_validation(self, X, y):
        self.validation_X = X
        self.validation_y = y

    def fitness(self, classifier):
        '''
        #TODO normalize diversity metric.
        '''
        self.ensemble.add(classifier)

        y_pred = self.predict(self.validation_X)
        y_true = self.validation_y

        auc = evaluation.auc_score(y_true, y_pred)
        div = self.diversity.calculate(self.ensemble, self.validation_X,
                                       y_true)

        self.ensemble.classifiers.pop()  # create interface for this later

        return self.alpha * auc + (1.0 - self.alpha) * div

    def _calc_pos_prob(self):
        y_pred = self.predict(self.validation_X)
        y_true = self.validation_y

        # obtaining recall scores for each label (assuming the labels are binary)
        pos_acc = recall_score(y_true,
                               y_pred,
                               average='binary',
                               pos_label=self.positive_label)
        neg_acc = recall_score(y_true,
                               y_pred,
                               average='binary',
                               pos_label=int(not self.positive_label))

        return neg_acc / (pos_acc + neg_acc)

    def bootstrap_classifiers(self, X, y, K, pos_prob):
        pos_idx = (y == self.positive_label)
        neg_idx = (y == int(not self.positive_label))

        X_pos, y_pos = X[pos_idx, :], y[pos_idx]  # positive examples
        X_neg, y_neg = X[neg_idx, :], y[neg_idx]  # negative examples

        classifiers = []
        for i in range(K):
            X_new = np.zeros(X.shape)
            y_new = np.zeros(y.shape)

            for j in range(X.shape[0]):

                if pos_prob > np.random.random():
                    # add a randomly chosen positive example
                    idx = np.random.randint(X_pos.shape[0])
                    X_new[j, :] = X_pos[idx, :]
                    y_new[j] = self.positive_label

                else:
                    # add a randomly chosen negative example
                    idx = np.random.randint(X_neg.shape[0])
                    X_new[j, :] = X_neg[idx, :]
                    y_new[j] = int(not self.positive_label)

            # if no positive example is present, make sure you insert at least one
            if not np.any(y_new == self.positive_label):
                idx_new = np.random.randint(
                    X_new.shape[0])  # chosen spot for replacement on new array
                idx_pos = np.random.randint(
                    X_pos.shape[0])  # chosen positive example index

                X_new[idx_new, :] = X_pos[idx_pos, :]
                y_new[idx_new] = self.positive_label

            # if no negative example is present, make sure you insert at least one
            elif not np.any(y_new == int(not self.positive_label)):
                idx_new = np.random.randint(
                    X_new.shape[0])  # chosen spot for replacement on new array
                idx_neg = np.random.randint(
                    X_neg.shape[0])  # chosen positive example index

                X_new[idx_new, :] = X_neg[idx_neg, :]
                y_new[idx_new] = int(not self.positive_label)

            # train classifier with the bootstrapped data
            clf = sklearn.base.clone(self.base_classifier)
            clf.fit(X_new, y_new)

            classifiers.append(clf)

        return classifiers

    def fit(self, X, y):
        #if self.validation_X == None and self.validation_y == None:
        self.validation_X = X
        self.validation_y = y

        self.classes_ = set(y)
        self.ensemble = Ensemble()

        clfs = self.bootstrap_classifiers(X, y, self.K, 0.5)
        self.ensemble.add(np.random.choice(clfs))

        for i in range(1, self.n_classifiers):
            clfs = self.bootstrap_classifiers(X, y, self.K,
                                              self._calc_pos_prob())
            self.ensemble.add(max(clfs, key=lambda clf: self.fitness(clf)))

        self.validation_X = None
        self.validation_y = None

        return self

    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)
Ejemplo n.º 28
0
class ICSBagging(PoolGenerator):
    def __init__(self,
                 K=10,
                 alpha=0.75,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote',
                 diversity_metric='e',
                 positive_label=1):

        self.K = K
        self.alpha = alpha

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers
        self.combination_rule = combination_rule
        self.positive_label = positive_label

        self.classifiers = None
        self.ensemble = None
        self.combiner = Combiner(rule=combination_rule)

        self.diversity_metric = diversity_metric
        self.diversity = Diversity(metric=diversity_metric)

        self.validation_X = None
        self.validation_y = None

    def set_validation(self, X, y):
        self.validation_X = X
        self.validation_y = y

    def fitness(self, classifier):
        '''
        #TODO normalize diversity metric.
        '''
        self.ensemble.add(classifier)
        out = self.ensemble.output(self.validation_X)
        y_pred = self.combiner.combine(out)
        y_true = self.validation_y

        auc = evaluation.auc_score(y_true, y_pred)
        div = self.diversity.calculate(self.ensemble, self.validation_X,
                                       self.validation_y)

        #diversity = entropy_measure_e(self.ensemble,
        #        self.validation_X, self.validation_y)

        self.ensemble.classifiers.pop()
        return self.alpha * auc + (1.0 - self.alpha) * div

    def _calc_pos_prob(self):
        y_pred = self.combiner.combine(self.ensemble.output(self.validation_X))
        mask = self.positive_label == self.validation_y
        pos_acc = float(sum(y_pred[mask] == self.validation_y[mask])) / len(
            self.validation_y[mask])
        neg_acc = float(sum(y_pred[~mask] == self.validation_y[~mask])) / len(
            self.validation_y[~mask])
        return 1.0 - (pos_acc / (pos_acc + neg_acc))

    def bootstrap_classifiers(self, X, y, K, pos_prob):
        mask = self.positive_label == y
        negative_label = y[~mask][0]

        clfs = []
        sets_cX, sets_cy = [], []
        for i in range(K):
            cX, cy = [], []
            for j in range(X.shape[0]):
                if np.random.random() < pos_prob:
                    idx = np.random.random_integers(0, len(X[mask]) - 1)
                    cX = cX + [X[mask][idx]]
                    cy = cy + [self.positive_label]
                else:
                    idx = np.random.random_integers(0, len(X[~mask]) - 1)
                    cX = cX + [X[~mask][idx]]
                    cy = cy + [negative_label]
            if not self.positive_label in cy:
                idx_1 = np.random.random_integers(0, len(cX) - 1)
                idx_2 = np.random.random_integers(0, len(X[mask]) - 1)
                cX[idx_1] = X[mask][idx_2]
                cy[idx_1] = self.positive_label
            elif not negative_label in cy:
                idx_1 = np.random.random_integers(0, len(cX) - 1)
                idx_2 = np.random.random_integers(0, len(X[~mask]) - 1)
                cX[idx_1] = X[~mask][idx_2]
                cy[idx_1] = negative_label
            #print len(cX), len(cy), X.shape[0], len(X), np.bincount(cy)

            sets_cX, sets_cy = sets_cX + [cX], sets_cy + [cy]
            clf = sklearn.base.clone(self.base_classifier)
            clfs = clfs + [clf.fit(cX, cy)]

        return clfs

    def fit(self, X, y):
        #if self.validation_X == None and self.validation_y == None:
        self.validation_X = X
        self.validation_y = y

        self.classes_ = set(y)
        self.ensemble = Ensemble()

        clfs = self.bootstrap_classifiers(X, y, self.K, 0.5)
        self.ensemble.add(np.random.choice(clfs))

        for _ in range(1, self.n_classifiers):
            clfs = self.bootstrap_classifiers(X, y, self.K,
                                              self._calc_pos_prob())
            self.ensemble.add(max(clfs, key=lambda clf: self.fitness(clf)))

        self.validation_X = None
        self.validation_y = None

        return self

    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)
Ejemplo n.º 29
0
                      max_features=1.0),
    AdaBoostClassifier(DecisionTreeClassifier(max_depth=2),
                       n_estimators=600,
                       learning_rate=1),
    BaggingClassifier(ExtraTreesClassifier(criterion='entropy',
                                           max_depth=100,
                                           n_estimators=100),
                      max_samples=1.0,
                      max_features=1.0)
]
clfs = classifiers  # [clf1, clf2]
ens = Ensemble(classifiers=clfs)

# create your Combiner
# the rules can be 'majority_vote', 'max', 'min', 'mean' or 'median'
comb = Combiner(rule='max')

# now create your ensemble classifier
ensemble_clf = EnsembleClassifier(ensemble=ens, combiner=comb)
ensemble_clf = ensemble_clf.fit(X_train, Y_train)
y_tested = ensemble_clf.predict(X_test)

# for i in xrange(1,10):
#     clf = BaggingClassifier(DecisionTreeClassifier(criterion = 'entropy', max_depth = i + 100),max_samples=1.0, max_features=1.0)
#     clf = clf.fit(X_train, Y_train)
#     y_tested1 = clf.predict(X_test)
#     for a in range(len(y_tested)):
#         y_tested[a] = (y_tested[a] & y_tested1[a])
#     clf = BaggingClassifier(ExtraTreesClassifier(criterion = 'entropy', max_depth = i + 100,n_estimators=100+i),max_samples=1.0, max_features=1.0)
#     clf = clf.fit(X_train, Y_train)
#     y_tested2 = clf.predict(X_test)
Ejemplo n.º 30
0
 def test_default_rule(self):
     comb = Combiner()
     assert comb.rule == majority_vote_rule
Ejemplo n.º 31
0
class SmoteBagging(PoolGenerator):
    def __init__(self,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote',
                 k=5):

        #self.b = b
        self.k = k
        self.n_classifiers = n_classifiers
        self.base_classifier = base_classifier

        self.ensemble = None
        self.combiner = Combiner(rule=combination_rule)

    def smote_bootstrap_sample(self, X, y, b, k):

        classes = np.unique(y)
        count = np.bincount(y)  # number of instances of each class

        majority_class = count.argmax()  # majority clas
        majority_count = count.max()  # majority class

        data = np.empty((0, X.shape[1]))
        target = np.empty((0, ))

        for i in classes:

            class_data = X[(y == i), :]

            if i == majority_class:  # majority class
                # regular bootstrap (i.e. 100% sampling rate)
                idx = np.random.choice(majority_count, (majority_count, ))
                data = np.concatenate((data, class_data[idx, :]))
                target = np.concatenate((target, i * np.ones(
                    (majority_count, ))))
                #print('original class data = {}'.format(class_data.shape))
                #print('sampled class data = {}'.format(class_data[idx,:].shape))
                #print()

            else:  # minority classes
                # bootstrap the class data with defined sampling rate
                sample_rate = (majority_count / class_data.shape[0]) * (b /
                                                                        100)
                idx = np.random.choice(
                    class_data.shape[0],
                    (int(sample_rate * class_data.shape[0]), ))
                sampled_class_data = class_data[idx, :]

                #print('original class data = {}'.format(class_data.shape))
                #print('majority_count = {}'.format(majority_count))
                #print('class data = {}'.format(class_data.shape))
                #print('b = {}'.format(b))
                #print('sample rate = {}'.format(sample_rate))
                #print('sampled class data = {}'.format(sampled_class_data.shape))

                # run smote on bootstrapped data to obtain synthetic samples
                # ceil to make sure N_smote is a multiple of 100, and the small value to avoid a zero
                N_smote = int(
                    np.ceil((majority_count / sampled_class_data.shape[0]) *
                            (1 - b / 100 + 10e-8)) * 100)
                #print(N_smote)

                #print('----------')
                #print('smote parameters:')
                #print('T : {}'.format(sampled_class_data.shape))
                #print('N : {}'.format(N_smote))
                synthetic = smote(sampled_class_data, N=N_smote, k=self.k)
                #print('synthetic data = {})'.format(synthetic.shape))
                #print(synthetic)

                # add synthetic samples to sampled class data
                n_missing = majority_count - sampled_class_data.shape[0]
                idx = np.random.choice(synthetic.shape[0], (n_missing, ))
                new_class_data = np.concatenate(
                    (sampled_class_data, synthetic[idx, :]))
                #print('new class data = {})'.format(new_class_data.shape))
                #print()
                data = np.concatenate((data, new_class_data))
                target = np.concatenate((target, i * np.ones(
                    (new_class_data.shape[0], ))))

        return data, target

    def fit(self, X, y):

        self.ensemble = Ensemble()

        # this parameter should change between [10, 100] with
        # increments of 10, for every classifier in the ensemble
        b = 10

        for i in range(self.n_classifiers):
            #print()
            #print('classifier : {}'.format(i))
            #print('------------------------')
            #print('b = {}'.format(b))
            data, target = self.smote_bootstrap_sample(X,
                                                       y,
                                                       b=float(b),
                                                       k=self.k)
            #print('data = {}'.format(data.shape))
            #print()

            classifier = sklearn.base.clone(self.base_classifier)
            classifier.fit(data, target)

            self.ensemble.add(classifier)

            if b >= 100:
                b = 10
            else:
                b += 10

        return

    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)
Ejemplo n.º 32
0
# Create plot
plt.title("Learning Curve")
plt.xlabel("Training Set Size"), plt.ylabel("Accuracy Score"), plt.legend(loc="best")
plt.tight_layout()
plt.show()


# Merge two classifier Randomforest and KNN
from brew.base import Ensemble
from brew.base import EnsembleClassifier
from brew.combination.combiner import Combiner
# Random Sampling  
X_resampled, y_resampled = RandomUnderSampler(random_state=0).fit_sample(X_train, y_train)
clfs = [classifier_rf, classifier_knn]
ens = Ensemble(classifiers = clfs)
comb = Combiner(rule='max')
eclf = EnsembleClassifier(ensemble=ens, combiner=Combiner('mean'))
eclf.fit(X_resampled, y_resampled)
y_pred = eclf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(classification_report(y_test, y_pred))


# PCA Using feature reduction technique
# Check how many components needed in a way which will express maximum variance
from sklearn.decomposition import PCA
pca = PCA(n_components = None)
X_train_pca = pca.fit(X_train)
X_test_pca = pca.fit(X_test)
explained_variance = pca.explained_variance_ratio_
Ejemplo n.º 33
0
 def test_min(self):
     comb = Combiner(rule='min')
     assert comb.rule == min_rule
Ejemplo n.º 34
0
class SmoteBagging(PoolGenerator):
    
    def __init__(self, base_classifier=None,
                n_classifiers=100,
                combination_rule='majority_vote', k=2):

        #self.b = b
        self.k = k
        self.n_classifiers = n_classifiers
        self.base_classifier = base_classifier

        self.ensemble = None
        self.combiner = Combiner(rule=combination_rule)


    def smote_bootstrap_sample(self, X, y, b, k):
        
        classes = np.unique(y)
        count = np.bincount(y) # number of instances of each class

        majority_class = count.argmax() # majority clas
        majority_count = count.max() # majority class

        data = np.empty((0, X.shape[1]))
        target = np.empty((0,))

        for i in classes:

            class_data = X[(y==i),:]

            if i == majority_class: # majority class
                # regular bootstrap (i.e. 100% sampling rate)
                idx = np.random.choice(majority_count, (majority_count,))
                data = np.concatenate((data, class_data[idx,:]))
                target = np.concatenate((target, i * np.ones((majority_count,))))
                #print('original class data = {}'.format(class_data.shape))
                #print('sampled class data = {}'.format(class_data[idx,:].shape))
                #print()



            else: # minority classes
                # bootstrap the class data with defined sampling rate
                sample_rate = (majority_count / class_data.shape[0]) * (b/100)
                idx = np.random.choice(class_data.shape[0], (int(sample_rate * class_data.shape[0]),))
                sampled_class_data = class_data[idx,:]
                
                #print('original class data = {}'.format(class_data.shape))
                #print('majority_count = {}'.format(majority_count))
                #print('class data = {}'.format(class_data.shape))
                #print('b = {}'.format(b))
                #print('sample rate = {}'.format(sample_rate))
                #print('sampled class data = {}'.format(sampled_class_data.shape))


                # run smote on bootstrapped data to obtain synthetic samples
                # ceil to make sure N_smote is a multiple of 100, and the small value to avoid a zero
                N_smote = int( np.ceil((majority_count / sampled_class_data.shape[0]) * (1 - b/100 + 10e-8)) * 100 )
                #print(N_smote)

                #print('----------')
                #print('smote parameters:')
                #print('T : {}'.format(sampled_class_data.shape))
                #print('N : {}'.format(N_smote))
                synthetic = smote(sampled_class_data, N=N_smote, k=self.k)
                #print('synthetic data = {})'.format(synthetic.shape))
                #print(synthetic)
               
                # add synthetic samples to sampled class data
                n_missing = majority_count - sampled_class_data.shape[0]
                idx = np.random.choice(synthetic.shape[0], (n_missing,))
                new_class_data = np.concatenate((sampled_class_data, synthetic[idx,:]))
                #print('new class data = {})'.format(new_class_data.shape))
                #print()
                data = np.concatenate((data, new_class_data))
                target = np.concatenate((target, i * np.ones((new_class_data.shape[0],))))

        return data, target


    def fit(self, X, y):

        self.ensemble = Ensemble()

        # this parameter should change between [10, 100] with
        # increments of 10, for every classifier in the ensemble
        b = 10

        for i in range(self.n_classifiers):
            #print()
            #print('classifier : {}'.format(i))
            #print('------------------------')
            #print('b = {}'.format(b))
            data, target = self.smote_bootstrap_sample(X, y, b=b, k=self.k)
            #print('data = {}'.format(data.shape))
            #print()

            classifier = sklearn.base.clone(self.base_classifier)
            classifier.fit(data, target)
            
            self.ensemble.add(classifier)

            if b >= 100:
                b = 10
            else:
                b += 10

        return

    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)
# creating a new ensemble of ensembles
ens = Ensemble(classifiers=[clf1,ensemble_clf])
ensemble_ens = EnsembleClassifier(ensemble=ens, combiner=cmb)
 
# and you can use it in the same way as a regular ensemble
ensemble_ens.fit(X, y)
ensemble_ens.predict(X)
ensemble_ens.predict_proba(X)



'''

# l'altra libreria

# create your Ensemble clf1 can be an EnsembleClassifier object too
ens = Ensemble(classifiers=[mode_9, mode_9])

# create your Combiner (combination rule)
# it can be 'min', 'max', 'majority_vote' ...
cmb = Combiner(rule='mean')

# and now, create your Ensemble Classifier
ensemble_clf = EnsembleClassifier(ensemble=ens, combiner=cmb)

# assuming you have a X, y data you can use
ensemble_clf.fit(val_path, val_path)

print("-----------d-----------")
ensemble_clf.predict(val_path)
Ejemplo n.º 36
0
my_data = genfromtxt('/Users/samarth/Desktop/data.csv', delimiter=',')

for item in range(0, my_data.shape[0]):
    var = my_data[item][4]
    my_data[item][4] = int(range_scaler(5538, 600000, 100, 1000, var))
'''	
if my_data[item][6] < 100 or my_data[item][6] > 1000 or (my_data[item][6]>my_data[item][4]):
		my_data = np.delete(my_data, (item), axis = 0)
'''
my_data = my_data[np.logical_not(
    np.logical_and(my_data[:, 4] < 100, my_data[:, 4] > 1000))]
my_data = my_data[np.logical_not(my_data[:, 4] > my_data[:, 6])]

ensemble = Ensemble([clf1, clf2, clf3])
eclf = EnsembleClassifier(ensemble=ensemble, combiner=Combiner('mean'))

layer_1 = Ensemble([clf1, clf2, clf3])
layer_2 = Ensemble([sklearn.clone(clf1)])

stack = EnsembleStack(cv=3)

stack.add_layer(layer_1)
stack.add_layer(layer_2)

sclf = EnsembleStackClassifier(stack)

clf_list = [clf1, clf2, clf3, eclf, sclf]
lbl_list = [
    'Logistic Regression', 'Random Forest', 'RBF kernel SVM', 'Ensemble',
    'Stacking'
Ejemplo n.º 37
0
Archivo: base.py Proyecto: viisar/brew
class EnsembleClassifier(object):

    def __init__(self, ensemble=None, selector=None, combiner=None):
        self.ensemble = ensemble
        self.selector = selector

        if combiner is None:
            self.combiner = Combiner(rule='majority_vote')
        elif isinstance(combiner, str):
            self.combiner = Combiner(rule=combiner)
        elif isinstance(combiner, Combiner):
            self.combiner = combiner
        else:
            raise ValueError('Invalid parameter combiner')

    def fit(self, X, y):
        self.ensemble.fit(X, y)

    def predict(self, X):

        # TODO: warn the user if mode of ensemble
        # output excludes the chosen combiner?

        if self.selector is None:
            out = self.ensemble.output(X)
            y = self.combiner.combine(out)

        else:
            y = []

            for i in range(X.shape[0]):
                ensemble, weights = self.selector.select(
                    self.ensemble, X[i, :][np.newaxis, :])

                if weights is not None:  # use the ensemble with weights
                    if self.combiner.combination_rule == 'majority_vote':
                        out = ensemble.output(X[i, :][np.newaxis, :])
                    else:
                        out = ensemble.output(X[i, :][np.newaxis, :], mode='probs')

                    # apply weights
                    for i in range(out.shape[2]):
                        out[:, :, i] = out[:, :, i] * weights[i]

                    [tmp] = self.combiner.combine(out)
                    y.append(tmp)

                else:  # use the ensemble, but ignore the weights
                    if self.combiner.combination_rule == 'majority_vote':
                        out = ensemble.output(X[i, :][np.newaxis, :])
                    else:
                        out = ensemble.output(X[i, :][np.newaxis, :], mode='probs')
                    [tmp] = self.combiner.combine(out)
                    y.append(tmp)

        return np.asarray(y)

    def predict_proba(self, X):

        # TODO: warn the user if mode of ensemble
        # output excludes the chosen combiner?

        if self.selector is None:
            out = self.ensemble.output(X, mode='probs')
            return np.mean(out, axis=2)

        else:
            out_full = []

            for i in range(X.shape[0]):
                ensemble, weights = self.selector.select(
                    self.ensemble, X[i, :][np.newaxis, :])

                if weights is not None:  # use the ensemble with weights
                    out = ensemble.output(X[i, :][np.newaxis, :])

                    # apply weights
                    for i in range(out.shape[2]):
                        out[:, :, i] = out[:, :, i] * weights[i]

                    # [tmp] = self.combiner.combine(out)
                    out_full.extend(list(np.mean(out, axis=2)))

                else:  # use the ensemble, but ignore the weights
                    out = ensemble.output(X[i, :][np.newaxis, :])
                    out_full.extend(list(np.mean(out, axis=2)))

        # return np.asarray(y)
        return np.array(out_full)

    def score(self, X, y, sample_weight=None):
        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)