コード例 #1
0
def decision_tree_bagging(Xtrain, Xtest, ytrain, ytest, ensemble_size=60):
    # bagging
    accuracies = []
    ensemble_sizes = []

    for i in range(1, ensemble_size):
        bagging = BaggingClassifier(
            base_estimator=tree.DecisionTreeClassifier(),
            n_estimators=i,
            bootstrap=True,
            max_samples=1.0,
            max_features=1.0)

        bagging.fit(Xtrain, ytrain)

        ypred = bagging.predict(Xtest)
        accuracy = np.mean(ypred == ytest)

        ensemble_sizes.append(i)
        accuracies.append(accuracy)

    plt.plot(ensemble_sizes, accuracies)
    plt.xlabel('number of estimators')
    plt.ylabel('accuracy')
    plt.grid(True)
    plt.title('Decision tree (bagging)')
    plt.show()

    print('Highest accuracy of  bagging = %f' % np.max(accuracies))
コード例 #2
0
class BaggingClassifierImpl():
    def __init__(self, base_estimator=None, n_estimators=10, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=None, random_state=None, verbose=0):
        self._hyperparams = {
            'base_estimator': make_sklearn_compat(base_estimator),
            'n_estimators': n_estimators,
            'max_samples': max_samples,
            'max_features': max_features,
            'bootstrap': bootstrap,
            'bootstrap_features': bootstrap_features,
            'oob_score': oob_score,
            'warm_start': warm_start,
            'n_jobs': n_jobs,
            'random_state': random_state,
            'verbose': verbose}
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def predict_proba(self, X):
        return self._wrapped_model.predict_proba(X)

    def decision_function(self, X):
        return self._wrapped_model.decision_function(X)
コード例 #3
0
            # 3.2.1. Bagging com DecisionTree ############################################################

            # numero do modelo na tabela
            num_model = 0

            # modelo
            bg = BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                                   max_samples=pct_trainamento[i],
                                   max_features=1.0,
                                   n_estimators=qtd_modelos)
            # treinando o modelo
            bg.fit(x_train, y_train)

            # computando a previsao
            pred = bg.predict(x_test)

            # printando os resultados
            acuracia, auc, f1measure, gmean = printar_resultados(
                y_test, pred,
                nome_datasets[h] + '-pct-' + str(pct_trainamento[i]) +
                '- Bagging com DecisionTree [' + str(j) + ']')

            # escrevendo os resultados obtidos
            tabela.Adicionar_Sheet_Linha(num_model, j,
                                         [acuracia, auc, f1measure, gmean])

            # 3.2.1. End ###################################################################################

            # 3.2.2. Bagging com Main ################################################################
            # numero do modelo na tabela
コード例 #4
0
 num_model = 0
 
 # intanciando o classificador
 ensemble = BaggingClassifier(base_estimator=Perceptron(), 
                             max_samples=qtd_amostras, 
                             max_features=1.0, 
                             n_estimators = qtd_modelos)
     
 # treinando o modelo
 ensemble.fit(x_train, y_train)
     
 # realizando a poda 
 ensemble = REP(x_val, y_val, ensemble)
             
 # computando a previsao
 pred = ensemble.predict(x_test)
             
 # computando a diversidade do ensemble
 q_statistic = MedidasDiversidade('q', x_val, y_val, ensemble)
 double_fault = MedidasDiversidade('disagreement', x_val, y_val, ensemble)
     
 # printando os resultados
 qtd_modelos, acuracia, auc, f1measure, gmean = printar_resultados(y_test, pred, ensemble, nome_datasets[h]+'-Bagging-REP-'+validacao[k]+'['+str(j)+']')
 
 # escrevendo os resultados obtidos
 tabela.Adicionar_Sheet_Linha(num_model, j, [qtd_modelos, q_statistic, double_fault, acuracia, auc, f1measure, gmean])
 ###########################################################################################
     
     
 ########## instanciando o modelo Bagging+OGOB ###########################################
 # definindo o numero do modelo na tabela
コード例 #5
0
ファイル: ensemble.py プロジェクト: afcarl/SciProjects
# from sklearn.ensemble import AdaBoostClassifier as Boost
from sklearn.ensemble.bagging import BaggingClassifier as Boost
from sklearn.naive_bayes import GaussianNB

from csxdata import CData

from SciProjects.grapes import path, indepsn

if __name__ == '__main__':

    data = CData(path,
                 indepsn,
                 feature="evjarat",
                 headers=1,
                 cross_val=0.2,
                 lower=True)
    data.transformation = "std"
    model = Boost(GaussianNB(), n_estimators=100)

    model.fit(data.learning, data.lindeps)
    preds = model.predict(data.testing)
    eq = [left == right for left, right in zip(preds, data.tindeps)]
    print("Acc:", sum(eq) / len(eq))
コード例 #6
0
from sklearn.ensemble.bagging import BaggingClassifier

train = pd.read_csv("train.csv")

train.drop(['Cabin'], 1, inplace=True)

train = train.dropna()
y = train['Survived']
train.drop(['Survived', 'PassengerId', 'Name', 'Ticket'], 1, inplace=True)
train.fillna({'Age': 30})
X = pd.get_dummies(train)

bag_clf = BaggingClassifier(
    tree.DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=200,
    bootstrap=True,  # True => bagging, False => pasting
    n_jobs=-1  # use all cores
)

bag_clf.fit(X, y)

test = pd.read_csv('test.csv')
ids = test[['PassengerId']]
test.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], 1, inplace=True)
test.fillna(2, inplace=True)
test = pd.get_dummies(test)
predictions = bag_clf.predict(test)
results = ids.assign(Survived=predictions)
results.to_csv('titanic_result_bagging.csv', index=False)