コード例 #1
0
        y_pred = np.zeros(X.shape[0], dtype=float)
        for m in self.models:
            y_pred += m.predict(X)
        y_pred /= len(self.models)
        return (y_pred >= 0.5).astype(int)


if __name__ == '__main__':

    import sys
    from os import path
    sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
    from kaggle_io.extract_inputs import extract_training_data, extract_testing_data

    print 'Reading data...'
    Id_train, X_train, y_train = extract_training_data(
        'data/kaggle_train_tf_idf.csv')
    Id_test, X_test = extract_testing_data('data/kaggle_test_tf_idf.csv')

    print 'Reading models...'
    H = []
    H.append(joblib.load('rf1/1.pkl'))
    H.append(joblib.load('rf1/2.pkl'))
    H.append(joblib.load('rf1/3.pkl'))
    H.append(joblib.load('ada1/1.pkl'))
    H.append(joblib.load('ada1/2.pkl'))

    print 'Hill climbing...'
    ensemble = Ensemble()
    ensemble.fit(X_train,
                 y_train,
                 H,
コード例 #2
0
ファイル: ada1.py プロジェクト: jkim-/KaggleCS155
import sys
from os import path
sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
from kaggle_io.extract_inputs import extract_training_data

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.externals import joblib
from CvModel import CvModel

Id, X, y = extract_training_data('data/kaggle_train_tf_idf.csv')

n_folds = 5
scaler = StandardScaler().fit(X)
ada = AdaBoostClassifier()

print 'Training AdaBoost with n_estimators=10'
ada.set_params(n_estimators=10)
cv_ada = CvModel(n_folds, scaler, ada)
cv_ada.fit(X, y)
joblib.dump(cv_ada, 'ada1/1.pkl')

print 'Training AdaBoost with n_estimators=50'
ada.set_params(n_estimators=50)
cv_ada = CvModel(n_folds, scaler, ada)
cv_ada.fit(X, y)
joblib.dump(cv_ada, 'ada1/2.pkl')