y_pred = np.zeros(X.shape[0], dtype=float) for m in self.models: y_pred += m.predict(X) y_pred /= len(self.models) return (y_pred >= 0.5).astype(int) if __name__ == '__main__': import sys from os import path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from kaggle_io.extract_inputs import extract_training_data, extract_testing_data print 'Reading data...' Id_train, X_train, y_train = extract_training_data( 'data/kaggle_train_tf_idf.csv') Id_test, X_test = extract_testing_data('data/kaggle_test_tf_idf.csv') print 'Reading models...' H = [] H.append(joblib.load('rf1/1.pkl')) H.append(joblib.load('rf1/2.pkl')) H.append(joblib.load('rf1/3.pkl')) H.append(joblib.load('ada1/1.pkl')) H.append(joblib.load('ada1/2.pkl')) print 'Hill climbing...' ensemble = Ensemble() ensemble.fit(X_train, y_train, H,
import sys from os import path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from kaggle_io.extract_inputs import extract_training_data from sklearn.preprocessing import StandardScaler from sklearn.ensemble import AdaBoostClassifier from sklearn.externals import joblib from CvModel import CvModel Id, X, y = extract_training_data('data/kaggle_train_tf_idf.csv') n_folds = 5 scaler = StandardScaler().fit(X) ada = AdaBoostClassifier() print 'Training AdaBoost with n_estimators=10' ada.set_params(n_estimators=10) cv_ada = CvModel(n_folds, scaler, ada) cv_ada.fit(X, y) joblib.dump(cv_ada, 'ada1/1.pkl') print 'Training AdaBoost with n_estimators=50' ada.set_params(n_estimators=50) cv_ada = CvModel(n_folds, scaler, ada) cv_ada.fit(X, y) joblib.dump(cv_ada, 'ada1/2.pkl')