def imputData(data): #Separate columns numeric and no numeric def splitTypes(dframe): objectsName = [] objects = pd.DataFrame() #dframeName= [] for i in dframe.columns: #If i column is not float if (not (dframe[i].dtype == np.float64)): #Get name objectsName.append(i) #Get no numeric column objects = pd.concat([objects, dframe[i]], axis=1) #Drop no numeric column dframe = dframe.drop(columns=objectsName[-1]) #Get numeric columns dframeName = list(dframe.columns) return dframeName, dframe, objectsName, objects w, x, y, z = splitTypes(data) #Extra Tree Regressor impute_est = ETR(n_estimators=10, random_state=0) #Iterative imputer estimator = IterativeImputer(random_state=0, estimator=impute_est) #Fit transform data impdf = estimator.fit_transform(x) #Concat imputed data and class imp_data = pd.concat([pd.DataFrame(impdf), z], axis=1) #Rename columns imp_data.columns = w + y return imp_data
def iterative_fit(self, X, y, n_iter=1, refit=False): from sklearn.ensemble import ExtraTreesRegressor as ETR if refit: self.estimator = None if self.estimator is None: num_features = X.shape[1] max_features = int( float(self.max_features) * (np.log(num_features) + 1)) # Use at most half of the features max_features = max(1, min(int(X.shape[1] / 2), max_features)) self.estimator = ETR(n_estimators=n_iter, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, warm_start=True) else: self.estimator.n_estimators += n_iter self.estimator.fit( X, y, ) return self
def iterative_fit(self, X, y, n_iter=1, refit=False): if refit: self.estimator = None if self.estimator is None: num_features = X.shape[1] max_features = int( float(self.max_features) * (np.log(num_features) + 1)) # Use at most half of the features max_features = max(1, min(int(X.shape[1] / 2), max_features)) self.estimator = ETR(n_estimators=0, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, warm_start=True) tmp = self.estimator # TODO copy ? tmp.n_estimators += n_iter tmp.fit( X, y, ) self.estimator = tmp return self
def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False): from sklearn.ensemble import ExtraTreesRegressor as ETR if refit: self.estimator = None if self.estimator is None: max_features = int(X.shape[1]**float(self.max_features)) self.estimator = ETR( n_estimators=n_iter, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, min_weight_fraction_leaf=self.min_weight_fraction_leaf, min_impurity_decrease=self.min_impurity_decrease, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, warm_start=True) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y, sample_weight=sample_weight) return self
def iterative_fit(self, X, y, n_iter=1, refit=False): from sklearn.ensemble import ExtraTreesRegressor as ETR if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if self.criterion not in ("mse", "friedman_mse", "mae"): raise ValueError( "'criterion' is not in ('mse', 'friedman_mse', " "'mae): %s" % self.criterion) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_samples_leaf = int(self.min_samples_leaf) self.min_samples_split = int(self.min_samples_split) self.max_features = float(self.max_features) self.min_impurity_decrease = float(self.min_impurity_decrease) self.bootstrap = check_for_bool(self.bootstrap) self.n_jobs = int(self.n_jobs) self.verbose = int(self.verbose) self.estimator = ETR( n_estimators=n_iter, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, warm_start=True) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit( X, y, ) return self
def __init__(self, timeseries, dataname, n_estimators=100, criterion='mse', min_samples_leaf=1, min_samples_split=2, max_features=1, bootstrap=False, max_leaf_nodes='None', max_depth='None', min_impurity_decrease=0.0, Window_size=20, Difference=False, time_feature=True, tsfresh_feature=True, forecasting_steps=25, n_splits=5, max_train_size=None, NAN_threshold=0.05): if criterion not in ("mse", "friedman_mse", "mae"): raise ValueError("'criterion' is not in ('mse', 'friedman_mse', " "'mae): %s" % criterion) self.criterion = criterion self.n_estimators = int(n_estimators) self.min_samples_leaf = int(min_samples_leaf) self.min_samples_split = int(min_samples_split) self.max_features = float(max_features) self.bootstrap = bootstrap if max_leaf_nodes == "None" or max_leaf_nodes is None: self.max_leaf_nodes = None else: self.max_leaf_nodes = int(max_leaf_nodes) if max_depth == "None" or max_depth is None: self.max_depth = None else: self.max_depth = int(max_depth) self.min_impurity_decrease = float(min_impurity_decrease) self.estimator = ETR(n_estimators=self.n_estimators, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, warm_start=True) super().__init__(timeseries, dataname, Window_size, time_feature, Difference, tsfresh_feature, forecasting_steps, n_splits, max_train_size, NAN_threshold)
def get_Qmin(X0, X1, u, cost, T): """ Gets the extra trees regressor model after training through multiple steps. Evaluates the suboptimal regressor for the infinite horizon problem. Parameters ---------- X0 : numpy 2D array of (n_samples,n_features) Each row indicate the episodes in the training data and columns indicate delta and omega in respective columns X1 : numpy 2D array of (n_samples,n_features) Each row indicate the episodes in the training data and columns indicate delta and omega in respective columns u : numpy 1D array (n_samples,) Each row indicates the episode and contains the control used in the episode cost : numpy 1D array (n_samples,) Each row indicates the episode and contains the cost incurred in the episode T : integer value number of time steps to consider. Returns ------- TYPE DESCRIPTION. """ gamma = 0.95 n_samples = X0.shape[0] Xtrain = np.concatenate((X0, u), axis=1) for n in range(T): if n == 0: ytrain = cost REGRESSOR = ETR(n_estimators=50).fit(Xtrain, ytrain) else: Qdata = np.zeros(shape=(n_samples, 11)) for i in range(11): udata = -i * 0.016 * np.ones(shape=(n_samples, 1)) Xdata = np.concatenate((X1, udata), axis=1) Qdata[:, i] = REGRESSOR.predict(Xdata) ytrain = cost + gamma * np.amin(Qdata, axis=1) REGRESSOR = ETR(n_estimators=50).fit(Xtrain, ytrain) return REGRESSOR
def __init__(self, df, mode, pca): #self.scaler = MinMaxScaler() #StandardScaler() y = df.y.values x = df[cols].values #self.means = x.mean() #x = x.fillna(self.means) #x = x.values self.decomp = False self.model = None #x = self.scaler.fit_transform(x) if pca: var_exp_tol = 0.9 self.decomp = ipca() x_mod = self.decomp.fit_transform(x) cum_var_exp = self.decomp.explained_variance_ratio_.cumsum() self.n = np.arange(0, x.shape[1])[cum_var_exp >= var_exp_tol][0] self.n = min(self.n, int(np.sqrt(df.shape[0]))) x = x_mod[:, 0:self.n] print("# PC used:", self.n, "variance explained:", var_exp_tol) if mode == 'A': self.model = 'linreg' self.regr = LinearRegression() elif mode == 'B': self.model = 'ridge' #self.regr = Ridge(alpha = 200,random_state = 52) self.regr = RidgeCV(alphas=np.arange(0.1,1.1,0.1)*1E5,\ store_cv_values=True) #scoring='neg_mean_squared_error') elif mode == 'C': self.model = 'lasso' #self.regr = Lasso(alpha = 1, selection = 'random', random_state = 52) self.regr = LassoCV(alphas=np.array([0.1, 1, 10]) * 1E4) elif mode == 'D': self.model = 'elasticNet' #self.regr = ElasticNet(alpha = 1,l1_ratio = 0.2, selection = 'random', random_state = 52) self.regr = ElasticNetCV(l1_ratio=np.arange(0.1,1.1)*0.1,\ n_alphas=3,alphas = np.array([0.01,0.1,1])*1E7) elif mode == 'E': self.model = 'ETR' self.regr = ETR(n_estimators=500,max_depth=10,\ max_features='auto',\ bootstrap=True,\ criterion='mse',\ #verbose=1,\ oob_score=True,\ n_jobs=4,random_state=50) #est = ETR(random_state=50,verbose=1,n_jobs=-1) tuned_parameters = [{ 'n_estimators': [10, 100], 'max_depth': [5, 50] }] #self.regr = GridSearchCV(estimator=est,\ # param_grid=tuned_parameters,\ # verbose=1) self.regr.fit(x, y)
def __ensemble_test(type, X_train, X_test, y_train, y_test): if type.lower() == 'gbr': reg = GBR(n_estimators=100, random_state=1) elif type.lower() == 'rfr': reg = RFR(n_estimators=100, random_state=1) elif type.lower() == 'abr': reg = ABR(n_estimators=100, random_state=1) elif type.lower() == 'etr': reg = ETR(n_estimators=100, random_state=1) reg.fit(X_train, y_train) return reg, reg.score(X_test, y_test), reg.feature_importances_
def fit(self, X, y, sample_weight=None): from sklearn.ensemble import ExtraTreesRegressor as ETR max_features = int(X.shape[1]**float(self.max_features)) self.estimator = ETR( n_estimators=self.get_max_iter(), criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, min_weight_fraction_leaf=self.min_weight_fraction_leaf, min_impurity_decrease=self.min_impurity_decrease, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, warm_start=True) self.estimator.fit(X, y, sample_weight=sample_weight) return self
def regression(X, Y, X_cv, Y_cv, n_estimators=200, criterion='mse', max_depth=20): ####### ERT regressor trained by simulated data KK = X.shape[1] mse_train = [] mse_cv = [] importance = np.zeros(KK) regr = ETR(n_estimators=n_estimators, criterion=criterion, max_depth=max_depth) regr.fit(X, Y) Y_pred = regr.predict(X) mse_train.append(np.sqrt(mse(Y, Y_pred))) Y_pred = regr.predict(X_cv) mse_cv.append(np.sqrt(mse(Y_cv, Y_pred))) importance[:] = regr.feature_importances_ return regr, mse_train, mse_cv
def main(): ### parsing and Data pre-processing # load the provided data train_features_path = os.path.join(data_path, 'dengue_features_train.csv') train_labels_path = os.path.join(data_path, 'dengue_labels_train.csv') ### pre-processing data sj_train, iq_train = preprocess_data(train_features_path, labels_path=train_labels_path) #print(sj_train.describe()) #print(iq_train.describe()) kf = KFold(n_splits=6) sj_model_list = [] sj_err_list = [] loop = 1 for train_index, val_index in kf.split( sj_train ): #The index will be split into [train_index] and [val_index] X_train, X_val = sj_train.ix[train_index], sj_train.ix[val_index] sj_etr = ETR(n_estimators=800, max_depth=4, random_state=0, verbose=1) sj_etr.fit(X_train.drop('total_cases', axis=1), X_train['total_cases']) predictions = sj_etr.predict(X_val.drop('total_cases', axis=1)) sj_err_list.append( eval_measures.meanabs(predictions, X_val.total_cases)) sj_model_list.append(sj_etr) loop += 1 print(sj_err_list) argmax = sorted(range(len(sj_err_list)), key=lambda x: sj_err_list[x])[0] print(argmax) sj_best_model = sj_model_list[argmax] iq_model_list = [] iq_err_list = [] loop = 1 for train_index, val_index in kf.split(iq_train): X_train, X_val = iq_train.ix[train_index], iq_train.ix[val_index] iq_etr = ETR(n_estimators=400, max_depth=4, random_state=0) iq_etr.fit(X_train.drop('total_cases', axis=1), X_train['total_cases']) predictions = iq_etr.predict(X_val.drop('total_cases', axis=1)) iq_err_list.append( eval_measures.meanabs(predictions, X_val.total_cases)) iq_model_list.append(iq_etr) loop += 1 print(iq_err_list) argmax = sorted(range(len(iq_err_list)), key=lambda x: iq_err_list[x])[0] print(argmax) iq_best_model = iq_model_list[argmax] ##Accessing testing data test_features_path = os.path.join(data_path, 'dengue_features_test.csv') sj_test, iq_test = preprocess_data(test_features_path) #Calculate the k-fold validation error sj_score = [] for train_index, val_index in kf.split(sj_train): X_train, X_val = sj_train.ix[train_index], sj_train.ix[val_index] train_predict = np.array( sj_best_model.predict(X_val.drop('total_cases', axis=1))).astype(int) sj_score.append(eval_measures.meanabs(train_predict, X_val.total_cases)) print("Mean of {} cross validation of sj_score is {} (+/- {})".format( kf.get_n_splits(sj_train), np.mean(sj_score), np.std(sj_score))) iq_score = [] for train_index, val_index in kf.split(iq_train): X_train, X_val = iq_train.ix[train_index], iq_train.ix[val_index] train_predict = np.array( iq_best_model.predict(X_val.drop('total_cases', axis=1))).astype(int) iq_score.append(eval_measures.meanabs(train_predict, X_val.total_cases)) print("Mean of {} cross validation of iq_score is {} (+/- {})".format( kf.get_n_splits(iq_train), np.mean(iq_score), np.std(iq_score))) ##Use the model sj_lr and iq_lr trained before to predict the testing data print("Predicting testing data...") sj_predictions = sj_best_model.predict(sj_test) iq_predictions = iq_best_model.predict(iq_test) sj_predictions = np.array(sj_predictions).astype(int) iq_predictions = np.array(iq_predictions).astype(int) print("Creating submit file...") ##Use submission_format as template to write the answer sample_path = os.path.join(data_path, 'submission_format.csv') submission = pd.read_csv(sample_path, index_col=[0, 1, 2]) submission.total_cases = np.concatenate([sj_predictions, iq_predictions]) submission.to_csv("./data/ext_new.csv") '''
def main(): ### parsing and Data pre-processing # load the provided data train_features_path = os.path.join(data_path, 'dengue_features_train.csv') train_labels_path = os.path.join(data_path, 'dengue_labels_train.csv') ### pre-processing data sj_train, iq_train = preprocess_data(train_features_path, labels_path=train_labels_path) #print(sj_train.describe()) #print(iq_train.describe()) ###Define the xgb parameters xgb_params = { 'eta': 0.05, 'max_depth': 5, 'subsample': 0.7, 'colsample_bytree': 0.7, 'objective': 'reg:linear', 'eval_metric': 'rmse', 'silent': 1 } num_boost_rounds = 1000 ##Use K-fold to create cross validation data kf = KFold(n_splits=6) ##Do the stacking by adding 5 dataframes 'negbi', 'gb', 'xgb','adaboost','extratree' ,'bagging'which store the training prediction sj_train = sj_train.assign(negbi=0) sj_train = sj_train.assign(gb=0) sj_train = sj_train.assign(xgb=0) sj_train = sj_train.assign(abr=0) sj_train = sj_train.assign(etr=0) sj_train = sj_train.assign(br=0) loop = 1 for train_index, val_index in kf.split( sj_train ): #The index will be split into [train_index] and [val_index] X_train, X_val = sj_train.ix[train_index], sj_train.ix[val_index] ###(1)neg_binomial method sj_neg_model = get_best_model(X_train, X_val, 'sj') predictions_neg = sj_neg_model.predict(X_val).astype(int) #Shift the prediction manually for i in range(predictions_neg.shape[0] - 1, 3, -1): predictions_neg.ix[i] = predictions_neg.ix[i - 4] ###(2)gradient boosting method sj_gb_model = gradient_boosting( X_train.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_val.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) predictions_gb = sj_gb_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ###(3)xgboost method dtrain = xgb.DMatrix( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) dval = xgb.DMatrix( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) sj_xgb_model = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=num_boost_rounds) predictions_xgb = sj_xgb_model.predict(dval).astype(int) ###(4)Adaboost regressor method sj_abr_model = ABR(n_estimators=800, learning_rate=0.08, loss='linear', random_state=0) sj_abr_model.fit( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_abr = sj_abr_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) ###(5)Extra tree regressor method sj_etr_model = ETR(n_estimators=800, max_depth=4, random_state=0, verbose=1) sj_etr_model.fit( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_etr = sj_etr_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) ###(6) Bagging Regressor method sj_br_model = BR(n_estimators=800, oob_score=False, n_jobs=5, random_state=0, verbose=1) sj_br_model.fit( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_br = sj_br_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) ###Store the result in sj_train predictions_neg -> 'negbi', predictions_gb -> 'gb' print( "Adding the result of the predictions to sj training data({}/{})". format(loop, 6)) for idx, index in enumerate(val_index): sj_train['negbi'].ix[index] = predictions_neg.ix[idx] sj_train['gb'].ix[index] = predictions_gb[idx] sj_train['xgb'].ix[index] = predictions_xgb[idx] sj_train['abr'].ix[index] = predictions_abr[idx] sj_train['etr'].ix[index] = predictions_etr[idx] sj_train['br'].ix[index] = predictions_br[idx] loop += 1 iq_train = iq_train.assign(negbi=0) iq_train = iq_train.assign(gb=0) iq_train = iq_train.assign(xgb=0) iq_train = iq_train.assign(abr=0) iq_train = iq_train.assign(etr=0) iq_train = iq_train.assign(br=0) loop = 1 for train_index, val_index in kf.split(iq_train): X_train, X_val = iq_train.ix[train_index], iq_train.ix[val_index] ###(1)neg_binomial method iq_neg_model = get_best_model(X_train, X_val, 'iq') predictions_neg = iq_neg_model.predict(X_val).astype(int) #Shift the prediction manually for i in range(predictions_neg.shape[0] - 1, 0, -1): predictions_neg.ix[i] = predictions_neg.ix[i - 1] ###(2)gradient boosting method iq_gb_model = gradient_boosting( X_train.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_val.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) predictions_gb = iq_gb_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ###(3)xgb method dtrain = xgb.DMatrix( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) dval = xgb.DMatrix( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) iq_xgb_model = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=num_boost_rounds) predictions_xgb = iq_xgb_model.predict(dval).astype(int) ###(4)Adaboost regressor method iq_abr_model = ABR(n_estimators=800, learning_rate=0.08, loss='linear', random_state=0) iq_abr_model.fit( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_abr = iq_abr_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) ###(5)Extra tree regressor method iq_etr_model = ETR(n_estimators=800, max_depth=4, random_state=0, verbose=1) iq_etr_model.fit( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_etr = iq_etr_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) ###(6) Bagging Regressor method iq_br_model = BR(n_estimators=800, oob_score=False, n_jobs=5, random_state=0, verbose=1) iq_br_model.fit( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_br = iq_br_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) ###Store the result in iq_train predictions_neg -> 'negbi', predictions_gb -> 'gb' print( "Adding the result of the predictions to iq training data({}/{})". format(loop, 6)) for idx, index in enumerate(val_index): iq_train['negbi'].ix[index] = predictions_neg.ix[idx] iq_train['gb'].ix[index] = predictions_gb[idx] iq_train['xgb'].ix[index] = predictions_xgb[idx] iq_train['abr'].ix[index] = predictions_abr[idx] iq_train['etr'].ix[index] = predictions_etr[idx] iq_train['br'].ix[index] = predictions_br[idx] loop += 1 ###Now the training data looks like [feature, total_cases, negbi, gb, xgb] ##Accessing testing data test_features_path = os.path.join(data_path, 'dengue_features_test.csv') sj_test, iq_test = preprocess_data(test_features_path) ##Like training, add 'negbi' and 'gb' to the testing dataframe sj_test = sj_test.assign(negbi=0) sj_test = sj_test.assign(gb=0) sj_test = sj_test.assign(xgb=0) sj_test = sj_test.assign(abr=0) sj_test = sj_test.assign(etr=0) sj_test = sj_test.assign(br=0) ##(1)neg_binomial prediction sj_predictions_neg = sj_neg_model.predict(sj_test).astype(int) for i in range(sj_predictions_neg.shape[0] - 1, 3, -1): sj_predictions_neg.ix[i] = sj_predictions_neg.ix[i - 4] ##(2)gradient boosting prediction sj_predictions_gb = sj_gb_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ##(3)xgb prediction dtest = xgb.DMatrix( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) sj_predictions_xgb = sj_xgb_model.predict(dtest).astype(int) ###(4)Adaboost regressor method sj_predictions_abr = sj_br_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ###(5)extra tree regressor method sj_predictions_etr = sj_etr_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ###(6)bagging regressor method sj_predictions_br = sj_br_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) print("Adding predictions as features to sj testing data...") for i in range(len(sj_test['negbi']) ): #Add the prediction to the corresponding column sj_test['negbi'].ix[i] = sj_predictions_neg.ix[i] sj_test['gb'].ix[i] = sj_predictions_gb[i] sj_test['xgb'].ix[i] = sj_predictions_xgb[i] sj_test['abr'].ix[i] = sj_predictions_abr[i] sj_test['etr'].ix[i] = sj_predictions_etr[i] sj_test['br'].ix[i] = sj_predictions_br[i] ##Same process as city sj iq_test = iq_test.assign(negbi=0) iq_test = iq_test.assign(gb=0) iq_test = iq_test.assign(xgb=0) iq_test = iq_test.assign(abr=0) iq_test = iq_test.assign(etr=0) iq_test = iq_test.assign(br=0) ###(1)neg_binomial prediction iq_predictions_neg = iq_neg_model.predict(iq_test).astype(int) for i in range(iq_predictions_neg.shape[0] - 1, 0, -1): iq_predictions_neg.ix[i] = iq_predictions_neg.ix[i - 1] ##(2)gradient boosting prediction iq_predictions_gb = iq_gb_model.predict( iq_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ##(3)xgb prediction dtest = xgb.DMatrix( iq_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) iq_predictions_xgb = iq_xgb_model.predict(dtest).astype(int) ###(4)Adaboost regressor method iq_predictions_abr = iq_abr_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ###(5)extra tree regressor method iq_predictions_etr = iq_etr_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ###(6)bagging regressor method iq_predictions_br = iq_br_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) print("Adding predictions as features to iq testing data...") for i in range(len(iq_test['negbi'])): iq_test['negbi'].ix[i] = iq_predictions_neg.ix[i] iq_test['gb'].ix[i] = iq_predictions_gb[i] iq_test['xgb'].ix[i] = iq_predictions_xgb[i] iq_test['abr'].ix[i] = iq_predictions_abr[i] iq_test['etr'].ix[i] = iq_predictions_etr[i] iq_test['br'].ix[i] = iq_predictions_br[i] ##use new information to run a linear regression print("Building linear regression model...") #Now the linear regression model uses (X = [features, negbi, gb, xgb], y = total_cases )to train(fit) sj_lr = LR() sj_lr.fit(sj_train.drop('total_cases', axis=1), sj_train['total_cases']) iq_lr = LR() iq_lr.fit(iq_train.drop('total_cases', axis=1), iq_train['total_cases']) #Calculate the k-fold validation error sj_score = [] for train_index, val_index in kf.split(sj_train): X_train, X_val = sj_train.ix[train_index], sj_train.ix[val_index] train_predict = np.array( sj_lr.predict(X_val.drop('total_cases', axis=1))).astype(int) sj_score.append(eval_measures.meanabs(train_predict, X_val.total_cases)) print("Mean of {} cross validation of sj_score is {} (+/- {})".format( kf.get_n_splits(sj_train), np.mean(sj_score), np.std(sj_score))) iq_score = [] for train_index, val_index in kf.split(iq_train): X_train, X_val = iq_train.ix[train_index], iq_train.ix[val_index] train_predict = np.array( iq_lr.predict(X_val.drop('total_cases', axis=1))).astype(int) iq_score.append(eval_measures.meanabs(train_predict, X_val.total_cases)) print("Mean of {} cross validation of iq_score is {} (+/- {})".format( kf.get_n_splits(iq_train), np.mean(iq_score), np.std(iq_score))) ##Use the model sj_lr and iq_lr trained before to predict the testing data print("Predicting testing data...") sj_predictions = sj_lr.predict(sj_test) iq_predictions = iq_lr.predict(iq_test) sj_predictions = np.array(sj_predictions).astype(int) iq_predictions = np.array(iq_predictions).astype(int) print("Creating submit file...") ##Use submission_format as template to write the answer sample_path = os.path.join(data_path, 'submission_format.csv') submission = pd.read_csv(sample_path, index_col=[0, 1, 2]) submission.total_cases = np.concatenate([sj_predictions, iq_predictions]) submission.to_csv("./data/stacking_6_less_feature.csv") '''
for min_samples_split in [2]: for min_samples_leaf in [1]: if algorithm_name == "rf": estimator_withParams = RFR( n_estimators=n_estimators, max_features="auto", min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, oob_score=False, n_jobs=-1, random_state=2017) if algorithm_name == "etr": estimator_withParams = ETR( n_estimators=n_estimators, max_features="auto", min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, oob_score=False, n_jobs=-1, random_state=2017) Model_for_competition( algorithm_name=algorithm_name, estimator_withParams=estimator_withParams, pickle_model_file_name="1.csv", fgfs=fgfs, ReportFolder=ReportFolder, source=source, predictors_type=predictors_type, predictors=predictors, target_variables=target_variables, estimator_output_length=estimator_output_length,
def testPCA(components): #pca_trans=PCA(n_components=components,random_state=1) pca_trans = tsvd(n_components=components, random_state=7, n_iter=10) pca_trans.fit(data) data2 = pca_trans.transform(data) #MinMax Normalizer scaler = MinMaxScaler() scaler.fit(data2) data2 = scaler.transform(data2) y["target"] = np.log1p(y["target"]) #train test split x_train, x_test, y_train, y_test = tts(data2, y["target"], test_size=0.20) #######################----------Algos--------------------####################### ranfor = RFR(n_estimators=500, verbose=0, n_jobs=-1, random_state=7) extratrees = ETR(n_estimators=500, random_state=7) bagging = BR(ETR(n_estimators=10, random_state=1), n_estimators=100, random_state=7) """---XGBOOST---""" xgb_train = xgb.DMatrix(x_train, label=y_train) xgb_validate = xgb.DMatrix(x_test, label=y_test) xgb_test_pred = xgb.DMatrix(x_test) param = {} param['objective'] = 'reg:linear' param['eta'] = 0.001 param['max_depth'] = 6 param['alpha'] = 0.001 param['colsample_bytree'] = 0.6 param['subsample'] = 0.6 param['silent'] = 0 param['nthread'] = 4 param['random_state'] = 42 param['eval_metric'] = 'rmse' watchlist = [(xgb_train, 'train'), (xgb_validate, 'validation')] """-fit-""" ranfor.fit(x_train, y_train) extratrees.fit(x_train, y_train) bst = xgb.train(param, xgb_train, 10000, watchlist, early_stopping_rounds=100, verbose_eval=100, maximize=False) y_pred = ranfor.predict(x_test) y_pred_ada = extratrees.predict(x_test) y_pred_xgb = bst.predict(xgb_test_pred, ntree_limit=bst.best_ntree_limit) #blending blending_X = pd.DataFrame() blending_X['xgb'] = bst.predict(xgb.DMatrix(x_train), ntree_limit=bst.best_ntree_limit) blending_X['ExtraTrees'] = extratrees.predict(x_train) blending_X['ranfor'] = ranfor.predict(x_train) bagging.fit(blending_X, y_train) blending_test = pd.DataFrame() blending_test['xgb'] = y_pred_xgb blending_test['ExtraTrees'] = y_pred_ada blending_test['ranfor'] = y_pred y_pred_grad = bagging.predict(blending_test) ############################################### y_pred_2best = (0.6 * y_pred_ada) + (0.4 * y_pred_xgb) print("PCA: %s --- Ranfor RMSE is : %s" % (components, np.sqrt(mse(y_test, y_pred)))) print("PCA: %s --- ExtraTrees RMSE is : %s" % (components, np.sqrt(mse(y_test, y_pred_ada)))) print("PCA: %s --- XGBoost RMSE is : %s" % (components, np.sqrt(mse(y_test, y_pred_xgb)))) print("PCA: %s --- blended bagging RMSE is : %s" % (components, np.sqrt(mse(y_test, y_pred_grad)))) print("PCA: %s --- XGBoost+ExtraTrees RMSE is : %s" % (components, np.sqrt(mse(y_test, y_pred_2best)))) return { "pca": pca_trans, "scaler": scaler, "ranfor": ranfor, 'extratrees': extratrees, 'bagging': bagging, 'xgboost': bst }
def main(): ### parsing and Data pre-processing # load the provided data train_features_path = os.path.join(data_path, 'dengue_features_train.csv') train_labels_path = os.path.join(data_path, 'dengue_labels_train.csv') ### pre-processing data sj_train, iq_train = preprocess_data(train_features_path, labels_path=train_labels_path) #print(sj_train.describe()) #print(iq_train.describe()) choose = rand.sample(range(0, sj_train.shape[0] - 1), 800) val = [i for i in range(sj_train.shape[0]) if i not in choose] sj_train_subtrain = sj_train.ix[choose] sj_train_subtest = sj_train.ix[val] sj_etr = ETR(n_estimators=2000, max_depth=3, criterion='mae', verbose=1) sj_etr.fit(sj_train_subtrain.drop('total_cases', axis=1), sj_train_subtrain['total_cases']) ##The model generate by neg_binomial with best alpha on val_set chosen before kf = KFold(n_splits=12) sj_model_list = [] sj_err_list = [] loop = 1 for train_index, val_index in kf.split( sj_train ): #The index will be split into [train_index] and [val_index] X_train, X_val = sj_train.ix[train_index], sj_train.ix[val_index] #sj_etr = ETR(n_estimators = 2000, max_depth = 3,criterion = 'mae',verbose = 1) #sj_etr.fit(X_train.drop(['station_avg_temp_c','total_cases'],axis = 1),X_train['total_cases']) predictions = sj_etr.predict(X_val.drop('total_cases', axis=1)) sj_err_list.append( eval_measures.meanabs(predictions, X_val.total_cases)) #sj_model_list.append(sj_etr) loop += 1 print(sj_err_list) #argmax = sorted(range(len(sj_err_list)), key=lambda x: sj_err_list[x])[0] #print(argmax) #sj_best_model = sj_model_list[argmax] sj_best_model = sj_etr #print(sj_best_model.feature_importances_) choose = rand.sample(range(0, iq_train.shape[0] - 1), 400) val = [i for i in range(iq_train.shape[0]) if i not in choose] iq_train_subtrain = iq_train.ix[choose] iq_train_subtest = iq_train.ix[val] iq_etr = ETR(n_estimators=2000, max_depth=3, criterion='mae', verbose=1) iq_etr.fit(iq_train_subtrain.drop('total_cases', axis=1), iq_train_subtrain['total_cases']) iq_model_list = [] iq_err_list = [] loop = 1 for train_index, val_index in kf.split(iq_train): X_train, X_val = iq_train.ix[train_index], iq_train.ix[val_index] #iq_etr = ETR(n_estimators = 2000, max_depth = 3,criterion = 'mae',verbose = 1) #iq_etr.fit(X_train.drop(['station_min_temp_c','total_cases'],axis = 1),X_train['total_cases']) predictions = iq_etr.predict(X_val.drop('total_cases', axis=1)) iq_err_list.append( eval_measures.meanabs(predictions, X_val.total_cases)) #iq_model_list.append(iq_etr) loop += 1 print(iq_err_list) #argmax = sorted(range(len(iq_err_list)), key=lambda x: iq_err_list[x])[0] #print(argmax) #iq_best_model = iq_model_list[argmax] iq_best_model = iq_etr #print(iq_best_model.feature_importances_) ##Accessing testing data test_features_path = os.path.join(data_path, 'dengue_features_test.csv') sj_test, iq_test = preprocess_data(test_features_path) #Calculate the k-fold validation error sj_score = [] for train_index, val_index in kf.split(sj_train): X_train, X_val = sj_train.ix[train_index], sj_train.ix[val_index] train_predict = np.array( sj_best_model.predict(X_val.drop('total_cases', axis=1))).astype(int) sj_score.append(eval_measures.meanabs(train_predict, X_val.total_cases)) print("Mean of {} cross validation of sj_score is {} (+/- {})".format( kf.get_n_splits(sj_train), np.mean(sj_score), np.std(sj_score))) iq_score = [] for train_index, val_index in kf.split(iq_train): X_train, X_val = iq_train.ix[train_index], iq_train.ix[val_index] train_predict = np.array( iq_best_model.predict(X_val.drop('total_cases', axis=1))).astype(int) iq_score.append(eval_measures.meanabs(train_predict, X_val.total_cases)) print("Mean of {} cross validation of iq_score is {} (+/- {})".format( kf.get_n_splits(iq_train), np.mean(iq_score), np.std(iq_score))) ##Use the model sj_lr and iq_lr trained before to predict the testing data print("Predicting testing data...") sj_predictions = sj_best_model.predict(sj_test) iq_predictions = iq_best_model.predict(iq_test) sj_predictions = np.round(sj_predictions).astype(int) iq_predictions = np.round(iq_predictions).astype(int) print("Creating submit file...") ##Use submission_format as template to write the answer sample_path = os.path.join(data_path, 'submission_format.csv') submission = pd.read_csv(sample_path, index_col=[0, 1, 2]) submission.total_cases = np.concatenate([[28], [25], [34], sj_predictions, [8], [6], [10], iq_predictions]) submission.to_csv("./data/ext_final_new.csv") '''
def main(): ### parsing and Data pre-processing # load the provided data train_features_path = os.path.join(data_path, 'dengue_features_train.csv') train_labels_path = os.path.join(data_path, 'dengue_labels_train.csv') ### pre-processing data sj_train, iq_train = preprocess_data(train_features_path, labels_path=train_labels_path) #print(sj_train.describe()) #print(iq_train.describe()) ###Define the xgb parameters xgb_params = { 'eta': 0.05, 'max_depth': 5, 'subsample': 0.7, 'colsample_bytree': 0.7, 'objective': 'reg:linear', 'eval_metric': 'rmse', 'silent': 1 } num_boost_rounds = 1000 ##Use K-fold to create cross validation data kf = KFold(n_splits=6) ##Do the stacking by adding 5 dataframes 'negbi', 'gb', 'xgb','adaboost','extratree' ,'bagging'which store the training prediction sj_train = sj_train.assign(xgb=0) sj_train = sj_train.assign(etr=0) sj_train = sj_train.assign(br=0) loop = 1 for train_index, val_index in kf.split( sj_train ): #The index will be split into [train_index] and [val_index] X_train, X_val = sj_train.ix[train_index], sj_train.ix[val_index] ###(1)xgboost method dtrain = xgb.DMatrix( X_train.drop(['total_cases', 'xgb', 'etr', 'br'], axis=1), X_train['total_cases']) dval = xgb.DMatrix( X_val.drop(['total_cases', 'xgb', 'etr', 'br'], axis=1)) sj_xgb_model = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=num_boost_rounds) predictions_xgb = sj_xgb_model.predict(dval).astype(int) ###(2)Extra tree regressor method sj_etr_model = ETR(n_estimators=2000, max_depth=3, criterion='mae') sj_etr_model.fit( X_train.drop(['total_cases', 'xgb', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_etr = sj_etr_model.predict( X_val.drop(['total_cases', 'xgb', 'etr', 'br'], axis=1)) ###(3) Bagging Regressor method sj_br_model = BR(n_estimators=100, max_features=0.6, max_samples=0.6) sj_br_model.fit( X_train.drop(['total_cases', 'xgb', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_br = sj_br_model.predict( X_val.drop(['total_cases', 'xgb', 'etr', 'br'], axis=1)) ###Store the result in sj_train predictions_neg -> 'negbi', predictions_gb -> 'gb' print( "Adding the result of the predictions to sj training data({}/{})". format(loop, 6)) for idx, index in enumerate(val_index): sj_train['xgb'].ix[index] = predictions_xgb[idx] sj_train['etr'].ix[index] = predictions_etr[idx] sj_train['br'].ix[index] = predictions_br[idx] loop += 1 iq_train = iq_train.assign(xgb=0) iq_train = iq_train.assign(etr=0) iq_train = iq_train.assign(br=0) loop = 1 for train_index, val_index in kf.split(iq_train): X_train, X_val = iq_train.ix[train_index], iq_train.ix[val_index] ###(1)xgb method dtrain = xgb.DMatrix( X_train.drop(['total_cases', 'xgb', 'etr', 'br'], axis=1), X_train['total_cases']) dval = xgb.DMatrix( X_val.drop(['total_cases', 'xgb', 'etr', 'br'], axis=1)) iq_xgb_model = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=num_boost_rounds) predictions_xgb = iq_xgb_model.predict(dval).astype(int) ###(2)Extra tree regressor method iq_etr_model = ETR(n_estimators=2000, max_depth=3, criterion='mae') iq_etr_model.fit( X_train.drop(['total_cases', 'xgb', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_etr = iq_etr_model.predict( X_val.drop(['total_cases', 'xgb', 'etr', 'br'], axis=1)) ###(3) Bagging Regressor method iq_br_model = BR(n_estimators=800, max_features=0.6, max_samples=0.6) iq_br_model.fit( X_train.drop(['total_cases', 'xgb', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_br = iq_br_model.predict( X_val.drop(['total_cases', 'xgb', 'etr', 'br'], axis=1)) ###Store the result in iq_train predictions_neg -> 'negbi', predictions_gb -> 'gb' print( "Adding the result of the predictions to iq training data({}/{})". format(loop, 6)) for idx, index in enumerate(val_index): iq_train['xgb'].ix[index] = predictions_xgb[idx] iq_train['etr'].ix[index] = predictions_etr[idx] iq_train['br'].ix[index] = predictions_br[idx] loop += 1 ###Now the training data looks like [feature, total_cases, negbi, gb, xgb] ##Accessing testing data test_features_path = os.path.join(data_path, 'dengue_features_test.csv') sj_test, iq_test = preprocess_data(test_features_path) ##Like training, add 'xgb' 'br' 'etr' to the testing dataframe sj_test = sj_test.assign(xgb=0) sj_test = sj_test.assign(etr=0) sj_test = sj_test.assign(br=0) ##(1)xgb prediction dtest = xgb.DMatrix(sj_test.drop(['xgb', 'etr', 'br'], axis=1)) sj_predictions_xgb = sj_xgb_model.predict(dtest).astype(int) ###(2)extra tree regressor method sj_predictions_etr = sj_etr_model.predict( sj_test.drop(['xgb', 'etr', 'br'], axis=1)).astype(int) ###(3)bagging regressor method sj_predictions_br = sj_br_model.predict( sj_test.drop(['xgb', 'etr', 'br'], axis=1)).astype(int) print("Adding predictions as features to sj testing data...") for i in range(len( sj_test['xgb'])): #Add the prediction to the corresponding column sj_test['xgb'].ix[i] = sj_predictions_xgb[i] sj_test['etr'].ix[i] = sj_predictions_etr[i] sj_test['br'].ix[i] = sj_predictions_br[i] ##Same process as city iq iq_test = iq_test.assign(xgb=0) iq_test = iq_test.assign(etr=0) iq_test = iq_test.assign(br=0) ###(1)xgb prediction dtest = xgb.DMatrix(iq_test.drop(['xgb', 'etr', 'br'], axis=1)) iq_predictions_xgb = iq_xgb_model.predict(dtest).astype(int) ###(2)extra tree regressor method iq_predictions_etr = iq_etr_model.predict( sj_test.drop(['xgb', 'etr', 'br'], axis=1)).astype(int) ###(3)bagging regressor method iq_predictions_br = iq_br_model.predict( sj_test.drop(['xgb', 'etr', 'br'], axis=1)).astype(int) print("Adding predictions as features to iq testing data...") for i in range(len(iq_test['xgb'])): iq_test['xgb'].ix[i] = iq_predictions_xgb[i] iq_test['etr'].ix[i] = iq_predictions_etr[i] iq_test['br'].ix[i] = iq_predictions_br[i] ##use new information to run a linear regression ''' print("Building linear regression model...") #Now the linear regression model uses (X = [features, negbi, gb, xgb, abr, etr, br], y = total_cases )to train(fit) sj_lr = LR() sj_lr.fit(sj_train.drop('total_cases',axis = 1),sj_train['total_cases']) iq_lr = LR() iq_lr.fit(iq_train.drop('total_cases',axis = 1),iq_train['total_cases']) #Calculate the k-fold validation error sj_score = [] for train_index, val_index in kf.split(sj_train): X_train,X_val = sj_train.ix[train_index], sj_train.ix[val_index] train_predict = np.array(sj_lr.predict(X_val.drop('total_cases',axis = 1))).astype(int) sj_score.append(eval_measures.meanabs(train_predict, X_val.total_cases)) print("Mean of {} cross validation of sj_score is {} (+/- {})".format(kf.get_n_splits(sj_train) ,np.mean(sj_score),np.std(sj_score))) iq_score = [] for train_index, val_index in kf.split(iq_train): X_train,X_val = iq_train.ix[train_index], iq_train.ix[val_index] train_predict = np.array(iq_lr.predict(X_val.drop('total_cases',axis = 1))).astype(int) iq_score.append(eval_measures.meanabs(train_predict, X_val.total_cases)) print("Mean of {} cross validation of iq_score is {} (+/- {})".format(kf.get_n_splits(iq_train) ,np.mean(iq_score),np.std(iq_score))) ##Use the model sj_lr and iq_lr trained before to predict the testing data print("Predicting testing data...") sj_predictions = sj_lr.predict(sj_test) iq_predictions = iq_lr.predict(iq_test) ''' sj_predictions = [] iq_predictions = [] for i in range(len(sj_test['br'])): sj_predictions.append(( #sj_test['negbi'].ix[i]+\ #sj_test['gb'].ix[i]+\ #sj_test['abr'].ix[i]+\ sj_test['xgb'].ix[i]+\ sj_test['etr'].ix[i]+\ sj_test['br'].ix[i])/3) for i in range(len(iq_test['br'])): iq_predictions.append(( #iq_test['negbi'].ix[i]+\ #iq_test['gb'].ix[i]+\ #iq_test['abr'].ix[i]+\ iq_test['xgb'].ix[i]+\ iq_test['etr'].ix[i]+\ iq_test['br'].ix[i])/3) sj_predictions = np.round(sj_predictions).astype(int) iq_predictions = np.round(iq_predictions).astype(int) print("Creating submit file...") ##Use submission_format as template to write the answer sample_path = os.path.join(data_path, 'submission_format.csv') submission = pd.read_csv(sample_path, index_col=[0, 1, 2]) submission.total_cases = np.concatenate([[28], [25], [34], sj_predictions, [7], [6], [8], iq_predictions]) submission.to_csv("./data/stacking_final_new.csv") '''