# param_list = param_list[:11] # labels lbl_enc = preprocessing.LabelEncoder() labels = lbl_enc.fit_transform(labels) oneHotEnc = preprocessing.OneHotEncoder() labels = oneHotEnc.fit_transform(labels.reshape((-1, 1))) labels = labels.toarray() train_name = "feat_sel_mult_train3" i = 0 preds_train = [] for loss, ntree, param in param_list: part_train = pd.read_csv(otto_utils.get_train_preds_from_param(param, train_name, ntree), index_col=0) preds_train.append(np.asarray(part_train)) i += 1 lr = 0.1 decay = 0.05 max_epoch = 500 tol = 0.00007 search_param = True if search_param: best_loss = None # split dataset c_preds_train = [] c_preds_test = [] num = len(preds_train[0]) idx = np.random.permutation(num)
for loss, num_round, param in param_list: full_param = other.copy() full_param.update(param) plst = full_param.items() # train on the trainning set bst,loss,ntree = xgb.train(full_param, dtrain, num_round, watchlist) # dump bst model bst_dump_model = otto_utils.get_model_name_from_param(param, train_name, ntree) bst.save_model(bst_dump_model) # output train predictions preds = bst.predict(dtrain) preds = pd.DataFrame(preds, index=train_ids, columns=sample.columns[1:]) preds.to_csv(otto_utils.get_train_preds_from_param(param, train_name, ntree), index_label='id') # output test predictions preds = bst.predict(dtest) preds = pd.DataFrame(preds, index=sample.id.values, columns=sample.columns[1:]) preds.to_csv(otto_utils.get_test_preds_from_param(param, train_name, ntree), index_label='id') if avg_preds is None: avg_preds = preds else: avg_preds += preds avg_preds /= len(param_list) # create submission file avg_preds = pd.DataFrame(avg_preds, index=sample.id.values, columns=sample.columns[1:])