def train_test(self, X, y, X_test): """ """ sss = StratifiedShuffleSplit(y, 1, test_size=0.5) for train_id, valid_id in sss: X0, X1 = X[train_id], X[valid_id] y0, y1 = y[train_id], y[valid_id] #First half w0 = np.zeros(len(y0)) for i in range(len(w0)): w0[i] = self.w[int(y0[i])] xg0_train = DMatrix(X0, label=y0, weight=w0) xg0_test = DMatrix(X1, label=y1) xgt_test = DMatrix(X_test) bst0 = my_train_xgboost(self.param, xg0_train, self.num_round) y0_pred = bst0.predict(xg0_test).reshape(X1.shape[0], 9) yt_pred = bst0.predict(xgt_test).reshape(X_test.shape[0], 9) #Calibrated RF rf = RandomForestClassifier(n_estimators=600, criterion='gini', class_weight='auto', max_features='auto') cal = CalibratedClassifierCV(rf, method='isotonic', cv=3) cal.fit(X0, y0) y0_cal = cal.predict_proba(X1) yt_cal = cal.predict_proba(X_test) #Second half ss = StandardScaler() y0_pred = ss.fit_transform(y0_pred) yt_pred = ss.fit_transform(yt_pred) y0_cal = ss.fit_transform(y0_cal) yt_cal = ss.fit_transform(yt_cal) X1 = np.hstack((X1, y0_pred, y0_cal)) X_test = np.hstack((X_test, yt_pred, yt_cal)) w1 = np.zeros(len(y1)) # self.param['eta'] = 0.01 self.num_round = 450 for i in range(len(w1)): w1[i] = self.w[int(y1[i])] xg1_train = DMatrix(X1, label=y1, weight=w1) xg_test= DMatrix(X_test) bst1 = my_train_xgboost(self.param, xg1_train, self.num_round) y_pred = bst1.predict(xg_test).reshape(X_test.shape[0], 9) return y_pred
def train_validate(self, X_train, y_train, X_valid, y_valid): """ """ sss = StratifiedShuffleSplit(y_train, 1, test_size=0.5) for train_id, valid_id in sss: X0_train, X1_train = X_train[train_id], X_train[valid_id] y0_train, y1_train = y_train[train_id], y_train[valid_id] #First half w0_train = np.zeros(len(y0_train)) for i in range(len(w0_train)): w0_train[i] = self.w[int(y0_train[i])] xg0_train = DMatrix(X0_train, label=y0_train, weight=w0_train) xg0_valid = DMatrix(X1_train, label=y1_train) xgv_valid = DMatrix(X_valid, label=y_valid) watchlist = [(xg0_train,'train'), (xg0_valid, 'validation0')] # bst0 = train(self.param, xg0_train, self.num_round, watchlist) bst0 = my_train_xgboost(self.param, xg0_train, self.num_round, watchlist) y0_pred = bst0.predict(xg0_valid).reshape(X1_train.shape[0], 9) yv_pred = bst0.predict(xgv_valid).reshape(X_valid.shape[0], 9) #Calibrated RF rf = RandomForestClassifier(n_estimators=600, criterion='gini', class_weight='auto', max_features='auto') cal = CalibratedClassifierCV(rf, method='isotonic', cv=3) cal.fit(X0_train, y0_train) y0_cal = cal.predict_proba(X1_train) yv_cal = cal.predict_proba(X_valid) #Second half ss = StandardScaler() y0_pred = ss.fit_transform(y0_pred) yv_pred = ss.fit_transform(yv_pred) y0_cal = ss.fit_transform(y0_cal) yv_cal = ss.fit_transform(yv_cal) X1_train = np.hstack((X1_train, y0_pred, y0_cal)) X_valid = np.hstack((X_valid, yv_pred, yv_cal)) w1_train = np.zeros(len(y1_train)) # self.param['eta'] = 0.05 self.num_round = 450 for i in range(len(w1_train)): w1_train[i] = self.w[int(y1_train[i])] xg1_train = DMatrix(X1_train, label=y1_train, weight=w1_train) xg_valid = DMatrix(X_valid, label=y_valid) watchlist = [(xg1_train,'train'), (xg_valid, 'validation')] # bst1 = train(self.param, xg1_train, self.num_round, watchlist) bst1 = my_train_xgboost(self.param, xg1_train, self.num_round, watchlist) y_pred = bst1.predict(xg_valid).reshape(X_valid.shape[0], 9) # pdb.set_trace() return y_pred
def train_validate(self, X_train, y_train, X_valid, y_valid): """ """ nc = 10 X = [] y = [] clt = MiniBatchKMeans(n_clusters=nc, batch_size=100) for i in range(9): XX = X_train[y_train==i] yy = y_train[y_train==i] lbs = clt.fit_predict(XX) ids = lbs < 7 X.append(XX[ids]) y.append(yy[ids]) X = np.vstack(X) y = np.hstack(y) print X.shape w_train = np.zeros(len(y)) for i in range(len(w_train)): w_train[i] = self.w[int(y[i])] xg_train = DMatrix(X, label=y, weight=w_train) xg_valid = DMatrix(X_valid, label=y_valid) watchlist = [(xg_train,'train'), (xg_valid, 'validation')] bst = my_train_xgboost(self.param, xg_train, self.num_round, watchlist) y_pred = bst.predict(xg_valid).reshape(X_valid.shape[0], 9) return y_pred
def train_test(self, X, y, X_test): """ """ X = X[:, self.feats] X_test = X_test[:, self.feats] w_train = np.zeros(len(y)) for i in range(len(w_train)): w_train[i] = self.w[int(y[i])] xg_train = DMatrix(X, label=y, weight=w_train) xg_test = DMatrix(X_test) self.num_round = self.best_n_iters bst = my_train_xgboost(self.param, xg_train, self.num_round, seed=self.seed) y_pred = bst.predict(xg_test).reshape(X_test.shape[0], 9) return y_pred
def train_validate(self, X_train, y_train, X_valid, y_valid): """ """ self.feats = np.ones(X_train.shape[1], dtype=np.bool) rd = np.random.randint(0, X_train.shape[1], 3) self.feats[rd] = False X_train = X_train[:, self.feats] X_valid = X_valid[:, self.feats] w_train = np.zeros(len(y_train)) for i in range(len(w_train)): w_train[i] = self.w[int(y_train[i])] xg_train = DMatrix(X_train, label=y_train, weight=w_train) xg_valid = DMatrix(X_valid, label=y_valid) watchlist = [(xg_train,'train'), (xg_valid, 'validation')] self.seed = np.random.randint(0,10000) bst = my_train_xgboost(self.param, xg_train, self.num_round, watchlist, early_stopping_rounds=100, seed=self.seed) self.best_n_iters = bst.best_iteration print self.best_n_iters y_pred = bst.predict(xg_valid, ntree_limit=self.best_n_iters) # pdb.set_trace() return y_pred
def train_validate(self, X_train, y_train, X_valid, y_valid): """ """ #training le = LabelEncoder() id_123 = np.logical_or(np.logical_or(y_train==1, y_train==2), y_train==3) y0_train = np.zeros(len(y_train), dtype=np.int32) y0_train[id_123] = 1 X0_train = np.copy(X_train) y0_train = le.fit_transform(y0_train).astype(np.int32) X1_train = X_train[id_123] y1_train = y_train[id_123] y1_train = le.fit_transform(y1_train).astype(np.int32) X2_train = X_train[np.logical_not(id_123)] y2_train = y_train[np.logical_not(id_123)] y2_train = le.fit_transform(y2_train).astype(np.int32) #Validation id_123_valid = np.logical_or(np.logical_or(y_valid==1, y_valid==2), y_valid==3) y0_valid = np.zeros(len(y_valid), dtype=np.int32) y0_valid[id_123_valid] = 1 X0_valid = np.copy(X_valid) y0_valid = le.fit_transform(y0_valid).astype(np.int32) X1_valid = X_valid[id_123_valid] y1_valid = y_valid[id_123_valid] y1_valid = le.fit_transform(y1_valid).astype(np.int32) X2_valid = X_valid[np.logical_not(id_123_valid)] y2_valid = y_valid[np.logical_not(id_123_valid)] y2_valid = le.fit_transform(y2_valid).astype(np.int32) xg_valid = DMatrix(X_valid) #Classifier 0 w0_train = np.zeros(len(y0_train)) for i in range(len(w0_train)): w0_train[i] = self.w0[int(y0_train[i])] xg0_train = DMatrix(X0_train, label=y0_train, weight=w0_train) xg0_valid = DMatrix(X0_valid, label=y0_valid) watchlist0 = [(xg0_train,'train'), (xg0_valid, 'validation')] bst0 = my_train_xgboost(self.param0, xg0_train, self.num_round0, watchlist0, rt_eta=self.rt0_eta, rt_ssp=self.rt0_ssp, rt_clb=self.rt0_clb, rt_dpt=self.rt0_dpt) y0_pred = bst0.predict(xg_valid).reshape(y_valid.shape[0], 2) # pdb.set_trace() #Classifier 1 w1_train = np.zeros(len(y1_train)) for i in range(len(w1_train)): w1_train[i] = self.w1[int(y1_train[i])] xg1_train = DMatrix(X1_train, label=y1_train, weight=w1_train) xg1_valid = DMatrix(X1_valid, label=y1_valid) watchlist1 = [(xg1_train,'train'), (xg1_valid, 'validation')] bst1 = my_train_xgboost(self.param1, xg1_train, self.num_round1, watchlist1, rt_eta=self.rt1_eta, rt_ssp=self.rt1_ssp, rt_clb=self.rt1_clb, rt_dpt=self.rt1_dpt) y1_pred = bst1.predict(xg_valid).reshape(y_valid.shape[0], 3) #Classifier 2 w2_train = np.zeros(len(y2_train)) for i in range(len(w2_train)): w2_train[i] = self.w2[int(y2_train[i])] xg2_train = DMatrix(X2_train, label=y2_train, weight=w2_train) xg2_valid = DMatrix(X2_valid, label=y2_valid) watchlist2 = [(xg2_train,'train'), (xg2_valid, 'validation')] bst2 = my_train_xgboost(self.param2, xg2_train, self.num_round2, watchlist2, rt_eta=self.rt2_eta, rt_ssp=self.rt2_ssp, rt_clb=self.rt2_clb, rt_dpt=self.rt2_dpt) y2_pred = bst2.predict(xg_valid).reshape(y_valid.shape[0], 6) y_pred = np.zeros((y0_pred.shape[0], 9)) y_pred[:,0] = y0_pred[:,0]*y2_pred[:,0] y_pred[:,1] = y0_pred[:,1]*y1_pred[:,0] y_pred[:,2] = y0_pred[:,1]*y1_pred[:,1] y_pred[:,3] = y0_pred[:,1]*y1_pred[:,2] y_pred[:,4] = y0_pred[:,0]*y2_pred[:,1] y_pred[:,5] = y0_pred[:,0]*y2_pred[:,2] y_pred[:,6] = y0_pred[:,0]*y2_pred[:,3] y_pred[:,7] = y0_pred[:,0]*y2_pred[:,4] y_pred[:,8] = y0_pred[:,0]*y2_pred[:,5] return y_pred
def train_test(self, X, y, X_test): """ """ #training le = LabelEncoder() id_123 = np.logical_or(np.logical_or(y==1, y==2), y==3) y0 = np.zeros(len(y), dtype=np.int32) y0[id_123] = 1 X0 = np.copy(X) y0 = le.fit_transform(y0).astype(np.int32) X1 = X[id_123] y1 = y[id_123] y1 = le.fit_transform(y1).astype(np.int32) X2 = X[np.logical_not(id_123)] y2 = y[np.logical_not(id_123)] y2 = le.fit_transform(y2).astype(np.int32) xg_test = DMatrix(X_test) #Classifier 0 w0_train = np.zeros(len(y0)) for i in range(len(w0_train)): w0_train[i] = self.w0[int(y0[i])] xg0_train = DMatrix(X0, label=y0, weight=w0_train) bst0 = my_train_xgboost(self.param0, xg0_train, self.num_round0, rt_eta=self.rt0_eta, rt_ssp=self.rt0_ssp, rt_clb=self.rt0_clb, rt_dpt=self.rt0_dpt) y0_pred = bst0.predict(xg_test).reshape(X_test.shape[0], 2) #Classifier 1 w1_train = np.zeros(len(y1)) for i in range(len(w1_train)): w1_train[i] = self.w1[int(y1[i])] xg1_train = DMatrix(X1, label=y1, weight=w1_train) bst1 = my_train_xgboost(self.param1, xg1_train, self.num_round1, rt_eta=self.rt1_eta, rt_ssp=self.rt1_ssp, rt_clb=self.rt1_clb, rt_dpt=self.rt1_dpt) y1_pred = bst1.predict(xg_test).reshape(X_test.shape[0], 3) #Classifier 2 w2_train = np.zeros(len(y2)) for i in range(len(w2_train)): w2_train[i] = self.w2[int(y2[i])] xg2_train = DMatrix(X2, label=y2, weight=w2_train) bst2 = my_train_xgboost(self.param2, xg2_train, self.num_round2, rt_eta=self.rt2_eta, rt_ssp=self.rt2_ssp, rt_clb=self.rt2_clb, rt_dpt=self.rt2_dpt) y2_pred = bst2.predict(xg_test).reshape(X_test.shape[0], 6) y_pred = np.zeros((y0_pred.shape[0], 9)) y_pred[:,0] = y0_pred[:,0]*y2_pred[:,0] y_pred[:,1] = y0_pred[:,1]*y1_pred[:,0] y_pred[:,2] = y0_pred[:,1]*y1_pred[:,1] y_pred[:,3] = y0_pred[:,1]*y1_pred[:,2] y_pred[:,4] = y0_pred[:,0]*y2_pred[:,1] y_pred[:,5] = y0_pred[:,0]*y2_pred[:,2] y_pred[:,6] = y0_pred[:,0]*y2_pred[:,3] y_pred[:,7] = y0_pred[:,0]*y2_pred[:,4] y_pred[:,8] = y0_pred[:,0]*y2_pred[:,5] return y_pred