def func_mlogloss(w, Xs, y): """ """ sol = np.zeros((Xs[0].shape[0], 9)) for i in range(len(w)): sol += Xs[i] * w[i] return logloss_mc(y, sol)
def internal_processing(self, X, y, X_test): """ """ Xs = np.hsplit(X, 5) Xts = np.hsplit(X_test, 5) Xts_cal = [] for i in range(len(Xs)): Xts_cal.append(calibrate(Xs[i], y, Xts[i])) XX_test = np.hstack(Xts_cal) ec = EC(n_preds=5) ec.fit(X, y) y_ens = ec.predict_proba(XX_test) # y_pred = ec.predict_proba(X_test) #validation yv = ec.predict_proba(X) print 'Weights: %s' %(ec.w) print 'Validation log-loss: %s' %(logloss_mc(y, yv)) cc = CalibratedClassifierCV(base_estimator=EC(n_preds=5), method='isotonic', cv=10) cc.fit(X, y) y_cal = cc.predict_proba(XX_test) y_pred = (y_ens + y_cal)/2. return y_pred
def train_validate(self, X_train, y_train, X_valid, y_valid): """ """ self.nn.max_epochs = 300 self.nn.verbose=1 self.nn.fit(X_train, y_train, X_valid, y_valid) params = self.nn.get_all_params_values() self.nn2.load_params_from(params) self.nn2.fit(X_train, y_train, X_valid, y_valid) params2 = self.nn2.get_all_params_values() self.nn3.load_params_from(params2) self.nn3.fit(X_train, y_train, X_valid, y_valid) yp0 = self.nn3.predict_proba(X_valid) print 'Nolearn log-loss: %s'%(logloss_mc(y_valid, yp0)) y_pred = yp0 # pdb.set_trace() return y_pred
def func_mlogloss_2(w, Xs, y): """ """ nc=9 sol = np.zeros((Xs[0].shape[0], 9)) for i in range(len(w)): sol[:,i%nc] += Xs[i/nc][:,i%nc] * w[i] return logloss_mc(y, sol)
def train_validate(self, X_train, y_train, X_valid, y_valid): """ """ rf = RandomForestClassifier(n_estimators=1500, class_weight="auto", max_features=0.8) rf.fit(X_train, y_train) yp0 = rf.predict_proba(X_valid) print logloss_mc(y_valid, yp0) rf = RandomForestClassifier(n_estimators=1500, class_weight="auto", max_features=0.8) cc = CalibratedClassifierCV(base_estimator=rf, method="isotonic", cv=StratifiedKFold(y_train, 3)) cc.fit(X_train, y_train) yp1 = cc.predict_proba(X_valid) print logloss_mc(y_valid, yp1) y_pred = (yp0 + yp1) / 2.0 return y_pred
def func_mlogloss(w, Xs, y): """ """ w = np.abs(w) sol = np.zeros((Xs[0].shape[0], 9)) for i in range(len(w)): sol += Xs[i] * w[i] ll = logloss_mc(y, sol) reg = np.sqrt(np.sum(w**2)) * 0.001 return ll + reg
def func_mlogloss_4(w, Xs, y): """ """ w = np.abs(w) sol = np.zeros((Xs[0].shape[0], 9)) for i in range(len(w)/2): sol[:,0] += Xs[i][:,0] * w[i] sol[:,1] += Xs[i][:,1] * w[i+1] sol[:,2] += Xs[i][:,2] * w[i+1] sol[:,3] += Xs[i][:,3] * w[i+1] sol[:,4] += Xs[i][:,4] * w[i] sol[:,5] += Xs[i][:,5] * w[i] sol[:,6] += Xs[i][:,6] * w[i] sol[:,7] += Xs[i][:,7] * w[i] sol[:,8] += Xs[i][:,8] * w[i] return logloss_mc(y, sol)
def train_validate(self, X_train, y_train, X_valid, y_valid): """ """ self.nn.max_epochs = 300 self.nn.verbose=1 self.nn.fit(X_train, y_train, X_valid, y_valid) # XX = np.vstack((X_train, X_valid[:len(X_valid)/2])) # yy = np.hstack((y_train, y_valid[:len(y_valid)/2])) # XXv = X_valid[len(X_valid)/2:] # yyv = y_valid[len(y_valid)/2:] ## self.nn.dropouti_p=0.25 # self.nn.fit(XX, yy, XXv, yyv) self.nn2.fit(X_train, y_train, X_valid, y_valid) # self.nn.fit(X_train, y_train) yp0 = self.nn.predict_proba(X_valid) print 'Nolearn log-loss: %s'%(logloss_mc(y_valid, yp0)) # self.nn.max_epochs = self.early_stopping.best_valid_epoch # print self.early_stopping.best_valid_epoch # self.nn.verbose=0 # # clf = ClfCal(self.nn) # cc = CalibratedClassifierCV(base_estimator=clf, method='isotonic', # cv=StratifiedKFold(y_train, n_folds=3)) # cc.fit(X_train, y_train) # yp1= cc.predict_proba(X_valid) # print 'Calibrated log-loss: %s' %(logloss_mc(y_valid, yp1)) # y_pred = (yp0+yp1)/2. # print 'Mean log-loss: %s' %(logloss_mc(y_valid, y_pred)) # # self.cal_clf = cc y_pred = yp0 # pdb.set_trace() return y_pred
def train_validate(self, X_train, y_train, X_valid, y_valid): """ """ le = LabelEncoder() id_123 = np.logical_or(np.logical_or(y_train==1, y_train==2), y_train==3) y0 = np.zeros(len(y_train), dtype=np.int32) y0[id_123] = 1 X0 = np.copy(X_train) y0 = le.fit_transform(y0).astype(np.int32) X1 = X_train[id_123] y1 = y_train[id_123] y1 = le.fit_transform(y1).astype(np.int32) X2 = X_train[np.logical_not(id_123)] y2 = y_train[np.logical_not(id_123)] y2 = le.fit_transform(y2).astype(np.int32) #Validation id_123_valid = np.logical_or(np.logical_or(y_valid==1, y_valid==2), y_valid==3) y0_valid = np.zeros(len(y_valid), dtype=np.int32) y0_valid[id_123_valid] = 1 X0_valid = np.copy(X_valid) y0_valid = le.fit_transform(y0_valid).astype(np.int32) X1_valid = X_valid[id_123_valid] y1_valid = y_valid[id_123_valid] y1_valid = le.fit_transform(y1_valid).astype(np.int32) X2_valid = X_valid[np.logical_not(id_123_valid)] y2_valid = y_valid[np.logical_not(id_123_valid)] y2_valid = le.fit_transform(y2_valid).astype(np.int32) self.nn0.max_epochs = 300 self.nn0.verbose=1 self.nn0.fit(X0, y0, X0_valid, y0_valid) y0_pred = self.nn0.predict_proba(X_valid) self.nn1.max_epochs = 300 self.nn1.verbose=1 self.nn1.fit(X1, y1, X1_valid, y1_valid) y1_pred = self.nn1.predict_proba(X_valid) self.nn2.max_epochs = 300 self.nn2.verbose=1 self.nn2.fit(X2, y2, X2_valid, y2_valid) y2_pred = self.nn2.predict_proba(X_valid) y_pred = np.zeros((y0_pred.shape[0], 9)) y_pred[:,0] = y0_pred[:,0]*y2_pred[:,0] y_pred[:,1] = y0_pred[:,1]*y1_pred[:,0] y_pred[:,2] = y0_pred[:,1]*y1_pred[:,1] y_pred[:,3] = y0_pred[:,1]*y1_pred[:,2] y_pred[:,4] = y0_pred[:,0]*y2_pred[:,1] y_pred[:,5] = y0_pred[:,0]*y2_pred[:,2] y_pred[:,6] = y0_pred[:,0]*y2_pred[:,3] y_pred[:,7] = y0_pred[:,0]*y2_pred[:,4] y_pred[:,8] = y0_pred[:,0]*y2_pred[:,5] yp0 = y_pred print logloss_mc(y_valid, yp0) self.nn0.max_epochs = self.early_stopping0.best_valid_epoch self.nn0.verbose=0 self.nn1.max_epochs = self.early_stopping1.best_valid_epoch self.nn1.verbose=0 self.nn2.max_epochs = self.early_stopping2.best_valid_epoch self.nn2.verbose=0 clf = ClfCal(self.nn0, self.nn1, self.nn2) cc = CalibratedClassifierCV(base_estimator=clf, method='isotonic', cv=StratifiedKFold(y_train, n_folds=3)) cc.fit(X_train, y_train) yp1= cc.predict_proba(X_valid) print 'Calibrated log-loss: %s' %(logloss_mc(y_valid, yp1)) y_pred = (yp0+yp1)/2. print 'Mean log-loss: %s' %(logloss_mc(y_valid, y_pred)) self.cal_clf = cc return y_pred
def internal_processing(self, X, y, X_test): """ """ # d_train = np.loadtxt('./data/X_train') # d_valid = np.loadtxt('./data/X_valid') # Xs = np.hsplit(X, 5) # Xs_cal = [] # for i in range(len(Xs)): # cc = CalibratedClassifierCV(base_estimator=DumClf(), # method='isotonic', cv=5) # # cc.fit(Xs[i], y) # Xs_cal.append(cc.predict_proba(Xs[i])) # XX = np.hstack(Xs_cal) # Xts_cal = [] # Xts = np.hsplit(X_test, 5) # for i in range(len(Xts)): # cc = CalibratedClassifierCV(base_estimator=DumClf(), # method='isotonic', cv=5) # cc.fit(Xts[i], (np.random.rand(len(Xts[i]))*10).astype(np.int32)) # Xts_cal.append(cc.predict_proba(Xts[i])) # XX_test = np.hstack(Xts_cal) # # print 'estoy aqui...' # # ec = EC(n_preds=self.n_preds) # ec.fit(X, y) # ew0 = ec.w ## y_ens = ec.predict_proba(X_test) # # #validation # yv = ec.predict_proba(X) # print 'Weights: %s' %(ec.w) # print 'Validation log-loss: %s' %(logloss_mc(y, yv)) # # ## ## cc = CalibratedClassifierCV(base_estimator=EC(n_preds=self.n_preds), ## method='isotonic', cv=10) ## ## cc.fit(X, y) ## y_cal = cc.predict_proba(X_test) ## ## y1_pred = (y_ens + y_cal)/2. ## # pdb.set_trace() ## ### #### # w20 = np.ones(X.shape[1]) # for i in range(len(w20)): # w20[i] = w20[i] * (ew0[i/9]) # ec2 = EC_2(n_preds=self.n_preds, w0=w20) # ec2.fit(X, y) ## y2_ens = ec2.predict_proba(X_test) # # #validation # yv2 = ec2.predict_proba(X) # print 'Weights: %s' %(ec2.w) # print 'Validation log-loss: %s' %(logloss_mc(y, yv2)) # # pdb.set_trace() # cc2 = CalibratedClassifierCV(base_estimator=EC_2(n_preds=self.n_preds, # w0=w20), # method='isotonic', cv=10) # # cc2.fit(X, y) # y2_cal = cc2.predict_proba(X_test) # ## y2_pred = (y2_ens + y2_cal)/2. # y2_pred = (y2_ens*2 + y2_cal*3)/5. ### ### ## y_pred = (y1_pred + y2_pred)/2. # y_pred = y2_pred ## # ec3 = EC_3(n_preds=self.n_preds) # ec3.fit(X, y) ## y3_ens = ec4.predict_proba(X_test) ## ## #validation # yv3 = ec3.predict_proba(X) ## print 'Weights: %s' %(ec4.w) # print 'Validation log-loss: %s' %(logloss_mc(y, yv3)) ## # cc3 = CalibratedClassifierCV(base_estimator=EC_3(n_preds=self.n_preds), # method='isotonic', cv=10) # # cc3.fit(X, y) # y3_cal = cc3.predict_proba(X) # print 'Validation log-loss: %s' %(logloss_mc(y, y3_cal)) ## # y3_pred = (y3_ens + y3_cal)/2. # # y_pred = (y1_pred + y2_pred + y3_pred)/3. # ## # pdb.set_trace() ## cc = CalibratedClassifierCV(base_estimator=ec, method='sigmoid', cv=5) ## cc.fit(X, y) ## y_pred = cc.predict_proba(X_test) ## sss = StratifiedShuffleSplit(y, 1, test_size=0.5) sss = StratifiedKFold(y, n_folds=10) for train_id, valid_id in sss: X0, X1 = X[train_id], X[valid_id] y0, y1 = y[train_id], y[valid_id] # d0, d1 = d_train[train_id], d_train[valid_id] ec = EC(n_preds = self.n_preds) ec.fit(X0, y0) ew0 = ec.w # print ec.w y0_pred = ec.predict_proba(X1) print 1,logloss_mc(y1, y0_pred) # pp2 = np.where(y0_pred+0.01>1,1,y0_pred+0.01) # pp3 = np.where(y0_pred+0.001>1,1,y0_pred+0.001) # pp4 = np.where(y0_pred-0.01<0,0,y0_pred-0.01) pp4 = np.where(y0_pred>0.5, y0_pred-0.0001, y0_pred+0.0001) pp5 = np.where(y0_pred>0.5, y0_pred+0.0001, y0_pred-0.0001) # print 2, logloss_mc(y1, pp2) # print 3, logloss_mc(y1, pp3) print 3, logloss_mc(y1, pp4) print 4, logloss_mc(y1, pp5) ## # tt = CalibratedClassifierCV(base_estimator=EC(n_preds=self.n_preds), # method='isotonic', cv=5) # tt.fit(X0, y0) # y0_tt = tt.predict_proba(X1) # # # print 2,logloss_mc(y1, y0_tt) ## # ym0 = (y0_tt+y0_pred)/2. ### # print 3,logloss_mc(y1, ym0) # # y4 = np.where(ym0<0.05,0,ym0) # print 4, logloss_mc(y1, y4) # # y5 = np.where(ym0<0.2, ym0/2, ym0) # print 5, logloss_mc(y1, y5) # # yy = np.zeros(y0_pred.shape) # for i,j in enumerate(y1): # yy[i,j] = 1. # # plt.plot(y0_pred.reshape(-1), 'r^-') # plt.plot(y0_tt.reshape(-1), 'g-') # plt.plot(ym0.reshape(-1), 'b*-') # ## plt.plot(yy.reshape(-1), 'y-') # plt.show() # # pdb.set_trace() # print 4, logloss_mc(y1, (2*y0_tt+y0_pred)/3.) # print 5, logloss_mc(y1, (y0_tt+2*y0_pred)/3.) # ## jj = CalibratedClassifierCV(base_estimator= CalibratedClassifierCV(base_estimator=EC(n_preds=self.n_preds), ## method='isotonic', cv=5), method='isotonic', ## cv=3) ## jj.fit(X0, y0) ## y0_jj = jj.predict_proba(X1) ## print 6,logloss_mc(y1, y0_jj) ## print 7,logloss_mc(y1, (y0_pred+y0_jj)/2.) # # mn = np.min(y0_pred) # mx = np.max(y0_pred) # mms = MinMaxScaler(feature_range=(mn, mx)) # ymm = mms.fit_transform(y0_tt) # print 6, logloss_mc(y1, ymm) # print 7, logloss_mc(y1, (y0_pred+ymm)/2.) # # ## #### # print '-----' w20 = np.ones(X0.shape[1]) for i in range(len(w20)): w20[i] = w20[i] * (ew0[i/9]) ec2 = EC_2(n_preds=self.n_preds, w0=w20) ec2.fit(X0, y0) # print ec2.w y02_pred = ec2.predict_proba(X1) ## ## print 4, logloss_mc(y1, y02_pred) pp8 = np.where(y02_pred>0.5, y02_pred-0.0001, y02_pred+0.0001) pp9 = np.where(y02_pred>0.5, y02_pred+0.0001, y02_pred-0.0001) # print 2, logloss_mc(y1, pp2) # print 3, logloss_mc(y1, pp3) print 5, logloss_mc(y1, pp8) print 6, logloss_mc(y1, pp9) print '---\n' ## ## gg = CalibratedClassifierCV(base_estimator=EC_2(n_preds=self.n_preds, w0=w20), ## method='isotonic', cv=5) ## gg.fit(X0, y0) ## y0_gg = gg.predict_proba(X1) ## ## print 5, logloss_mc(y1, y0_gg) ## ## ym1 = (y0_gg+y02_pred)/2. ## print 6, logloss_mc(y1, ym1) ### ## print 7, logloss_mc(y1, (ym0 + ym1)/2.) # # print '----' # # ec3.fit(X0, y0) # y03_pred = ec3.predict_proba(X1) # print 4, logloss_mc(y1, y03_pred) # # hh = CalibratedClassifierCV(base_estimator=EC_3(n_preds=self.n_preds), # method='isotonic', cv=5) # hh.fit(X0, y0) # y0_hh = hh.predict_proba(X1) # # print 5, logloss_mc(y1, y0_hh) ## # ym3 = (y0_hh+y03_pred)/2. # print 6, logloss_mc(y1, ym3) ## # print 7, logloss_mc(y1, (ym0+ym3)/2.) # print '------ ' # print ' ' ## ecc = EC(n_preds=5) ### y_aux = np.array([np.random.randint(9) for i in range(len(XX0))], dtype=np.int32) ### pdb.set_trace() ## ## Xs0 = np.hsplit(X0, 5) # Xs1 = np.hsplit(X1, 5) # X0_cal = [] # X1_cal = [] # for i in range(len(Xs0)): # cc = CalibratedClassifierCV(base_estimator=DumClf(), # method='isotonic', cv=5) # # cc.fit(d[i], y0) # X0_cal.append(cc.predict_proba(Xs0[i])) # X1_cal.append(cc.predict_proba(Xs1[i])) # XX0 = np.hstack(X0_cal) # XX1 = np.hstack(X1_cal) # ecc.fit(XX0, y0) # y01_pred = ecc.predict_proba(XX1) # print 4,logloss_mc(y1, y01_pred) # # gg = CalibratedClassifierCV(base_estimator=EC(n_preds=5), # method='isotonic', cv=5) # gg.fit(XX0, y0) # y0_gg = tt.predict_proba(XX1) # # print 5,logloss_mc(y1, y0_gg) # # print 6,logloss_mc(y1, (y0_gg+y01_pred)/2.) # # # # print ' ' # pdb.set_trace() # # Xs = np.hsplit(X, self.n_preds) # # #evaluating individual predictions # for i in range(len(Xs)): # print 'Solution %s, logloss: %s' %(i, logloss_mc(y, Xs[i])) # print ' ' # # x0 = np.ones(self.n_preds) / float(self.n_preds) # # bounds = [(0,1)]*len(x0) # # res = minimize(func_mlogloss, x0, args=(Xs, y), # method='L-BFGS-B', bounds=bounds) # w = res.x # print res.message, res.success # print w, np.sum(w) # # ypv = np.zeros((X.shape[0], 9)) # for i in range(len(w)): # ypv += Xs[i] * w[i] # print 'valid log-loss: %s' %(logloss_mc(y, ypv)) # # # ## pdb.set_trace() # # # Xs_test = np.hsplit(X_test, self.n_preds) # y_pred = np.zeros((X_test.shape[0], 9)) # for i in range(len(w)): # y_pred += Xs_test[i] * w[i] # # pdb.set_trace() return y_pred
def scoring_mlogloss(clf, X_test, y_test): y_pred = clf.predict_proba(X_test) return logloss_mc(y_test, y_pred)