Ejemplo n.º 1
0
def func_mlogloss(w, Xs, y):
    """
    """
    sol = np.zeros((Xs[0].shape[0], 9))
    for i in range(len(w)):
        sol += Xs[i] * w[i]
    return logloss_mc(y, sol)
Ejemplo n.º 2
0
    def internal_processing(self, X, y, X_test):
        """
        """  
        Xs = np.hsplit(X, 5)
        Xts = np.hsplit(X_test, 5)
        Xts_cal = []
        
        for i in range(len(Xs)):           
            Xts_cal.append(calibrate(Xs[i], y, Xts[i]))
         
        XX_test = np.hstack(Xts_cal)   
        
        ec = EC(n_preds=5)
        ec.fit(X, y)
        y_ens = ec.predict_proba(XX_test)
#        y_pred = ec.predict_proba(X_test)
        
        #validation
        yv = ec.predict_proba(X)
        print 'Weights: %s' %(ec.w)
        print 'Validation log-loss: %s' %(logloss_mc(y, yv))
        
        cc = CalibratedClassifierCV(base_estimator=EC(n_preds=5), 
                                    method='isotonic', cv=10)
                                    
        cc.fit(X, y)
        y_cal = cc.predict_proba(XX_test)
        
        y_pred = (y_ens + y_cal)/2.
         
        return y_pred       
    def train_validate(self, X_train, y_train, X_valid, y_valid):
        """
        """
        self.nn.max_epochs = 300
        self.nn.verbose=1
        self.nn.fit(X_train, y_train, X_valid, y_valid)
        
        params = self.nn.get_all_params_values()
        
        self.nn2.load_params_from(params)
        
        self.nn2.fit(X_train, y_train, X_valid, y_valid)
        
        params2 = self.nn2.get_all_params_values()
        
        self.nn3.load_params_from(params2)
        
        self.nn3.fit(X_train, y_train, X_valid, y_valid)        
        
        yp0 = self.nn3.predict_proba(X_valid)
        print 'Nolearn log-loss: %s'%(logloss_mc(y_valid, yp0))
        
        y_pred = yp0
        
#        pdb.set_trace()
        
        return y_pred
Ejemplo n.º 4
0
def func_mlogloss_2(w, Xs, y):
    """
    """
    nc=9
    sol = np.zeros((Xs[0].shape[0], 9))
    for i in range(len(w)):
        sol[:,i%nc] += Xs[i/nc][:,i%nc] * w[i]
    return logloss_mc(y, sol)
Ejemplo n.º 5
0
    def train_validate(self, X_train, y_train, X_valid, y_valid):
        """
        """
        rf = RandomForestClassifier(n_estimators=1500, class_weight="auto", max_features=0.8)
        rf.fit(X_train, y_train)
        yp0 = rf.predict_proba(X_valid)
        print logloss_mc(y_valid, yp0)

        rf = RandomForestClassifier(n_estimators=1500, class_weight="auto", max_features=0.8)

        cc = CalibratedClassifierCV(base_estimator=rf, method="isotonic", cv=StratifiedKFold(y_train, 3))
        cc.fit(X_train, y_train)
        yp1 = cc.predict_proba(X_valid)
        print logloss_mc(y_valid, yp1)

        y_pred = (yp0 + yp1) / 2.0

        return y_pred
Ejemplo n.º 6
0
def func_mlogloss(w, Xs, y):
    """
    """
    w = np.abs(w)
    sol = np.zeros((Xs[0].shape[0], 9))
    for i in range(len(w)):
        sol += Xs[i] * w[i]
    ll = logloss_mc(y, sol)
    reg = np.sqrt(np.sum(w**2)) * 0.001
    return ll + reg
Ejemplo n.º 7
0
def func_mlogloss_4(w, Xs, y):
    """
    """
    w = np.abs(w)
    sol = np.zeros((Xs[0].shape[0], 9))
    for i in range(len(w)/2):
        sol[:,0] += Xs[i][:,0] * w[i]
        sol[:,1] += Xs[i][:,1] * w[i+1]
        sol[:,2] += Xs[i][:,2] * w[i+1]
        sol[:,3] += Xs[i][:,3] * w[i+1]
        sol[:,4] += Xs[i][:,4] * w[i]
        sol[:,5] += Xs[i][:,5] * w[i]
        sol[:,6] += Xs[i][:,6] * w[i]
        sol[:,7] += Xs[i][:,7] * w[i]
        sol[:,8] += Xs[i][:,8] * w[i]
    return logloss_mc(y, sol)
    def train_validate(self, X_train, y_train, X_valid, y_valid):
        """
        """
        self.nn.max_epochs = 300
        self.nn.verbose=1
        self.nn.fit(X_train, y_train, X_valid, y_valid)
#        XX = np.vstack((X_train, X_valid[:len(X_valid)/2]))
#        yy = np.hstack((y_train, y_valid[:len(y_valid)/2]))
#        XXv = X_valid[len(X_valid)/2:]
#        yyv = y_valid[len(y_valid)/2:]
##        self.nn.dropouti_p=0.25
#        self.nn.fit(XX, yy, XXv, yyv)
        
        self.nn2.fit(X_train, y_train, X_valid, y_valid)
        
#        self.nn.fit(X_train, y_train)
        yp0 = self.nn.predict_proba(X_valid)
        print 'Nolearn log-loss: %s'%(logloss_mc(y_valid, yp0))
        
#        self.nn.max_epochs = self.early_stopping.best_valid_epoch
#        print self.early_stopping.best_valid_epoch
#        self.nn.verbose=0
#        
#        clf = ClfCal(self.nn)
#        cc = CalibratedClassifierCV(base_estimator=clf, method='isotonic',
#                                    cv=StratifiedKFold(y_train, n_folds=3))
#        cc.fit(X_train, y_train)
#        yp1= cc.predict_proba(X_valid)
#        print 'Calibrated log-loss: %s' %(logloss_mc(y_valid, yp1))
#        y_pred = (yp0+yp1)/2.
#        print 'Mean log-loss: %s' %(logloss_mc(y_valid, y_pred))
#        
#        self.cal_clf = cc
        y_pred = yp0
        
#        pdb.set_trace()
        
        return y_pred
    def train_validate(self, X_train, y_train, X_valid, y_valid):
        """
        """
        le = LabelEncoder()
        id_123 = np.logical_or(np.logical_or(y_train==1, y_train==2), 
                               y_train==3)  
        y0 = np.zeros(len(y_train), dtype=np.int32)
        y0[id_123] = 1
        X0 = np.copy(X_train) 
        y0 = le.fit_transform(y0).astype(np.int32)
    
        X1 = X_train[id_123]
        y1 = y_train[id_123]
        y1 = le.fit_transform(y1).astype(np.int32)
    
        X2 = X_train[np.logical_not(id_123)]
        y2 = y_train[np.logical_not(id_123)]    
        y2 = le.fit_transform(y2).astype(np.int32)    
        
        #Validation
        id_123_valid = np.logical_or(np.logical_or(y_valid==1, y_valid==2), 
                               y_valid==3)  
        y0_valid = np.zeros(len(y_valid), dtype=np.int32)
        y0_valid[id_123_valid] = 1
        X0_valid = np.copy(X_valid) 
        y0_valid = le.fit_transform(y0_valid).astype(np.int32)
    
        X1_valid = X_valid[id_123_valid]
        y1_valid = y_valid[id_123_valid]
        y1_valid = le.fit_transform(y1_valid).astype(np.int32)
    
        X2_valid = X_valid[np.logical_not(id_123_valid)]
        y2_valid = y_valid[np.logical_not(id_123_valid)]    
        y2_valid = le.fit_transform(y2_valid).astype(np.int32) 
        
        self.nn0.max_epochs = 300
        self.nn0.verbose=1
        self.nn0.fit(X0, y0, X0_valid, y0_valid)
        y0_pred = self.nn0.predict_proba(X_valid)
        
        self.nn1.max_epochs = 300
        self.nn1.verbose=1
        self.nn1.fit(X1, y1, X1_valid, y1_valid)
        y1_pred = self.nn1.predict_proba(X_valid)        

        self.nn2.max_epochs = 300
        self.nn2.verbose=1
        self.nn2.fit(X2, y2, X2_valid, y2_valid)
        y2_pred = self.nn2.predict_proba(X_valid)
           
        y_pred = np.zeros((y0_pred.shape[0], 9))
        y_pred[:,0] = y0_pred[:,0]*y2_pred[:,0]
        y_pred[:,1] = y0_pred[:,1]*y1_pred[:,0]
        y_pred[:,2] = y0_pred[:,1]*y1_pred[:,1]
        y_pred[:,3] = y0_pred[:,1]*y1_pred[:,2]
        y_pred[:,4] = y0_pred[:,0]*y2_pred[:,1]
        y_pred[:,5] = y0_pred[:,0]*y2_pred[:,2]
        y_pred[:,6] = y0_pred[:,0]*y2_pred[:,3]
        y_pred[:,7] = y0_pred[:,0]*y2_pred[:,4]
        y_pred[:,8] = y0_pred[:,0]*y2_pred[:,5]  
        yp0 = y_pred
        
        print logloss_mc(y_valid, yp0)
        
        self.nn0.max_epochs = self.early_stopping0.best_valid_epoch
        self.nn0.verbose=0
        self.nn1.max_epochs = self.early_stopping1.best_valid_epoch
        self.nn1.verbose=0
        self.nn2.max_epochs = self.early_stopping2.best_valid_epoch
        self.nn2.verbose=0  
        
        clf = ClfCal(self.nn0, self.nn1, self.nn2)
        cc = CalibratedClassifierCV(base_estimator=clf, method='isotonic',
                                    cv=StratifiedKFold(y_train, n_folds=3))
        cc.fit(X_train, y_train)
        yp1= cc.predict_proba(X_valid)
        print 'Calibrated log-loss: %s' %(logloss_mc(y_valid, yp1))
        y_pred = (yp0+yp1)/2.
        print 'Mean log-loss: %s' %(logloss_mc(y_valid, y_pred))
        
        self.cal_clf = cc
        
        return y_pred
Ejemplo n.º 10
0
    def internal_processing(self, X, y, X_test):
        """
        """  
#        d_train = np.loadtxt('./data/X_train')
#        d_valid = np.loadtxt('./data/X_valid')
#        Xs = np.hsplit(X, 5)
#        Xs_cal = []
#        for i in range(len(Xs)):
#            cc = CalibratedClassifierCV(base_estimator=DumClf(), 
#                                        method='isotonic', cv=5)
#
#            cc.fit(Xs[i], y)
#            Xs_cal.append(cc.predict_proba(Xs[i]))
#        XX = np.hstack(Xs_cal)
        
#        Xts_cal = []
#        Xts = np.hsplit(X_test, 5)
#        for i in range(len(Xts)):
#            cc = CalibratedClassifierCV(base_estimator=DumClf(),
#                                        method='isotonic', cv=5)
#            cc.fit(Xts[i], (np.random.rand(len(Xts[i]))*10).astype(np.int32))
#            Xts_cal.append(cc.predict_proba(Xts[i]))
#        XX_test = np.hstack(Xts_cal)   
#        
#        print 'estoy aqui...'
#        
#        ec = EC(n_preds=self.n_preds)
#        ec.fit(X, y)
#        ew0 = ec.w
##        y_ens = ec.predict_proba(X_test)
#        
#        #validation
#        yv = ec.predict_proba(X)
#        print 'Weights: %s' %(ec.w)
#        print 'Validation log-loss: %s' %(logloss_mc(y, yv))
#        
#        
##        
##        cc = CalibratedClassifierCV(base_estimator=EC(n_preds=self.n_preds), 
##                                    method='isotonic', cv=10)
##                                    
##        cc.fit(X, y)
##        y_cal = cc.predict_proba(X_test)
##        
##        y1_pred = (y_ens + y_cal)/2.
##        
#        pdb.set_trace()
##        
###        
####      
#        w20 = np.ones(X.shape[1])
#        for i in range(len(w20)):
#            w20[i] = w20[i] * (ew0[i/9])
#        ec2 = EC_2(n_preds=self.n_preds, w0=w20)
#        ec2.fit(X, y)
##        y2_ens = ec2.predict_proba(X_test)
#        
#        #validation
#        yv2 = ec2.predict_proba(X)
#        print 'Weights: %s' %(ec2.w)
#        print 'Validation log-loss: %s' %(logloss_mc(y, yv2))
#        
#        pdb.set_trace()
        
#        cc2 = CalibratedClassifierCV(base_estimator=EC_2(n_preds=self.n_preds,
#                                                         w0=w20), 
#                                    method='isotonic', cv=10)
#                                    
#        cc2.fit(X, y)
#        y2_cal = cc2.predict_proba(X_test)
#        
##        y2_pred = (y2_ens + y2_cal)/2.
#        y2_pred = (y2_ens*2 + y2_cal*3)/5.
###        
###        
##        y_pred = (y1_pred + y2_pred)/2.
#        y_pred = y2_pred
##        
#        ec3 = EC_3(n_preds=self.n_preds)
#        ec3.fit(X, y)
##        y3_ens = ec4.predict_proba(X_test)
##        
##        #validation
#        yv3 = ec3.predict_proba(X)
##        print 'Weights: %s' %(ec4.w)
#        print 'Validation log-loss: %s' %(logloss_mc(y, yv3))
##        
#        cc3 = CalibratedClassifierCV(base_estimator=EC_3(n_preds=self.n_preds), 
#                                    method='isotonic', cv=10)
#                                    
#        cc3.fit(X, y)
#        y3_cal = cc3.predict_proba(X)
#        print 'Validation log-loss: %s' %(logloss_mc(y, y3_cal))
##        
#        y3_pred = (y3_ens + y3_cal)/2.
#        
#        y_pred = (y1_pred + y2_pred + y3_pred)/3.
#
##        
#        pdb.set_trace()
        
        
##        cc = CalibratedClassifierCV(base_estimator=ec, method='sigmoid', cv=5)
##        cc.fit(X, y)
##        y_pred = cc.predict_proba(X_test)
##        sss = StratifiedShuffleSplit(y, 1, test_size=0.5) 
        sss = StratifiedKFold(y, n_folds=10)
        for train_id, valid_id in sss:
            X0, X1 = X[train_id], X[valid_id]
            y0, y1 = y[train_id], y[valid_id] 
#            d0, d1 = d_train[train_id], d_train[valid_id]
            ec = EC(n_preds = self.n_preds)
            ec.fit(X0, y0)
            ew0 = ec.w
#            print ec.w
            y0_pred = ec.predict_proba(X1)
            print 1,logloss_mc(y1, y0_pred)
            
#            pp2 = np.where(y0_pred+0.01>1,1,y0_pred+0.01)
#            pp3 = np.where(y0_pred+0.001>1,1,y0_pred+0.001)
#            pp4 = np.where(y0_pred-0.01<0,0,y0_pred-0.01)
            pp4 = np.where(y0_pred>0.5, y0_pred-0.0001, y0_pred+0.0001)
            pp5 = np.where(y0_pred>0.5, y0_pred+0.0001, y0_pred-0.0001)
#            print 2, logloss_mc(y1, pp2)
#            print 3, logloss_mc(y1, pp3)
            print 3, logloss_mc(y1, pp4)
            print 4, logloss_mc(y1, pp5)
            
            
            
##            
#            tt = CalibratedClassifierCV(base_estimator=EC(n_preds=self.n_preds), 
#                                        method='isotonic', cv=5)
#            tt.fit(X0, y0)
#            y0_tt = tt.predict_proba(X1)
#            
#            
#            print 2,logloss_mc(y1, y0_tt)
##            
#            ym0 = (y0_tt+y0_pred)/2.
###            
#            print 3,logloss_mc(y1, ym0)
#            
#            y4 = np.where(ym0<0.05,0,ym0)
#            print 4, logloss_mc(y1, y4)
#            
#            y5 = np.where(ym0<0.2, ym0/2, ym0)
#            print 5, logloss_mc(y1, y5)
#            
#            yy = np.zeros(y0_pred.shape)
#            for i,j in enumerate(y1):
#                yy[i,j] = 1.
#            
#            plt.plot(y0_pred.reshape(-1), 'r^-')
#            plt.plot(y0_tt.reshape(-1), 'g-')
#            plt.plot(ym0.reshape(-1), 'b*-')
#            
##            plt.plot(yy.reshape(-1), 'y-')
#            plt.show()
#            
#            pdb.set_trace() 
            
            
            
#            print 4, logloss_mc(y1, (2*y0_tt+y0_pred)/3.)
#            print 5, logloss_mc(y1, (y0_tt+2*y0_pred)/3.)
#            
##            jj = CalibratedClassifierCV(base_estimator= CalibratedClassifierCV(base_estimator=EC(n_preds=self.n_preds), 
##                                        method='isotonic', cv=5), method='isotonic',
##                                        cv=3)
##            jj.fit(X0, y0)
##            y0_jj = jj.predict_proba(X1) 
##            print 6,logloss_mc(y1, y0_jj)
##            print 7,logloss_mc(y1, (y0_pred+y0_jj)/2.)
#            
#            mn = np.min(y0_pred)
#            mx = np.max(y0_pred)
#            mms = MinMaxScaler(feature_range=(mn, mx))
#            ymm = mms.fit_transform(y0_tt)
#            print 6, logloss_mc(y1, ymm)
#            print 7, logloss_mc(y1, (y0_pred+ymm)/2.)
#            
#            
##
####            
#            print '-----'
            w20 = np.ones(X0.shape[1])
            for i in range(len(w20)):
                w20[i] = w20[i] * (ew0[i/9])
                
            ec2 = EC_2(n_preds=self.n_preds, w0=w20)
            ec2.fit(X0, y0)
#            print ec2.w
            y02_pred = ec2.predict_proba(X1)
##            
##            
            print 4, logloss_mc(y1, y02_pred)
            pp8 = np.where(y02_pred>0.5, y02_pred-0.0001, y02_pred+0.0001)
            pp9 = np.where(y02_pred>0.5, y02_pred+0.0001, y02_pred-0.0001)
#            print 2, logloss_mc(y1, pp2)
#            print 3, logloss_mc(y1, pp3)
            print 5, logloss_mc(y1, pp8)
            print 6, logloss_mc(y1, pp9)
            
            print '---\n'
##            
##            gg = CalibratedClassifierCV(base_estimator=EC_2(n_preds=self.n_preds, w0=w20), 
##                            method='isotonic', cv=5)
##            gg.fit(X0, y0)
##            y0_gg = gg.predict_proba(X1)
##            
##            print 5, logloss_mc(y1, y0_gg)
##            
##            ym1 = (y0_gg+y02_pred)/2.
##            print 6, logloss_mc(y1, ym1)
###            
##            print 7, logloss_mc(y1, (ym0 + ym1)/2.)
#            
#            print '----' 
#           
#            ec3.fit(X0, y0)
#            y03_pred = ec3.predict_proba(X1)
#            print 4, logloss_mc(y1, y03_pred)
#            
#            hh = CalibratedClassifierCV(base_estimator=EC_3(n_preds=self.n_preds), 
#                method='isotonic', cv=5)
#            hh.fit(X0, y0)
#            y0_hh = hh.predict_proba(X1)
#            
#            print 5, logloss_mc(y1, y0_hh)
##            
#            ym3 = (y0_hh+y03_pred)/2.
#            print 6, logloss_mc(y1, ym3)
##            
#            print 7, logloss_mc(y1, (ym0+ym3)/2.)
#            print '------ ' 
#            print ' ' 
##            ecc = EC(n_preds=5)
###            y_aux = np.array([np.random.randint(9) for i in range(len(XX0))], dtype=np.int32)
###            pdb.set_trace()
##            
##            Xs0 = np.hsplit(X0, 5)
#            Xs1 = np.hsplit(X1, 5)
#            X0_cal = []
#            X1_cal = []
#            for i in range(len(Xs0)):
#                cc = CalibratedClassifierCV(base_estimator=DumClf(), 
#                                            method='isotonic', cv=5)
#    
#                cc.fit(d[i], y0)
#                X0_cal.append(cc.predict_proba(Xs0[i]))
#                X1_cal.append(cc.predict_proba(Xs1[i]))
#            XX0 = np.hstack(X0_cal) 
#            XX1 = np.hstack(X1_cal)
#            ecc.fit(XX0, y0)
#            y01_pred = ecc.predict_proba(XX1)
#            print 4,logloss_mc(y1, y01_pred)
#            
#            gg = CalibratedClassifierCV(base_estimator=EC(n_preds=5), 
#                                        method='isotonic', cv=5)
#            gg.fit(XX0, y0)
#            y0_gg = tt.predict_proba(XX1)
#            
#            print 5,logloss_mc(y1, y0_gg)
#            
#            print 6,logloss_mc(y1, (y0_gg+y01_pred)/2.)
#            
#            
#            
#            print ' '
            
#        pdb.set_trace()        
#        
#        Xs = np.hsplit(X, self.n_preds)
#        
#        #evaluating individual predictions
#        for i in range(len(Xs)):
#            print 'Solution %s, logloss: %s' %(i, logloss_mc(y, Xs[i]))
#        print ' '
#        
#        x0 = np.ones(self.n_preds) / float(self.n_preds)
#        
#        bounds = [(0,1)]*len(x0)
#    
#        res = minimize(func_mlogloss, x0, args=(Xs, y), 
#                       method='L-BFGS-B', bounds=bounds)
#        w = res.x
#        print res.message, res.success
#        print w, np.sum(w)
#        
#        ypv = np.zeros((X.shape[0], 9))
#        for i in range(len(w)):
#            ypv += Xs[i] * w[i]
#        print 'valid log-loss: %s' %(logloss_mc(y, ypv))
#        
#        
#        
##        pdb.set_trace()
#    
#        
#        Xs_test = np.hsplit(X_test, self.n_preds)
#        y_pred = np.zeros((X_test.shape[0], 9))
#        for i in range(len(w)):
#            y_pred += Xs_test[i] * w[i]
#            
#        pdb.set_trace()
         
        return y_pred       
Ejemplo n.º 11
0
def scoring_mlogloss(clf, X_test, y_test):
    y_pred = clf.predict_proba(X_test)
    return logloss_mc(y_test, y_pred)