예제 #1
0
    def build (self, X, Y, quantitative=False, autoscale=False, nestimators=0, features='', random=False, tune=False):
        """Build a new RF model with the X and Y numpy matrices

        """
        
        nobj, nvarx= np.shape(X)

        self.nobj  = nobj
        self.nvarx = nvarx

        self.quantitative = quantitative
        self.autoscale = autoscale
        self.estimators = nestimators
        self.features = features
        self.random = random
        
        self.X = X.copy()
        self.Y = Y.copy()

        if autoscale:
            self.X, self.mux = center(self.X)
            self.X, self.wgx = scale(self.X, autoscale)

        if random :
            RANDOM_STATE = None
        else:
            RANDOM_STATE = 1226 # no reason to pick this number

        if tune :
            self.estimators, self.features = self.optimize (X,Y)

            if self.features=='none':
                self.features = None
                
        #print self.estimators
            
        if self.quantitative:
            print "Building Quantitative RF model"
            self.clf = RandomForestRegressor(n_estimators = int(self.estimators),
                                            warm_start=False,
                                            max_features=self.features,
                                            oob_score=True,
                                            random_state=RANDOM_STATE)
        else:
            print "Building Qualitative RF_model"
            self.clf = RandomForestClassifier(n_estimators = int(self.estimators),
                                            warm_start=False,
                                            max_features=self.features,
                                            oob_score=True,
                                            random_state=RANDOM_STATE)
            
        self.clf.fit(self.X, self.Y)
            
        # Regenerate the X and Y, since they might have been centered/scaled
        self.X = X.copy()
        self.Y = Y.copy()
예제 #2
0
    def build (self, X, targetA, autoscale=False):

        nobj, nvar= np.shape(X)

        self.nobj = nobj
        self.nvar = nvar

        self.X = X

        X, mu = center(X)
        X, wg = scale (X, autoscale)

        self.mu = mu
        self.wg = wg
        self.autoscale = autoscale

        SSXac=0.0

        for a in range(targetA):
            # extracts LV
            t, p = self.extractPC(X)

            self.t.append(t)
            self.p.append(p)

            # deflates X
            X, SSX, SSXex = self.deflatePC(X,t,p)

            SSXac += SSXex
            
            self.SSXex.append(SSXex)
            self.SSXac.append(SSXac)
            
            if a==0:
                self.SSX = SSX

        self.A = targetA
예제 #3
0
    def getLOO (self, X, Y, Xout):   
        clf = None
        if self.autoscale:
            X, mux = center(X)
            X, wgx = scale(X, self.autoscale)

        RANDOM_STATE = 1226 # no reason to pick this number

        if self.quantitative:
            clf = RandomForestRegressor(n_estimators = int(self.estimators),
                warm_start=False,
                max_features=self.features,
                oob_score=True,
                random_state=RANDOM_STATE)
        else:
            clf = RandomForestClassifier(n_estimators = int(self.estimators),
                warm_start=False,
                max_features=self.features,
                oob_score=True,
                random_state=RANDOM_STATE)
            
        clf.fit(X, Y)
          
        return ( clf.predict(Xout), clf.oob_score_ )
예제 #4
0
파일: pls.py 프로젝트: tljm/eTOXlab
    def validateLOO(self, A, gui=False):
        """ Validates A dimensions of an already built PLS model, using Leave-One-Out cross-validation

            Returns nothing. The results of the cv (SSY, SDEP and Q2) are stored internally
        """

        if self.X == None or self.Y == None:
            return

        X = self.X
        Y = self.Y

        nobj, nvarx = np.shape(X)

        SSY0 = 0.0
        for i in range(nobj):
            SSY0 += np.square(Y[i] - np.mean(Y))

        SSY = np.zeros(A, dtype=np.float64)
        YP = np.zeros((nobj, A + 1), dtype=np.float64)

        if gui: updateProgress(0.0)

        for i in range(nobj):

            # build reduced X and Y matrices removing i object
            Xr = np.delete(X, i, axis=0)
            Yr = np.delete(Y, i)

            Xr, muxr = center(Xr)
            Xr, wgxr = scale(Xr, self.autoscale)

            Yr, muyr = center(Yr)

            xp = np.copy(X[i, :])

            xp -= muxr
            xp *= wgxr

            # predicts y for the i object, using A LV
            yp = self.getLOO(Xr, Yr, xp, A)
            yp += muyr

            # updates SSY with the object i errors
            YP[i, 0] = Y[i]

            for a in range(A):
                SSY[a] += np.square(yp[a] - Y[i])
                YP[i, a + 1] = yp[a]

            if gui: updateProgress(float(i) / float(nobj))

        if gui: print

        self.SSY = SSY
        self.SDEP = [np.sqrt(i / nobj) for i in SSY]
        self.Q2 = [1.00 - (i / SSY0) for i in SSY]

        self.Av = A

        return (YP)
예제 #5
0
파일: pls.py 프로젝트: tljm/eTOXlab
    def build(self, X, Y, targetA=0, targetSSX=0.0, autoscale=False):
        """Build a new PLS model with the X and Y numpy matrice provided using NIPALS algorithm

           The dimensionality of the model can be defined either providing
           1. directly the number of LV to extract (targetA)
           2. the fraction of SSX that the model will explain (targetSSX)

           The X and Y matrices are centered but no other scaling transform is applied

           Does not return anything, but updates internals vectors and variables
        """
        nobj, nvarx = np.shape(X)

        ##        for i in range (nobj):
        ##            for j in range (nvarx):
        ##                print X[i,j],
        ##            print

        self.nobj = nobj
        self.nvarx = nvarx
        self.X = X.copy()
        self.Y = Y.copy()

        self.X, self.mux = center(self.X)
        self.Y, self.muy = center(self.Y)
        self.X, self.wgx = scale(self.X, autoscale)

        ##        self.mux = mux
        ##        self.muy = muy
        ##        self.wgx = wgx

        self.autoscale = autoscale

        SSXac = 0.0
        SSYac = 0.0

        SSX0, SSY0, null = self.computeSS(self.X, self.Y)

        SSXold = SSX0
        SSYold = SSY0

        a = 0
        while True:
            t, p, w, c = self.extractLV(self.X, self.Y)

            self.t.append(t)
            self.p.append(p)
            self.w.append(w)
            self.c.append(c)

            self.X, self.Y = self.deflateLV(self.X, self.Y, t, p, c)

            SSXnew, SSYnew, dmodx = self.computeSS(self.X, self.Y)

            SSXex = (SSXold - SSXnew) / SSX0
            SSXac += SSXex

            SSYex = (SSYold - SSYnew) / SSY0
            SSYac += SSYex

            SDEC = np.sqrt(SSYnew / nobj)

            dof = nvarx - a
            if dof <= 0: dof = 1
            dmodx = [np.sqrt(d / dof) for d in dmodx]

            SSXold = SSXnew
            SSYold = SSYnew

            self.SSXex.append(SSXex)
            self.SSXac.append(SSXac)
            self.SSYex.append(SSYex)
            self.SSYac.append(SSYac)
            self.SDEC.append(SDEC)
            self.dmodx.append(dmodx)

            a += 1

            if targetA > 0:
                if a == targetA: break

            if targetSSX > 0.0:
                if SSXac > targetSSX: break
                # prevents to extract a meaningless number of LV
                if a > min(20, nobj / 5): break

        self.Am = a

        # NIPALS is destructive, so we must retrieve X and Y from original data for validation
        self.X = X.copy()
        self.Y = Y.copy()

        self.cutoff = np.zeros(self.Am, dtype=np.float64)
        self.TP = np.zeros(self.Am)
        self.TN = np.zeros(self.Am)
        self.FP = np.zeros(self.Am)
        self.FN = np.zeros(self.Am)

        self.TPpred = np.zeros(self.Am)
        self.TNpred = np.zeros(self.Am)
        self.FPpred = np.zeros(self.Am)
        self.FNpred = np.zeros(self.Am)
예제 #6
0
    def build(self,
              X,
              Y,
              quantitative=False,
              autoscale=False,
              nestimators=0,
              features='',
              random=False,
              tune=False,
              class_weight="balanced",
              cv='loo',
              n=2,
              p=1,
              lc=True,
              vpath=''):
        """Build a new RF model with the X and Y numpy matrices

        """

        nobj, nvarx = np.shape(X)

        self.nobj = nobj
        self.nvarx = nvarx

        self.quantitative = quantitative
        self.autoscale = autoscale
        self.estimators = nestimators
        self.features = features
        self.random = random
        self.class_weight = class_weight
        self.learning_curve = lc
        self.n = n
        self.p = p
        self.cv = cv

        self.X = X.copy()
        self.Y = Y.copy()

        self.vpath = vpath

        #print self.vpath
        if autoscale:
            self.X, self.mux = center(self.X)
            self.X, self.wgx = scale(self.X, autoscale)

        if random:
            RANDOM_STATE = None
        else:
            RANDOM_STATE = 1226  # no reason to pick this number

        if self.cv:
            self.cv = getCrossVal(self.cv, RANDOM_STATE, self.n, self.p)

        if tune:
            self.estimators, self.features = self.optimize(self.X, self.Y)

            if self.features == 'none':
                self.features = None

        #print self.estimators

        if self.quantitative:
            print "Building Quantitative RF model"
            self.clf = RandomForestRegressor(n_estimators=int(self.estimators),
                                             warm_start=False,
                                             max_features=self.features,
                                             oob_score=True,
                                             random_state=RANDOM_STATE)
        else:
            print "Building Qualitative RF_model"
            self.clf = RandomForestClassifier(n_estimators=int(
                self.estimators),
                                              warm_start=False,
                                              max_features=self.features,
                                              oob_score=True,
                                              random_state=RANDOM_STATE,
                                              class_weight=self.class_weight)

        self.clf.fit(self.X, self.Y)

        print 'Building Learning Curves'
        if self.learning_curve:
            title = "Learning Curves (RF)"
            # SVC is more expensive so we do a lower number of CV iterations:
            cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
            estimator = self.clf
            plot = plot_learning_curve(estimator,
                                       title,
                                       self.X,
                                       self.Y, (0.0, 1.01),
                                       cv=cv)
            plot.savefig(self.vpath + "/RF-learning_curves.png", format='png')
            plot.savefig("./RF-learning_curves.png", format='png')

        # Regenerate the X and Y, since they might have been centered/scaled
        self.X = X.copy()
        self.Y = Y.copy()
예제 #7
0
파일: RF.py 프로젝트: manuelpastor/eTOXlab
    def build (self, X, Y, quantitative=False, autoscale=False,
               nestimators=0, features='', random=False, tune=False, class_weight="balanced",
               cv='loo', n=2, p=1, lc=True, vpath = ''):
        """Build a new RF model with the X and Y numpy matrices

        """

        nobj, nvarx= np.shape(X)

        self.nobj  = nobj
        self.nvarx = nvarx

        self.quantitative = quantitative
        self.autoscale = autoscale
        self.estimators = nestimators
        self.features = features
        self.random = random
        self.class_weight = class_weight
        self.learning_curve = lc
        self.n = n
        self.p = p
        self.cv = cv

        self.X = X.copy()
        self.Y = Y.copy()

        self.vpath = vpath

        #print self.vpath
        if autoscale:
            self.X, self.mux = center(self.X)
            self.X, self.wgx = scale(self.X, autoscale)

        if random :
            RANDOM_STATE = None
        else:
            RANDOM_STATE = 1226 # no reason to pick this number

        if self.cv:
            self.cv = getCrossVal(self.cv, RANDOM_STATE, self.n, self.p)
            
        if tune :
            self.estimators, self.features = self.optimize (self.X, self.Y)

            if self.features=='none':
                self.features = None

        #print self.estimators

        if self.quantitative:
            print "Building Quantitative RF model"
            self.clf = RandomForestRegressor(n_estimators = int(self.estimators),
                                            warm_start=False,
                                            max_features=self.features,
                                            oob_score=True,
                                            random_state=RANDOM_STATE)
        else:
            print "Building Qualitative RF_model"
            self.clf = RandomForestClassifier(n_estimators = int(self.estimators),
                                            warm_start=False,
                                            max_features=self.features,
                                            oob_score=True,
                                            random_state=RANDOM_STATE,
                                            class_weight=self.class_weight)

        self.clf.fit(self.X, self.Y)
        
        print 'Building Learning Curves'
        if self.learning_curve:
            title = "Learning Curves (RF)"
            # SVC is more expensive so we do a lower number of CV iterations:
            cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
            estimator = self.clf
            plot = plot_learning_curve(estimator, title, self.X, self.Y, (0.0, 1.01), cv=cv)
            plot.savefig(self.vpath+"/RF-learning_curves.png", format='png')
            plot.savefig("./RF-learning_curves.png", format='png')


        # Regenerate the X and Y, since they might have been centered/scaled
        self.X = X.copy()
        self.Y = Y.copy()
예제 #8
0
파일: pls.py 프로젝트: manuelpastor/eTOXlab
    def validateLOO (self, A, gui=False):
        """ Validates A dimensions of an already built PLS model, using Leave-One-Out cross-validation

            Returns nothing. The results of the cv (SSY, SDEP and Q2) are stored internally
        """

        if self.X == None or self.Y == None:
            return 
        
        X = self.X
        Y = self.Y     

        nobj,nvarx = np.shape (X)

        SSY0 = 0.0
        for i in range (nobj):
            SSY0+=np.square(Y[i]-np.mean(Y))

        SSY = np.zeros(A,dtype=np.float64)
        YP = np.zeros ((nobj,A+1),dtype=np.float64)

        if gui: updateProgress (0.0)
        
        for i in range (nobj):
            
            # build reduced X and Y matrices removing i object
            Xr = np.delete(X,i,axis=0)
            Yr = np.delete(Y,i)

            Xr,muxr = center(Xr)
            Xr,wgxr = scale (Xr, self.autoscale)
           
            Yr,muyr = center(Yr)

            xp = np.copy(X[i,:])
            
            xp -= muxr
            xp *= wgxr
            
            # predicts y for the i object, using A LV
            yp = self.getLOO(Xr,Yr,xp,A)      
            yp += muyr

            # updates SSY with the object i errors
            YP[i,0]=Y[i]
            
            for a in range(A):
                SSY[a]+= np.square(yp[a]-Y[i])
                YP[i,a+1]=yp[a]

            if gui : updateProgress (float(i)/float(nobj))

        if gui : print
        
        self.SSY  = SSY        
        self.SDEP = [np.sqrt(i/nobj) for i in SSY]
        self.Q2   = [1.00-(i/SSY0) for i in SSY]
        
        self.Av = A

        return (YP)
예제 #9
0
파일: pls.py 프로젝트: manuelpastor/eTOXlab
    def build (self, X, Y, targetA=0, targetSSX=0.0, autoscale=False):
        """Build a new PLS model with the X and Y numpy matrice provided using NIPALS algorithm

           The dimensionality of the model can be defined either providing
           1. directly the number of LV to extract (targetA)
           2. the fraction of SSX that the model will explain (targetSSX)

           The X and Y matrices are centered but no other scaling transform is applied

           Does not return anything, but updates internals vectors and variables
        """
        nobj, nvarx= np.shape(X)

##        for i in range (nobj):
##            for j in range (nvarx):
##                print X[i,j],
##            print

        self.nobj = nobj
        self.nvarx = nvarx
        self.X = X.copy()
        self.Y = Y.copy()

        self.X, self.mux = center(self.X)
        self.Y, self.muy = center(self.Y)
        self.X, self.wgx = scale(self.X, autoscale)

##        self.mux = mux
##        self.muy = muy
##        self.wgx = wgx

        self.autoscale = autoscale
        
        SSXac=0.0
        SSYac=0.0

        SSX0,SSY0, null = self.computeSS(self.X,self.Y)
        
        SSXold=SSX0
        SSYold=SSY0

        

        a=0
        while True:
            t, p, w, c = self.extractLV(self.X, self.Y)
                
            self.t.append(t) 
            self.p.append(p)
            self.w.append(w)
            self.c.append(c)
            
            self.X, self.Y = self.deflateLV(self.X, self.Y, t, p, c)
            
            SSXnew, SSYnew, dmodx = self.computeSS(self.X, self.Y)

            SSXex = (SSXold-SSXnew)/SSX0
            SSXac+=SSXex

            SSYex = (SSYold-SSYnew)/SSY0
            SSYac+=SSYex

            SDEC = np.sqrt(SSYnew/nobj)

            dof = nvarx-a
            if dof <= 0 : dof = 1
            dmodx = [np.sqrt(d/dof) for d in dmodx] 

            SSXold=SSXnew
            SSYold=SSYnew

            self.SSXex.append(SSXex)
            self.SSXac.append(SSXac)
            self.SSYex.append(SSYex)
            self.SSYac.append(SSYac)
            self.SDEC.append(SDEC)
            self.dmodx.append(dmodx)
            
            a+=1
                
            if targetA>0:
                if a==targetA : break

            if targetSSX>0.0:
                if SSXac>targetSSX: break
                # prevents to extract a meaningless number of LV
                if a > min (20,nobj/5) : break 

        self.Am=a
            
        # NIPALS is destructive, so we must retrieve X and Y from original data for validation
        self.X = X.copy()
        self.Y = Y.copy()
        
        self.cutoff = np.zeros(self.Am, dtype=np.float64)
        self.TP = np.zeros(self.Am)
        self.TN = np.zeros(self.Am)
        self.FP = np.zeros(self.Am)
        self.FN = np.zeros(self.Am)

        self.TPpred = np.zeros(self.Am)
        self.TNpred = np.zeros(self.Am)
        self.FPpred = np.zeros(self.Am)
        self.FNpred = np.zeros(self.Am)