def build (self, X, Y, quantitative=False, autoscale=False, nestimators=0, features='', random=False, tune=False): """Build a new RF model with the X and Y numpy matrices """ nobj, nvarx= np.shape(X) self.nobj = nobj self.nvarx = nvarx self.quantitative = quantitative self.autoscale = autoscale self.estimators = nestimators self.features = features self.random = random self.X = X.copy() self.Y = Y.copy() if autoscale: self.X, self.mux = center(self.X) self.X, self.wgx = scale(self.X, autoscale) if random : RANDOM_STATE = None else: RANDOM_STATE = 1226 # no reason to pick this number if tune : self.estimators, self.features = self.optimize (X,Y) if self.features=='none': self.features = None #print self.estimators if self.quantitative: print "Building Quantitative RF model" self.clf = RandomForestRegressor(n_estimators = int(self.estimators), warm_start=False, max_features=self.features, oob_score=True, random_state=RANDOM_STATE) else: print "Building Qualitative RF_model" self.clf = RandomForestClassifier(n_estimators = int(self.estimators), warm_start=False, max_features=self.features, oob_score=True, random_state=RANDOM_STATE) self.clf.fit(self.X, self.Y) # Regenerate the X and Y, since they might have been centered/scaled self.X = X.copy() self.Y = Y.copy()
def build (self, X, targetA, autoscale=False): nobj, nvar= np.shape(X) self.nobj = nobj self.nvar = nvar self.X = X X, mu = center(X) X, wg = scale (X, autoscale) self.mu = mu self.wg = wg self.autoscale = autoscale SSXac=0.0 for a in range(targetA): # extracts LV t, p = self.extractPC(X) self.t.append(t) self.p.append(p) # deflates X X, SSX, SSXex = self.deflatePC(X,t,p) SSXac += SSXex self.SSXex.append(SSXex) self.SSXac.append(SSXac) if a==0: self.SSX = SSX self.A = targetA
def getLOO (self, X, Y, Xout): clf = None if self.autoscale: X, mux = center(X) X, wgx = scale(X, self.autoscale) RANDOM_STATE = 1226 # no reason to pick this number if self.quantitative: clf = RandomForestRegressor(n_estimators = int(self.estimators), warm_start=False, max_features=self.features, oob_score=True, random_state=RANDOM_STATE) else: clf = RandomForestClassifier(n_estimators = int(self.estimators), warm_start=False, max_features=self.features, oob_score=True, random_state=RANDOM_STATE) clf.fit(X, Y) return ( clf.predict(Xout), clf.oob_score_ )
def validateLOO(self, A, gui=False): """ Validates A dimensions of an already built PLS model, using Leave-One-Out cross-validation Returns nothing. The results of the cv (SSY, SDEP and Q2) are stored internally """ if self.X == None or self.Y == None: return X = self.X Y = self.Y nobj, nvarx = np.shape(X) SSY0 = 0.0 for i in range(nobj): SSY0 += np.square(Y[i] - np.mean(Y)) SSY = np.zeros(A, dtype=np.float64) YP = np.zeros((nobj, A + 1), dtype=np.float64) if gui: updateProgress(0.0) for i in range(nobj): # build reduced X and Y matrices removing i object Xr = np.delete(X, i, axis=0) Yr = np.delete(Y, i) Xr, muxr = center(Xr) Xr, wgxr = scale(Xr, self.autoscale) Yr, muyr = center(Yr) xp = np.copy(X[i, :]) xp -= muxr xp *= wgxr # predicts y for the i object, using A LV yp = self.getLOO(Xr, Yr, xp, A) yp += muyr # updates SSY with the object i errors YP[i, 0] = Y[i] for a in range(A): SSY[a] += np.square(yp[a] - Y[i]) YP[i, a + 1] = yp[a] if gui: updateProgress(float(i) / float(nobj)) if gui: print self.SSY = SSY self.SDEP = [np.sqrt(i / nobj) for i in SSY] self.Q2 = [1.00 - (i / SSY0) for i in SSY] self.Av = A return (YP)
def build(self, X, Y, targetA=0, targetSSX=0.0, autoscale=False): """Build a new PLS model with the X and Y numpy matrice provided using NIPALS algorithm The dimensionality of the model can be defined either providing 1. directly the number of LV to extract (targetA) 2. the fraction of SSX that the model will explain (targetSSX) The X and Y matrices are centered but no other scaling transform is applied Does not return anything, but updates internals vectors and variables """ nobj, nvarx = np.shape(X) ## for i in range (nobj): ## for j in range (nvarx): ## print X[i,j], ## print self.nobj = nobj self.nvarx = nvarx self.X = X.copy() self.Y = Y.copy() self.X, self.mux = center(self.X) self.Y, self.muy = center(self.Y) self.X, self.wgx = scale(self.X, autoscale) ## self.mux = mux ## self.muy = muy ## self.wgx = wgx self.autoscale = autoscale SSXac = 0.0 SSYac = 0.0 SSX0, SSY0, null = self.computeSS(self.X, self.Y) SSXold = SSX0 SSYold = SSY0 a = 0 while True: t, p, w, c = self.extractLV(self.X, self.Y) self.t.append(t) self.p.append(p) self.w.append(w) self.c.append(c) self.X, self.Y = self.deflateLV(self.X, self.Y, t, p, c) SSXnew, SSYnew, dmodx = self.computeSS(self.X, self.Y) SSXex = (SSXold - SSXnew) / SSX0 SSXac += SSXex SSYex = (SSYold - SSYnew) / SSY0 SSYac += SSYex SDEC = np.sqrt(SSYnew / nobj) dof = nvarx - a if dof <= 0: dof = 1 dmodx = [np.sqrt(d / dof) for d in dmodx] SSXold = SSXnew SSYold = SSYnew self.SSXex.append(SSXex) self.SSXac.append(SSXac) self.SSYex.append(SSYex) self.SSYac.append(SSYac) self.SDEC.append(SDEC) self.dmodx.append(dmodx) a += 1 if targetA > 0: if a == targetA: break if targetSSX > 0.0: if SSXac > targetSSX: break # prevents to extract a meaningless number of LV if a > min(20, nobj / 5): break self.Am = a # NIPALS is destructive, so we must retrieve X and Y from original data for validation self.X = X.copy() self.Y = Y.copy() self.cutoff = np.zeros(self.Am, dtype=np.float64) self.TP = np.zeros(self.Am) self.TN = np.zeros(self.Am) self.FP = np.zeros(self.Am) self.FN = np.zeros(self.Am) self.TPpred = np.zeros(self.Am) self.TNpred = np.zeros(self.Am) self.FPpred = np.zeros(self.Am) self.FNpred = np.zeros(self.Am)
def build(self, X, Y, quantitative=False, autoscale=False, nestimators=0, features='', random=False, tune=False, class_weight="balanced", cv='loo', n=2, p=1, lc=True, vpath=''): """Build a new RF model with the X and Y numpy matrices """ nobj, nvarx = np.shape(X) self.nobj = nobj self.nvarx = nvarx self.quantitative = quantitative self.autoscale = autoscale self.estimators = nestimators self.features = features self.random = random self.class_weight = class_weight self.learning_curve = lc self.n = n self.p = p self.cv = cv self.X = X.copy() self.Y = Y.copy() self.vpath = vpath #print self.vpath if autoscale: self.X, self.mux = center(self.X) self.X, self.wgx = scale(self.X, autoscale) if random: RANDOM_STATE = None else: RANDOM_STATE = 1226 # no reason to pick this number if self.cv: self.cv = getCrossVal(self.cv, RANDOM_STATE, self.n, self.p) if tune: self.estimators, self.features = self.optimize(self.X, self.Y) if self.features == 'none': self.features = None #print self.estimators if self.quantitative: print "Building Quantitative RF model" self.clf = RandomForestRegressor(n_estimators=int(self.estimators), warm_start=False, max_features=self.features, oob_score=True, random_state=RANDOM_STATE) else: print "Building Qualitative RF_model" self.clf = RandomForestClassifier(n_estimators=int( self.estimators), warm_start=False, max_features=self.features, oob_score=True, random_state=RANDOM_STATE, class_weight=self.class_weight) self.clf.fit(self.X, self.Y) print 'Building Learning Curves' if self.learning_curve: title = "Learning Curves (RF)" # SVC is more expensive so we do a lower number of CV iterations: cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) estimator = self.clf plot = plot_learning_curve(estimator, title, self.X, self.Y, (0.0, 1.01), cv=cv) plot.savefig(self.vpath + "/RF-learning_curves.png", format='png') plot.savefig("./RF-learning_curves.png", format='png') # Regenerate the X and Y, since they might have been centered/scaled self.X = X.copy() self.Y = Y.copy()
def build (self, X, Y, quantitative=False, autoscale=False, nestimators=0, features='', random=False, tune=False, class_weight="balanced", cv='loo', n=2, p=1, lc=True, vpath = ''): """Build a new RF model with the X and Y numpy matrices """ nobj, nvarx= np.shape(X) self.nobj = nobj self.nvarx = nvarx self.quantitative = quantitative self.autoscale = autoscale self.estimators = nestimators self.features = features self.random = random self.class_weight = class_weight self.learning_curve = lc self.n = n self.p = p self.cv = cv self.X = X.copy() self.Y = Y.copy() self.vpath = vpath #print self.vpath if autoscale: self.X, self.mux = center(self.X) self.X, self.wgx = scale(self.X, autoscale) if random : RANDOM_STATE = None else: RANDOM_STATE = 1226 # no reason to pick this number if self.cv: self.cv = getCrossVal(self.cv, RANDOM_STATE, self.n, self.p) if tune : self.estimators, self.features = self.optimize (self.X, self.Y) if self.features=='none': self.features = None #print self.estimators if self.quantitative: print "Building Quantitative RF model" self.clf = RandomForestRegressor(n_estimators = int(self.estimators), warm_start=False, max_features=self.features, oob_score=True, random_state=RANDOM_STATE) else: print "Building Qualitative RF_model" self.clf = RandomForestClassifier(n_estimators = int(self.estimators), warm_start=False, max_features=self.features, oob_score=True, random_state=RANDOM_STATE, class_weight=self.class_weight) self.clf.fit(self.X, self.Y) print 'Building Learning Curves' if self.learning_curve: title = "Learning Curves (RF)" # SVC is more expensive so we do a lower number of CV iterations: cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) estimator = self.clf plot = plot_learning_curve(estimator, title, self.X, self.Y, (0.0, 1.01), cv=cv) plot.savefig(self.vpath+"/RF-learning_curves.png", format='png') plot.savefig("./RF-learning_curves.png", format='png') # Regenerate the X and Y, since they might have been centered/scaled self.X = X.copy() self.Y = Y.copy()
def validateLOO (self, A, gui=False): """ Validates A dimensions of an already built PLS model, using Leave-One-Out cross-validation Returns nothing. The results of the cv (SSY, SDEP and Q2) are stored internally """ if self.X == None or self.Y == None: return X = self.X Y = self.Y nobj,nvarx = np.shape (X) SSY0 = 0.0 for i in range (nobj): SSY0+=np.square(Y[i]-np.mean(Y)) SSY = np.zeros(A,dtype=np.float64) YP = np.zeros ((nobj,A+1),dtype=np.float64) if gui: updateProgress (0.0) for i in range (nobj): # build reduced X and Y matrices removing i object Xr = np.delete(X,i,axis=0) Yr = np.delete(Y,i) Xr,muxr = center(Xr) Xr,wgxr = scale (Xr, self.autoscale) Yr,muyr = center(Yr) xp = np.copy(X[i,:]) xp -= muxr xp *= wgxr # predicts y for the i object, using A LV yp = self.getLOO(Xr,Yr,xp,A) yp += muyr # updates SSY with the object i errors YP[i,0]=Y[i] for a in range(A): SSY[a]+= np.square(yp[a]-Y[i]) YP[i,a+1]=yp[a] if gui : updateProgress (float(i)/float(nobj)) if gui : print self.SSY = SSY self.SDEP = [np.sqrt(i/nobj) for i in SSY] self.Q2 = [1.00-(i/SSY0) for i in SSY] self.Av = A return (YP)
def build (self, X, Y, targetA=0, targetSSX=0.0, autoscale=False): """Build a new PLS model with the X and Y numpy matrice provided using NIPALS algorithm The dimensionality of the model can be defined either providing 1. directly the number of LV to extract (targetA) 2. the fraction of SSX that the model will explain (targetSSX) The X and Y matrices are centered but no other scaling transform is applied Does not return anything, but updates internals vectors and variables """ nobj, nvarx= np.shape(X) ## for i in range (nobj): ## for j in range (nvarx): ## print X[i,j], ## print self.nobj = nobj self.nvarx = nvarx self.X = X.copy() self.Y = Y.copy() self.X, self.mux = center(self.X) self.Y, self.muy = center(self.Y) self.X, self.wgx = scale(self.X, autoscale) ## self.mux = mux ## self.muy = muy ## self.wgx = wgx self.autoscale = autoscale SSXac=0.0 SSYac=0.0 SSX0,SSY0, null = self.computeSS(self.X,self.Y) SSXold=SSX0 SSYold=SSY0 a=0 while True: t, p, w, c = self.extractLV(self.X, self.Y) self.t.append(t) self.p.append(p) self.w.append(w) self.c.append(c) self.X, self.Y = self.deflateLV(self.X, self.Y, t, p, c) SSXnew, SSYnew, dmodx = self.computeSS(self.X, self.Y) SSXex = (SSXold-SSXnew)/SSX0 SSXac+=SSXex SSYex = (SSYold-SSYnew)/SSY0 SSYac+=SSYex SDEC = np.sqrt(SSYnew/nobj) dof = nvarx-a if dof <= 0 : dof = 1 dmodx = [np.sqrt(d/dof) for d in dmodx] SSXold=SSXnew SSYold=SSYnew self.SSXex.append(SSXex) self.SSXac.append(SSXac) self.SSYex.append(SSYex) self.SSYac.append(SSYac) self.SDEC.append(SDEC) self.dmodx.append(dmodx) a+=1 if targetA>0: if a==targetA : break if targetSSX>0.0: if SSXac>targetSSX: break # prevents to extract a meaningless number of LV if a > min (20,nobj/5) : break self.Am=a # NIPALS is destructive, so we must retrieve X and Y from original data for validation self.X = X.copy() self.Y = Y.copy() self.cutoff = np.zeros(self.Am, dtype=np.float64) self.TP = np.zeros(self.Am) self.TN = np.zeros(self.Am) self.FP = np.zeros(self.Am) self.FN = np.zeros(self.Am) self.TPpred = np.zeros(self.Am) self.TNpred = np.zeros(self.Am) self.FPpred = np.zeros(self.Am) self.FNpred = np.zeros(self.Am)