def train(self,X,T, nIterations=100,weightPrecision=1e-10,errorPrecision=1e-10): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) X = self.standardizeX(X) if T.ndim == 1: T = T.reshape((-1,1)) if self.Tmeans is None: self.Tmeans = T.mean(axis=0) self.Tstds = T.std(axis=0) T = self.standardizeT(T) scgresult = SCG.scg(self._pack(self.Vs,self.W), self.objectiveF, self.gradF, X,T, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, iterationVariable = self.iteration, ftracep=True) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) self.trained.value = True return self
def train(self,X,T, nIterations=100,weightPrecision=0.0001,errorPrecision=0.0001): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) X = self.standardizeX(X) X1 = self.addOnes(X) self.classes = np.unique(T) if self.no != len(self.classes)-1: raise ValueError(" In NeuralNetClassifier, the number of outputs must be one less than\n the number of classes in the training data. The given number of outputs\n is %d and number of classes is %d. Try changing the number of outputs in the\n call to NeuralNetClassifier()." % (self.no, len(self.classes))) T = self.makeIndicatorVars(T) # Local functions used by gradientDescent.scg() def pack(V,W): return np.hstack((V.flat,W.flat)) def unpack(w): self.V[:] = w[:(self.ni+1)*self.nh].reshape((self.ni+1,self.nh)) self.W[:] = w[(self.ni+1)*self.nh:].reshape((self.nh+1,self.no)) def objectiveF(w): unpack(w) Y = np.dot(self.addOnes(np.tanh(np.dot(X1,self.V))), self.W) expY = np.exp(Y) denom = 1 + np.sum(expY,axis=1).reshape((-1,1)) Y = np.hstack((expY / denom, 1/denom)) return -np.mean(T * np.log(Y)) def gradF(w): unpack(w) Z = np.tanh(np.dot( X1, self.V )) Z1 = self.addOnes(Z) Y = np.dot( Z1, self.W ) expY = np.exp(Y) denom = 1 + np.sum(expY,axis=1).reshape((-1,1)) Y = np.hstack((expY /denom , 1.0/denom)) error = (Y[:,:-1] - T[:,:-1]) / (X1.shape[0] * T.shape[1]) dV = np.dot( X1.T, np.dot( error, self.W[1:,:].T) * (1-Z**2)) dW = np.dot( Z1.T, error) return pack(dV,dW) scgresult = SCG.scg(pack(self.V,self.W), objectiveF, gradF, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, iterationVariable = self.iteration, ftracep=True) unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) self.trained.value = True return self
def update(ignore): global x x = np.random.uniform(0,10,2) resultSCG = SCG.scg(x, f, df, center, S, xtracep=True, ftracep=True, xPrecision=0, fPrecision=0, nIterations=20) for i in range(20): resultSteepest[i,:] = x x = x - 0.08 * df(x,center,S) scglines.set_data(resultSCG['xtrace'][:,0], resultSCG['xtrace'][:,1]) steepestlines.set_data(resultSteepest[:,0], resultSteepest[:,1]) return (scglines,steepestlines)
def train(self,X,T, nIterations=100,weightPrecision=0.0001,errorPrecision=0.0001): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) X = self.standardizeX(X) X1 = self.addOnes(X) if T.ndim == 1: T = T.reshape((-1,1)) if self.Tmeans is None: self.Tmeans = T.mean(axis=0) self.Tstds = T.std(axis=0) T = self.standardizeT(T) # Local functions used by gradientDescent.scg() def pack(V,W): return np.hstack((V.flat,W.flat)) def unpack(w): self.V[:] = w[:(self.ni+1)*self.nh].reshape((self.ni+1,self.nh)) self.W[:] = w[(self.ni+1)*self.nh:].reshape((self.nh+1,self.no)) def objectiveF(w): unpack(w) Y = np.dot(self.addOnes(np.tanh(np.dot(X1,self.V))), self.W) return 0.5 * np.mean((Y - T)**2) def gradF(w): unpack(w) Z = np.tanh(np.dot( X1, self.V )) Z1 = self.addOnes(Z) Y = np.dot( Z1, self.W ) error = (Y - T) / (X1.shape[0] * T.shape[1]) dV = np.dot( X1.T, np.dot( error, self.W[1:,:].T) * (1-Z**2)) dW = np.dot( Z1.T, error) return pack(dV,dW) scgresult = SCG.scg(pack(self.V,self.W), objectiveF, gradF, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, iterationVariable = self.iteration, ftracep=True) unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) self.trained.value = True return self
def train(self,X,R,Q,Y,gamma=1, nIterations=100,weightPrecision=1e-10,errorPrecision=1e-10): X = self.standardizeX(X) T = R + gamma * Q scgresult = SCG.scg(self._pack(self.Vs,self.W), self.objectiveF, self.gradF, X,T, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, iterationVariable = self.iteration, ftracep=True) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) self.trained.value = True return self
def train(self,X,R,Q,Y,gamma=1, nIterations=100,weightPrecision=0.0001,errorPrecision=0.0001): X = self.standardizeX(X) X1 = self.addOnes(X) # Local functions used by gradientDescent.scg() def pack(V,W): return np.hstack((V.flat,W.flat)) def unpack(w): self.V[:] = w[:(self.ni+1)*self.nh].reshape((self.ni+1,self.nh)) self.W[:] = w[(self.ni+1)*self.nh:].reshape((self.nh+1,self.no)) def objectiveF(w): unpack(w) Y = np.dot(self.addOnes(np.tanh(np.dot(X1,self.V))), self.W) return 0.5 *np.mean((R+gamma*Q-Y)**2) def gradF(w): unpack(w) Z = np.tanh(np.dot( X1, self.V )) Z1 = self.addOnes(Z) Y = np.dot( Z1, self.W ) nSamples = X1.shape[0] error = -(R + gamma * Q - Y) / nSamples dV = np.dot( X1.T, np.dot( error, self.W[1:,:].T) * (1-Z**2)) dW = np.dot( Z1.T, error) return pack(dV,dW) scgresult = SCG.scg(pack(self.V,self.W), objectiveF, gradF, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, iterationVariable = self.iteration, ftracep=True) unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) self.trained.value = True return self
def train(self,X,T, nIterations=100,weightPrecision=1e-10,errorPrecision=1e-10): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) X = self.standardizeX(X) self.classes = np.unique(T) if self.no != len(self.classes)-1: raise ValueError(" In NeuralNetworkClassifier, the number of outputs must be one less than\n the number of classes in the training data. The given number of outputs\n is %d and number of classes is %d. Try changing the number of outputs in the\n call to NeuralNetworkClassifier()." % (self.no, len(self.classes))) T = ml.makeIndicatorVars(T) scgresult = SCG.scg(self._pack(self.Vs,self.W), self.objectiveF, self.gradF, X,T, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, iterationVariable = self.iteration, ftracep=True) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) self.trained.value = True return self
Y = np.dot( Z1, W ) nSamples = X1.shape[0] nOutputs = T.shape[1] error = (Y - T) / (nSamples*nOutputs) dV = np.dot( X1.T, np.dot( error, W[1:,:].T) * (1-Z**2)) #/ (nSamples * nOutputs) dW = np.dot( Z1.T, error) #/ nSamples return pack(dV,dW) # Initialize weights to uniformly distributed values between small uniformly-distributed between -0.1 and 0.1 V = np.random.uniform(-0.1,0.1,(nInputs+1,nHiddens)) W = np.random.uniform(-0.1,0.1,(1+nHiddens,nOutputs)) result = SCG.scg(pack(V,W), errorFunction, errorGradient, xPrecision = 1.e-8, fPrecision = 1.e-12, nIterations = 2000, ftracep = True) result # Now plot everything and take a look fig = plt.figure(figsize=(10,15)) plt.subplot(3,1,1) plt.plot(result['ftrace']) plt.xlabel('Epochs') plt.ylabel('Train RMSE') plt.subplot(3,1,2) Y = np.dot(addOnes(np.tanh(np.dot(X1,V))), W)
both = np.vstack((X.flat,Y.flat)).T nall = n*n Z = np.zeros(nall) for i in range(n*n): Z[i] = f(both[i,:],center,S) Z.resize((n,n)) # Initialize the figure and draw f(x) fig = plt.figure(figsize=(10,10)) ax = plt.gca() ax.contourf(X,Y,Z,20,alpha=0.3) ax.axis('tight') x = np.random.uniform(0,10,2) resultSCG = SCG.scg(x, f, df, center, S, xtracep=True, ftracep=True, xPrecision=0, fPrecision=0, nIterations=20) resultSteepest = np.zeros((20,2)) for i in range(20): resultSteepest[i,:] = x x = x - 0.1 * df(x,center,S) scglines = ax.plot(resultSCG['xtrace'][:,0], resultSCG['xtrace'][:,1],'go-')[0] steepestlines = ax.plot(resultSteepest[:,0], resultSteepest[:,1],'ro-')[0] ax.legend(('SCG','Steepest')) def update(ignore): global x x = np.random.uniform(0,10,2) resultSCG = SCG.scg(x, f, df, center, S, xtracep=True, ftracep=True, xPrecision=0, fPrecision=0, nIterations=20)
def train(self,X,T, nIterations=100,weightPrecision=0.0001,errorPrecision=0.0001): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) X = self.standardizeX(X) X1 = self.addOnes(X) if T.ndim == 1: T = T.reshape((-1,1)) if self.Tmeans is None: self.Tmeans = T.mean(axis=0) self.Tstds = T.std(axis=0) T = self.standardizeT(T) # Local functions used by gradientDescent.scg() def pack(Vs,W): # r = np.hstack([V.flat for V in Vs] + [W.flat]) # print 'pack',len(Vs), Vs[0].shape, W.shape,r.shape return np.hstack([V.flat for V in Vs] + [W.flat]) def unpack(w): first = 0 numInThisLayer = self.ni for i in range(len(self.Vs)): self.Vs[i][:] = w[first:first+(numInThisLayer+1)*self.nhs[i]].reshape((numInThisLayer+1,self.nhs[i])) first += (numInThisLayer+1) * self.nhs[i] numInThisLayer = self.nhs[i] self.W[:] = w[first:].reshape((numInThisLayer+1,self.no)) def objectiveF(w): unpack(w) Zprev = X for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(np.dot(Zprev,V[1:,:]) + V[0:1,:]) Y = np.dot(Zprev, self.W[1:,:]) + self.W[0:1,:] return 0.5 * np.mean((Y - T)**2) def gradF(w): unpack(w) Zprev = X Z = [Zprev] for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(np.dot(Zprev,V[1:,:]) + V[0:1,:]) Z.append(Zprev) Y = np.dot(Zprev, self.W[1:,:]) + self.W[0:1,:] delta = (Y - T) / (X1.shape[0] * T.shape[1]) dW = np.vstack((np.dot(np.ones((1,delta.shape[0])),delta), np.dot( Z[-1].T, delta))) dVs = [] delta = (1-Z[-1]**2) * np.dot( delta, self.W[1:,:].T) for Zi in range(len(self.nhs),0,-1): Vi = Zi - 1 # because X is first element of Z dV = np.vstack(( np.dot(np.ones((1,delta.shape[0])), delta), np.dot( Z[Zi-1].T, delta))) dVs.insert(0,dV) delta = np.dot( delta, self.Vs[Vi][1:,:].T) * (1-Z[Zi-1]**2) return pack(dVs,dW) scgresult = SCG.scg(pack(self.Vs,self.W), objectiveF, gradF, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, iterationVariable = self.iteration, ftracep=True) unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) self.trained.value = True return self
def train(self,X,R,Q,Y,gamma=1, nIterations=100,weightPrecision=0.0001,errorPrecision=0.0001): X = self.standardizeX(X) X1 = self.addOnes(X) def pack(Vs,W): # r = np.hstack([V.flat for V in Vs] + [W.flat]) # print 'pack',len(Vs), Vs[0].shape, W.shape,r.shape return np.hstack([V.flat for V in Vs] + [W.flat]) def unpack(w): first = 0 numInThisLayer = self.ni for i in range(len(self.Vs)): self.Vs[i][:] = w[first:first+(numInThisLayer+1)*self.nhs[i]].reshape((numInThisLayer+1,self.nhs[i])) first += (numInThisLayer+1) * self.nhs[i] numInThisLayer = self.nhs[i] self.W[:] = w[first:].reshape((numInThisLayer+1,self.no)) def objectiveF(w): unpack(w) Zprev = X for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(np.dot(Zprev,V[1:,:]) + V[0:1,:]) Y = np.dot(Zprev, self.W[1:,:]) + self.W[0:1,:] return 0.5 *np.mean((R+gamma*Q-Y)**2) def gradF(w): unpack(w) Zprev = X Z = [Zprev] for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(np.dot(Zprev,V[1:,:]) + V[0:1,:]) Z.append(Zprev) Y = np.dot(Zprev, self.W[1:,:]) + self.W[0:1,:] nSamples = X1.shape[0] delta = -(R + gamma * Q - Y) / nSamples dW = np.vstack((np.dot(np.ones((1,delta.shape[0])),delta), np.dot( Z[-1].T, delta))) dVs = [] delta = (1-Z[-1]**2) * np.dot( delta, self.W[1:,:].T) for Zi in range(len(self.nhs),0,-1): Vi = Zi - 1 # because X is first element of Z dV = np.vstack(( np.dot(np.ones((1,delta.shape[0])), delta), np.dot( Z[Zi-1].T, delta))) dVs.insert(0,dV) delta = np.dot( delta, self.Vs[Vi][1:,:].T) * (1-Z[Zi-1]**2) return pack(dVs,dW) scgresult = SCG.scg(pack(self.Vs,self.W), objectiveF, gradF, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, iterationVariable = self.iteration, ftracep=True) unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) self.trained.value = True return self
def train(self,X,T, nIterations=100,weightPrecision=0.0001,errorPrecision=0.0001): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) X = self.standardizeX(X) X1 = self.addOnes(X) self.classes = np.unique(T) if self.no != len(self.classes)-1: raise ValueError(" In NeuralNetClassifier, the number of outputs must be one less than\n the number of classes in the training data. The given number of outputs\n is %d and number of classes is %d. Try changing the number of outputs in the\n call to NeuralNetClassifier()." % (self.no, len(self.classes))) T = self.makeIndicatorVars(T) # Local functions used by gradientDescent.scg() def pack(Vs,W): # r = np.hstack([V.flat for V in Vs] + [W.flat]) # print 'pack',len(Vs), Vs[0].shape, W.shape,r.shape return np.hstack([V.flat for V in Vs] + [W.flat]) def unpack(w): first = 0 numInThisLayer = self.ni for i in range(len(self.Vs)): self.Vs[i][:] = w[first:first+(numInThisLayer+1)*self.nhs[i]].reshape((numInThisLayer+1,self.nhs[i])) first += (numInThisLayer+1) * self.nhs[i] numInThisLayer = self.nhs[i] self.W[:] = w[first:].reshape((numInThisLayer+1,self.no)) def objectiveF(w): unpack(w) Zprev = X for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(np.dot(Zprev,V[1:,:]) + V[0:1,:]) Y = np.dot(Zprev, self.W[1:,:]) + self.W[0:1,:] return 0.5 * np.mean((Y - T)**2) def gradF(w): unpack(w) Zprev = X Z = [Zprev] for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(np.dot(Zprev,V[1:,:]) + V[0:1,:]) Z.append(Zprev) Y = np.dot(Zprev, self.W[1:,:]) + self.W[0:1,:] delta = (Y - T) / (X1.shape[0] * T.shape[1]) dW = np.vstack((np.dot(np.ones((1,delta.shape[0])),delta), np.dot( Z[-1].T, delta))) dVs = [] delta = (1-Z[-1]**2) * np.dot( delta, self.W[1:,:].T) for Zi in range(len(self.nhs),0,-1): Vi = Zi - 1 # because X is first element of Z dV = np.vstack(( np.dot(np.ones((1,delta.shape[0])), delta), np.dot( Z[Zi-1].T, delta))) dVs.insert(0,dV) delta = np.dot( delta, self.Vs[Vi][1:,:].T) * (1-Z[Zi-1]**2) return pack(dVs,dW) def objectiveF(w): unpack(w) Zprev = X for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(np.dot(Zprev,V[1:,:]) + V[0:1,:]) Y = np.dot(Zprev, self.W[1:,:]) + self.W[0:1,:] expY = np.exp(Y) denom = 1 + np.sum(expY,axis=1).reshape((-1,1)) Y = np.hstack((expY / denom, 1/denom)) return -np.mean(T * np.log(Y)) def gradF(w): unpack(w) Zprev = X Z = [Zprev] for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(np.dot(Zprev,V[1:,:]) + V[0:1,:]) Z.append(Zprev) Y = np.dot(Zprev, self.W[1:,:]) + self.W[0:1,:] expY = np.exp(Y) denom = 1 + np.sum(expY,axis=1).reshape((-1,1)) Y = np.hstack((expY /denom , 1.0/denom)) delta = (Y[:,:-1] - T[:,:-1]) / (X1.shape[0] * (T.shape[1]-1)) dW = np.vstack((np.dot(np.ones((1,delta.shape[0])),delta), np.dot( Z[-1].T, delta))) dVs = [] delta = (1-Z[-1]**2) * np.dot( delta, self.W[1:,:].T) for Zi in range(len(self.nhs),0,-1): Vi = Zi - 1 # because X is first element of Z dV = np.vstack(( np.dot(np.ones((1,delta.shape[0])), delta), np.dot( Z[Zi-1].T, delta))) dVs.insert(0,dV) delta = np.dot( delta, self.Vs[Vi][1:,:].T) * (1-Z[Zi-1]**2) return pack(dVs,dW) scgresult = SCG.scg(pack(self.Vs,self.W), objectiveF, gradF, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, iterationVariable = self.iteration, ftracep=True) unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) self.trained.value = True return self Y = self.unstandardizeT(Y) return (Y,Z[1:]) if allOutputs else Y