def compute_grad(self,x,t): batch_size = nn.find_batch_size(x) # x = I[:,:,:,batch_size*l:batch_size*(l+1)] self.feedforward(x) wk,bk = self.weights[self.size-1] dwk,dbk = self.dweights[self.size-1] if ((self.size-1) in self.cfg.index_dense): delta_ = self.H[-1]-t else: delta = self.H[-1]-t for k in range(1,self.size)[::-1]: if k in self.cfg.index_dense: wk,bk = self.weights[k] dwk,dbk = self.dweights[k] dwk[:] = 1.0/batch_size*nn.dot(self.H_[k-1].T,delta_) dbk[:] = 1.0/batch_size*nn.sum(delta_,axis=0) delta_ = nn.dot(delta_, wk.T) if k-1 in self.cfg.index_pooling: delta = delta_.T.reshape(self.cfg[k-1].shape[0], self.cfg[k-1].shape[1], self.cfg[k-1].shape[2], batch_size) elif (k-1 in self.cfg.index_convolution and k!=1): delta = delta_.T.reshape(self.cfg[k-1].shape[0], self.cfg[k-1].shape[1], self.cfg[k-1].shape[2], batch_size) delta *= self.dH[k-1] elif (k-1 in self.cfg.index_dense and k!=1): delta_ *= self.dH[k-1] elif k in self.cfg.index_pooling: delta = self.cfg[k].applyPoolingUndo(self.H[k-1],delta,self.H[k]) delta *= self.dH[k-1] elif k in self.cfg.index_convolution: wk,bk = self.weights[k] dwk,dbk = self.dweights[k] delta_ = delta.reshape(self.cfg[k].shape[0], self.cfg[k].shape[1] * self.cfg[k].shape[2] * batch_size).T dwk[:] = (1.0/batch_size)*self.cfg[k].applyConvOut(self.H[k-1], delta) dbk[:] = (1.0/batch_size)*nn.sum(delta_,axis=0) if k!=1: delta = self.cfg[k].applyConvDown(delta, wk) #convdown is unnecessary if k==1 if (k-1 in self.cfg.index_convolution and k!=1): delta *= self.dH[k-1] #tied weights if self.cfg.want_tied: for hidden_pairs in self.cfg.tied_list: self.dweights.make_tied(*hidden_pairs) for k in range(1,len(self.cfg)): if self.cfg[k].l2!=None: wk,bk = self.weights[k] dwk,dbk = self.dweights[k] dwk += self.cfg[k].l2*wk
def compute_cost_euclidean(self,x,t): batch_size = nn.find_batch_size(x) # x = I[:,:,:,batch_size*l:batch_size*(l+1)] self.feedforward(x) return nn.sum(((1.0/batch_size)*(.5*(t-self.H[-1])**2)),axis=None)