def compute_cost_log(self,x,t): batch_size = nn.find_batch_size(x) # x = I[:,:,:,batch_size*l:batch_size*(l+1)] # O[batch_size*l:batch_size*(l+1)] self.feedforward(x) out = (1.0/batch_size)*(-t*nn.log(self.H[-1])).sum() for k in range(1,len(self.cfg)): if self.cfg[k].l2!=None: wk,bk = self.weights[k] out += self.cfg[k].l2*.5*((wk**2).sum()) return out
def feedforward(self,x): self.H = [None] * self.size self.H_ = [None] * self.size self.dH = [None] * self.size self.mask_matrix = [None] * self.size batch_size = nn.find_batch_size(x) self.H[0]=x if 0 in self.cfg.index_convolution: self.H_[0] = self.H[0].reshape(self.cfg[0].shape[0] * self.cfg[0].shape[1] * self.cfg[0].shape[2], batch_size).T else: self.H_[0] = self.H[0] for k in range(1,self.size): f=self.cfg[k].activation df=self.cfg[k].activation_prime if k in self.cfg.index_convolution: wk,bk = self.weights[k] A = self.cfg[k].applyConvUp(self.H[k-1], wk) # A_ = A.reshape(self.cfg[k].shape[0], self.cfg[k].shape[1] * self.cfg[k].shape[2] * batch_size).T A+= bk.reshape(-1,1,1,1) # A = A_.T.reshape(self.cfg[k].shape[0], self.cfg[k].shape[1], self.cfg[k].shape[2], batch_size) self.H[k] = f(A) self.dH[k] = df(A) if (self.cfg.want_k_sparsity and (not self.test_mode) and self.cfg[k].k_sparsity != None): # if (self.cfg.want_k_sparsity and self.cfg[k].k_sparsity != None): self.mask_matrix[k] = nn.mask_3d(self.H[k],self.cfg[k].k_sparsity) self.H[k] *= self.mask_matrix[k] self.dH[k]*= self.mask_matrix[k] self.H_[k] = self.H[k].reshape(self.cfg[k].shape[0] * self.cfg[k].shape[1] * self.cfg[k].shape[2], batch_size).T elif k in self.cfg.index_pooling: self.H[k] = self.cfg[k].applyPooling(self.H[k-1]) self.H_[k] = self.H[k].reshape(self.cfg[k].shape[0] * self.cfg[k].shape[1] * self.cfg[k].shape[2], batch_size).T elif k in self.cfg.index_dense: wk,bk = self.weights[k] A = nn.dot(self.H_[k-1],wk) A += bk self.H[k] = f(A) self.dH[k] = df(A) if (self.cfg.want_k_sparsity and self.cfg[k].k_sparsity != None): self.mask_matrix[k] = nn.threshold_mask_hard(self.H[k],self.cfg[k].k_sparsity) self.H[k] *= self.mask_matrix[k] self.dH[k]*= self.mask_matrix[k] if (self.cfg.want_dropout and self.cfg[k].dropout != None): if self.test_mode: self.H[k]*=self.cfg[k].dropout else: self.mask_matrix[k] = nn.mask(self.H[k],self.cfg[k].dropout) self.H[k] *= self.mask_matrix[k] self.dH[k]*= self.mask_matrix[k] self.H_[k] = self.H[k]
def compute_grad(self,x,t): batch_size = nn.find_batch_size(x) # x = I[:,:,:,batch_size*l:batch_size*(l+1)] self.feedforward(x) wk,bk = self.weights[self.size-1] dwk,dbk = self.dweights[self.size-1] if ((self.size-1) in self.cfg.index_dense): delta_ = self.H[-1]-t else: delta = self.H[-1]-t for k in range(1,self.size)[::-1]: if k in self.cfg.index_dense: wk,bk = self.weights[k] dwk,dbk = self.dweights[k] dwk[:] = 1.0/batch_size*nn.dot(self.H_[k-1].T,delta_) dbk[:] = 1.0/batch_size*nn.sum(delta_,axis=0) delta_ = nn.dot(delta_, wk.T) if k-1 in self.cfg.index_pooling: delta = delta_.T.reshape(self.cfg[k-1].shape[0], self.cfg[k-1].shape[1], self.cfg[k-1].shape[2], batch_size) elif (k-1 in self.cfg.index_convolution and k!=1): delta = delta_.T.reshape(self.cfg[k-1].shape[0], self.cfg[k-1].shape[1], self.cfg[k-1].shape[2], batch_size) delta *= self.dH[k-1] elif (k-1 in self.cfg.index_dense and k!=1): delta_ *= self.dH[k-1] elif k in self.cfg.index_pooling: delta = self.cfg[k].applyPoolingUndo(self.H[k-1],delta,self.H[k]) delta *= self.dH[k-1] elif k in self.cfg.index_convolution: wk,bk = self.weights[k] dwk,dbk = self.dweights[k] delta_ = delta.reshape(self.cfg[k].shape[0], self.cfg[k].shape[1] * self.cfg[k].shape[2] * batch_size).T dwk[:] = (1.0/batch_size)*self.cfg[k].applyConvOut(self.H[k-1], delta) dbk[:] = (1.0/batch_size)*nn.sum(delta_,axis=0) if k!=1: delta = self.cfg[k].applyConvDown(delta, wk) #convdown is unnecessary if k==1 if (k-1 in self.cfg.index_convolution and k!=1): delta *= self.dH[k-1] #tied weights if self.cfg.want_tied: for hidden_pairs in self.cfg.tied_list: self.dweights.make_tied(*hidden_pairs) for k in range(1,len(self.cfg)): if self.cfg[k].l2!=None: wk,bk = self.weights[k] dwk,dbk = self.dweights[k] dwk += self.cfg[k].l2*wk
def compute_cost_euclidean(self,x,t): batch_size = nn.find_batch_size(x) # x = I[:,:,:,batch_size*l:batch_size*(l+1)] self.feedforward(x) return nn.sum(((1.0/batch_size)*(.5*(t-self.H[-1])**2)),axis=None)