def gradient_check(self): backend_backup = nn.backend nn.set_backend("numpy") assert self.cfg.want_dropout == False # assert self.cfg[-1].activation==nn.softmax assert nn.backend == nn.NumpyBackend if self.cfg[0].type=="convolution": x=nn.randn((2,self.cfg[0].shape[0],self.cfg[0].shape[1],self.cfg[0].shape[2])) else: x=nn.randn((2,self.cfg[0].shape)) if self.cfg[self.size-1].type == "dense": t=nn.zeros((2,self.cfg[-1].shape)) t[0,np.random.randint(self.cfg[-1].shape)] = 1; t[1,np.random.randint(self.cfg[-1].shape)] = 1; #since we are using a trick in cross-entropy cost function in order not to get nan, t values should be either 0 or 1. # row_sums = t.sum(axis=1);t = t / row_sums[:, np.newaxis] #for softmax gradient checking, rows should sum up to one. else: t=nn.randn((2,self.cfg[-1].shape[0],self.cfg[-1].shape[1],self.cfg[-1].shape[2])) # print self.cfg[-1].shape epsilon=.00001 for k in range(0,self.size): if self.cfg[k].type == "dense": self.weights.randn(.01) self.compute_grad(x,t) #is it necessary to have this inside the loop? don't think so. if k==0: continue wk,bk=self.weights[k] dwk,dbk=self.dweights[k] f=self.feedforward(x,t) wk[0,0]+=epsilon f_new=self.feedforward(x,t) df=(f_new-f) print k,df/epsilon/dwk[0,0] f=self.feedforward(x,t) bk[0,0]+=epsilon f_new=self.feedforward(x,t) df=(f_new-f) print k,df/epsilon/dbk[0,0] if self.cfg[k].type in ("convolution","deconvolution"): self.weights.randn(.01) self.compute_grad(x,t) if k==0: continue wk,bk=self.weights[k] dwk,dbk=self.dweights[k] f=self.feedforward(x,t) # print wk.shape wk[0,0,2,0]+=epsilon f_new=self.feedforward(x,t) df=(f_new-f) # print f_new,f print k,df/epsilon/dwk[0,0,2,0] f=self.feedforward(x,t) bk[0,0]+=epsilon f_new=self.feedforward(x,t) df=(f_new-f) print k,df/epsilon/dbk[0,0] nn.backend = backend_backup
def __init__(self,cfg,initial_weights=None): self.cfg=cfg self.size=len(cfg.num_weights) if initial_weights == None: self.mem = nn.zeros(self.cfg.num_parameters) #it has to be initialized because of wk,bk = self.weights[k] in the self.init_weights(initial_weights) # if initial_weights == None: pass else: self.mem = initial_weights self.weights = [(None,None)]; for k in range(1,len(cfg)): self.weights.append(self.get_weights(k))
def __init__(self, cfg, initial_weights=None): self.cfg = cfg self.size = len(cfg.num_weights) if initial_weights == None: self.mem = nn.zeros( self.cfg.num_parameters ) #it has to be initialized because of wk,bk = self.weights[k] in the self.init_weights(initial_weights) # if initial_weights == None: pass else: self.mem = initial_weights
def __init__(self, cfg, initial_weights=None): self.cfg = cfg self.size = len(cfg.num_weights) if initial_weights == None: self.mem = nn.zeros( self.cfg.num_parameters ) #it has to be initialized because of wk,bk = self.weights[k] in the self.init_weights(initial_weights) # if initial_weights == None: pass else: self.mem = initial_weights self.weights = [(None, None)] for k in range(1, len(cfg)): self.weights.append(self.get_weights(k))
def __init__(self, shape, tied_list=None, tied_type="first"): self.shape = shape self.index = [] tied_dict = {} if tied_list: for pairs in tied_list: i = pairs[0] j = pairs[1] if tied_type == "all": assert shape[i] == shape[j] for k in xrange(len(shape[i])): tied_dict[(j, k)] = (i, k) elif tied_type == "first": print shape[i][0] print shape[j][0] assert shape[i][0] == shape[j][0] tied_dict[(j, 0)] = (i, 0) i = 0 for layer_index in xrange(len(self.shape)): layer = self.shape[layer_index] if layer in [None, [None], [None, None]]: self.index.append(None) continue self.index.append([None] * len(layer)) for w_index in xrange(len(layer)): w = layer[w_index] tied = tied_dict.get((layer_index, w_index)) if tied == None: self.index[layer_index][w_index] = (i, i + WeightSet.prod(w)) i += WeightSet.prod(w) else: self.index[layer_index][w_index] = self.index[tied[0]][ tied[1]] # print "shape",self.shape # print "index",self.index self.num_weights = self.index[-1][-1][-1] self.mem = nn.zeros(self.num_weights) self.weights = [] for layer_index in xrange(len(self.shape)): self.weights.append(self.get_weights(layer_index))
def __init__(self,shape, tied_list=None, tied_type = "first"): self.shape = shape self.index = [] tied_dict = {} if tied_list: for pairs in tied_list: i = pairs[0] j = pairs[1] if tied_type == "all": assert shape[i] == shape[j] for k in xrange(len(shape[i])): tied_dict[(j,k)]=(i,k) elif tied_type == "first": print shape[i][0] print shape[j][0] assert shape[i][0] == shape[j][0] tied_dict[(j,0)]=(i,0) i = 0 for layer_index in xrange(len(self.shape)): layer = self.shape[layer_index] if layer in [None,[None],[None,None]]: self.index.append(None) continue self.index.append([None]*len(layer)) for w_index in xrange(len(layer)): w = layer[w_index] tied = tied_dict.get((layer_index,w_index)) if tied == None: self.index[layer_index][w_index] = (i,i+WeightSet.prod(w)) i+=WeightSet.prod(w) else: self.index[layer_index][w_index] = self.index[tied[0]][tied[1]] # print "shape",self.shape # print "index",self.index self.num_weights = self.index[-1][-1][-1] self.mem = nn.zeros(self.num_weights) self.weights = []; for layer_index in xrange(len(self.shape)): self.weights.append(self.get_weights(layer_index))
def __init__(self,cfg,initial_weights=None): self.cfg=cfg self.size=len(cfg.num_weights) if initial_weights == None: self.mem = nn.zeros(self.cfg.num_parameters) #it has to be initialized because of wk,bk = self.weights[k] in the self.init_weights(initial_weights) # if initial_weights == None: pass else: self.mem = initial_weights
def gradient_check(self): backend_backup = nn.backend nn.set_backend("numpy") assert self.cfg.want_dropout == False # assert self.cfg[-1].activation==nn.softmax assert nn.backend == nn.NumpyBackend if self.cfg[0].type == "convolution": x = nn.randn((2, self.cfg[0].shape[0], self.cfg[0].shape[1], self.cfg[0].shape[2])) else: x = nn.randn((2, self.cfg[0].shape)) if self.cfg[self.size - 1].type == "dense": t = nn.zeros((2, self.cfg[-1].shape)) t[0, np.random.randint(self.cfg[-1].shape)] = 1 t[1, np.random.randint(self.cfg[-1].shape)] = 1 #since we are using a trick in cross-entropy cost function in order not to get nan, t values should be either 0 or 1. # row_sums = t.sum(axis=1);t = t / row_sums[:, np.newaxis] #for softmax gradient checking, rows should sum up to one. else: t = nn.randn((2, self.cfg[-1].shape[0], self.cfg[-1].shape[1], self.cfg[-1].shape[2])) # print self.cfg[-1].shape epsilon = .00001 for k in range(0, self.size): if self.cfg[k].type == "dense": self.weights.randn(.01) self.compute_grad( x, t ) #is it necessary to have this inside the loop? don't think so. if k == 0: continue wk, bk = self.weights[k] dwk, dbk = self.dweights[k] f = self.feedforward(x, t) wk[0, 0] += epsilon f_new = self.feedforward(x, t) df = (f_new - f) print k, df / epsilon / dwk[0, 0] f = self.feedforward(x, t) bk[0, 0] += epsilon f_new = self.feedforward(x, t) df = (f_new - f) print k, df / epsilon / dbk[0, 0] if self.cfg[k].type in ("convolution", "deconvolution"): self.weights.randn(.01) self.compute_grad(x, t) if k == 0: continue wk, bk = self.weights[k] dwk, dbk = self.dweights[k] f = self.feedforward(x, t) # print wk.shape wk[0, 0, 2, 0] += epsilon f_new = self.feedforward(x, t) df = (f_new - f) # print f_new,f print k, df / epsilon / dwk[0, 0, 2, 0] f = self.feedforward(x, t) bk[0, 0] += epsilon f_new = self.feedforward(x, t) df = (f_new - f) print k, df / epsilon / dbk[0, 0] nn.backend = backend_backup