def alloc_params(self): # Refer to Ch. 2 pg. 10 of Sutskever's thesis hps = self.hps # Initial hidden state self.params['h0'] = zeros((hps.hidden_size, hps.hidden_layers)) # Input to hidden, note if first layer is recurrent bih is redundant self.params['Wih'] = vp_init((hps.hidden_size, hps.output_size)) self.params['bih'] = zeros((hps.hidden_size, 1)) # recurrent weight # NOTE Initialization important for grad check, don't use vp_init? self.params['Whh'] = vp_init((hps.hidden_size, hps.hidden_size)) self.params['bhh'] = zeros((hps.hidden_size, 1)) # Weights between hidden layers for k in xrange(1, hps.hidden_layers): self.params['Wh%d' % k] = vp_init((hps.hidden_size, hps.hidden_size)) self.params['bh%d' % k] = zeros((hps.hidden_size, 1)) # Hidden to output self.params['Who'] = vp_init((hps.output_size, hps.hidden_size)) self.params['bho'] = zeros((hps.output_size, 1)) # Keep around last hidden state in case want to resume RNN from there self.last_h = None self.count_params()
def alloc_params(self): # Refer to Ch. 2 pg. 10 of Sutskever's thesis hps = self.hps # Initial hidden state self.params['h0'] = zeros((hps.hidden_size, hps.hidden_layers)) # Input to hidden, note if first layer is recurrent bih is redundant self.params['Wih'] = vp_init((hps.hidden_size, hps.output_size)) self.params['bih'] = zeros((hps.hidden_size, 1)) # recurrent weight # NOTE Initialization important for grad check, don't use vp_init? self.params['Whh'] = vp_init((hps.hidden_size, hps.hidden_size)) self.params['bhh'] = zeros((hps.hidden_size, 1)) # Weights between hidden layers for k in xrange(1, hps.hidden_layers): self.params['Wh%d' % k] = vp_init( (hps.hidden_size, hps.hidden_size)) self.params['bh%d' % k] = zeros((hps.hidden_size, 1)) # Hidden to output self.params['Who'] = vp_init((hps.output_size, hps.hidden_size)) self.params['bho'] = zeros((hps.output_size, 1)) # Keep around last hidden state in case want to resume RNN from there self.last_h = None self.count_params()
def alloc_params(self): hps = self.hps self.params['Wih'] = vp_init((hps.hidden_size, hps.input_size)) self.params['bih'] = zeros((hps.hidden_size, 1)) for k in xrange(hps.hidden_layers - 1): self.params['W%d' % (k+1)] = vp_init((hps.hidden_size, hps.hidden_size)) self.params['b%d' % (k+1)] = zeros((hps.hidden_size, 1)) self.params['Who'] = vp_init((hps.output_size, hps.hidden_size)) self.params['bho'] = zeros((hps.output_size, 1)) self.count_params()
def alloc_params(self): hps = self.hps self.params['Wih'] = vp_init((hps.hidden_size, hps.input_size)) self.params['bih'] = zeros((hps.hidden_size, 1)) for k in xrange(hps.hidden_layers - 1): self.params['W%d' % (k + 1)] = vp_init( (hps.hidden_size, hps.hidden_size)) self.params['b%d' % (k + 1)] = zeros((hps.hidden_size, 1)) self.params['Who'] = vp_init((hps.output_size, hps.hidden_size)) self.params['bho'] = zeros((hps.output_size, 1)) self.count_params()
def alloc_params(self): hps = self.hps self.params['Wih'] = vp_init((hps.hidden_size, hps.input_size)) self.params['Wsh'] = vp_init((hps.hidden_size, hps.source_size)) self.params['bih'] = zeros((hps.hidden_size, 1)) for k in xrange(hps.hidden_layers - 1): self.params['W%d' % (k+1)] = vp_init((hps.hidden_size, hps.hidden_size)) self.params['b%d' % (k+1)] = zeros((hps.hidden_size, 1)) self.params['Who'] = vp_init((hps.output_size, hps.hidden_size)) self.params['bho'] = zeros((hps.output_size, 1)) self.count_params() # Allocate grads as well self.grads = {} for k in self.params: self.grads[k] = empty(self.params[k].shape) logger.info('Allocated gradients')