def __init__(self, in_size, out_size, lateral_init=None, upward_init=None, bias_init=0, forget_bias_init=0): super(LSTMBase, self).__init__( upward=linear.Linear(in_size, 4 * out_size, initialW=0), lateral=linear.Linear(out_size, 4 * out_size, initialW=0, nobias=True), ) self.state_size = out_size for i in six.moves.range(0, 4 * out_size, out_size): initializers.init_weight(self.lateral.W.data[i:i + out_size, :], lateral_init) initializers.init_weight(self.upward.W.data[i:i + out_size, :], upward_init) a, i, f, o = lstm._extract_gates( self.upward.b.data.reshape(1, 4 * out_size, 1)) initializers.init_weight(a, bias_init) initializers.init_weight(i, bias_init) initializers.init_weight(f, forget_bias_init) initializers.init_weight(o, bias_init)
def _initialize_params(self): bias_initializer = initializers.Zero() self.add_param('b', self.state_size*4, initializer=bias_initializer) a, i, f, o = lstm._extract_gates(self.b.data.reshape(1, 4 * self.state_size, 1)) initializers.init_weight(a, self.bias_init) initializers.init_weight(i, self.bias_init) initializers.init_weight(f, self.forget_bias_init) initializers.init_weight(o, self.bias_init)
def _initialize_params(self): for i in six.moves.range(0, 4 * self.state_size, self.state_size): initializers.init_weight( self.lateral.W.data[i:i + self.state_size, :], self.lateral_init) initializers.init_weight( self.upward.W.data[i:i + self.state_size, :], self.upward_init) a, i, f, o = lstm._extract_gates( self.upward.b.data.reshape(1, 4 * self.state_size, 1)) initializers.init_weight(a, self.bias_init) initializers.init_weight(i, self.bias_init) initializers.init_weight(f, self.forget_bias_init) initializers.init_weight(o, self.bias_init)
def _initialize_params(self): lateral_init = initializers._get_initializer(self.lateral_init) upward_init = initializers._get_initializer(self.upward_init) bias_init = initializers._get_initializer(self.bias_init) forget_bias_init = initializers._get_initializer(self.forget_bias_init) for i in six.moves.range(0, 4 * self.state_size, self.state_size): lateral_init(self.lateral.W.array[i:i + self.state_size, :]) upward_init(self.upward.W.array[i:i + self.state_size, :]) a, i, f, o = lstm._extract_gates( self.upward.b.array.reshape(1, 4 * self.state_size, 1)) bias_init(a) bias_init(i) forget_bias_init(f) bias_init(o)
def _initialize_params(self): lateral_init = initializers._get_initializer(self.lateral_init) upward_init = initializers._get_initializer(self.upward_init) bias_init = initializers._get_initializer(self.bias_init) forget_bias_init = initializers._get_initializer(self.forget_bias_init) for i in six.moves.range(0, 4 * self.state_size, self.state_size): lateral_init(self.lateral.W.data[i:i + self.state_size, :]) upward_init(self.upward.W.data[i:i + self.state_size, :]) a, i, f, o = lstm._extract_gates( self.upward.b.data.reshape(1, 4 * self.state_size, 1)) bias_init(a) bias_init(i) forget_bias_init(f) bias_init(o)
def __init__(self, in_size, out_size, lateral_init=None, upward_init=None, bias_init=0, forget_bias_init=0): super(LSTMBase, self).__init__( upward=linear.Linear(in_size, 4 * out_size, initialW=0), lateral=linear.Linear(out_size, 4 * out_size, initialW=0, nobias=True), ) self.state_size = out_size for i in six.moves.range(0, 4 * out_size, out_size): initializers.init_weight( self.lateral.W.data[i:i + out_size, :], lateral_init) initializers.init_weight( self.upward.W.data[i:i + out_size, :], upward_init) a, i, f, o = lstm._extract_gates( self.upward.b.data.reshape(1, 4 * out_size, 1)) initializers.init_weight(a, bias_init) initializers.init_weight(i, bias_init) initializers.init_weight(f, forget_bias_init) initializers.init_weight(o, bias_init)
def split_clstm(var): return [chainer.Variable(m[0]) for m in _extract_gates(var.data[None, :])]