def __init__(self,
                 in_channels,
                 out_channels,
                 ksize,
                 stride=1,
                 pad=0,
                 dilate=1,
                 wscale=1,
                 bias=0,
                 nobias=False,
                 use_cudnn=True,
                 initialW=None,
                 initial_bias=None):
        super(DilatedConvolution2D, self).__init__()
        self.ksize = ksize
        self.stride = _pair(stride)
        self.pad = _pair(pad)
        self.dilate = _pair(dilate)
        self.use_cudnn = use_cudnn
        self.out_channels = out_channels
        self.initialW = initialW
        self.wscale = wscale

        if in_channels is None:
            self.add_uninitialized_param('W')
        else:
            self._initialize_params(in_channels)

        if nobias:
            self.b = None
        else:
            self.add_param('b', out_channels)
            if initial_bias is None:
                initial_bias = bias
            initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 2
0
    def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
                 wscale=1, bias=0, nobias=False, use_cudnn=True,
                 initialW=None, initial_bias=None):
        #super(BinaryConv2D_Link, self).__init__()
        super(Convolution2D, self).__init__()
        self.ksize = ksize
        self.stride = _pair(stride)
        self.pad = _pair(pad)
        self.use_cudnn = use_cudnn
        self.out_channels = out_channels
        self.initialW = initialW
        self.wscale = wscale

        if in_channels is None:
            self.add_uninitialized_param('W')
        else:
            self._initialize_params(in_channels)

        kh, kw = _pair(self.ksize)
        W_shape = (self.out_channels, in_channels, kh, kw)
        #self.add_param('W', W_shape)
        # For backward compatibility, the scale of weights is proportional to
        # the square root of wscale.
        initializers.init_weight(self.W.data, self.initialW,
                                 scale=math.sqrt(self.wscale))

        if nobias:
            self.b = None
        else:
            self.add_param('b', out_channels)
            if initial_bias is None:
                initial_bias = bias
            initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 3
0
	def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
				 wscale=1, bias=0, nobias=False, outsize=None, use_cudnn=True,
				 initialV=None, dtype=np.float32):
		kh, kw = _pair(ksize)
		self.stride = _pair(stride)
		self.pad = _pair(pad)
		self.outsize = (None, None) if outsize is None else outsize
		self.use_cudnn = use_cudnn
		self.dtype = dtype
		self.nobias = nobias
		self.out_channels = out_channels
		self.in_channels = in_channels

		V_shape = (in_channels, out_channels, kh, kw)
		super(Deconvolution2D, self).__init__(V=V_shape)

		if isinstance(initialV, (np.ndarray, cuda.ndarray)):
			assert initialV.shape == (in_channels, out_channels, kh, kw)
		initializers.init_weight(self.V.data, initialV, scale=math.sqrt(wscale))

		if nobias:
			self.b = None
		else:
			self.add_uninitialized_param("b")
		self.add_uninitialized_param("g")
Ejemplo n.º 4
0
    def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
                 wscale=1, bias=0, nobias=False, outsize=None, use_cudnn=True,
                 initialW=None, initial_bias=None, deterministic=False):
        kh, kw = _pair(ksize)
        self.stride = _pair(stride)
        self.pad = _pair(pad)
        self.outsize = (None, None) if outsize is None else outsize
        self.use_cudnn = use_cudnn
        self.deterministic = deterministic

        W_shape = (in_channels, out_channels, kh, kw)
        super(Deconvolution2D, self).__init__(W=W_shape)

        if isinstance(initialW, (numpy.ndarray, cuda.ndarray)):
            assert initialW.shape == (in_channels, out_channels, kh, kw)
        # For backward compatibility, the scale of weights is proportional to
        # the square root of wscale.
        initializers.init_weight(self.W.data, initialW,
                                 scale=math.sqrt(wscale))

        if nobias:
            self.b = None
        else:
            self.add_param('b', out_channels)
            if isinstance(initial_bias, (numpy.ndarray, cuda.ndarray)):
                assert initial_bias.shape == (out_channels,)
            if initial_bias is None:
                initial_bias = bias
            initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 5
0
    def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
                 wscale=1, bias=0, nobias=False, outsize=None, use_cudnn=True,
                 initialW=None, initial_bias=None, deterministic=False):
        super(Deconvolution2D, self).__init__()
        self.ksize = ksize
        self.stride = _pair(stride)
        self.pad = _pair(pad)
        self.outsize = (None, None) if outsize is None else outsize
        self.use_cudnn = use_cudnn
        self.initialW = initialW
        self.wscale = wscale
        self.out_channels = out_channels
        self.deterministic = deterministic

        if in_channels is None:
            self.add_uninitialized_param('W')
        else:
            self._initialize_params(in_channels)

        if nobias:
            self.b = None
        else:
            self.add_param('b', out_channels)
            if isinstance(initial_bias, (numpy.ndarray, cuda.ndarray)):
                assert initial_bias.shape == (out_channels,)
            if initial_bias is None:
                initial_bias = bias
            initializers.init_weight(self.b.data, initial_bias)
 def __init__(self,
              size,
              decay=0.9,
              eps=2e-5,
              dtype=numpy.float32,
              use_gamma=True,
              use_beta=True,
              initial_gamma=None,
              initial_beta=None):
     super(BatchNormalization, self).__init__()
     if use_gamma:
         self.add_param('gamma', size, dtype=dtype)
         if initial_gamma is None:
             initial_gamma = initializers.One()
         initializers.init_weight(self.gamma.data, initial_gamma)
     if use_beta:
         self.add_param('beta', size, dtype=dtype)
         if initial_beta is None:
             initial_beta = initializers.Zero()
         initializers.init_weight(self.beta.data, initial_beta)
     self.add_persistent('avg_mean', numpy.zeros(size, dtype=dtype))
     self.add_persistent('avg_var', numpy.zeros(size, dtype=dtype))
     self.add_persistent('N', 0)
     self.decay = decay
     self.eps = eps
Ejemplo n.º 7
0
 def _initialize_params(self, in_size):
     self.add_param('W', (self.out_size, in_size))
     # For backward compatibility, the scale of weights is proportional to
     # the square root of wscale.
     initializers.init_weight(self.W.data,
                              self.initialW,
                              scale=math.sqrt(self.wscale))
    def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
                 dilate=1, wscale=1, bias=0, nobias=False, use_cudnn=True,
                 initialW=None, initial_bias=None):
        super(DilatedConvolution2D, self).__init__()
        self.ksize = ksize
        self.stride = _pair(stride)
        self.pad = _pair(pad)
        self.dilate = _pair(dilate)
        self.use_cudnn = use_cudnn
        self.out_channels = out_channels
        self.initialW = initialW
        self.wscale = wscale

        if in_channels is None:
            self.add_uninitialized_param('W')
        else:
            self._initialize_params(in_channels)

        if nobias:
            self.b = None
        else:
            self.add_param('b', out_channels)
            if initial_bias is None:
                initial_bias = bias
            initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 9
0
    def __init__(self,
                 in_size,
                 out_size,
                 wscale=1,
                 bias=0,
                 nobias=False,
                 initialW=None,
                 initial_bias=None):
        super(BinaryLinear, self).__init__()
        self.initialW = initialW
        self.wscale = wscale
        self.out_size = out_size

        if in_size is None:
            self.add_uninitialized_param('W')
        else:
            self._initialize_params(in_size)

        if nobias:
            self.b = None
        else:
            self.add_param('b', out_size)
            if initial_bias is None:
                initial_bias = bias
            initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 10
0
    def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
                 wscale=1, bias=0, nobias=False, outsize=None, use_cudnn=True,
                 initialW=None, initial_bias=None, deterministic=False):
        super(Deconvolution2D, self).__init__()
        self.ksize = ksize
        self.stride = _pair(stride)
        self.pad = _pair(pad)
        self.outsize = (None, None) if outsize is None else outsize
        self.use_cudnn = use_cudnn
        self.initialW = initialW
        self.wscale = wscale
        self.out_channels = out_channels
        self.deterministic = deterministic

        if in_channels is None:
            self.add_uninitialized_param('W')
        else:
            self._initialize_params(in_channels)

        if nobias:
            self.b = None
        else:
            self.add_param('b', out_channels)
            if isinstance(initial_bias, (numpy.ndarray, cuda.ndarray)):
                assert initial_bias.shape == (out_channels,)
            if initial_bias is None:
                initial_bias = bias
            initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 11
0
 def _initialize_params(self, in_channels):
     kh, kw = _pair(self.ksize)
     W_shape = (in_channels, self.out_channels, kh, kw)
     self.add_param('W', W_shape)
     # For backward compatibility, the scale of weights is proportional to
     # the square root of wscale.
     initializers.init_weight(self.W.data, self.initialW,
                              scale=math.sqrt(self.wscale))
 def _initialize_params(self, in_channels):
     kh, kw = _pair(self.ksize)
     W_shape = (self.out_channels, in_channels, kh, kw)
     self.add_param('W', W_shape)
     # For backward compatibility, the scale of weights is proportional to
     # the square root of wscale.
     initializers.init_weight(self.W.data, self.initialW,
                              scale=math.sqrt(self.wscale))
Ejemplo n.º 13
0
    def __init__(self, in_size, out_size,
                 lateral_init=None):
        super(GridLSTMBase, self).__init__(
            lateral=linear.Linear(in_size, 4 * out_size,
                                  initialW=0, nobias=True),
        )
        self.state_size = out_size

        for i in six.moves.range(0, 4 * out_size, out_size):
            initializers.init_weight(
                self.lateral.W.data[i:i + out_size, :], lateral_init)
Ejemplo n.º 14
0
    def __init__(self, nc, dtype=np.float32):
        super(InstanceNormalization, self).__init__()
        self.nc = nc
        self.dtype = dtype
        self.bn = None
        self.prev_batch = None

        self.add_param('gamma', nc, dtype=dtype)
        initializers.init_weight(self.gamma.data, np.random.uniform(size=nc))

        self.add_param('beta', nc, dtype=dtype)
        initializers.init_weight(self.beta.data, initializers.Zero())
Ejemplo n.º 15
0
    def __init__(self,
                 children,
                 in_size,
                 out_size,
                 lateral_init=None,
                 upward_init=None,
                 bias_init=0,
                 forget_bias_init=0):
        super(SLSTM, self).__init__(
            upward=linear.Linear(in_size, 4 * out_size, initialW=0))
        self.state_size = out_size
        self.n_children = children

        for i in range(0, 4 * out_size, out_size):
            initializers.init_weight(self.upward.W.data[i:i + out_size, :],
                                     upward_init)
        a, i, f, o = numpy_extract_gates(
            self.upward.b.data.reshape(1, 4 * out_size, 1))
        initializers.init_weight(a, bias_init)
        initializers.init_weight(i, bias_init)
        initializers.init_weight(f, forget_bias_init)
        initializers.init_weight(o, bias_init)

        #hidden unit gates for each child
        for i in range(self.n_children):
            self.add_link(
                self.U_I_H.format(i),
                linear.Linear(out_size,
                              out_size,
                              initialW=lateral_init,
                              nobias=True))
            self.add_link(
                self.U_O_H.format(i),
                linear.Linear(out_size,
                              out_size,
                              initialW=lateral_init,
                              nobias=True))
            self.add_link(
                self.U_A_H.format(i),
                linear.Linear(out_size,
                              out_size,
                              initialW=lateral_init,
                              nobias=True))

            for j in range(self.n_children):
                self.add_link(
                    self.U_F_H.format(i, j),
                    linear.Linear(out_size,
                                  out_size,
                                  initialW=lateral_init,
                                  nobias=True))
Ejemplo n.º 16
0
    def __init__(self, in_size, out_size, wscale=1, bias=0, nobias=False,
                 initialW=None, initial_bias=None):
        super(Linear, self).__init__(W=(out_size, in_size))

        # For backward compatibility, the scale of weights is proportional to
        # the square root of wscale.
        initializers.init_weight(self.W.data, initialW,
                                 scale=math.sqrt(wscale))

        if nobias:
            self.b = None
        else:
            self.add_param('b', out_size)
            if initial_bias is None:
                initial_bias = bias
            initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 17
0
    def _initialize_params(self):

        bias_initializer = initializers.Zero()
        self.add_param('b', self.state_size*4, initializer=bias_initializer)
        a, i, f, o = lstm._extract_gates(self.b.data.reshape(1, 4 * self.state_size, 1))
        initializers.init_weight(a, self.bias_init)
        initializers.init_weight(i, self.bias_init)
        initializers.init_weight(f, self.forget_bias_init)
        initializers.init_weight(o, self.bias_init)
Ejemplo n.º 18
0
    def __init__(self, ndim, in_channels, out_channels, ksize, stride=1, pad=0,
                 initialW=None, initial_bias=None, use_cudnn=True,
                 cover_all=False):
        ksize = conv_nd.as_tuple(ksize, ndim)
        self.stride = stride
        self.pad = pad
        self.use_cudnn = use_cudnn
        self.cover_all = cover_all

        W_shape = (out_channels, in_channels) + ksize
        super(ConvolutionND, self).__init__(W=W_shape)
        initializers.init_weight(self.W.data, initialW)

        if initial_bias is None:
            self.b = None
        else:
            self.add_param('b', out_channels)
            initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 19
0
 def __init__(self, in_size, out_size, wscale=1, bias=0, nobias=True, initialW=None, initial_bias=None):
     super(MultiLinear, self).__init__()
     self.initialW = initialW
     self.wscale = wscale
     self.out_size = out_size
     self.in_size = in_size
     # self.batch_size = batch_size
     if in_size is None:
         self.add_uninitialized_param("W")
     else:
         self._initialize_params(in_size)
     if nobias:
         self.b = None
     else:
         self.add_param("b", out_size)
         if initial_bias is None:
             initial_bias = bias
         initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 20
0
 def __init__(self, size, decay=0.9, eps=2e-5, dtype=numpy.float32,
              use_gamma=True, use_beta=True,
              initial_gamma=None, initial_beta=None):
     super(BatchNormalization, self).__init__()
     if use_gamma:
         self.add_param('gamma', size, dtype=dtype)
         if initial_gamma is None:
             initial_gamma = initializers.One()
         initializers.init_weight(self.gamma.data, initial_gamma)
     if use_beta:
         self.add_param('beta', size, dtype=dtype)
         if initial_beta is None:
             initial_beta = initializers.Zero()
         initializers.init_weight(self.beta.data, initial_beta)
     self.add_persistent('avg_mean', numpy.zeros(size, dtype=dtype))
     self.add_persistent('avg_var', numpy.zeros(size, dtype=dtype))
     self.add_persistent('N', 0)
     self.decay = decay
     self.eps = eps
Ejemplo n.º 21
0
    def __init__(self, in_size, out_size, wscale=1, bias=0, nobias=False,
                 initialW=None, initial_bias=None):
        super(Linear, self).__init__()
        self.initialW = initialW
        self.wscale = wscale
        self.out_size = out_size

        if in_size is None:
            self.add_uninitialized_param('W')
        else:
            self._initialize_params(in_size)

        if nobias:
            self.b = None
        else:
            self.add_param('b', out_size)
            if initial_bias is None:
                initial_bias = bias
            initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 22
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 ksize,
                 stride=1,
                 pad=0,
                 wscale=1,
                 bias=0,
                 nobias=False,
                 outsize=None,
                 use_cudnn=True,
                 initialW=None,
                 initial_bias=None,
                 deterministic=False):
        kh, kw = _pair(ksize)
        self.stride = _pair(stride)
        self.pad = _pair(pad)
        self.outsize = (None, None) if outsize is None else outsize
        self.use_cudnn = use_cudnn
        self.deterministic = deterministic

        W_shape = (in_channels, out_channels, kh, kw)
        super(Deconvolution2D, self).__init__(W=W_shape)

        if isinstance(initialW, (numpy.ndarray, cuda.ndarray)):
            assert initialW.shape == (in_channels, out_channels, kh, kw)
        # For backward compatibility, the scale of weights is proportional to
        # the square root of wscale.
        initializers.init_weight(self.W.data,
                                 initialW,
                                 scale=math.sqrt(wscale))

        if nobias:
            self.b = None
        else:
            self.add_param('b', out_channels)
            if isinstance(initial_bias, (numpy.ndarray, cuda.ndarray)):
                assert initial_bias.shape == (out_channels, )
            if initial_bias is None:
                initial_bias = bias
            initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 23
0
    def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
                 wscale=1, bias=0, nobias=False, use_cudnn=True,
                 initialW=None, initial_bias=None):
        kh, kw = _pair(ksize)
        self.stride = _pair(stride)
        self.pad = _pair(pad)
        self.use_cudnn = use_cudnn

        W_shape = (out_channels, in_channels, kh, kw)
        super(Convolution2D, self).__init__(W=W_shape)

        # For backward compatibility, the scale of weights is proportional to
        # the square root of wscale.
        initializers.init_weight(self.W.data, initialW,
                                 scale=math.sqrt(wscale))

        if nobias:
            self.b = None
        else:
            self.add_param('b', out_channels)
            if initial_bias is None:
                initial_bias = bias
            initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 24
0
    def __init__(self,
                 in_size,
                 out_size,
                 wscale=1,
                 bias=0,
                 nobias=False,
                 initialW=None,
                 initial_bias=None):
        super(Linear, self).__init__(W=(out_size, in_size))

        # For backward compatibility, the scale of weights is proportional to
        # the square root of wscale.
        initializers.init_weight(self.W.data,
                                 initialW,
                                 scale=math.sqrt(wscale))

        if nobias:
            self.b = None
        else:
            self.add_param('b', out_size)
            if initial_bias is None:
                initial_bias = bias
            initializers.init_weight(self.b.data, initial_bias)
Ejemplo n.º 25
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 ksize,
                 stride=1,
                 pad=0,
                 wscale=1,
                 bias=0,
                 nobias=False,
                 outsize=None,
                 use_cudnn=True,
                 initialV=None,
                 dtype=np.float32):
        kh, kw = _pair(ksize)
        self.stride = _pair(stride)
        self.pad = _pair(pad)
        self.outsize = (None, None) if outsize is None else outsize
        self.use_cudnn = use_cudnn
        self.dtype = dtype
        self.nobias = nobias
        self.out_channels = out_channels
        self.in_channels = in_channels

        V_shape = (in_channels, out_channels, kh, kw)
        super(Deconvolution2D, self).__init__(V=V_shape)

        if isinstance(initialV, (np.ndarray, cuda.ndarray)):
            assert initialV.shape == (in_channels, out_channels, kh, kw)
        initializers.init_weight(self.V.data,
                                 initialV,
                                 scale=math.sqrt(wscale))

        if nobias:
            self.b = None
        else:
            self.add_uninitialized_param("b")
        self.add_uninitialized_param("g")
Ejemplo n.º 26
0
    def __init__(self,
                 left_size,
                 right_size,
                 out_size,
                 nobias=True,
                 initialW=None,
                 initial_bias=None):
        super(Retrieval, self).__init__(W=(left_size, right_size, out_size))
        self.in_sizes = (left_size, right_size)
        self.nobias = nobias

        # TODO(Kenta OONO): I do not know appropriate way of
        # initializing weights in tensor network.
        # This initialization is a modification of
        # that of Linear function.
        '''
        if isinstance(initialW, (numpy.ndarray, cuda.ndarray)):
            assert initialW.shape == self.W.shape
        '''
        initializers.init_weight(self.W.data, initialW)

        if not self.nobias:
            self.add_param('V1', (left_size, out_size))
            self.add_param('V2', (right_size, out_size))
            self.add_param('b', out_size)

            if isinstance(initial_bias, tuple):
                V1, V2, b = initial_bias
            elif initial_bias is None:
                V1 = V2 = None
                b = 0
            else:
                raise ValueError('initial_bias must be tuple or None')

            if isinstance(V1, (numpy.ndarray, cuda.ndarray)):
                assert V1.shape == self.V1.shape
            if isinstance(V2, (numpy.ndarray, cuda.ndarray)):
                assert V2.shape == self.V2.shape
            if isinstance(b, (numpy.ndarray, cuda.ndarray)):
                assert b.shape == self.b.shape
            initializers.init_weight(self.V1.data, V1)
            initializers.init_weight(self.V2.data, V2)
            initializers.init_weight(self.b.data, b)
Ejemplo n.º 27
0
    def __init__(self, left_size, right_size, out_size, nobias=False,
                 initialW=None, initial_bias=None):
        super(Bilinear, self).__init__(W=(left_size, right_size, out_size))
        self.in_sizes = (left_size, right_size)
        self.nobias = nobias

        # TODO(Kenta OONO): I do not know appropriate way of
        # initializing weights in tensor network.
        # This initialization is a modification of
        # that of Linear function.

        if isinstance(initialW, (numpy.ndarray, cuda.ndarray)):
            assert initialW.shape == self.W.data.shape
        initializers.init_weight(self.W.data, initialW)

        if not self.nobias:
            self.add_param('V1', (left_size, out_size))
            self.add_param('V2', (right_size, out_size))
            self.add_param('b', out_size)

            if isinstance(initial_bias, tuple):
                V1, V2, b = initial_bias
            elif initial_bias is None:
                V1 = V2 = None
                b = 0
            else:
                raise ValueError('initial_bias must be tuple or None')

            if isinstance(V1, (numpy.ndarray, cuda.ndarray)):
                assert V1.shape == self.V1.data.shape
            if isinstance(V2, (numpy.ndarray, cuda.ndarray)):
                assert V2.shape == self.V2.data.shape
            if isinstance(b, (numpy.ndarray, cuda.ndarray)):
                assert b.shape == self.b.data.shape
            initializers.init_weight(self.V1.data, V1)
            initializers.init_weight(self.V2.data, V2)
            initializers.init_weight(self.b.data, b)
Ejemplo n.º 28
0
 def __init__(self, in_size, out_size, initialW=None, ignore_label=None):
     super(EmbedID, self).__init__(W=(in_size, out_size))
     if initialW is None:
         initialW = initializers.Normal(1.0)
     initializers.init_weight(self.W.data, initialW)
     self.ignore_label = ignore_label
Ejemplo n.º 29
0
 def _initialize_params(self, size):
     self.add_param('gamma', size)
     initializers.init_weight(self.gamma.data, self._gamma_initializer)
     self.add_param('beta', size)
     initializers.init_weight(self.beta.data, self._beta_initializer)
Ejemplo n.º 30
0
 def _initialize_params(self, in_size):
     self.add_param("W", (self.out_size, in_size))
     # For backward compatibility, the scale of weights is proportional to
     # the square root of wscale.
     initializers.init_weight(self.W.data, self.initialW, scale=math.sqrt(self.wscale))
Ejemplo n.º 31
0
 def initialize_LSTM(self, LSTM, initializer):
     initializers.init_weight(LSTM.upward.W.data, initializer)
     initializers.init_weight(LSTM.lateral.W.data, initializer)
Ejemplo n.º 32
0
 def _initialize_params(self, size):
     self.add_param('gamma', size)
     initializers.init_weight(self.gamma.data, self._gamma_initializer)
     self.add_param('beta', size)
     initializers.init_weight(self.beta.data, self._beta_initializer)
Ejemplo n.º 33
0
    def initialize_parameters(self):
        G_init = initializers.GlorotNormal()

        #initializers.init_weight(self.W_predict.W.data, G_init)
        initializers.init_weight(self.W_candidate.W.data, G_init)
        self.initialize_LSTM(self.LSTM, G_init)
Ejemplo n.º 34
0
    def __init__(self,
                 in_size,
                 out_size,
                 lateral_init=None,
                 upward_init=None,
                 bias_init=0,
                 forget_bias_init=0):
        super(LSTMBase, self).__init__(
            upward=linear.Linear(in_size, 4 * out_size, initialW=0),
            lateral=linear.Linear(out_size,
                                  4 * out_size,
                                  initialW=0,
                                  nobias=True),
        )
        self.state_size = out_size

        for i in six.moves.range(0, 4 * out_size, out_size):
            initializers.init_weight(self.lateral.W.data[i:i + out_size, :],
                                     lateral_init)
            initializers.init_weight(self.upward.W.data[i:i + out_size, :],
                                     upward_init)

        a, i, f, o = lstm._extract_gates(
            self.upward.b.data.reshape(1, 4 * out_size, 1))
        initializers.init_weight(a, bias_init)
        initializers.init_weight(i, bias_init)
        initializers.init_weight(f, forget_bias_init)
        initializers.init_weight(o, bias_init)
Ejemplo n.º 35
0
    def initialize_parameters(self):
        G_init = initializers.GlorotNormal()

        #initializers.init_weight(self.W_predict.W.data, G_init)
        initializers.init_weight(self.W_candidate.W.data, G_init)
        self.initialize_LSTM(self.LSTM, G_init)
Ejemplo n.º 36
0
    def __init__(self, children, in_size, out_size,
                 lateral_init=None, upward_init=None,
                 bias_init=0, forget_bias_init=0):
        super(FasterTreeLSTM, self).__init__(
            upward=L.Linear(in_size, 4 * out_size, initialW=0),
            lateral=L.Linear(children * out_size, 3 * children * out_size, initialW=0, nobias=True),
            forget=L.Linear(children * out_size, children * children * out_size, initialW=0, nobias=True)
        )
        self.state_size = out_size
        self.n_children = children

        for i in range(0, 4 * out_size, out_size):
            initializers.init_weight(self.upward.W.data[i:i + out_size, :], upward_init)
            for j in range(0, 4 * out_size, out_size):
                initializers.init_weight(self.lateral.W.data[i + j:i + j + out_size, :], lateral_init)
            for j in range(0, self.n_children * out_size, out_size):
                initializers.init_weight(self.forget.W.data[i + j:i + j + out_size, :], lateral_init)

        a, i, f, o = numpy_extract_gates(self.upward.b.data.reshape(1, 4 * out_size, 1))
        initializers.init_weight(a, bias_init)
        initializers.init_weight(i, bias_init)
        initializers.init_weight(f, forget_bias_init)
        initializers.init_weight(o, bias_init)
Ejemplo n.º 37
0
    def __init__(self, in_size, out_size,
                 lateral_init=None, upward_init=None,
                 bias_init=0, forget_bias_init=0):
        super(LSTMBase, self).__init__(
            upward=linear.Linear(in_size, 4 * out_size, initialW=0),
            lateral=linear.Linear(out_size, 4 * out_size,
                                  initialW=0, nobias=True),
        )
        self.state_size = out_size

        for i in six.moves.range(0, 4 * out_size, out_size):
            initializers.init_weight(
                self.lateral.W.data[i:i + out_size, :], lateral_init)
            initializers.init_weight(
                self.upward.W.data[i:i + out_size, :], upward_init)

        a, i, f, o = lstm._extract_gates(
            self.upward.b.data.reshape(1, 4 * out_size, 1))
        initializers.init_weight(a, bias_init)
        initializers.init_weight(i, bias_init)
        initializers.init_weight(f, forget_bias_init)
        initializers.init_weight(o, bias_init)
Ejemplo n.º 38
0
 def initialize_LSTM(self, LSTM, initializer):
     initializers.init_weight(LSTM.upward.W.data, initializer)
     initializers.init_weight(LSTM.lateral.W.data, initializer)
Ejemplo n.º 39
0
 def _initialize_params(self):
     self.add_param('W', self.initialW.shape)
     # For backward compatibility, the scale of weights is proportional to
     # the square root of wscale.
     initializers.init_weight(self.W.data, self.initialW,
                              scale=math.sqrt(self.wscale))
Ejemplo n.º 40
0
 def _initialize_params(self, in_channels):
     kh, kw = _pair(self.ksize)
     W_shape = (self.out_channels, in_channels, kh, kw)
     self.add_param('W', W_shape)
     initializers.init_weight(self.W.data, self.initialW)
Ejemplo n.º 41
0
    def _initialize_params(self):
        for i in six.moves.range(0, 4 * self.state_size, self.state_size):
            initializers.init_weight(
                self.lateral.W.data[i:i + self.state_size, :],
                self.lateral_init)
            initializers.init_weight(
                self.upward.W.data[i:i + self.state_size, :], self.upward_init)

        a, i, f, o = lstm._extract_gates(
            self.upward.b.data.reshape(1, 4 * self.state_size, 1))
        initializers.init_weight(a, self.bias_init)
        initializers.init_weight(i, self.bias_init)
        initializers.init_weight(f, self.forget_bias_init)
        initializers.init_weight(o, self.bias_init)
Ejemplo n.º 42
0
 def __init__(self, in_size, out_size, initialW=None, ignore_label=None):
     super(EmbedID, self).__init__(W=(in_size, out_size))
     if initialW is None:
         initialW = initializers.Normal(1.0)
     initializers.init_weight(self.W.data, initialW)
     self.ignore_label = ignore_label