Esempio n. 1
0
    def _init_params(self):
        shape_hh = (self.n_hids, self.n_hids)
	if self.with_attention:
            self.B_hp = norm_weight(shape=shape_hh, name=_p(self.pname, 'B_hp'))
            self.b_tt = constant_weight(shape=(self.n_hids,), name=_p(self.pname, 'b_tt'))
            self.D_pe = norm_weight(shape=(self.n_hids, 1), name=_p(self.pname, 'D_pe'))

            self.params = [self.B_hp, self.b_tt, self.D_pe]
Esempio n. 2
0
    def _init_params(self):

        shape_i0o = (self.n_in_0, self.n_out)
        shape_i1o = (self.n_in_1, self.n_out)

        if self.orth:
            if self.n_in_0 != self.n_out or self.n_in_1 != self.n_out:
                raise ValueError('n_in != n_out when require orth in FeedForward')
            self.W0 = ortho_weight(rng=self.rng, shape=shape_i0o, name=_p(self.pname, 'W0'))
            self.W1 = ortho_weight(rng=self.rng, shape=shape_i1o, name=_p(self.pname, 'W1'))
        else:
            self.W0 = norm_weight(rng=self.rng, shape=shape_i0o, name=_p(self.pname, 'W0'))
            self.W1 = norm_weight(rng=self.rng, shape=shape_i1o, name=_p(self.pname, 'W1'))
        self.b = constant_weight(shape=(self.n_out, ), name=_p(self.pname, 'b'))
        self.params = [self.W0, self.W1, self.b]
Esempio n. 3
0
    def run_pipeline(self, state_below, mask_below, context=None):

        hiddens = self.apply(state_below, mask_below, context=context)

        if self.with_context:
            n_in = self.n_in + self.n_hids + self.c_hids
            n_out = self.n_hids * 2
            n_times = K.shape(state_below)[0]
            r_context = ReplicateLayer(context, n_times)
            combine = K.concatenate([state_below, hiddens, r_context], axis=2)
        else:
            n_in = self.n_in + self.n_hids
            n_out = self.n_hids * 2  # for maxout
            combine = K.concatenate([state_below, hiddens], axis=2)

        self.W_m = norm_weight(shape=(n_in, n_out), name=_p(self.pname, 'W_m'))
        self.b_m = constant_weight(shape=(n_out, ), name=_p(self.pname, 'b_m'))

        self.params += [self.W_m, self.b_m]

        # maxout
        merge_out = K.dot(combine, self.W_m) + self.b_m
        merge_out_shape = K.shape(merge_out)
        merge_max_out = K.max(K.reshape(merge_out,
                                        shape=(merge_out_shape[0],
                                               merge_out_shape[1],
                                               merge_out_shape[2] / 2, 2)),
                              axis=3)

        return merge_max_out * mask_below
Esempio n. 4
0
    def _init_params(self):

        shape_xh = (self.n_in, self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)

        self.W_xz = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xz'))
        self.W_xr = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xr'))
        self.W_xh = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_z = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_z'))
        self.b_r = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_r'))
        self.b_h = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_h'))
        self.W_hz = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hz'))
        self.W_hr = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hr'))
        self.W_hh = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params = [
            self.W_xz, self.W_xr, self.W_xh, self.W_hz, self.W_hr, self.W_hh,
            self.b_z, self.b_r, self.b_h
        ]

        if self.with_context:
            shape_ch = (self.c_hids, self.n_hids)
            self.W_cz = norm_weight(shape=shape_ch,
                                    name=_p(self.pname, 'W_cz'))
            self.W_cr = norm_weight(shape=shape_ch,
                                    name=_p(self.pname, 'W_cr'))
            self.W_ch = norm_weight(shape=shape_ch,
                                    name=_p(self.pname, 'W_ch'))
            self.W_c_init = norm_weight(shape=shape_ch,
                                        name=_p(self.pname, 'W_c_init'))

            self.params += [self.W_cz, self.W_cr, self.W_ch, self.W_c_init]
Esempio n. 5
0
    def __init__(self, vocab_size, embedding_size, name='embeddings'):

        self.vocab_size = vocab_size
        self.embedding_size = embedding_size

        # for norm_weight
        self.W = norm_weight(shape=(vocab_size, embedding_size), name=name)

        # parameters of the model
        self.params = [self.W]
    def __init__(self, rng, n_in, n_out, name='LR'):

        # initialize the weights W as a matrix of shape (n_in, n_out)
        self.W = norm_weight(rng=rng, shape=(n_in, n_out), name=_p(name, 'W'))

		# initialize the baises b as a vector of n_out 0s
        self.b = constant_weight(shape=(n_out, ), name=_p(name, 'b'))

        # parameters of the model
        self.params = [self.W, self.b]
Esempio n. 7
0
    def _init_params(self):

        shape_xh = (self.n_in*3, self.n_hids)
        shape_hh = (self.n_hids*3, self.n_hids)
        self.W_x = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_x'))
        self.b = constant_weight(shape=(self.n_hids*3,), name=_p(self.pname, 'b'))
        self.W_h = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_h'))
        self.params = [self.W_x, self.W_h, self.b]
        self.GRU_op = mkl_gru.GRU(hid=self.n_hids, return_sequences=True)
        self.h_init_state = numpy.zeros((80, 1000), numpy.float64)
Esempio n. 8
0
    def _init_params(self):

        shape_xh = (self.n_in, self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)

        self.W_xz = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xz'))
        self.W_xr = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xr'))
        self.W_xh = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_z = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_z'))
        self.b_r = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_r'))
        self.b_h = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_h'))
        self.W_hz = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hz'))
        self.W_hr = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hr'))
        self.W_hh = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params = [self.W_xz, self.W_xr, self.W_xh,
                       self.W_hz, self.W_hr, self.W_hh,
                       self.b_z, self.b_r, self.b_h]

        if self.with_layernorm:
            self.W_xz_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'xz_lnb'))
            self.W_xz_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'xz_lns'))
            self.W_xr_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'xr_lnb'))
            self.W_xr_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'xr_lns'))
            self.W_xh_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'xh_lnb'))
            self.W_xh_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'xh_lns'))

            self.W_z_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'z_lnb'))
            self.W_z_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'z_lns'))
            self.W_r_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'r_lnb'))
            self.W_r_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'r_lns'))
            self.W_h_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'h_lnb'))
            self.W_h_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'h_lns'))

            self.params += [self.W_xz_lnb, self.W_xz_lns, self.W_xr_lnb, self.W_xr_lns, self.W_xh_lnb, self.W_xh_lns, \
                           self.W_z_lnb, self.W_z_lns, self.W_r_lnb, self.W_r_lns, self.W_h_lnb, self.W_h_lns] 


        if self.with_context:
            shape_ch = (self.c_hids, self.n_hids)
            self.W_cz = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cz'))
            self.W_cr = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cr'))
            self.W_ch = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_ch'))
            self.W_c_init = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_c_init'))

            self.params += [self.W_cz, self.W_cr, self.W_ch, self.W_c_init]

            if self.with_layernorm:
                self.W_cz_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'cz_lnb'))
                self.W_cz_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'cz_lns'))
                self.W_cr_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'cr_lnb'))
                self.W_cr_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'cr_lns'))
                self.W_ch_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'ch_lnb'))
                self.W_ch_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'ch_lns'))

                self.params += [self.W_cz_lnb, self.W_cz_lns, self.W_cr_lnb, self.W_cr_lns, self.W_ch_lnb, self.W_ch_lns]
Esempio n. 9
0
    def _init_params(self):

        shape_io = (self.n_in, self.n_out)

        if self.orth:
            if self.n_in != self.n_out :
                raise ValueError('n_in != n_out when require orth in FeedForward')
            self.W = ortho_weight(rng=self.rng, shape=shape_io, name=_p(self.pname, 'W'))
        else:
            self.W = norm_weight(rng=self.rng, shape=shape_io, name=_p(self.pname, 'W'))
        self.b = constant_weight(shape=(self.n_out, ), name=_p(self.pname, 'b'))
        self.params = [self.W, self.b]
Esempio n. 10
0
    def _init_params(self):

        shape_io = (self.n_in_0, self.n_out)

        if self.orth:
            if self.n_in_0 != self.n_out:
                raise ValueError('n_in != n_out when require orth in FeedForward')
            self.W = ortho_weight(rng=self.rng, shape=shape_io, name=_p(self.pname, 'W'))
        else:
            self.W = norm_weight(rng=self.rng, shape=shape_io, name=_p(self.pname, 'W'))
        self.params = [self.W]

        self.ff =  FeedForward(self.n_in_1, self.n_out, orth=self.orth, rng=self.rng, name=_p(self.pname, 'FF_W') )
        self.params.extend(self.ff.params)
Esempio n. 11
0
    def _init_params(self):

        shape_xh = (self.n_in, self.n_hids)
        shape_xh4 = (self.n_in, 4*self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)
        shape_hh4 = (self.n_hids, 4*self.n_hids)

        self.W_pre_x = norm_weight(rng=self.rng, shape=shape_xh4, name=_p(self.pname, 'W_pre_x'))
        self.W_h = multi_orth(rng=self.rng, size=shape_hh4, name=_p(self.pname, 'W_h'))

        b_i = constant_weight(share=False, shape=(self.n_hids, ), name=_p(self.pname, 'b_i'))
        b_f = constant_weight(share=False, value=1., shape=(self.n_hids, ), name=_p(self.pname, 'b_f'))
        b_o = constant_weight(share=False, shape=(self.n_hids, ), name=_p(self.pname, 'b_o'))
        b_c = constant_weight(share=False, shape=(self.n_hids, ), name=_p(self.pname, 'b_c'))
        b_ifoc = numpy.concatenate([b_i, b_f, b_o, b_c], axis=0)
        self.b_pre_x = theano.shared(value=b_ifoc, borrow=True, name=_p(self.pname, 'b_pre_x'))

        self.params += [self.W_pre_x, self.W_h, self.b_pre_x]

        if self.with_context:
            raise NotImplementedError

        if self.with_begin_tag:
            self.struct_begin_tag = constant_weight(shape=(self.n_hids,), value=0., name=_p(self.pname, 'struct_begin_tag'))
            self.params += [self.struct_begin_tag]

        if self.with_end_tag:
            self.struct_end_tag = constant_weight(shape=(self.n_in,), value=0., name=_p(self.pname, 'struct_end_tag'))
            self.params += [self.struct_end_tag]

        if self.n_att_ctx:
            self.lstm_combine_ctx_h = LSTM(self.n_att_ctx, self.n_hids, rng=self.rng, name=_p(self.pname, 'lstm_combine_ctx_h'))
            self.params.extend(self.lstm_combine_ctx_h.params)
            self.attention = ATTENTION(self.n_hids, self.rng, name=_p(self.pname, 'att_ctx'))
            self.params.extend(self.attention.params)
            if self.seq_pyramid:
                self.pyramid_on_seq = LSTM(self.n_att_ctx, self.n_att_ctx, rng=self.rng, name=_p(self.pname, 'pyramid_on_seq'))
                self.params.extend(self.pyramid_on_seq.params)
                self.ff_pyramid2ctx = FeedForward(self.n_att_ctx, self.n_hids, name=_p(self.pname, 'ff_pyramid2ctx'))
                self.params.extend(self.ff_pyramid2ctx.params)
Esempio n. 12
0
    def _init_params(self):
        shape_xh = (self.n_in, self.n_hids)
        shape_xh2 = (self.n_in, 2*self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)
        shape_hh2 = (self.n_hids, 2*self.n_hids)

        self.W_xzr = norm_weight(rng=self.rng, shape=shape_xh2, name=_p(self.pname, 'W_xzr'))
        self.W_xh  = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_zr  = constant_weight(shape=(2*self.n_hids, ), name=_p(self.pname, 'b_zr'))
        self.b_h   = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_h'))
        self.W_hzr = multi_orth(rng=self.rng, size=shape_hh2, name=_p(self.pname, 'W_hzr'))
        self.W_hh  = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params += [self.W_xzr, self.W_xh,
                        self.W_hzr, self.W_hh,
                        self.b_zr,  self.b_h]

        if self.with_context:
            shape_ch = (self.c_hids, self.n_hids)
            self.W_cz = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cz'))
            self.W_cr = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cr'))
            self.W_ch = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_ch'))
            self.W_c_init = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_c_init'))

            self.params += [self.W_cz, self.W_cr, self.W_ch, self.W_c_init]

        if self.with_begin_tag:
            self.struct_begin_tag = constant_weight(shape=(self.n_hids,), value=0., name=_p(self.pname, 'struct_begin_tag'))
            self.params += [self.struct_begin_tag]

        if self.with_end_tag:
            self.struct_end_tag = constant_weight(shape=(self.n_in,), value=0., name=_p(self.pname, 'struct_end_tag'))
            self.params += [self.struct_end_tag]

        if self.n_att_ctx:
            self.gru_combine_ctx_h = GRU(self.n_att_ctx, self.n_hids, rng=self.rng, name=_p(self.pname, 'gru_combine_ctx_h'))
            self.params.extend(self.gru_combine_ctx_h.params)
            self.attention = ATTENTION(self.n_hids, self.rng, name=_p(self.pname, 'att_ctx'))
            self.params.extend(self.attention.params)
Esempio n. 13
0
 def __init__(self, n_in, n_out, name='LR'):
     self.W = norm_weight(shape=(n_in, n_out), name=_p(name, 'W'))
     self.b = constant_weight(shape=(n_out, ), name=_p(name, 'b'))
     self.params = [self.W, self.b]
     self.n_out = n_out
Esempio n. 14
0
    def _init_params(self):
        # generally, parameters with shape shape_ch = (self.c_ndim, self.n_hids) can be applied with tied weights
        # this for combining lastly generated words and decoder state,
        # and thus cannot be applied with tied weights
        shape_xh = (self.n_in, self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)

        self.W_xz = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xz'))
        self.W_xr = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xr'))
        self.W_xh = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_z = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_z'))
        self.b_r = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_r'))
        self.b_h = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_h'))
        self.W_hz = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hz'))
        self.W_hr = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hr'))
        self.W_hh = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params = [
            self.W_xz, self.W_xr, self.W_xh, self.W_hz, self.W_hr, self.W_hh,
            self.b_z, self.b_r, self.b_h
        ]

        shape_ch = (self.n_cdim, self.n_hids)

        self.W_cz = norm_weight(shape=shape_ch, name=_p(self.pname, 'W_cz'))
        self.W_cr = norm_weight(shape=shape_ch, name=_p(self.pname, 'W_cr'))
        self.W_ch = norm_weight(shape=shape_ch, name=_p(self.pname, 'W_ch'))
        self.W_c_init = norm_weight(shape=shape_ch,
                                    name=_p(self.pname, 'W_c_init'))
        # we don't add the new params if we use tied_weights, since we reuse the weights in decoder
        self.params += [self.W_cz, self.W_cr, self.W_ch, self.W_c_init]

        self.b_c_init = constant_weight(shape=(self.n_hids, ),
                                        name=_p(self.pname, 'b_c_init'))
        self.params += [self.b_c_init]

        # we moved the parameters below here, to make it works for both with_context and with_attention modes
        # modification in this version
        # in the paper, e_{i,j} = a(s_{i-1}, h_j)
        # here, e_{i,j} = a(GRU(s_{i-1}, y_{i-1}), h_j), which considers the lastly generated target word
        # all the following parameters are for the introduced GRU
        # it is reasonable
        self.W_n1_h = ortho_weight(shape=shape_hh,
                                   name=_p(self.pname, 'W_n1_h'))
        self.W_n1_r = ortho_weight(shape=shape_hh,
                                   name=_p(self.pname, 'W_n1_r'))
        self.W_n1_z = ortho_weight(shape=shape_hh,
                                   name=_p(self.pname, 'W_n1_z'))
        self.b_n1_h = constant_weight(shape=(self.n_hids, ),
                                      name=_p(self.pname, 'b_n1_h'))
        self.b_n1_r = constant_weight(shape=(self.n_hids, ),
                                      name=_p(self.pname, 'b_n1_r'))
        self.b_n1_z = constant_weight(shape=(self.n_hids, ),
                                      name=_p(self.pname, 'b_n1_z'))
        self.params += [
            self.W_n1_h, self.W_n1_r, self.W_n1_z, self.b_n1_h, self.b_n1_r,
            self.b_n1_z
        ]
        ###############################################

        if self.with_attention:
            self.A_cp = norm_weight(shape=shape_ch,
                                    name=_p(self.pname, 'A_cp'))
            self.params += [self.A_cp]

            self.B_hp = norm_weight(shape=shape_hh,
                                    name=_p(self.pname, 'B_hp'))
            self.b_tt = constant_weight(shape=(self.n_hids, ),
                                        name=_p(self.pname, 'b_tt'))
            self.D_pe = norm_weight(shape=(self.n_hids, 1),
                                    name=_p(self.pname, 'D_pe'))
            # self.c_tt = constant_weight(shape=(1,), name=_p(self.pname, 'c_tt'))
            self.params += [self.B_hp, self.b_tt, self.D_pe]

        # for error on encoder states, we don't need the probability
        # thus no need for readout, which costs a large number of parameters
        # for readout
        n_out = self.n_in * self.maxout_part
        self.W_o_c = norm_weight(shape=(self.n_cdim, n_out),
                                 name=_p(self.pname, 'W_out_c'))
        self.W_o_h = norm_weight(shape=(self.n_hids, n_out),
                                 name=_p(self.pname, 'W_out_h'))
        self.W_o_e = norm_weight(shape=(self.n_in, n_out),
                                 name=_p(self.pname, 'W_out_e'))
        self.b_o = constant_weight(shape=(n_out, ),
                                   name=_p(self.pname, 'b_out_o'))

        self.params += [self.W_o_c, self.W_o_h, self.W_o_e, self.b_o]
Esempio n. 15
0
    def _init_params(self):
        # added by Zhaopeng Tu, 2016-07-12
        # this for combining lastly generated words and decoder state,
        shape_xh = (self.n_in, self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)

        self.W_xz = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xz'))
        self.W_xr = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xr'))
        self.W_xh = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_z = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_z'))
        self.b_r = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_r'))
        self.b_h = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_h'))
        self.W_hz = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hz'))
        self.W_hr = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hr'))
        self.W_hh = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params = [self.W_xz, self.W_xr, self.W_xh,
                       self.W_hz, self.W_hr, self.W_hh,
                       self.b_z, self.b_r, self.b_h]

        shape_ch = (self.n_cdim, self.n_hids)
        self.W_cz = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cz'))
        self.W_cr = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cr'))
        self.W_ch = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_ch'))
        self.W_c_init = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_c_init'))
        # modified by Zhaopeng Tu, 2016-07-29
        # we don't add the new params if we use tied_weights, since we reuse the weights in decoder
        self.params += [self.W_cz, self.W_cr, self.W_ch, self.W_c_init]

        self.b_c_init = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_c_init'))
        self.params += [self.b_c_init]
        
        # modified by Zhaopeng Tu, 2016-06-08
        # we moved the parameters below here, to make it works for both with_context and with_attention modes
        # commented by Zhaopeng Tu, 2016-04-29
        # modification in this version
        # in the paper, e_{i,j} = a(s_{i-1}, h_j)
        # here, e_{i,j} = a(GRU(s_{i-1}, y_{i-1}), h_j), which considers the lastly generated target word
        # all the following parameters are for the introduced GRU
        # it is reasonable
        self.W_n1_h = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_n1_h'))
        self.W_n1_r = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_n1_r'))
        self.W_n1_z = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_n1_z'))
        self.b_n1_h = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_n1_h'))
        self.b_n1_r = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_n1_r'))
        self.b_n1_z = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_n1_z'))
        self.params += [self.W_n1_h, self.W_n1_r, self.W_n1_z, self.b_n1_h, self.b_n1_r, self.b_n1_z]
        ###############################################

        if self.with_attention:
            self.A_cp = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'A_cp'))
            self.B_hp = norm_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'B_hp'))
            self.b_tt = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_tt'))
            self.D_pe = norm_weight(rng=self.rng, shape=(self.n_hids, 1), name=_p(self.pname, 'D_pe'))
            self.c_tt = constant_weight(shape=(1, ), name=_p(self.pname, 'c_tt'))
            self.params += [self.A_cp, self.B_hp, self.b_tt, self.D_pe, self.c_tt]


        # for readout
        n_out = self.n_in * self.maxout_part
        self.W_o_c = norm_weight(rng=self.rng, shape=(self.n_cdim, n_out), name=_p(self.pname, 'W_out_c'))
        self.W_o_h = norm_weight(rng=self.rng, shape=(self.n_hids, n_out), name=_p(self.pname, 'W_out_h'))
        self.W_o_e = norm_weight(rng=self.rng, shape=(self.n_in, n_out), name=_p(self.pname, 'W_out_e'))
        self.b_o = constant_weight(shape=(n_out, ), name=_p(self.pname, 'b_out_o'))

        self.params += [self.W_o_c, self.W_o_h, self.W_o_e, self.b_o]
Esempio n. 16
0
    def _init_params(self):

        shape_xh = (self.n_in, self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)

        self.W_xz = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xz'))
        self.W_xr = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xr'))
        self.W_xh = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_z = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_z'))
        self.b_r = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_r'))
        self.b_h = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_h'))
        self.W_hz = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hz'))
        self.W_hr = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hr'))
        self.W_hh = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params = [
            self.W_xz, self.W_xr, self.W_xh, self.W_hz, self.W_hr, self.W_hh,
            self.b_z, self.b_r, self.b_h
        ]

        shape_ch = (self.n_cdim, self.n_hids)
        self.W_cz = norm_weight(shape=shape_ch, name=_p(self.pname, 'W_cz'))
        self.W_cr = norm_weight(shape=shape_ch, name=_p(self.pname, 'W_cr'))
        self.W_ch = norm_weight(shape=shape_ch, name=_p(self.pname, 'W_ch'))
        self.W_c_init = norm_weight(shape=(self.n_cdim, self.n_hids),
                                    name=_p(self.pname, 'W_c_init'))
        self.b_c_init = constant_weight(shape=(self.n_hids, ),
                                        name=_p(self.pname, 'b_c_init'))

        self.params += [
            self.W_cz, self.W_cr, self.W_ch, self.W_c_init, self.b_c_init
        ]

        # we moved the parameters below here, to make it works for both with_context and with_attention modes
        # modification in this version
        # in the paper, e_{i,j} = a(s_{i-1}, h_j)
        # here, e_{i,j} = a(GRU(s_{i-1}, y_{i-1}), h_j), which considers the lastly generated target word
        # all the following parameters are for the introduced GRU
        # it is reasonable
        self.W_n1_h = ortho_weight(shape=shape_hh,
                                   name=_p(self.pname, 'W_n1_h'))
        self.W_n1_r = ortho_weight(shape=shape_hh,
                                   name=_p(self.pname, 'W_n1_r'))
        self.W_n1_z = ortho_weight(shape=shape_hh,
                                   name=_p(self.pname, 'W_n1_z'))
        self.b_n1_h = constant_weight(shape=(self.n_hids, ),
                                      name=_p(self.pname, 'b_n1_h'))
        self.b_n1_r = constant_weight(shape=(self.n_hids, ),
                                      name=_p(self.pname, 'b_n1_r'))
        self.b_n1_z = constant_weight(shape=(self.n_hids, ),
                                      name=_p(self.pname, 'b_n1_z'))
        self.params += [
            self.W_n1_h, self.W_n1_r, self.W_n1_z, self.b_n1_h, self.b_n1_r,
            self.b_n1_z
        ]

        if self.with_attention:
            self.A_cp = norm_weight(shape=shape_ch,
                                    name=_p(self.pname, 'A_cp'))
            self.B_hp = norm_weight(shape=shape_hh,
                                    name=_p(self.pname, 'B_hp'))
            self.b_tt = constant_weight(shape=(self.n_hids, ),
                                        name=_p(self.pname, 'b_tt'))
            self.D_pe = norm_weight(shape=(self.n_hids, 1),
                                    name=_p(self.pname, 'D_pe'))

            self.params += [self.A_cp, self.B_hp, self.b_tt, self.D_pe]

            # coverage only works for attention model
            if self.with_coverage:
                shape_covh = (self.coverage_dim, self.n_hids)
                self.C_covp = norm_weight(shape=shape_covh,
                                          name=_p(self.pname, 'Cov_covp'))

                if self.coverage_type is 'linguistic':
                    # for linguistic coverage, fertility model is necessary since it yields better translation and alignment quality
                    self.W_cov_fertility = norm_weight(shape=(self.n_cdim, 1),
                                                       name=_p(
                                                           self.pname,
                                                           'W_cov_fertility'))
                    self.b_cov_fertility = constant_weight(
                        shape=(1, ), name=_p(self.pname, 'b_cov_fertility'))
                    self.params += [self.W_cov_fertility, self.b_cov_fertility]
                else:
                    # for neural network based coverage, gating is necessary
                    shape_covcov = (self.coverage_dim, self.coverage_dim)
                    self.W_cov_h = ortho_weight(shape=shape_covcov,
                                                name=_p(self.pname, 'W_cov_h'))
                    self.W_cov_r = ortho_weight(shape=shape_covcov,
                                                name=_p(self.pname, 'W_cov_r'))
                    self.W_cov_z = ortho_weight(shape=shape_covcov,
                                                name=_p(self.pname, 'W_cov_z'))
                    self.b_cov_h = constant_weight(shape=(self.coverage_dim, ),
                                                   name=_p(
                                                       self.pname, 'b_cov_h'))
                    self.b_cov_r = constant_weight(shape=(self.coverage_dim, ),
                                                   name=_p(
                                                       self.pname, 'b_cov_r'))
                    self.b_cov_z = constant_weight(shape=(self.coverage_dim, ),
                                                   name=_p(
                                                       self.pname, 'b_cov_z'))

                    self.params += [
                        self.W_cov_h, self.W_cov_r, self.W_cov_z, self.b_cov_h,
                        self.b_cov_r, self.b_cov_z
                    ]

                    # parameters for coverage inputs
                    # attention probablity
                    self.W_cov_ph = norm_weight(shape=(1, self.coverage_dim),
                                                name=_p(
                                                    self.pname, 'W_cov_ph'))
                    self.W_cov_pr = norm_weight(shape=(1, self.coverage_dim),
                                                name=_p(
                                                    self.pname, 'W_cov_pr'))
                    self.W_cov_pz = norm_weight(shape=(1, self.coverage_dim),
                                                name=_p(
                                                    self.pname, 'W_cov_pz'))
                    # source annotations
                    self.W_cov_ch = norm_weight(
                        shape=(self.n_cdim, self.coverage_dim),
                        name=_p(self.pname, 'W_cov_ch'))
                    self.W_cov_cr = norm_weight(
                        shape=(self.n_cdim, self.coverage_dim),
                        name=_p(self.pname, 'W_cov_cr'))
                    self.W_cov_cz = norm_weight(
                        shape=(self.n_cdim, self.coverage_dim),
                        name=_p(self.pname, 'W_cov_cz'))
                    # previous decoding states
                    self.W_cov_hh = norm_weight(
                        shape=(self.n_hids, self.coverage_dim),
                        name=_p(self.pname, 'W_cov_hh'))
                    self.W_cov_hr = norm_weight(
                        shape=(self.n_hids, self.coverage_dim),
                        name=_p(self.pname, 'W_cov_hr'))
                    self.W_cov_hz = norm_weight(
                        shape=(self.n_hids, self.coverage_dim),
                        name=_p(self.pname, 'W_cov_hz'))

                    self.params += [
                        self.W_cov_ph, self.W_cov_pr, self.W_cov_pz,
                        self.W_cov_ch, self.W_cov_cr, self.W_cov_cz,
                        self.W_cov_hh, self.W_cov_hr, self.W_cov_hz
                    ]

        # for context gate, which works for both with_attention and with_context modes
        if self.with_context_gate:
            # parameters for coverage inputs
            # input form target context
            self.W_ctx_h = norm_weight(shape=(self.n_hids, self.n_hids),
                                       name=_p(self.pname, 'W_ctx_h'))
            self.W_ctx_c = norm_weight(shape=(self.n_cdim, self.n_hids),
                                       name=_p(self.pname, 'W_ctx_c'))
            self.b_ctx = constant_weight(shape=(self.n_hids, ),
                                         name=_p(self.pname, 'b_ctx'))
            self.params += [self.W_ctx_h, self.W_ctx_c]

        # for readout
        n_out = self.n_in * self.maxout_part
        self.W_o_c = norm_weight(shape=(self.n_cdim, n_out),
                                 name=_p(self.pname, 'W_out_c'))
        self.W_o_h = norm_weight(shape=(self.n_hids, n_out),
                                 name=_p(self.pname, 'W_out_h'))
        self.W_o_e = norm_weight(shape=(self.n_in, n_out),
                                 name=_p(self.pname, 'W_out_e'))
        self.b_o = constant_weight(shape=(n_out, ),
                                   name=_p(self.pname, 'b_out_o'))

        self.params += [self.W_o_c, self.W_o_h, self.W_o_e, self.b_o]
Esempio n. 17
0
 def _init_params(self):
     shape_hh = (self.n_hids, self.n_hids)
     self.W_comb_att = norm_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_comb_att'))
     self.U_att = norm_weight(rng=self.rng, shape=(self.n_hids, 1), name=_p(self.pname, 'U_att'))
     self.c_att = constant_weight(shape=(1,), name=_p(self.pname, 'c_att'))
     self.params = [self.W_comb_att, self.U_att, self.c_att]