Beispiel #1
0
 def __init__(self, name, embedding, sent_encoder, classifier):
     super().__init__()
     self.name = name
     self.embedding = embedding
     self.sent_encoder = sent_encoder
     self.classifier = classifier
     param_init(self)
Beispiel #2
0
    def __init__(self, n_in, n_out, prefix='logist', drop_rate=0.5):

        self.n_in = n_in
        self.n_out = n_out
        self.W0 = param_init().param((n_in, n_out), name=_p(prefix, 'W0'))
        self.b = param_init().param((n_out, ), name=_p(prefix, 'b'))
        self.params = [self.W0, self.b]
        self.drop_rate = drop_rate
Beispiel #3
0
    def __init__(self, n_in, n_out, nonlinearity=nonlinearities.rectify):

        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        self.W = param_init().uniform((n_in, n_out))
        # initialize the baises b as a vector of n_out 0s
        self.b = param_init().constant((n_out, ))
        self.params  = [self.W,self.b]
        self.nonlinearity = (nonlinearities.identity if nonlinearity is None
                             else nonlinearity)
Beispiel #4
0
    def __init__(self, s_in, t_in, prefix='Attention', **kwargs):
        self.params = []
        self.s_in = s_in
        self.t_in = t_in
        self.align_size = t_in
        self.prefix = prefix

        self.Wa = param_init().param((self.t_in, self.align_size), name=_p(prefix, 'Wa'))
        #self.v = param_init().param((self.align_size,), init_type='constant',
                                    #name=_p(prefix, 'v'), scale=0.001)

        self.v = param_init().param((self.align_size,), name=_p(prefix, 'v'))
        self.params += [self.Wa, self.v]
Beispiel #5
0
 def _init_params(self):
     prefix = self.prefix
     w_size = (self.emb_size, self.n_hids)
     u_size = (self.n_hids, self.n_hids)
     self.W = param_init().param(w_size,
                                 init_type='mfunc',
                                 m=4,
                                 name=_p(prefix, 'W'))
     self.U = param_init().param(u_size,
                                 init_type='mfunc',
                                 m=4,
                                 name=_p(prefix, 'U'))
     self.b = param_init().param((self.n_hids * 4, ), name=_p(prefix, 'b'))
     self.params += [self.W, self.U, self.b]
Beispiel #6
0
    def __init__(self, s_in, t_in, prefix='Attention', **kwargs):
        self.params = []
        self.s_in = s_in
        self.t_in = t_in
        self.align_size = t_in  # n_hids -> trg_nhids
        self.prefix = prefix

        self.Wa = param_init().param((self.t_in, self.align_size),
                                     name=_p(prefix, 'Wa'))
        # self.v = param_init().param((self.align_size,), init_type='constant',
        # name=_p(prefix, 'v'), scale=0.001)

        self.v = param_init().param((self.align_size, ), name=_p(prefix, 'v'))
        self.params += [self.Wa, self.v]
Beispiel #7
0
    def __init__(self, n_in, lr_out, prefix='logist', **kwargs):

        self.n_in = n_in  # n_out: 512    the last layer of decoder merge out
        self.lr_out = lr_out  # 30000  trg_vocab_size which is the predicted target vocabulary size
        # W0: shape(n_out, 30000)
        # b:  shape(30000,)
        self.W0 = param_init().param((n_in, lr_out), name=_p(prefix, 'W0'))
        # self.b = param_init().param((lr_out, ), name=_p(prefix, 'b'))
        self.b = param_init().param((lr_out, ),
                                    name=_p(prefix, 'b'),
                                    scale=numpy.log(1. / lr_out))
        self.params = [self.W0, self.b]
        self.drop_rate = kwargs.pop('dropout', 0.5)
        self.alpha = kwargs.pop('alpha', 0.0)
        self.use_mv = kwargs.pop('use_mv', 0)
Beispiel #8
0
 def __init__(self, embsize, vocab_size, prefix='Lookup_table'):
     # '%s_%s' % (pp, name)   # the name of self.W is 'Lookup_table_embed': self.W
     self.W = param_init().param((vocab_size, embsize),
                                 name=_p(prefix, 'embed'))
     # the type of self.W is theano.tensor.sharedvar.TensorSharedVariable: type(self.W)
     # self.W.shape.eval(): (array([30000,   620]), self.W.type: TensorType(float64, matrix)
     # self.W.dtype: 'float64'
     self.params = [self.W]
     self.vocab_size = vocab_size
     self.embsize = embsize
Beispiel #9
0
def build_cnn_model(num_vocab,
                    dim_word,
                    dim_fc,
                    windows,
                    dim_feature,
                    dropout_emb=0.0,
                    dropout_fc=0.0,
                    embedding_type=None,
                    vectors=None,
                    freeze_emb=True,
                    device=None):
    embedding = get_embedding(num_vocab, dim_word, vectors, freeze_emb,
                              embedding_type, dropout_emb)
    cnns = nn.ModuleList(
        nn.Conv2d(1, dim_feature, (w, dim_word)) for w in windows)
    dim_hidden = len(cnns) * dim_feature
    classifier = get_classifier(dim_hidden, dim_fc, dropout_fc)
    model = CNNModel(embedding, cnns, classifier)
    param_init(model)
    return model.to(device)
Beispiel #10
0
    def interact(self, facts_rep, questions_rep):
        self.W_f = param_init().orth((self.n_hids, self.n_hids))
        self.W_q = param_init().orth((self.n_hids, self.n_hids))
        self.b_f = param_init().constant((self.n_hids,))
        self.b_q = param_init().constant((self.n_hids,))
        self.params += [self.W_f, self.W_q, self.b_f, self.b_q]

        questions_rep = T.tanh(theano.dot(questions_rep, self.W_q) + self.b_q)
        facts_rep = T.tanh(theano.dot(facts_rep, self.W_f) + self.b_f)

        def _one_step(question_rep, facts_rep):
            if question_rep.ndim == 1:
                question_rep = T.shape_padleft(question_rep, n_ones=1)
            inter_rep = (question_rep + facts_rep).max(axis=0)
            return inter_rep

        inter_reps, updates = theano.scan(_one_step,
                                          sequences=questions_rep,
                                          outputs_info=None,
                                          non_sequences=facts_rep
                                          )
        return inter_reps
Beispiel #11
0
    def merge_out(self, state_below, mask_below, context=None):
        hiddens = self.apply(state_below, mask_below, context=context)
        if context is None:
            msize = self.n_in + self.n_hids
            osize = self.n_hids
            combine = T.concatenate([state_below, hiddens], axis=2)
        else:
            msize = self.n_in + self.n_hids + self.c_hids
            osize = self.n_hids
            n_times = state_below.shape[0]
            m_context = repeat_x(context, n_times)
            combine = T.concatenate([state_below, hiddens, m_context], axis=2)

        self.W_m = param_init().uniform((msize, osize*2))
        self.b_m = param_init().constant((osize*2,))
        self.params += [self.W_m, self.b_m]

        merge_out = theano.dot(combine, self.W_m) + self.b_m
        merge_max = merge_out.reshape((merge_out.shape[0],
                                       merge_out.shape[1],
                                       merge_out.shape[2]/2,
                                       2), ndim=4).max(axis=3)
        return merge_max * mask_below[:, :, None]
Beispiel #12
0
    def __init__(self, input, n_in, n_out):

        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        self.W = param_init().uniform((n_in, n_out))
        # initialize the baises b as a vector of n_out 0s
        self.b = param_init().constant((n_out, ))

        # compute vector of class-membership probabilities in symbolic form
        energy = theano.dot(input, self.W) + self.b
        if energy.ndim == 3:
            energy_exp = T.exp(energy - T.max(energy, 2, keepdims=True))
            pmf = energy_exp / energy_exp.sum(2, keepdims=True)
        else:
            pmf = T.nnet.softmax(energy)

        self.p_y_given_x = pmf
        self.y_pred = T.argmax(self.p_y_given_x, axis=-1)

        # compute prediction as class whose probability is maximal in
        # symbolic form

        # parameters of the model
        self.params = [self.W, self.b]
Beispiel #13
0
    def _init_params2(self):

        f = lambda name: _p(self.prefix, name)
        n_hids = self.n_hids
        size_hh = (n_hids, n_hids)

        self.W_hz2 = param_init().param(size_hh, 'orth', name=f('W_hz2'))
        self.W_hr2 = param_init().param(size_hh, 'orth', name=f('W_hr2'))
        self.W_hh2 = param_init().param(size_hh, 'orth', name=f('W_hh2'))
        self.b_z2 = param_init().param((n_hids,), name=f('b_z2'))
        self.b_r2 = param_init().param((n_hids,), name=f('b_r2'))
        self.b_h2 = param_init().param((n_hids,), name=f('b_h2'))

        self.Ws = param_init().param((self.c_hids, self.n_hids), name=f('Ws'))
        self.bs = param_init().param((self.n_hids,), name=f('bs'))
        self.params += [self.W_hz2, self.W_hr2, self.W_hh2,
                        self.b_z2, self.b_r2, self.b_h2, self.Ws, self.bs]
Beispiel #14
0
    def _init_params2(self):

        f = lambda name: _p(self.prefix, name)
        n_hids = self.n_hids
        size_hh = (n_hids, n_hids)

        self.W_hz2 = param_init().param(size_hh, 'orth', name=f('W_hz2'))
        self.W_hr2 = param_init().param(size_hh, 'orth', name=f('W_hr2'))
        self.W_hh2 = param_init().param(size_hh, 'orth', name=f('W_hh2'))
        self.b_z2 = param_init().param((n_hids, ), name=f('b_z2'))
        self.b_r2 = param_init().param((n_hids, ), name=f('b_r2'))
        self.b_h2 = param_init().param((n_hids, ), name=f('b_h2'))

        self.Ws = param_init().param((self.c_hids, self.n_hids), name=f('Ws'))
        self.bs = param_init().param((self.n_hids, ), name=f('bs'))
        self.params += [
            self.W_hz2, self.W_hr2, self.W_hh2, self.b_z2, self.b_r2,
            self.b_h2, self.Ws, self.bs
        ]
        '''
Beispiel #15
0
    def _init_params(self):
        n_in = self.n_in
        n_hids = self.n_hids
        size_xh = (n_in, n_hids) #(30,39)
        size_hh = (n_hids, n_hids) #(39,39)
        self.W_xz = param_init().uniform(size_xh) #(30,39)
        self.W_xr = param_init().uniform(size_xh)#(30,39)
        self.W_xh = param_init().uniform(size_xh)#(30,39)

        self.W_hz = param_init().orth(size_hh)
        self.W_hr = param_init().orth(size_hh)
        self.W_hh = param_init().orth(size_hh)

        self.b_z = param_init().constant((n_hids,))
        self.b_r = param_init().constant((n_hids,))
        self.b_h = param_init().constant((n_hids,))

        self.params = [self.W_xz, self.W_xr, self.W_xh,
                       self.W_hz, self.W_hr, self.W_hh,
                       self.b_z, self.b_r, self.b_h]

        if self.with_contex:
            size_ch = (self.c_hids, self.n_hids)
            self.W_cz = param_init().uniform(size_ch)
            self.W_cr = param_init().uniform(size_ch)
            self.W_ch = param_init().uniform(size_ch)
            self.W_c_init = param_init().uniform(size_ch)

            self.params = self.params + [self.W_cz, self.W_cr,
                                         self.W_ch, self.W_c_init]
Beispiel #16
0
    def _init_params(self):

        f = lambda name: _p(self.prefix, name)

        n_in = self.n_in
        n_hids = self.n_hids
        size_xh = (n_in, n_hids)
        size_hh = (n_hids, n_hids)
        self.W_xz = param_init().param(size_xh, name=f('W_xz'))
        self.W_xr = param_init().param(size_xh, name=f('W_xr'))
        self.W_xh = param_init().param(size_xh, name=f('W_xh'))

        self.W_hz = param_init().param(size_hh, 'orth', name=f('W_hz'))
        self.W_hr = param_init().param(size_hh, 'orth', name=f('W_hr'))
        self.W_hh = param_init().param(size_hh, 'orth', name=f('W_hh'))

        self.b_z = param_init().param((n_hids,), name=f('b_z'))
        self.b_r = param_init().param((n_hids,), name=f('b_r'))
        self.b_h = param_init().param((n_hids,), name=f('b_h'))

        self.params = [self.W_xz, self.W_xr, self.W_xh,
                       self.W_hz, self.W_hr, self.W_hh,
                       self.b_z, self.b_r, self.b_h]

        if self.with_contex:
            size_ch = (self.c_hids, self.n_hids)
            size_ch_ini = (self.c_hids, self.n_hids)
            self.W_cz = param_init().param(size_ch, name=f('W_cz'))
            self.W_cr = param_init().param(size_ch, name=f('W_cr'))
            self.W_ch = param_init().param(size_ch, name=f('W_ch'))
            self.W_c_init = param_init().param(size_ch_ini, name=f('W_c_init'))
            self.b_init = param_init().param((self.n_hids,), name=f('b_init'))

            self.params = self.params + [self.W_cz, self.W_cr,
                                         self.W_ch, self.W_c_init]

            msize = self.n_in + self.n_hids + self.c_hids
        else:
            msize = self.n_in + self.n_hids

        if self.merge:
            osize = self.n_out
            if self.max_out:
                self.W_m = param_init().param((msize, osize*2), name=_p(self.prefix, 'W_m'))
                self.b_m = param_init().param((osize*2,), name=_p(self.prefix, 'b_m'))
                self.params += [self.W_m, self.b_m]
            else:
                self.W_m = param_init().param((msize, osize), name=_p(self.prefix, 'W_m'))
                self.b_m = param_init().param((osize,), name=_p(self.prefix, 'b_m'))
                self.params += [self.W_m, self.b_m]
Beispiel #17
0
 def __init__(self, embsize, vocab_size, prefix='Lookup_table'):
     self.W = param_init().param((vocab_size, embsize), name=_p(prefix, 'embed'))
     self.params = [self.W]
     self.vocab_size = vocab_size
     self.embsize = embsize
Beispiel #18
0
 def __init__(self, embsize, vocab_size):
     self.W = param_init().uniform((vocab_size, embsize))
     self.params = [self.W]
     self.vocab_size = vocab_size
     self.embsize = embsize
Beispiel #19
0
    def _init_params(self):

        f = lambda name: _p(self.prefix, name
                            )  # return 'GRU_' + parameters name

        n_in = self.n_in
        n_hids = self.n_hids
        size_xh = (n_in, n_hids)
        size_hh = (n_hids, n_hids)
        # following three are parameters matrix from input layer to hidden layer:
        # generate numpy.ndarray by normal distribution
        self.W_xz = param_init().param(size_xh, name=f('W_xz'))
        self.W_xr = param_init().param(size_xh, name=f('W_xr'))
        self.W_xh = param_init().param(size_xh, name=f('W_xh'))

        # following three are parameters matrix from hidden layer to hidden layer:
        # generate numpy.ndarray by standard normal distribution with qr
        # factorization
        self.W_hz = param_init().param(size_hh, 'orth', name=f('W_hz'))
        self.W_hr = param_init().param(size_hh, 'orth', name=f('W_hr'))
        self.W_hh = param_init().param(size_hh, 'orth', name=f('W_hh'))

        # following three are bias vector of hidden layer: generate by normal distribution
        self.b_z = param_init().param((n_hids, ), name=f('b_z'))
        self.b_r = param_init().param((n_hids, ), name=f('b_r'))
        self.b_h = param_init().param((n_hids, ), name=f('b_h'))

        # just put all this parameters matrix (numpy.ndarray) into a list
        self.params = [
            self.W_xz, self.W_xr, self.W_xh, self.W_hz, self.W_hr, self.W_hh,
            self.b_z, self.b_r, self.b_h
        ]

        if self.with_contex:  # default False
            size_ch = (self.c_hids, self.n_hids)  # (src_nhids*2, trg_nhids)
            # following there are parameters matrix from context hidden layer to hidden layer
            size_ch_ini = (self.c_hids, self.n_hids)
            self.W_cz = param_init().param(size_ch, name=f('W_cz'))
            self.W_cr = param_init().param(size_ch, name=f('W_cr'))
            self.W_ch = param_init().param(size_ch, name=f('W_ch'))
            self.W_c_init = param_init().param(size_ch_ini, name=f('W_c_init'))
            self.b_init = param_init().param((self.n_hids, ), name=f('b_init'))

            self.params = self.params + [
                self.W_cz, self.W_cr, self.W_ch, self.W_c_init
            ]  # just put several matrix together

            msize = self.n_in + self.n_hids + self.c_hids
        else:
            msize = self.n_in + self.n_hids

        if self.merge:  # default True
            osize = self.n_out  # default is units number of hidden layer (n_hids == trg_nhids)
            if self.max_out:  # default True, need change here, because it is same w/o max_out
                self.W_m = param_init().param((msize, osize * 2),
                                              name=_p(self.prefix, 'W_m'))
                self.b_m = param_init().param((osize * 2, ),
                                              name=_p(self.prefix, 'b_m'))
                self.params += [self.W_m, self.b_m]
            else:
                self.W_m = param_init().param((msize, osize),
                                              name=_p(self.prefix, 'W_m'))
                self.b_m = param_init().param((osize, ),
                                              name=_p(self.prefix, 'b_m'))
                self.params += [self.W_m, self.b_m]

        # default False
        if self.ln:
            mul_scale = 1.0
            add_scale = 0.0
            self.g1 = param_init().param((n_hids, ),
                                         scale=mul_scale,
                                         name=_p(self.prefix, 'ln_g1'))
            self.g2 = param_init().param((n_hids, ),
                                         scale=mul_scale,
                                         name=_p(self.prefix, 'ln_g2'))
            self.g3 = param_init().param((n_hids, ),
                                         scale=mul_scale,
                                         name=_p(self.prefix, 'ln_g3'))
            self.g4 = param_init().param((n_hids, ),
                                         scale=mul_scale,
                                         name=_p(self.prefix, 'ln_g4'))
            self.b1 = param_init().param((n_hids, ),
                                         scale=add_scale,
                                         name=_p(self.prefix, 'ln_b1'))
            self.b2 = param_init().param((n_hids, ),
                                         scale=add_scale,
                                         name=_p(self.prefix, 'ln_b2'))
            self.b3 = param_init().param((n_hids, ),
                                         scale=add_scale,
                                         name=_p(self.prefix, 'ln_b3'))
            self.b4 = param_init().param((n_hids, ),
                                         scale=add_scale,
                                         name=_p(self.prefix, 'ln_b4'))
            self.params += [
                self.g1, self.b1, self.g2, self.b2, self.g3, self.b3, self.g4,
                self.b4
            ]
            if self.with_contex:
                self.gcz = param_init().param((self.n_hids, ),
                                              scale=mul_scale,
                                              name=_p(self.prefix, 'ln_gcz'))
                self.bcz = param_init().param((self.n_hids, ),
                                              scale=mul_scale,
                                              name=_p(self.prefix, 'ln_bcz'))
                self.gcr = param_init().param((self.n_hids, ),
                                              scale=mul_scale,
                                              name=_p(self.prefix, 'ln_gcr'))
                self.bcr = param_init().param((self.n_hids, ),
                                              scale=mul_scale,
                                              name=_p(self.prefix, 'ln_bcr'))
                self.gch = param_init().param((self.n_hids, ),
                                              scale=mul_scale,
                                              name=_p(self.prefix, 'ln_gch'))
                self.bch = param_init().param((self.n_hids, ),
                                              scale=mul_scale,
                                              name=_p(self.prefix, 'ln_bch'))