예제 #1
0
class BiRNNLayer(StackLayer):
    def __init__(self,
                 input_dim,
                 output_dim,
                 n_layers,
                 unit_type,
                 connect_type,
                 drop_rate=0.0):
        name = 'BiRNNs-%d:(%dx%d)' % (n_layers, input_dim, output_dim)
        super(BiRNNLayer, self).__init__(name=name)

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.n_layers = n_layers
        self.rnn_unit = self._set_rnn_unit(unit_type)
        self.connect_unit = self._set_connect_unit(connect_type)
        self.dropout = Dropout(drop_rate)

        self.layers = self._set_layers()
        self.params = self._set_params()

    def _set_layers(self):
        layers = []
        for i in range(self.n_layers):
            if i == 0:
                rnn_input_dim = self.input_dim
                connect_input_dim = self.input_dim + self.output_dim
            else:
                rnn_input_dim = self.output_dim
                connect_input_dim = self.output_dim * 2

            r_unit = self.rnn_unit(input_dim=rnn_input_dim,
                                   output_dim=self.output_dim)
            c_unit = self.connect_unit(input_dim=connect_input_dim,
                                       output_dim=self.output_dim,
                                       activation='relu')
            layers += [r_unit, c_unit]
        return layers

    def forward(self, x, mask=None, is_train=False):
        n_layers = int(len(self.layers) / 2)
        for i in range(n_layers):
            if mask is None:
                h = self.layers[i * 2].forward(x=x)
                h = self.dropout.forward(x=h, is_train=is_train)
                x = self.layers[i * 2 + 1].forward(T.concatenate([x, h], axis=2))
            else:
                h = self.layers[i * 2].forward(x=x, mask=mask)
                h = self.dropout.forward(x=h, is_train=is_train)
                x = self.layers[i * 2 + 1].forward(T.concatenate([x, h], axis=2)) * mask
                mask = mask[::-1]
            x = x[::-1]
        if (n_layers % 2) == 1:
            return x[::-1]
        return x
예제 #2
0
    def __init__(self, drop_rate=0.0, name=None):
        super(ElmoLayer, self).__init__(name=name if name else 'ElmoEmb')
        self.dropout = Dropout(drop_rate)

        self.gamma = theano.shared(value=np.asarray([[1.0]],
                                                    theano.config.floatX),
                                   name='gamma',
                                   borrow=True)
        self.scalar_mix = theano.shared(value=np.zeros(
            shape=(1, 3), dtype=theano.config.floatX),
                                        name='scalar_mix',
                                        borrow=True)
        self.params = [self.gamma, self.scalar_mix]
예제 #3
0
class Embedding(Unit):
    def __init__(self,
                 input_dim,
                 output_dim,
                 init_emb=None,
                 param_init='xavier',
                 param_fix=False,
                 drop_rate=0.0,
                 name=None):
        super(Embedding, self).__init__(name=name if name else 'Emb(%dx%d)' %
                                        (input_dim, output_dim))
        self.dropout = Dropout(drop_rate)

        self.W = self._set_weight(input_dim, output_dim, init_emb, param_init)
        if param_fix:
            self.params = []
        else:
            self.params = [self.W]

    def _set_weight(self, input_dim, output_dim, init_emb, param_init):
        if init_emb is None:
            return self._set_param(shape=(input_dim, output_dim),
                                   init_type=param_init,
                                   name='embedding')
        return theano.shared(init_emb)

    def forward(self, x, is_train=0):
        return self.dropout.forward(x=self.W[x], is_train=is_train)
예제 #4
0
class ElmoLayer(Unit):
    def __init__(self, drop_rate=0.0, name=None):
        super(ElmoLayer, self).__init__(name=name if name else 'ElmoEmb')
        self.dropout = Dropout(drop_rate)

        self.gamma = theano.shared(value=np.asarray([[1.0]],
                                                    theano.config.floatX),
                                   name='gamma',
                                   borrow=True)
        self.scalar_mix = theano.shared(value=np.zeros(
            shape=(1, 3), dtype=theano.config.floatX),
                                        name='scalar_mix',
                                        borrow=True)
        self.params = [self.gamma, self.scalar_mix]

    def forward(self, x, is_train=0):
        """
        :param x: 1D: batch_size, 2D: n_words, 3D: n_layers, 4D: dim
        :param is_train: 0/1
        :return:
        """
        s = T.nnet.softmax(self.scalar_mix).dimshuffle('x', 'x', 1, 0)
        s = T.repeat(s, repeats=x.shape[3], axis=3)
        x = self.gamma[0, 0] * T.sum(s * x, axis=2)
        return self.dropout.forward(x=x, is_train=is_train)
예제 #5
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 init_emb=None,
                 param_init='xavier',
                 param_fix=False,
                 drop_rate=0.0,
                 name=None):
        super(Embedding, self).__init__(name=name if name else 'Emb(%dx%d)' %
                                        (input_dim, output_dim))
        self.dropout = Dropout(drop_rate)

        self.W = self._set_weight(input_dim, output_dim, init_emb, param_init)
        if param_fix:
            self.params = []
        else:
            self.params = [self.W]
예제 #6
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 n_layers,
                 unit_type,
                 connect_type,
                 drop_rate=0.0):
        name = 'BiRNNs-%d:(%dx%d)' % (n_layers, input_dim, output_dim)
        super(BiRNNLayer, self).__init__(name=name)

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.n_layers = n_layers
        self.rnn_unit = self._set_rnn_unit(unit_type)
        self.connect_unit = self._set_connect_unit(connect_type)
        self.dropout = Dropout(drop_rate)

        self.layers = self._set_layers()
        self.params = self._set_params()
예제 #7
0
 def compile(self, **kwargs):
     self.dropout = Dropout(rate=kwargs['drop_rate'])
     self._set_layers(n_experts=kwargs['n_experts'],
                      hidden_dim=kwargs['feat_dim'],
                      output_dim=kwargs['output_dim'])