class BiRNNLayer(StackLayer): def __init__(self, input_dim, output_dim, n_layers, unit_type, connect_type, drop_rate=0.0): name = 'BiRNNs-%d:(%dx%d)' % (n_layers, input_dim, output_dim) super(BiRNNLayer, self).__init__(name=name) self.input_dim = input_dim self.output_dim = output_dim self.n_layers = n_layers self.rnn_unit = self._set_rnn_unit(unit_type) self.connect_unit = self._set_connect_unit(connect_type) self.dropout = Dropout(drop_rate) self.layers = self._set_layers() self.params = self._set_params() def _set_layers(self): layers = [] for i in range(self.n_layers): if i == 0: rnn_input_dim = self.input_dim connect_input_dim = self.input_dim + self.output_dim else: rnn_input_dim = self.output_dim connect_input_dim = self.output_dim * 2 r_unit = self.rnn_unit(input_dim=rnn_input_dim, output_dim=self.output_dim) c_unit = self.connect_unit(input_dim=connect_input_dim, output_dim=self.output_dim, activation='relu') layers += [r_unit, c_unit] return layers def forward(self, x, mask=None, is_train=False): n_layers = int(len(self.layers) / 2) for i in range(n_layers): if mask is None: h = self.layers[i * 2].forward(x=x) h = self.dropout.forward(x=h, is_train=is_train) x = self.layers[i * 2 + 1].forward(T.concatenate([x, h], axis=2)) else: h = self.layers[i * 2].forward(x=x, mask=mask) h = self.dropout.forward(x=h, is_train=is_train) x = self.layers[i * 2 + 1].forward(T.concatenate([x, h], axis=2)) * mask mask = mask[::-1] x = x[::-1] if (n_layers % 2) == 1: return x[::-1] return x
def __init__(self, drop_rate=0.0, name=None): super(ElmoLayer, self).__init__(name=name if name else 'ElmoEmb') self.dropout = Dropout(drop_rate) self.gamma = theano.shared(value=np.asarray([[1.0]], theano.config.floatX), name='gamma', borrow=True) self.scalar_mix = theano.shared(value=np.zeros( shape=(1, 3), dtype=theano.config.floatX), name='scalar_mix', borrow=True) self.params = [self.gamma, self.scalar_mix]
class Embedding(Unit): def __init__(self, input_dim, output_dim, init_emb=None, param_init='xavier', param_fix=False, drop_rate=0.0, name=None): super(Embedding, self).__init__(name=name if name else 'Emb(%dx%d)' % (input_dim, output_dim)) self.dropout = Dropout(drop_rate) self.W = self._set_weight(input_dim, output_dim, init_emb, param_init) if param_fix: self.params = [] else: self.params = [self.W] def _set_weight(self, input_dim, output_dim, init_emb, param_init): if init_emb is None: return self._set_param(shape=(input_dim, output_dim), init_type=param_init, name='embedding') return theano.shared(init_emb) def forward(self, x, is_train=0): return self.dropout.forward(x=self.W[x], is_train=is_train)
class ElmoLayer(Unit): def __init__(self, drop_rate=0.0, name=None): super(ElmoLayer, self).__init__(name=name if name else 'ElmoEmb') self.dropout = Dropout(drop_rate) self.gamma = theano.shared(value=np.asarray([[1.0]], theano.config.floatX), name='gamma', borrow=True) self.scalar_mix = theano.shared(value=np.zeros( shape=(1, 3), dtype=theano.config.floatX), name='scalar_mix', borrow=True) self.params = [self.gamma, self.scalar_mix] def forward(self, x, is_train=0): """ :param x: 1D: batch_size, 2D: n_words, 3D: n_layers, 4D: dim :param is_train: 0/1 :return: """ s = T.nnet.softmax(self.scalar_mix).dimshuffle('x', 'x', 1, 0) s = T.repeat(s, repeats=x.shape[3], axis=3) x = self.gamma[0, 0] * T.sum(s * x, axis=2) return self.dropout.forward(x=x, is_train=is_train)
def __init__(self, input_dim, output_dim, init_emb=None, param_init='xavier', param_fix=False, drop_rate=0.0, name=None): super(Embedding, self).__init__(name=name if name else 'Emb(%dx%d)' % (input_dim, output_dim)) self.dropout = Dropout(drop_rate) self.W = self._set_weight(input_dim, output_dim, init_emb, param_init) if param_fix: self.params = [] else: self.params = [self.W]
def __init__(self, input_dim, output_dim, n_layers, unit_type, connect_type, drop_rate=0.0): name = 'BiRNNs-%d:(%dx%d)' % (n_layers, input_dim, output_dim) super(BiRNNLayer, self).__init__(name=name) self.input_dim = input_dim self.output_dim = output_dim self.n_layers = n_layers self.rnn_unit = self._set_rnn_unit(unit_type) self.connect_unit = self._set_connect_unit(connect_type) self.dropout = Dropout(drop_rate) self.layers = self._set_layers() self.params = self._set_params()
def compile(self, **kwargs): self.dropout = Dropout(rate=kwargs['drop_rate']) self._set_layers(n_experts=kwargs['n_experts'], hidden_dim=kwargs['feat_dim'], output_dim=kwargs['output_dim'])