Beispiel #1
0
 def __call__(self, X):
     d_x = X.dim()[0][0]
     d_y = X.dim()[0][1]
     g = dy.ones((d_x, d_y))
     b = dy.zeros((d_x, d_y))
     Y = []
     for attention in self.attention:
         Y.append(attention(X))
     Y = dy.esum(Y)
     Y = dy.layer_norm(X + Y, g, b)
     Y = dy.layer_norm(Y + dy.transpose(self.feedforward(dy.transpose(Y))),
                       g, b)
     return Y
Beispiel #2
0
 def transduce(self, seq: ExpressionSequence) -> ExpressionSequence:
   seq_tensor = self.child.transduce(seq).as_tensor() + seq.as_tensor()
   if self.layer_norm:
     d = seq_tensor.dim()
     seq_tensor = dy.reshape(seq_tensor, (d[0][0],), batch_size=d[0][1]*d[1])
     seq_tensor = dy.layer_norm(seq_tensor, self.ln_g, self.ln_b)
     seq_tensor = dy.reshape(seq_tensor, d[0], batch_size=d[1])
   return ExpressionSequence(expr_tensor=seq_tensor)
Beispiel #3
0
 def norm(x):
     """Layer Norm only handles a vector in dynet so fold extra dims into the batch."""
     shape, batchsz = x.dim()
     first = shape[0]
     fold = np.prod(shape[1:])
     x = dy.reshape(x, (first, ), batch_size=batchsz * fold)
     x = dy.layer_norm(x, a, b)
     return dy.reshape(x, shape, batch_size=batchsz)
Beispiel #4
0
  def __call__(self, input_expr):
    g = dy.parameter(self.p_g)
    b = dy.parameter(self.p_b)

    (_, seq_len), batch_size = input_expr.dim()
    input = TimeDistributed()(input_expr)
    output = dy.layer_norm(input, g, b)
    return ReverseTimeDistributed()(output, seq_len, batch_size)
Beispiel #5
0
 def norm(x):
     """Layer Norm only handles a vector in dynet so fold extra dims into the batch."""
     shape, batchsz = x.dim()
     first = shape[0]
     fold = np.prod(shape[1:])
     x = dy.reshape(x, (first,), batch_size=batchsz*fold)
     x = dy.layer_norm(x, a, b)
     return dy.reshape(x, shape, batch_size=batchsz)
Beispiel #6
0
 def __call__(self, x):
     W = dy.parameter(self.W)
     b = dy.parameter(self.b)
     if self.ln:
         g = dy.parameter(self.g)
         y = dy.layer_norm(W * x, g, b)
         return self.act(y)
     else:
         y = dy.affine_transform([b, W, x])
         return self.act(y)
Beispiel #7
0
 def __call__(self, x):
     if self.ln:
         return self.activation(
             layer_norm(
                 parameter(self.W) * x, parameter(self.ln_s),
                 parameter(self.b)))
     else:
         return self.activation(
             affine_transform([parameter(self.b),
                               parameter(self.W), x]))
Beispiel #8
0
 def __call__(self, input, train_mode):
     for layer_idx in range(len(self.expressions)):
         layer = self.expressions[layer_idx]
         if layer_idx == 0:
             input = dy.layer_norm(input, layer[2], layer[3])
         input = dy.affine_transform([layer[0], layer[1], input])
         if layer_idx != len(self.expressions) - 1:
             input = self.act_fun(input)
             if train_mode:
                 input = dy.dropout(input, self.dropout_rate)
     return input
Beispiel #9
0
    def test_layer_norm(self):
        dy.renew_cg()
        x = dy.inputTensor(self.v1)
        g = dy.inputTensor(self.v2)
        b = dy.inputTensor(self.v3)
        y = dy.layer_norm(x, g, b)
        l = dy.sum_elems(y)
        l_value = l.scalar_value()
        l.backward()

        y_np_value = self.v2 / self.v1.std() * (self.v1 - self.v1.mean()) + self.v3

        self.assertTrue(np.allclose(y.npvalue(), y_np_value))
Beispiel #10
0
    def test_layer_norm(self):
        dy.renew_cg()
        x = dy.inputTensor(self.v1)
        g = dy.inputTensor(self.v2)
        b = dy.inputTensor(self.v3)
        y = dy.layer_norm(x,g,b)
        l = dy.sum_elems(y)
        l_value = l.scalar_value()
        l.backward()

        y_np_value = self.v2 / self.v1.std() * (self.v1 - self.v1.mean()) + self.v3

        self.assertTrue(np.allclose(y.npvalue(),y_np_value))
Beispiel #11
0
    def test_layer_norm(self):
        dy.renew_cg()
        x = dy.inputTensor(self.v1)
        g = dy.inputTensor(self.v2)
        b = dy.inputTensor(self.v3)
        y = dy.layer_norm(x, g, b)
        loss = dy.sum_elems(y)

        loss.backward()

        centered_v1 = self.v1 - self.v1.mean()
        y_np_value = self.v2 / self.v1.std() * centered_v1 + self.v3

        self.assertTrue(np.allclose(y.npvalue(), y_np_value))
Beispiel #12
0
    def test_layer_norm(self):
        dy.renew_cg()
        x = dy.inputTensor(self.v1)
        g = dy.inputTensor(self.v2)
        b = dy.inputTensor(self.v3)
        y = dy.layer_norm(x, g, b)
        loss = dy.sum_elems(y)

        loss.backward()

        centered_v1 = self.v1 - self.v1.mean()
        y_np_value = self.v2 / self.v1.std() * centered_v1 + self.v3

        self.assertTrue(np.allclose(y.npvalue(), y_np_value))
Beispiel #13
0
    def __call__(self, obs, batched=False):
        out = obs if isinstance(obs, dy.Expression) else dy.inputTensor(obs, batched=batched)

        for i in range(self.n_layers):
            b, W = dy.parameter(self.bs[i]), dy.parameter(self.Ws[i])
            out = dy.affine_transform([b, W, out])
            if self.layer_norm and i != self.n_layers - 1:
                out = dy.layer_norm(out, self.ln_gs[i], self.ln_bs[i])
            if self.specified_activation:
                if self.activation[i] is not None:
                    out = self.activation[i](out)
            else:
                out = self.activation(out)
        return out
Beispiel #14
0
    def __call__(self, obs, batched=False):
        out = obs if isinstance(obs, dy.Expression) else dy.inputTensor(
            obs, batched=batched)

        for i in range(self.n_layers):
            b, W = dy.parameter(self.bs[i]), dy.parameter(self.Ws[i])
            out = dy.affine_transform([b, W, out])
            if self.layer_norm and i != self.n_layers - 1:
                out = dy.layer_norm(out, self.ln_gs[i], self.ln_bs[i])
            if self.specified_activation:
                if self.activation[i] is not None:
                    out = self.activation[i](out)
            else:
                out = self.activation(out)
        return out
Beispiel #15
0
    def transduce(
        self, seq: expression_seqs.ExpressionSequence
    ) -> expression_seqs.ExpressionSequence:

        if self.train and self.dropout > 0.0:
            seq_tensor = dy.dropout(
                self.child.transduce(seq).as_tensor(),
                self.dropout) + seq.as_tensor()
        else:
            seq_tensor = self.child.transduce(
                seq).as_tensor() + seq.as_tensor()
        if self.layer_norm:
            d = seq_tensor.dim()
            seq_tensor = dy.reshape(seq_tensor, (d[0][0], ),
                                    batch_size=d[0][1] * d[1])
            seq_tensor = dy.layer_norm(seq_tensor, self.ln_g, self.ln_b)
            seq_tensor = dy.reshape(seq_tensor, d[0], batch_size=d[1])
        return expression_seqs.ExpressionSequence(expr_tensor=seq_tensor)
Beispiel #16
0
 def transform(self, x: tt.Tensor) -> tt.Tensor:
     g = dy.parameter(self.p_g)
     b = dy.parameter(self.p_b)
     return dy.layer_norm(x, g, b)
Beispiel #17
0
def layer_norm(xs):
    head_shape, batch_size = xs[0].dim()
    g = dy.ones(head_shape)
    b = dy.zeros(head_shape)
    return [dy.layer_norm(x, g, b) for x in xs]
Beispiel #18
0
 def transform(self, x: dy.Expression) -> dy.Expression:
     g = dy.parameter(self.p_g)
     b = dy.parameter(self.p_b)
     return dy.layer_norm(x, g, b)
Beispiel #19
0
 def transform(self, x):
     g = self.p_g
     b = self.p_b
     return dy.layer_norm(x, g, b)
Beispiel #20
0
 def __call__(self, x):
   g = dy.parameter(self.p_g)
   b = dy.parameter(self.p_b)
   return dy.layer_norm(x, g, b)
Beispiel #21
0
 def transform(self, x):
   g = dy.parameter(self.p_g)
   b = dy.parameter(self.p_b)
   return dy.layer_norm(x, g, b)