Example #1
0
 def embed_sent(
         self,
         sent_len: numbers.Integral) -> expression_seqs.ExpressionSequence:
     embeddings = dy.strided_select(dy.parameter(self.embeddings), [1, 1],
                                    [0, 0], [self.emb_dim, sent_len])
     return expression_seqs.ExpressionSequence(expr_tensor=embeddings,
                                               mask=None)
Example #2
0
 def __call__(self, expr):
     return dy.strided_select(expr, [
         self.margin[0],
         expr.dim()[0][0] - self.margin[0], self.stride[0], self.margin[1],
         expr.dim()[0][1] - self.margin[1], self.stride[1], 0,
         expr.dim()[0][2], 1, 0,
         expr.dim()[1], 1
     ])
Example #3
0
 def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
   sent_len = len(src)
   embeddings = dy.strided_select(dy.parameter(self.embedder), [1,1], [0,0], [self.input_dim, sent_len])
   if self.op == 'sum':
     output = embeddings + src.as_tensor()
   elif self.op == 'concat':
     output = dy.concatenate([embeddings, src.as_tensor()])
   else:
     raise ValueError(f'Illegal op {op} in PositionalTransducer (options are "sum"/"concat")')
   output_seq = ExpressionSequence(expr_tensor=output, mask=src.mask)
   self._final_states = [FinalTransducerState(output_seq[-1])]
   return output_seq
Example #4
0
    def transduce(
        self, expr_seq: expression_seqs.ExpressionSequence
    ) -> expression_seqs.ExpressionSequence:
        """
    transduce the sequence, applying masks if given (masked timesteps simply copy previous h / c)

    Args:
      expr_seq: expression sequence (will be accessed via tensor_expr)
    Return:
      expression sequence
    """

        if isinstance(expr_seq, list):
            mask_out = expr_seq[0].mask
            seq_len = len(expr_seq[0])
            batch_size = expr_seq[0].dim()[1]
            tensors = [e.as_tensor() for e in expr_seq]
            input_tensor = dy.reshape(dy.concatenate(tensors),
                                      (seq_len, 1, self.input_dim),
                                      batch_size=batch_size)
        else:
            mask_out = expr_seq.mask
            seq_len = len(expr_seq)
            batch_size = expr_seq.dim()[1]
            input_tensor = dy.reshape(dy.transpose(expr_seq.as_tensor()),
                                      (seq_len, 1, self.input_dim),
                                      batch_size=batch_size)

        if self.dropout > 0.0 and self.train:
            input_tensor = dy.dropout(input_tensor, self.dropout)

        proj_inp = dy.conv2d_bias(input_tensor,
                                  dy.parameter(self.p_f),
                                  dy.parameter(self.p_b),
                                  stride=(self.stride, 1),
                                  is_valid=False)
        reduced_seq_len = proj_inp.dim()[0][0]
        proj_inp = dy.transpose(
            dy.reshape(proj_inp, (reduced_seq_len, self.hidden_dim * 3),
                       batch_size=batch_size))
        # proj_inp dims: (hidden, 1, seq_len), batch_size
        if self.stride > 1 and mask_out is not None:
            mask_out = mask_out.lin_subsampled(trg_len=reduced_seq_len)

        h = [dy.zeroes(dim=(self.hidden_dim, 1), batch_size=batch_size)]
        c = [dy.zeroes(dim=(self.hidden_dim, 1), batch_size=batch_size)]
        for t in range(reduced_seq_len):
            f_t = dy.logistic(
                dy.strided_select(proj_inp, [], [0, t],
                                  [self.hidden_dim, t + 1]))
            o_t = dy.logistic(
                dy.strided_select(proj_inp, [], [self.hidden_dim, t],
                                  [self.hidden_dim * 2, t + 1]))
            z_t = dy.tanh(
                dy.strided_select(proj_inp, [], [self.hidden_dim * 2, t],
                                  [self.hidden_dim * 3, t + 1]))

            if self.dropout > 0.0 and self.train:
                retention_rate = 1.0 - self.dropout
                dropout_mask = dy.random_bernoulli((self.hidden_dim, 1),
                                                   retention_rate,
                                                   batch_size=batch_size)
                f_t = 1.0 - dy.cmult(
                    dropout_mask, 1.0 - f_t
                )  # TODO: would be easy to make a zoneout dynet operation to save memory

            i_t = 1.0 - f_t

            if t == 0:
                c_t = dy.cmult(i_t, z_t)
            else:
                c_t = dy.cmult(f_t, c[-1]) + dy.cmult(i_t, z_t)
            h_t = dy.cmult(
                o_t, c_t)  # note: LSTM would use dy.tanh(c_t) instead of c_t
            if mask_out is None or np.isclose(
                    np.sum(mask_out.np_arr[:, t:t + 1]), 0.0):
                c.append(c_t)
                h.append(h_t)
            else:
                c.append(
                    mask_out.cmult_by_timestep_expr(c_t, t, True) +
                    mask_out.cmult_by_timestep_expr(c[-1], t, False))
                h.append(
                    mask_out.cmult_by_timestep_expr(h_t, t, True) +
                    mask_out.cmult_by_timestep_expr(h[-1], t, False))

        self._final_states = [transducers.FinalTransducerState(dy.reshape(h[-1], (self.hidden_dim,), batch_size=batch_size), \
                                                               dy.reshape(c[-1], (self.hidden_dim,),
                                                                          batch_size=batch_size))]
        return expression_seqs.ExpressionSequence(expr_list=h[1:],
                                                  mask=mask_out)
Example #5
0
 def embed_sent(self, sent_len):
   embeddings = dy.strided_select(dy.parameter(self.embeddings), [1,1], [0,0], [self.emb_dim, sent_len])
   return ExpressionSequence(expr_tensor=embeddings, mask=None)