コード例 #1
0
ファイル: self_attention.py プロジェクト: seeledu/xnmt-devel
    def transduce(self, expr_seq: ExpressionSequence) -> ExpressionSequence:
        """
    transduce the sequence

    Args:
      expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated)
    Returns:
      expression sequence
    """

        Wq, Wk, Wv, Wo = [
            dy.parameter(x) for x in (self.pWq, self.pWk, self.pWv, self.pWo)
        ]
        bq, bk, bv, bo = [
            dy.parameter(x) for x in (self.pbq, self.pbk, self.pbv, self.pbo)
        ]

        # Start with a [(length, model_size) x batch] tensor
        x = expr_seq.as_transposed_tensor()
        x_len = x.dim()[0][0]
        x_batch = x.dim()[1]
        # Get the query key and value vectors
        # TODO: do we need bias broadcasting in DyNet?
        # q = dy.affine_transform([bq, x, Wq])
        # k = dy.affine_transform([bk, x, Wk])
        # v = dy.affine_transform([bv, x, Wv])
        q = bq + x * Wq
        k = bk + x * Wk
        v = bv + x * Wv

        # Split to batches [(length, head_dim) x batch * num_heads] tensor
        q, k, v = [
            dy.reshape(x, (x_len, self.head_dim),
                       batch_size=x_batch * self.num_heads) for x in (q, k, v)
        ]

        # Do scaled dot product [(length, length) x batch * num_heads], rows are queries, columns are keys
        attn_score = q * dy.transpose(k) / sqrt(self.head_dim)
        if expr_seq.mask is not None:
            mask = dy.inputTensor(np.repeat(
                expr_seq.mask.np_arr, self.num_heads, axis=0).transpose(),
                                  batched=True) * -1e10
            attn_score = attn_score + mask
        attn_prob = dy.softmax(attn_score, d=1)
        # Reduce using attention and resize to match [(length, model_size) x batch]
        o = dy.reshape(attn_prob * v, (x_len, self.input_dim),
                       batch_size=x_batch)
        # Final transformation
        # o = dy.affine_transform([bo, attn_prob * v, Wo])
        o = bo + o * Wo

        expr_seq = ExpressionSequence(expr_transposed_tensor=o,
                                      mask=expr_seq.mask)

        self._final_states = [FinalTransducerState(expr_seq[-1], None)]

        return expr_seq
コード例 #2
0
  def transduce(self, es: expression_seqs.ExpressionSequence) -> expression_seqs.ExpressionSequence:
    mask = es.mask
    sent_len = len(es)
    es_expr = es.as_transposed_tensor()
    batch_size = es_expr.dim()[1]

    es_chn = dy.reshape(es_expr, (sent_len, self.freq_dim, self.chn_dim), batch_size=batch_size)

    h_out = {}
    for direction in ["fwd", "bwd"]:
      # input convolutions
      gates_xt_bias = dy.conv2d_bias(es_chn, dy.parameter(self.params["x2all_" + direction]),
                                     dy.parameter(self.params["b_" + direction]), stride=(1, 1), is_valid=False)
      gates_xt_bias_list = [dy.pick_range(gates_xt_bias, i, i + 1) for i in range(sent_len)]

      h = []
      c = []
      for input_pos in range(sent_len):
        directional_pos = input_pos if direction == "fwd" else sent_len - input_pos - 1
        gates_t = gates_xt_bias_list[directional_pos]
        if input_pos > 0:
          # recurrent convolutions
          gates_h_t = dy.conv2d(h[-1], dy.parameter(self.params["h2all_" + direction]), stride=(1, 1), is_valid=False)
          gates_t += gates_h_t

        # standard LSTM logic
        if len(c) == 0:
          c_tm1 = dy.zeros((self.freq_dim * self.num_filters,), batch_size=batch_size)
        else:
          c_tm1 = c[-1]
        gates_t_reshaped = dy.reshape(gates_t, (4 * self.freq_dim * self.num_filters,), batch_size=batch_size)
        c_t = dy.reshape(dy.vanilla_lstm_c(c_tm1, gates_t_reshaped), (self.freq_dim * self.num_filters,),
                         batch_size=batch_size)
        h_t = dy.vanilla_lstm_h(c_t, gates_t_reshaped)
        h_t = dy.reshape(h_t, (1, self.freq_dim, self.num_filters,), batch_size=batch_size)

        if mask is None or np.isclose(np.sum(mask.np_arr[:, input_pos:input_pos + 1]), 0.0):
          c.append(c_t)
          h.append(h_t)
        else:
          c.append(
            mask.cmult_by_timestep_expr(c_t, input_pos, True) + mask.cmult_by_timestep_expr(c[-1], input_pos, False))
          h.append(
            mask.cmult_by_timestep_expr(h_t, input_pos, True) + mask.cmult_by_timestep_expr(h[-1], input_pos, False))

      h_out[direction] = h
    ret_expr = []
    for state_i in range(len(h_out["fwd"])):
      state_fwd = h_out["fwd"][state_i]
      state_bwd = h_out["bwd"][-1 - state_i]
      output_dim = (state_fwd.dim()[0][1] * state_fwd.dim()[0][2],)
      fwd_reshape = dy.reshape(state_fwd, output_dim, batch_size=batch_size)
      bwd_reshape = dy.reshape(state_bwd, output_dim, batch_size=batch_size)
      ret_expr.append(dy.concatenate([fwd_reshape, bwd_reshape], d=0 if self.reshape_output else 2))
    return expression_seqs.ExpressionSequence(expr_list=ret_expr, mask=mask)

  # TODO: implement get_final_states()
コード例 #3
0
ファイル: fixed_size_att.py プロジェクト: msperber/misc
 def transduce(self, x: expression_seqs.ExpressionSequence) -> expression_seqs.ExpressionSequence:
   x_T = x.as_transposed_tensor()
   scores = x_T * dy.parameter(self.W)
   if x.mask is not None:
     scores = x.mask.add_to_tensor_expr(scores, multiplicator=-100.0, time_first=True)
   if self.pos_enc_max:
     seq_len = x_T.dim()[0][0]
     pos_enc = self.pos_enc[:seq_len,:]
     scores = dy.cmult(scores, dy.inputTensor(pos_enc))
   attention = dy.softmax(scores)
   output_expr = x.as_tensor() * attention
   return expression_seqs.ExpressionSequence(expr_tensor=output_expr, mask=None)
コード例 #4
0
ファイル: convolution.py プロジェクト: yzhen-li/xnmt
 def transduce(
     self, x: expression_seqs.ExpressionSequence
 ) -> expression_seqs.ExpressionSequence:
     expr = x.as_transposed_tensor()
     batch_size, hidden_dim, seq_len = expr.size()
     expr = expr.view((batch_size, self.in_channels,
                       hidden_dim // self.in_channels, seq_len))
     expr = self.cnn_layer(expr)
     if self.use_pooling:
         expr = self.pooling_layer(expr)
     expr = self.activation_fct(expr)
     batch_size, out_chn, out_h, seq_len = expr.size()
     expr = expr.view((batch_size, out_chn * out_h, seq_len))
     output_seq = expression_seqs.ExpressionSequence(
         expr_transposed_tensor=expr,
         mask=x.mask.lin_subsampled(trg_len=seq_len) if x.mask else None)
     self._final_states = [transducers.FinalTransducerState(output_seq[-1])]
     return output_seq