def step(
            self, Ybar_t: torch.Tensor, dec_state: Tuple[torch.Tensor,
            enc_hiddens: torch.Tensor, enc_hiddens_proj: torch.Tensor,
            enc_masks: torch.Tensor
    ) -> Tuple[Tuple, torch.Tensor, torch.Tensor]:
        """ Compute one forward step of the LSTM decoder, including the attention computation.

        @param Ybar_t (Tensor): Concatenated Tensor of [Y_t o_prev], with shape (b, e + h). The input for the decoder,
                                where b = batch size, e = embedding size, h = hidden size.
        @param dec_state (tuple(Tensor, Tensor)): Tuple of tensors both with shape (b, h), where b = batch size, h = hidden size.
                First tensor is decoder's prev hidden state, second tensor is decoder's prev cell.
        @param enc_hiddens (Tensor): Encoder hidden states Tensor, with shape (b, src_len, h * 2), where b = batch size,
                                    src_len = maximum source length, h = hidden size.
        @param enc_hiddens_proj (Tensor): Encoder hidden states Tensor, projected from (h * 2) to h. Tensor is with shape (b, src_len, h),
                                    where b = batch size, src_len = maximum source length, h = hidden size.
        @param enc_masks (Tensor): Tensor of sentence masks shape (b, src_len),
                                    where b = batch size, src_len is maximum source length. 

        @returns dec_state (tuple (Tensor, Tensor)): Tuple of tensors both shape (b, h), where b = batch size, h = hidden size.
                First tensor is decoder's new hidden state, second tensor is decoder's new cell.
        @returns combined_output (Tensor): Combined output Tensor at timestep t, shape (b, h), where b = batch size, h = hidden size.
        @returns e_t (Tensor): Tensor of shape (b, src_len). It is attention scores distribution.
                                Note: You will not use this outside of this function.
                                      We are simply returning this value so that we can sanity check
                                      your implementation.

        combined_output = None

        # Apply the decoder to `Ybar_t` and `dec_state`to obtain the new dec_state.
        # h_t^{dec}, c_t^{dec} = decoder(\overline(y_t),h_{t-1}^{dec},c_{t-1}^{dec})
        dec_state = self.decoder(Ybar_t, dec_state)
        # Split dec_state into its two parts
        (dec_hidden, dec_cell) = dec_state  # (b, 2 * h) -> ((b, h), (b, h))
        # batched matrix multiplication
        # (b, src_len, h) .dot(b, h, 1) -> (b, src_len, 1) -> (b, src_len)
        # unsqueeze - Returns a new tensor with a dimension of size one inserted at the specified position.
        # e_{t, i} = (h_t^{dec})^{\top}W_{attProj}h_i^{enc}
        e_t = enc_hiddens_proj.bmm(dec_hidden.unsqueeze(2)).squeeze(2)

        # Set e_t to -inf where enc_masks has 1
        if enc_masks is not None:
  , -float('inf'))

        # \alpha_t = Softmax(e_t)
        alpha_t = torch.unsqueeze(F.softmax(e_t, dim=1),
                                  dim=1)  # (b, src_len) -> (b, 1, src_len)
        # (b, 1, src_len) * (b, src_len, 2*h) -> (b, 1, 2*h) -> (b, 2*h)
        # a_t = \sum_i^m\alpha_{t, i}h_i^{enc}
        a_t = torch.squeeze(torch.bmm(alpha_t, enc_hiddens), dim=1)
        # u_t = [a_t;h_t^{dec}]
        U_t =, dec_hidden), dim=1)
        # v_t = W_uu_t
        V_t = self.combined_output_projection(U_t)
        # o_t = Dropout(Tanh(v_t))
        O_t = self.dropout(torch.tanh(V_t))

        combined_output = O_t
        return dec_state, combined_output, e_t
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x: shape [bs, nums, ebd], where ebd must equals to self._dims
        out: shape [bs, nums], attention weights
        if x.shape[-1] != self._input_dim:
            raise ConfigurationError("""The last dim of input must equals to
                the construction parameter 'input_dim'. Please check.""")

        if self._need_mlp:
            bs, nums, ebd = x.shape
            x_combine = x.reshape(bs * nums, ebd)
            x_combine = self._activation(self._mlp(x_combine))
            x = x_combine.reshape(bs, nums, -1)

        bs = x.shape[0]
        w = self._weight.unsqueeze(0).unsqueeze(2)  # shape [1, ebd, 1]
        w = w.expand(bs, -1, -1)  # shape [bs, ebd, 1]

        logits = x.bmm(w).squeeze(-1)  # shape [bs, nums]

        if self._normalize:
            return F.softmax(logits, dim=1)
            return logits
def inverse_depth_to_camera_coords(
        inverse_depth: torch.Tensor,
        intrinsics_inv: torch.Tensor) -> torch.Tensor:
    """Transform coordinates in the pixel frame to the camera frame.
        inverse_depth: depth map B,1,H,W
        intrinsics_inv: intrinsics_inv matrix for each element of batch -- [B,3,3]
        array of (X,Y,Z) cam coordinates -- [B,3,H,W]
    b, _, h, w = inverse_depth.shape

    # compose homogeneous tensors
    i_range = torch.arange(h).view(1, h, 1).expand(1, h, w).type_as(
        inverse_depth)  # [1, H, W]
    j_range = torch.arange(w).view(1, 1, w).expand(1, h, w).type_as(
        inverse_depth)  # [1, H, W]
    ones = torch.ones(1, h, w).type_as(inverse_depth)
    pixel_coords = torch.stack((j_range, i_range, ones), dim=1)  # [1, 3, H, W]

    # expand to batch
    pixel_coords = pixel_coords.expand(b, 3, h, w)  # [B, 3, H, W]
    pixel_coords_flat = pixel_coords.view(*pixel_coords.shape[:2], -1)
    camera_coords = intrinsics_inv.bmm(pixel_coords_flat).view_as(pixel_coords)

    # scale by depth
    # assert inverse_depth.min().item() >= 10e-5  # avoid division by zero
    return camera_coords / inverse_depth
Beispiel #5
def weighted_sum(matrix: torch.Tensor,
                 attention: torch.Tensor) -> torch.Tensor:
    Takes a matrix of vectors and a set of weights over the rows in the matrix (which we call an
    "attention" vector), and returns a weighted sum of the rows in the matrix.  This is the typical
    computation performed after an attention mechanism.
    Note that while we call this a "matrix" of vectors and an attention "vector", we also handle
    higher-order tensors.  We always sum over the second-to-last dimension of the "matrix", and we
    assume that all dimensions in the "matrix" prior to the last dimension are matched in the
    "vector".  Non-matched dimensions in the "vector" must be `directly after the batch dimension`.
    For example, say I have a "matrix" with dimensions `(batch_size, num_queries, num_words,
    embedding_dim)`.  The attention "vector" then must have at least those dimensions, and could
    have more. Both:
        - `(batch_size, num_queries, num_words)` (distribution over words for each query)
        - `(batch_size, num_documents, num_queries, num_words)` (distribution over words in a
          query for each document)
    are valid input "vectors", producing tensors of shape:
    `(batch_size, num_queries, embedding_dim)` and
    `(batch_size, num_documents, num_queries, embedding_dim)` respectively.
    # We'll special-case a few settings here, where there are efficient (but poorly-named)
    # operations in pytorch that already do the computation we need.
    if attention.dim() == 2 and matrix.dim() == 3:
        return attention.unsqueeze(1).bmm(matrix).squeeze(1)
    if attention.dim() == 3 and matrix.dim() == 3:
        return attention.bmm(matrix)
    if matrix.dim() - 1 < attention.dim():
        expanded_size = list(matrix.size())
        for i in range(attention.dim() - matrix.dim() + 1):
            matrix = matrix.unsqueeze(1)
            expanded_size.insert(i + 1, attention.size(i + 1))
        matrix = matrix.expand(*expanded_size)
    intermediate = attention.unsqueeze(-1).expand_as(matrix) * matrix
    return intermediate.sum(dim=-2)
Beispiel #6
    def forward(self, inputA: Tensor, inputB: Tensor) -> Tensor:
        inputA = inputA.view(inputA.shape[0], inputA.shape[1], -1)
        inputB = inputB.view(inputB.shape[0], inputB.shape[1], -1)

        bi_vec = inputA.bmm(inputB.permute(0, 2, 1))
        bi_vec = bi_vec.view(bi_vec.shape[0], -1)
        bi_vec = torch.sign(bi_vec) * torch.sqrt(torch.abs(bi_vec))

        return F.normalize(bi_vec, dim=-1, p=2)
def scaled_dot_product_attention(query: torch.Tensor,
                                 key: torch.Tensor,
                                 value: torch.Tensor,
                                 mask=None) -> torch.Tensor:
    temp = query.bmm(key.transpose(1, 2))
    scaled = temp / (query.size(-1)**0.5)
    if mask is not None:
        scaled = scaled.masked_fill(mask == 0, -1e9)

    softmax = torch.nn.functional.softmax(scaled, dim=-1)
    return softmax.bmm(value)
Beispiel #8
def transform_points_Rt(points: torch.Tensor,
                        viewpoint: torch.Tensor,
                        inverse: bool = False):
    N, H, W = viewpoint.shape
    assert H == 3 and W == 4, "Rt is B x 3 x 4 "
    t = viewpoint[:, :, 3]
    r = viewpoint[:, :, 0:3]

    # transpose r to handle the fact that P in num_points x 3
    # yT = (RX)T = XT @ RT
    r = r.transpose(1, 2).contiguous()

    # invert if needed
    if inverse:
        points = points - t[:, None, :]
        points = points.bmm(r.inverse())
        points = points.bmm(r)
        points = points + t[:, None, :]

    return points
Beispiel #9
def weighted_sum(matrix: torch.Tensor, attention: torch.Tensor) -> torch.Tensor:
    if attention.dim() == 2 and matrix.dim() == 3:
        return attention.unsqueeze(1).bmm(matrix).squeeze(1)
    if attention.dim() == 3 and matrix.dim() == 3:
        return attention.bmm(matrix)
    if matrix.dim() - 1 < attention.dim():
        expanded_size = list(matrix.size())
        for i in range(attention.dim() - matrix.dim() + 1):
            matrix = matrix.unsqueeze(1)
            expanded_size.insert(i + 1, attention.size(i + 1))
        matrix = matrix.expand(*expanded_size)
    intermediate = attention.unsqueeze(-1).expand_as(matrix) * matrix
    return intermediate.sum(dim=-2)
Beispiel #10
 def forward(self, memory: torch.Tensor,
             answer: torch.Tensor) -> torch.Tensor:
         memory: tensor of shape bsz * hdim
         answer: tensor of shape bsz * hdim
         prob: tensor of shape bsz
     Ua = self.combination(answer)
     memory = memory.unsqueeze(1)
     Ua = Ua.unsqueeze(1)
     mUa = memory.bmm(Ua.transpose(1, 2))
     return mUa.view(-1)
Beispiel #11
 def _forward_internal(self, matrix1: torch.Tensor, matrix2: torch.Tensor
                       ) -> torch.Tensor:
         matrix1 : Tensor of shape (batch_size, seq_len1, hdim1)
         matrix2 : Tensor of shape (batch_size, seq_len2, hdim2)
         alpha : Tensor of shape (batch_size, seq_len1, seq_len2)
     # Shape : (batch_size, seq_len_2, hdim1)
     Wy = self._weights(matrix2)
     # Shape : (batch_size, seq_len_1, seq_len_2)
     alpha = matrix1.bmm(Wy.transpose(-2, -1))
     return alpha
Beispiel #12
def dot_product_score(queries: Tensor, keys: Tensor, scaled: bool = False):
    - queries: [B, T, A]
    - keys: [B, S, A]
    - score: [B, T, S]
    # [B,T,A] x [B,A,S] = [B,T,S]
    if scaled:
        attn_dim = queries.size(-1)
        queries = queries / (attn_dim**0.5)
    score = queries.bmm(keys.transpose(1, 2))  # [B,T,S]
    return score
Beispiel #13
 def forward(self, p_seq: torch.Tensor, q: torch.Tensor, p_mask: torch.Tensor):
         p_seq: batch_size * p_seq_len * p_hidden_dim
         q: batch_size * q_hidden_dim
         p_mask: batch_size * p_seq_len (1 for padding, 0 for true)
         attn_scores: batch_size * p_seq_len
     Wq = self.linear(q) if self.linear is not None else q
     pWq = p_seq.bmm(Wq.unsqueeze(2)).squeeze(2), -float("inf"))
     attn_scores = F.softmax(pWq, dim=-1) if self.normalize else pWq.exp()
     return attn_scores
Beispiel #14
def scaled_dot_product_attention(query: Tensor, key: Tensor,
                                 value: Tensor) -> Tensor:
        query = <X, W_q> => (L, d_k)
        key = <X, W_k> => (L, d_k)
        value = <X, W_k> => (L, d_v)
        Self Attention Matrix (L, d_v)
    temp = query.bmm(key.transpose(1, 2))
    scale = query.size(-1)**0.5
    softmax = f.softmax(temp / scale, dim=-1)
    return softmax.bmm(value)
def scaled_dot_product_attention(query: Tensor,
                                 key: Tensor,
                                 value: Tensor,
                                 mask: Union[None, Tensor] = None) -> Tensor:

    similarity = query.bmm(key.transpose(1, 2))

    scale = query.size(-1) ** 0.5

    if mask is not None:
        similarity = similarity.masked_fill(mask, float('-inf'))

    softmax = F.softmax(similarity / scale, dim=-1)

    return softmax.bmm(value)
Beispiel #16
def attention(query: torch.Tensor,
              key: torch.Tensor,
              value: torch.Tensor,
              enc_masks: torch.Tensor = None) -> torch.Tensor:

    query_unsqueezed = query.unsqueeze(dim=2)
    score = key.bmm(query_unsqueezed)
    score = score.squeeze(dim=2)

    if enc_masks is not None:, -float('inf'))

    attention_weights = softmax(score, dim=1)
    attention_weights = attention_weights.unsqueeze(dim=1)

    context_vector = attention_weights.bmm(value)
    context_vector = context_vector.squeeze(dim=1)

    return attention_weights, context_vector
Beispiel #17
Beispiel #22
    def forward(self, input: Tensor) -> Tensor:
        input = input.view(input.shape[0], input.shape[1], -1)
        interaction = input.bmm(input.permute(0, 2, 1))
        log_interactions = [mat_log_sym(matrix) for matrix in torch.unbind(interaction, dim=0)]

        return torch.stack(log_interactions, dim = 0)
Beispiel #23
 def forward(self, matrix_1: torch.Tensor, matrix_2: torch.Tensor) -> torch.Tensor:
     return matrix_1.bmm(matrix_2.transpose(2, 1))
 def _forward_internal(self, vector: torch.Tensor,
                       matrix: torch.Tensor) -> torch.Tensor:
     sim_score = matrix.bmm(vector.unsqueeze(-1)).squeeze(-1)
     sim_score_scaled = sim_score / math.sqrt(sim_score.size(-1))
     return sim_score_scaled
Beispiel #26
def scaled_dot_product_attention(query: Tensor, key: Tensor, value: Tensor):
    temp = query.bmm(key.transpose(1, 2))
    scale = query.size(-1)**0.5
    softmax = F.softmax(temp / scale, dim=-1)
    return softmax.bmm(value)
 def _forward_internal(self, vector: torch.Tensor, matrix: torch.Tensor) -> torch.Tensor:
     return matrix.bmm(vector.unsqueeze(-1)).squeeze(-1)
Beispiel #29
Beispiel #30
    def _forward_internal(self, vector: torch.Tensor,
                          matrix: torch.Tensor) -> torch.Tensor:
        transformed_vectors = self.ll(vector)

        return matrix.bmm(transformed_vectors.unsqueeze(-1)).squeeze(-1)
Beispiel #31
def scaled_dot_product_attention(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> torch.Tensor:
  temp = query.bmm(key.transpose(1, 2))
  scaled = temp / (query.size(-1) ** 0.5)

  softmax = torch.nn.functional.softmax(scaled, dim=-1)
  return softmax.bmm(value)
 def _forward_internal(self, vector: torch.Tensor,
                       matrix: torch.Tensor) -> torch.Tensor:
     return matrix.bmm(vector.unsqueeze(-1)).squeeze(-1)
Beispiel #33
 def forward(self, matrix_1: torch.Tensor,
             matrix_2: torch.Tensor) -> torch.Tensor:
     return matrix_1.bmm(matrix_2.transpose(2, 1))