コード例 #1
0
ファイル: abcnn2.py プロジェクト: codykala/ABCNN
    def forward(self, x1, x2):
        """ Computes the forward pass for the attention layer of the ABCNN-2
            Block.

            Args:
                x1, x2: torch.Tensors of shape (batch_size, 1, max_length + width - 1, output_size)
                    The outputs from the convolutional layer.

            Returns:
                w1, w2: torch.Tensors of shape (batch_size, 1, max_length, output_size)
                    The outputs from the attention layer. This layer takes
                    the place of the Average Pooling layer seen in the BCNN and ABCNN-1
                    models.
        """
        # Compute attention matrix for outputs of convolutional layer
        A = compute_attention_matrix(x1, x2, self.match_score)

        # Initialize outputs for attention layer
        batch_size = x1.shape[0]
        output_size = x1.shape[3]
        w1 = torch.zeros((batch_size, 1, self.max_length, output_size))
        w2 = torch.zeros((batch_size, 1, self.max_length, output_size))
        w1 = w1.cuda() if x1.is_cuda else w1
        w2 = w2.cuda() if x2.is_cuda else w2

        # Compute the outputs
        for j in range(self.max_length):
            for k in range(j, j + self.width):
                row_sum = torch.sum(A[:, :, :, k], dim=2, keepdim=True)
                col_sum = torch.sum(A[:, :, k, :], dim=2, keepdim=True)
                row_sum = row_sum.cuda() if x1.is_cuda else row_sum
                col_sum = col_sum.cuda() if x2.is_cuda else col_sum
                w1[:, :, j, :] += row_sum * x1[:, :, k, :]
                w2[:, :, j, :] += col_sum * x2[:, :, k, :]
        return w1, w2
コード例 #2
0
    def forward(self, x1, x2):
        """ Computes the forward pass for the attention layer of the ABCNN-1
            Block.

            Args:
                x1, x2: torch.Tensors of shape (batch_size, 1, max_length, input_size)
                    The inputs to the ABCNN-1 Block.

            Returns:
                attn1, attn2: torch.Tensors of shape (batch_size, 2, max_length, input_size)
                    The output of the attention layer for the ABCNN-1 Block.
        """
        # Get attention matrix and its transpose
        A = compute_attention_matrix(x1, x2, self.match_score)
        A = A.cuda() if self.W1.is_cuda else A
        A_t = A.permute(0, 1, 3, 2)

        # Compute attention feature maps
        a1 = torch.matmul(A, self.W1)
        a2 = torch.matmul(A_t, self.W2)

        # Stack attention feature maps with inputs
        attn1 = torch.cat([x1, a1], dim=1)
        attn2 = torch.cat([x2, a2], dim=1)
        return attn1, attn2
コード例 #3
0
ファイル: visualization.py プロジェクト: codykala/ABCNN
        # Create directory to store plots
        prefix = "example{}".format(i)
        plot_dir = os.path.join(args.output_dir, prefix)
        if not os.path.exists(plot_dir):
            os.mkdir(plot_dir)
        
        # Get features for each question
        x0 = features[0].view(1, 1, max_length, embeddings_size)
        x1 = features[1].view(1, 1, max_length, embeddings_size)

        # Store all-ap outputs for input layer
        outputs0.append(all_ap(x0))
        outputs1.append(all_ap(x1))

        # Generate initial attention distribution
        A = compute_attention_matrix(x0, x1, manhattan)
        A = A.squeeze().cpu().numpy()
        filename = "{}_input_attn.png".format(prefix)
        filepath = os.path.join(plot_dir, filename)
        plot_attention_matrix(A, example[0], example[1], filepath)

        # Generate attention distribution for blocks
        for j, block in enumerate(blocks):
            
            # Get outputs for next block
            x0, x1 = x0.detach(), x1.detach()
            x0, x1, a0, a1 = block(x0, x1)

            # Sanity check
            assert(not any(torch.isnan(x0).tolist()))
            assert(not any(torch.isnan(x1).tolist()))