Esempio n. 1
0
    def forward(self, input):

        input_shape = pytorch_utils.get_shape(input)
        assert len(input_shape) == 5

        dim = self.dim

        # permute to put the required dimension in the 2nd dimension
        if dim == 1:
            x = input
        elif dim == 2:
            x = input.permute(0, 2, 1, 3, 4)
        elif dim == 3:
            x = input.permute(0, 3, 2, 1, 4)
        elif dim == 4:
            x = input.permute(0, 4, 2, 3, 1)

        # apply batch_norm
        num_features = pytorch_utils.get_shape(x)[1]
        assert num_features == self.num_features
        x = self.layer(x)

        # permute back to the original view
        if dim == 2:
            x = x.permute(0, 2, 1, 3, 4)
        elif dim == 3:
            x = x.permute(0, 3, 2, 1, 4)
        elif dim == 4:
            x = x.permute(0, 4, 2, 3, 1)

        x_shape = pytorch_utils.get_shape(x)
        assert input_shape == x_shape

        return x
Esempio n. 2
0
    def get_context_class(self, x_cs, x_so, B):

        x_cs_class = []

        # loop on multi_contexts
        for idx_context in range(self.n_contexts):
            # embedding of context
            x_c = x_cs[idx_context]  # (B, C, N)

            x_c = x_c.permute(0, 2, 1)  # (B, N, C)

            # hide N dimension
            B, N, C = pytorch_utils.get_shape(x_c)
            x_c = x_c.contiguous().view(B * N, C)  # (B*N, C)

            x_c = torch.cat((x_so, x_c), dim=1)

            layer = self.classifier_layers
            x_c = layer(x_c)

            _, C = pytorch_utils.get_shape(x_c)
            x_c = x_c.view(B, N, C)  # (B, N, C)

            # append to list of context class predictions
            x_cs_class.append(x_c.view(1, B, self.N,
                                       self.n_classes))  # (1, B,N, C)

        # Process context features to get context category from x_cs features
        x_cs_class = torch.stack(x_cs_class, dim=0).view(
            -1, B, self.N, self.n_classes)  # (n_context, B, N, C)
        return x_cs_class
Esempio n. 3
0
    def forward(self, input):

        input_shape = pytorch_utils.get_shape(input)
        assert len(input_shape) == 2

        dim = self.dim

        # permute to put the required dimension in the 2nd dimension
        if dim == 0:
            x = input.permute(1, 0)
        else:
            x = input

        # apply batch_norm
        num_features = pytorch_utils.get_shape(x)[1]
        assert num_features == self.num_features
        x = self.layer(x)

        # permute back to the original view
        if dim == 0:
            x = x.permute(1, 0)

        x_shape = pytorch_utils.get_shape(x)
        assert input_shape == x_shape

        return x
Esempio n. 4
0
    def forward(self, input):
        # input is of shape (None, H, W, N, T)

        input_shape = pytorch_utils.get_shape(input)
        b, h, w, n, t = input_shape

        assert len(input_shape) == 5

        # reshape
        tensor = input.permute(0, 1, 2, 4, 3)  # (None, H, W, T. N)
        tensor = tensor.contiguous().view(b * h * w * t, n)  # (None*H*W*T, N)

        # sample gumbel noise
        gumbel_shape = tensor.size()
        gumbel_noise = self.gumbel_sampler.sample(gumbel_shape).cuda()
        # gumbel_noise = self.sample_gumbel(gumbel_shape)

        # gumbel sigmoid trick
        tensor = (tensor + gumbel_noise) / self.temperature
        tensor = self.sigmoid(tensor)

        # get original size and permutation
        tensor = tensor.view(b, h, w, t, n)  # (None, H, W, T, N)
        tensor = tensor.permute(0, 1, 2, 4, 3)  # (None, H, W, N, T)

        return tensor
Esempio n. 5
0
    def forward(self, x):
        """
        :param x: (B, C, T, H, W)
        :return:
        """

        K = self.window_size

        # padd the input
        x_padded = self.padding(x)

        # get how many local windows or slices (S)
        B, C, T, H, W = pytorch_utils.get_shape(x_padded)
        S = T - K + 1
        N = self.n_heads

        tensors = []

        # loop on windows, and get them
        for idx_slice in range(S):
            idx_start = idx_slice
            idx_stop = idx_start + K

            # slice to get the window
            x_window = x_padded[:, :, idx_start:idx_stop]
            tensors.append(x_window)

        # now that you get the windows, stack them into a new dimension
        y = torch.stack(tensors, dim=1)  # (B, S, C, T, H, W)

        # reshape to hide the slices inside the batch dimension
        y = y.view(B * S, C, K, H, W)  # (B*S, C, T, H, W)

        z = []

        # feed to to local-attentions block, multi-heads
        for idx_head in range(N):
            head_num = idx_head + 1
            attention_head_name = 'attention_head_%d' % (head_num)
            attention_head = getattr(self, attention_head_name)
            z_head = attention_head(y)  # (B*S, C, T, H, W)
            z.append(z_head)

        # concat
        z = torch.cat(z, dim=1)  # (B*S, C, H, W)

        # reshape to get back slices
        z = z.view(B, S, C, H, W)  # (B*S, C, H, W)

        # permute to put slices in the temporal dimension
        z = z.permute(0, 2, 1, 3, 4)

        # residual
        z += x

        return z
Esempio n. 6
0
    def forward(self, input):

        input_shape = pytorch_utils.get_shape(input)
        assert len(input_shape) == 5

        dim = self.dim

        # permute to put the required dimension in the 2nd dimension
        if dim == 4:
            x = input
        elif dim == 3:
            x = input.permute(0, 1, 2, 4, 3)
        elif dim == 2:
            x = input.permute(0, 1, 4, 3, 2)
        elif dim == 1:
            x = input.permute(0, 4, 2, 3, 1)
        else:
            x = None

        B, d1, d2, d3, d4 = pytorch_utils.get_shape(x)
        assert d4 == self.num_features

        # reshape
        x = x.view(B, d1 * d2 * d3, d4)

        # apply layer_norm
        x = self.layer(x)

        # reshape back to the original view
        x = x.view(B, d1, d2, d3, d4)

        # permute back to the original view
        if dim == 3:
            x = x.permute(0, 1, 2, 4, 3)
        elif dim == 2:
            x = x.permute(0, 1, 4, 3, 2)
        elif dim == 1:
            x = x.permute(0, 4, 2, 3, 1)

        x_shape = pytorch_utils.get_shape(x)
        assert input_shape == x_shape

        return x
Esempio n. 7
0
    def forward(self, *input):
        """
        input is two features: subject-object feature and context feature
        :param x_so: pairattn feature (B, C, N, H, W)
        :param x_c: scene feature (B, C, N, H, W)
        :return:
        """

        # return x_so embeddings
        x_so = input[0]
        x_so = self.dense_so(x_so)

        B, C, N, _, _ = pytorch_utils.get_shape(x_so)

        x_cs = input[1:]

        # return context embeddings
        x_c = self.get_context_embeddings(x_cs, B)

        x = x_so
        # spatial pooling
        x = self.spatial_pooling(x)  # (B, C, N)
        x = x.permute(0, 2, 1)  # (B, N, C)

        # hide N dimension
        B, N, C = pytorch_utils.get_shape(x)
        x_action = x.contiguous().view(B * N, C)  # (B*N, C)

        # return context categories
        x_cs_classes = self.get_context_class(x_c, x_action,
                                              B)  # (nco, B, N, C)

        x, _ = self.modulate_context_classifier(x_so, x_c, x_cs_classes,
                                                B)  # (B, N, 600)

        # Add modulated response to human-object classifier and max-pool over N

        x, _ = torch.max(x, dim=1)  # (B, C)
        x = torch.sigmoid(x)

        return x
Esempio n. 8
0
    def __save_values_for_debugging(self, f, alpha):
        is_training = self.training
        if is_training:
            return

        self.f_mean = torch.mean(f)
        self.f_std = torch.std(f)

        non_zero = torch.sum(alpha).item()
        sum = np.prod(pytorch_utils.get_shape(alpha))
        ratio = non_zero / sum
        self.alpha_ratio = ratio
Esempio n. 9
0
    def forward(self, x):
        """
        :param x: (B, C, T, H, W)
        :return:
        """

        batch_size = x.size(0)
        x_shape = pytorch_utils.get_shape(x)
        B, C, T, H, W = x_shape

        # key embedding
        key = self.key_embedding(x)  # (B, C, T, H, W)
        key = key.view(batch_size, self.n_channels_inter, -1)  # (B, C, T*H*W)
        key = key.permute(0, 2, 1)  # (B, T*H*W, C)

        # query embedding
        query = self.query_embedding(x)  # (B, C, T, H, W)
        query = query.view(batch_size, self.n_channels_inter,
                           -1)  # (B, C, T*H*W)

        # value embedding
        value = self.value_embedding(x)  # (B, C, T, H, W)
        value = value.view(batch_size, self.n_channels_inter,
                           -1)  # (B, C, T*H*W)
        value = value.permute(0, 2, 1)  # (B, T*H*W, C)

        # attention
        alpha = torch.matmul(key, query)  # (B, T*H*W, T*H*W)

        # normalize over timesteps
        alpha = alpha / float(T)

        # use softmax or sigmoid
        if self.is_softmax_activation:
            alpha = F.softmax(alpha, dim=-1)  # (B, T*H*W, T*H*W)
        else:
            alpha = alpha / alpha.size(-1)  # (B, T*H*W, T*H*W)
            alpha = F.sigmoid(alpha)  # (B, T*H*W, T*H*W)

        # multiply alpha with values
        y = torch.matmul(alpha, value)  # (B, T*H*W, C)
        y = y.permute(0, 2, 1).contiguous()  # (B, C, T*H*W)
        y = y.view(batch_size, self.n_channels_inter, T, H,
                   W)  # (B, C, T, H, W)

        # output embedding
        y = self.output_embedding(y)

        # residual connection
        y += x

        return y
Esempio n. 10
0
    def forward(self, x_window):
        """
        :param x: (B, C, T, H, W)
        :return:
        """

        B, C, T, H, W = pytorch_utils.get_shape(x_window)
        batch_size = x_window.size(0)
        assert T % 2 == 1

        # get middle item of the window
        idx_item = int(T / 2.0)
        x_item = x_window[:, :, idx_item:idx_item + 1]  # (B, C, 1, H, W)

        # query embedding
        query = self.query_embedding(x_item)  # (B, C, 1, H, W)
        query = query.view(batch_size, self.n_channels_inter,
                           -1)  # (B, C, 1*H*W)

        # key embedding
        key = self.key_embedding(x_window)  # (B, C, T, H, W)
        key = key.view(batch_size, self.n_channels_inter, -1)  # (B, C, T*H*W)
        key = key.permute(0, 2, 1)  # (B, T*H*W, C)

        # value embedding
        value = self.value_embedding(x_window)  # (B, C, T, H, W)
        value = value.view(batch_size, self.n_channels_inter,
                           -1)  # (B, C, T*H*W)
        value = value.permute(0, 2, 1)  # (B, T*H*W, C)

        # attention
        alpha = torch.matmul(key, query)  # (B, T*H*W, 1*H*W)
        alpha = alpha.permute(0, 2, 1)  # (B, 1*H*W, T*H*W)
        alpha = F.softmax(alpha, dim=-1)  # (B, 1*H*W, T*H*W)

        # scale over channels or over the timesteps
        # alpha = alpha / np.sqrt(self.n_channels_inter)  # (B, 1*H*W, T*H*W)
        # alpha = alpha / alpha.size(-1)  # (B, 1*H*W, T*H*W)

        # use sigmoid instead of softmax
        # alpha = F.sigmoid(alpha)  # (B, 1*H*W, T*H*W)

        # multiply alpha with values
        y = torch.matmul(alpha, value)  # (B, 1*H*W, C)
        y = y.permute(0, 2, 1).contiguous()  # (B, C, 1*H*W)
        y = y.view(batch_size, self.n_channels_inter, 1, H,
                   W)  # (B, C, 1, H, W)

        # output embedding
        y = self.output_embedding(y)

        return y
Esempio n. 11
0
    def forward(self, input):
        # input is of shape (None, H, W, N, T)

        input_shape = pytorch_utils.get_shape(input)
        b, h, w, n, t = input_shape

        assert len(input_shape) == 5

        # sample gumbel noise
        gumbel_shape = input.size()
        gumbel_noise = self.gumbel_sampler.sample(gumbel_shape).cuda()

        # gumbel sigmoid trick
        tensor = (input + gumbel_noise) / self.temperature
        tensor = self.sigmoid(tensor)

        return tensor
Esempio n. 12
0
    def get_context_relevance(self, x_so, x_cs):

        x_cs_value = []
        B, C, N, _, _ = pytorch_utils.get_shape(x_so)

        # loop on multi_contexts
        for idx_context in range(self.n_contexts):
            # embedding of context
            x_c = x_cs[idx_context]
            x_c = x_c.view(B, C, N, 1, 1)

            x_c = self.feature_selection(x_so, x_c)  # (B, N)
            x_cs_value.append(x_c.view(1, B, N))  # (1, B, C)

        x_cs_value = torch.stack(x_cs_value,
                                 dim=0).view(self.n_contexts, B,
                                             N)  # (num_context, B, N)
        return x_cs_value
Esempio n. 13
0
    def forward(self, x):
        x_shape = pytorch_utils.get_shape(x)  # (None, 2)
        assert len(x_shape) == 2
        assert x_shape[1] == 2

        # x_hard as zero list
        x_hard = torch.zeros_like(x)

        # find index of max value
        _, idx = torch.max(x, dim=1, keepdim=True)

        # set max value to one
        x_hard.scatter_(1, idx, 1)

        # ser gradients to be w.r.t x instead of being w.r.t x_hard
        y = (x_hard - x).detach() + x

        return y
Esempio n. 14
0
    def forward(self, x_so, x_c):
        # pairwise interaction between x_so and x_c

        f = torch.cat((x_so, x_c), dim=1)  # (B, C, N, H, W)

        # gating
        f = self.f_layers(f)  # (B, N)

        alpha = f

        # save values for debugging
        self.__save_values_for_debugging(f, alpha)

        # multiply the gating value by the context feature
        B, N = pytorch_utils.get_shape(alpha)
        alpha = alpha.view(B, N, 1)  # (B, N, 1)

        return alpha
Esempio n. 15
0
    def forward(ctx, input):
        """
        # input shape (B, T). Hardmax on the node dimension (dim=1)
        """

        input_shape = pytorch_utils.get_shape(input)
        B, T = input_shape
        rng = torch.arange(B)

        # find idx of max
        idx = torch.argmax(input, dim=1)

        # set all but max to zero, set max to 1
        mask = torch.zeros_like(input)  # (B, T)
        mask[rng, idx] = 1.0

        # save for backward pass
        ctx.mask = mask

        output = input.clone()  # copy input
        output = output * mask  # (B, T)

        return output
Esempio n. 16
0
    def forward(self, input):
        # input is of shape (None, C, T, H, W)

        input_shape = pytorch_utils.get_shape(input)
        n, c, t, h, w = input_shape

        assert len(input_shape) == 5

        # transpose and reshape to hide the spatial dimension, only expose the temporal dimension for depthwise conv
        tensor = input.permute(0, 3, 4, 1, 2)  # (None, H, W, C, T)
        tensor = tensor.contiguous().view(n * h * w, c, t)  # (None*H*W, C, T)

        # depthwise conv on the temporal dimension
        tensor = self.padding(tensor)
        tensor = self.depthwise_conv(tensor)  # (None*H*W, C, T)

        # reshape to get the spatial dimensions
        tensor = tensor.view(n, h, w, c, t)  # (None, H, W, C, T)

        # finally, transpose to get the desired output shape
        tensor = tensor.permute(0, 3, 4, 1, 2)  # (None, C, T, H, W)

        return tensor
Esempio n. 17
0
    def forward(self, input):
        B = pytorch_utils.get_shape(input)[0]
        new_shape = [B] + list(self.shape)
        output = input.view(*new_shape)

        return output