Example #1
0
    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu_(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = F.relu_(out)

        out0 = self.conv3(out)
        out = self.bn3(out0)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = F.relu_(out)

        return out
Example #2
0
    def forward(self, x):

        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu_(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = F.avg_pool2d(x, x.size()[3])
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x
Example #3
0
    def forward(self, x):
        c1 = self.layer0(x)
        c1 = F.relu_(c1)
        c1 = self.maxpool(c1)
        c2 = self.layer1(c1)
        c3 = self.layer2(c2)
        c4 = self.layer3(c3)
        c5 = self.layer4(c4)
        c6 = self.layer5(c5)
        c7 = self.layer6(F.relu_(c6))

        p7 = c7

        p6 = c6

        p5 = self.latlayer1(c5)

        p4 = self._upsample(p5, self.latlayer2(c4))
        p4 = self.toplayer1(p4)

        p3 = self._upsample(p4, self.latlayer3(c3))
        p3 = self.toplayer2(p3)

        return p3, p4, p5, p6, p7
Example #4
0
def nl_relu(x: Tensor, beta: float = 1., inplace: bool = False) -> Tensor:
    """Implements the natural logarithm ReLU activation function

    Args:
        x: input tensor
        beta: beta used for NReLU
        inplace: whether the operation should be performed inplace
    Returns:
        output tensor
    """

    if inplace:
        return torch.log(F.relu_(x).mul_(beta).add_(1), out=x)
    else:
        return torch.log(1 + beta * F.relu(x))
Example #5
0
    def forward(self, x):
        num_branch = self.num_branch if self.training or self.test_branch_idx == -1 else 1
        if not isinstance(x, list):
            x = [x] * num_branch

        out = self.conv1(x)
        out = [F.relu_(b) for b in out]
        out = self.conv2(out)

        if self.shortcut is not None:
            shortcut = [self.shortcut(b) for b in x]
        else:
            shortcut = x

        out = [out_b + shortcut_b for out_b, shortcut_b in zip(out, shortcut)]
        out = [F.relu_(b) for b in out]

        if self.has_pool:
            out = [p(b) for p, b in zip(self.list_pool, out)]

        if self.concat_output:
            out = torch.cat(out)

        return out
Example #6
0
    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu_(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = F.relu_(out)

        out0 = self.conv3(out)
        out = self.bn3(out0)

        if self.config['se_block']:
            out = self.domain_attention(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = F.relu_(out)

        return out
Example #7
0
    def forward(self, input):
        """
        Take a mini batch of character embedding of each word, compute word embedding
        :param input (Tensor): shape (batch_size, char_embed_size, max_word_length)
        :return (Tensor): shape (batch_size, word_embed_size), word embedding of each word in batch
        """
        print('Embedding size: ', self.char_embed_size)
        print('X_reshaped size: ',input.size())

        x = self.conv1d(input) # (batch_size, word_embed_size, max_word_length - kernel_size + 1)
        x = F.relu_(x)
        print('x_size after conv with out_channels: ',self.num_filters, ',  ',x.size())
        x = self.max_pool_1d(x).squeeze()  # (batch_size, word_embed_size)
        print('after pool x.size(): ',x.size())
        return x
Example #8
0
 def forward(self, x):
     b, c, height, width = x.size()
     C = self.pool(x)
     H = self.pool(x.permute(0, 3, 1, 2).contiguous())
     W = self.pool(x.permute(0, 2, 3, 1).contiguous())
     self.lam = F.softmax(self.lam, -1)
     lam = torch.chunk(self.lam, dim=0, chunks=self.rank)
     list = []
     for i in range(0, self.rank):
         list.append(lam[i] * self.TukerReconstruction(
             b, self.h, self.ps[0], self.conv1_1[i](C), self.conv1_2[i](H),
             self.conv1_3[i](W)))
     tensor1 = sum(list)
     tensor1 = torch.cat((x, F.relu_(x * tensor1)), 1)
     return tensor1
def nl_relu(x, beta=1., inplace=False):
    """Implements the natural logarithm ReLU activation function

    Args:
        x (torch.Tensor): input tensor
        beta (float): beta used for NReLU
        inplace (bool): whether the operation should be performed inplace
    Returns:
        torch.Tensor[x.size()]: output tensor
    """

    if inplace:
        return torch.log(F.relu_(x).mul_(beta).add_(1), out=x)
    else:
        return torch.log(1 + beta * F.relu(x))
Example #10
0
    def forward(self, x):
        residual = x

        x = self.body(x)

        if self.se:
            w = F.adaptive_avg_pool2d(x, output_size=1)
            w = self.se(w)
            x = x * w

        if self.downsample:
            residual = self.downsample(residual)

        x = F.relu_(x + residual)
        return x
Example #11
0
    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu_(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = F.relu_(out)

        out0 = self.conv3(out)
        out = self.bn3(out0)

        if self.with_ct:
            out = self.context_block(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = F.relu_(out)

        return out
    def forward(self, input, pool_size=(2, 2), pool_type='avg'):
        x = input
        x = F.relu_(self.bn1(self.conv1(x)))
        if pool_type == 'max':
            x = F.max_pool2d(x, kernel_size=pool_size)
        elif pool_type == 'avg':
            x = F.avg_pool2d(x, kernel_size=pool_size)
        elif pool_type == 'avg+max':
            x1 = F.avg_pool2d(x, kernel_size=pool_size)
            x2 = F.max_pool2d(x, kernel_size=pool_size)
            x = x1 + x2
        else:
            raise Exception('Incorrect argument!')

        return x
    def forward(self, x):

        if isinstance(x, tuple):
            x, prev_dp = x
        else:
            prev_dp = None

        out = self.conv1(x)
        out = F.relu_(out)

        out = self.conv2(out)
        out = F.relu_(out)

        out = self.conv3(out)

        if self.shortcut is not None:
            shortcut = self.shortcut(x)
        else:
            shortcut = x

        dp = None
        if self.module is not None:
            out = self.module(out)
            if isinstance(out, tuple):
                out, dp = out
                if prev_dp is not None:
                    dp = prev_dp + dp

        out += shortcut
        out = F.relu_(out)

        if dp is None:
            return out
        else:
            # diff loss
            return out, dp
Example #14
0
    def forward(self, x):
        x = F.relu(x)

        out = self.conv1(x)
        out = F.relu_(out)
        out = self.conv2(out)

        if self.shortcut is not None:
            shortcut = self.shortcut(x)
        else:
            shortcut = x

        out += shortcut
        # out = F.relu_(out)
        return out
Example #15
0
 def forward(self, x, y):
     activations = []
     for lay, (conv,
               bn) in enumerate(zip(self.convs[:-1], self.bns[:-1])):
         if lay == 0:
             x = F.relu_(bn(conv(self.preconvbn(self.preconv(x)))))
             y = F.relu_(self.lblbn(self.lblconv(y)))
             x = torch.cat([x, y], 1)
             # activations.append(x)
         else:
             new_size = (x.shape[2] - 1) * 2
             up = nn.Upsample(size=new_size,
                              mode='trilinear',
                              align_corners=False)
             x_res = up(x)
             oc = conv.out_channels
             x = F.relu_(bn(conv(x) + x_res[:, :oc]))
             # activations.append(x)
     up = nn.Upsample(size=2 * x.size()[2] - 2,
                      mode='trilinear',
                      align_corners=False)
     out = torch.softmax(self.postconv(up(x)), 1)
     # activations.append(out)
     return out
Example #16
0
    def forward(self, img, x_coord, x_adj, get_att_weights=False):
        img = self.cnn(img)
        img = img.unsqueeze(1).repeat(1, x_adj.shape[1], 1)
        input_sequence = torch.cat([x_coord, x_adj, img], dim=2)
        input_sequence = F.relu_(self.linear_att1(input_sequence))
        input_sequence = self.linear_att2(input_sequence)
        mask = self.softmax(input_sequence)
        masked_img = mask * img

        # optionally, return the attention weights of the context attention
        if get_att_weights:
            mask = mask[0].reshape(1, 30, 30)
            return masked_img, mask[0]

        return masked_img
Example #17
0
def correlate(input1, input2):
    out_corr = spatial_correlation_sample(
        input1,
        input2,
        kernel_size=1,
        patch_size=21,
        stride=1,
        padding=0,
        dilation_patch=2,
    )
    # collate dimensions 1 and 2 in order to be treated as a
    # regular 4D tensor
    b, ph, pw, h, w = out_corr.size()
    out_corr = out_corr.view(b, ph * pw, h, w) / input1.size(1)
    return F.relu_(out_corr)
Example #18
0
    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu_(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = F.relu_(out)

        out0 = self.conv3(out)
        out = self.bn3(out0)

        import pdb
        pdb.set_trace()

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = F.relu_(out)

        return out
Example #19
0
    def forward(self, input, spec_aug=False, mixup_lambda=None):
        #print(input.type())                                                     # Input : (16, 144000)
        x = self.spectrogram_extractor(input.float())                            # Output : (batch_size, 1, time_steps, n_fft + 1) : (16, 1, 696, 513)
        x = self.logmel_extractor(x)                                             # Output : (batch_size, 1, time_steps, mel_bins)     : (16, 1 , 696, 128)
        frames_num = x.shape[2]
        
        if self.training:
            x = self.spec_augmenter(x)
        
        # Mixup on spectrogram
        if mixup_lambda is not None:
            x = do_mixup(x, mixup_lambda)
        
        x = x.transpose(1, 3)
        x = self.batch_norm(x)
        x = x.transpose(1, 3)
                                                                                  # (16, 1, 2087, 128)
        x = self.encoder.forward_features(x)                                     # output : (batch_size, n_features, 66, 4)
        # Aggregate in time axis
        x = torch.mean(x, dim=3)                                                 # (16, 2048, 22) : (batch_size, n_features, _)
        x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1)
        x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1)
        x = x1 + x2                                                              # (16, 2048, 22)

        x = F.dropout(x, p=0.5, training=self.training)
        x = x.transpose(1, 2)                                                    # (batch_size, 22, n_features)
        #x = self.encoder.classifier(x)                                                    # (16, 22, 2048) : (batch_size, time, n_features)
        x = F.relu_(self.encoder.fc(x))                                          # (16, 22, 2048)
        x = x.transpose(1, 2)                                                    # (16, 2048, 22)
        x = F.dropout(x, p=0.5, training=self.training)

        (clipwise_output, norm_att, segmentwise_output) = self.att_head(x)
        segmentwise_output = segmentwise_output.transpose(1, 2)
        #print("clipwise_output.size : {}".format(clipwise_output.size()))        #(16, 24)     : (batch_sizes, n_features)
        #print("norm_att.size : {}".format(norm_att.size()))                      #(16, 24, 22) : (batch_sizes, n_features, time)
        #print("segmentwise_output.size : {}".format(segmentwise_output.size()))  #(16, 24, 22) : (batch_sizes, n_features, time)
        
        #Upscale back to original size
        framewise_output = interpolate(segmentwise_output,
                                       self.interpolate_ratio)                    # (16,696, 24) : (batch_sizes x time x num_classes)

        framewise_output = pad_framewise_output(framewise_output, frames_num)     # (16,696, 24) : (batch_sizes x time x num_classes)
        output_dict = {
           'framewise_output': framewise_output,
            'clipwise_output': clipwise_output
        }

        return output_dict
 def forward(self, x):
     x = F.pad(
         x,
         ([
             self.padding_size[1],
             self.padding_size[1],
             self.padding_size[0],
             self.padding_size[0],
         ]),
     )
     x = self.conv(x)
     x = self.batch_norm(x)
     # x = self.pair_norm(x)
     if self.activation is not None:
         x = F.relu_(x)
     return x
Example #21
0
 def forward(self, x_reshaped: torch.Tensor) -> torch.Tensor:
     '''
     pass character embeddings through Conv1d layer,relu, and maxpool
     @param x_reshaped(Tensor): tensor of character leverl embeddings
     @returns x_conv (Tensor): tensor of word embedding of size 
     '''
     print('Embedding size: ', self.embed_size)
     print('X_reshaped size: ',x_reshaped.size())
     x = self.conv1(x_reshaped)
     x = F.relu_(x)
     print('x_size after conv with out_channels: ',self.out_channels, ',  ',x.size())
     #x = nn.ReLU(x)
     x_conv = self.pool(x).squeeze()
     print('x_conv_size: ', x_conv.size())
     
     return x_conv
Example #22
0
    def forward(self, input, mixup_lambda=None):
        """
        Input: (batch_size, data_length)"""

        x = self.spectrogram_extractor(
            input)  # (batch_size, 1, time_steps, freq_bins)
        x = self.logmel_extractor(x)  # (batch_size, 1, time_steps, mel_bins)

        x = x.transpose(1, 3)
        x = self.bn0(x)
        x = x.transpose(1, 3)

        if self.training:
            x = self.spec_augmenter(x)

        # Mixup on spectrogram
        if self.training and mixup_lambda is not None:
            x = do_mixup(x, mixup_lambda)

        x = self.conv_block1(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv_block4(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv_block5(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv_block6(x, pool_size=(1, 1), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)
        x = torch.mean(x, dim=3)

        (x1, _) = torch.max(x, dim=2)
        x2 = torch.mean(x, dim=2)
        x = x1 + x2
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu_(self.fc1(x))
        embedding = F.dropout(x, p=0.5, training=self.training)
        clipwise_output = torch.sigmoid(self.fc_audioset(x))

        output_dict = {
            'clipwise_output': clipwise_output,
            'embedding': embedding
        }

        return output_dict
Example #23
0
    def forward(self, x, y=None):
        """
        Input: (batch_size, data_length)"""

        x = self.spectrogram_extractor(
            x)  # (batch_size, 1, time_steps, freq_bins)
        x = self.logmel_extractor(x)  # (batch_size, 1, time_steps, mel_bins)

        frames_num = x.shape[2]

        x = x.transpose(1, 3)
        x = self.bn0(x)
        x = x.transpose(1, 3)

        if self.training:
            x = self.spec_augmenter(x)

        # Mixup on spectrogram
        alpha = 1.0
        if self.training:
            x, y = do_mixup(x, y, alpha)

        x = torch.cat([x, x, x], dim=1)

        x = self.fe(x)

        x = torch.mean(x, dim=3)  # averaging across frequency dimension

        stride = 1

        x1 = F.max_pool1d(x, kernel_size=3, stride=stride, padding=1)
        x2 = F.avg_pool1d(x, kernel_size=3, stride=stride, padding=1)
        x = x1 + x2

        x = F.dropout(x, p=CONFIG.p, training=self.training)
        x = x.transpose(1, 2)
        x = F.relu_(self.fc1(x))
        x = x.transpose(1, 2)
        x = F.dropout(x, p=CONFIG.p, training=self.training)

        clipwise, weights, framewise = self.att_block(x)

        if self.training:

            return clipwise, y

        return torch.max(framewise, dim=-1)[0]
Example #24
0
    def forward(self, input_data):
        # input_x, mixup_lambda = input_data
        input_x = input_data
        mixup_lambda = None
        """
        Input: (batch_size, data_length)"""
        b, c, s = input_x.shape
        input_x = input_x.reshape(b * c, s)
        x, frames_num = self.preprocess(input_x, mixup_lambda=mixup_lambda)
        if mixup_lambda is not None:
            b = (b * c) // 2
            c = 1
        # Output shape (batch size, channels, time, frequency)
        x = x.expand(x.shape[0], 3, x.shape[2], x.shape[3])
        x = self.cnn_feature_extractor(x)

        # Aggregate in frequency axis
        x = torch.mean(x, dim=3)

        x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1)
        x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1)
        x = x1 + x2

        x = F.dropout(x, p=0.5, training=self.training)
        x = x.transpose(1, 2)
        x = F.relu_(self.fc1(x))
        x = x.transpose(1, 2)
        x = F.dropout(x, p=0.5, training=self.training)

        (clipwise_output, norm_att, segmentwise_output) = self.att_block(x)
        segmentwise_output = segmentwise_output.transpose(1, 2)

        # Get framewise output
        framewise_output = interpolate(segmentwise_output,
                                       self.interpolate_ratio)
        framewise_output = pad_framewise_output(framewise_output, frames_num)
        frame_shape = framewise_output.shape
        clip_shape = clipwise_output.shape
        output_dict = {
            'framewise_output':
            framewise_output.reshape(b, c, frame_shape[1], frame_shape[2]),
            'clipwise_output':
            clipwise_output.reshape(b, c, clip_shape[1]),
        }

        return output_dict
Example #25
0
    def forward(self, x):
        x = x.transpose(2, 3)

        frames_num = x.shape[2]

        x = x.transpose(1, 3)
        x = self.bn0(x)
        x = x.transpose(1, 3)

        x = self.conv_block1(x, pool_size=(2, 2), pool_type="avg")
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv_block2(x, pool_size=(2, 2), pool_type="avg")
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv_block3(x, pool_size=(2, 2), pool_type="avg")
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv_block4(x, pool_size=(2, 2), pool_type="avg")
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv_block5(x, pool_size=(2, 2), pool_type="avg")
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv_block6(x, pool_size=(1, 1), pool_type="avg")
        x = F.dropout(x, p=0.2, training=self.training)
        x = torch.mean(x, dim=3)

        x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1)
        x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1)
        x = x1 + x2
        x = F.dropout(x, p=0.5, training=self.training)
        x = x.transpose(1, 2)
        x = F.relu_(self.fc1(x))
        x = x.transpose(1, 2)
        x = F.dropout(x, p=0.5, training=self.training)
        (clipwise_output, _, segmentwise_output) = self.att_block(x)
        segmentwise_output = segmentwise_output.transpose(1, 2)

        # Get framewise output
        framewise_output = interpolate(segmentwise_output,
                                       self.interpolate_ratio)
        framewise_output = pad_framewise_output(framewise_output, frames_num)

        output_dict = {
            "framewise_output": framewise_output,
            "clipwise_output": clipwise_output,
        }

        # print(clipwise_output.min(), clipwise_output.max())
        return clipwise_output, framewise_output
Example #26
0
    def forward(self, x_conv_out: torch.Tensor) -> torch.Tensor:
        """
        Map from x_conv_out to x_highway
        :param x_conv_out: Tensor output from cnn layer. Input size (batch_size, embedding_size)
        :return: x_highway: Tensor output from Highway network. Output size (batch_size, embedding_size)
        """
        # In the comments we’ll describe the dimensions for a single example (not a batch).
        # Then, sent_len and batch_size should be taking into account.
        # Highway layer.
        # x_proj = ReLU(W_proj x_conv_out + b_proj); ∈ R e_{word}
        # x_gate = σ(W_gate x_conv_out + b_gate); ∈ R e_{word}
        # x_highway = x_gate ⊙ x_proj + (1 − x_gate) ⊙ x_conv_out; ∈ R e_{word}
        x_projection = F.relu_(self.projection(x_conv_out))
        x_gate = torch.sigmoid(self.gate(x_conv_out))

        x_highway = x_gate * x_projection + (1 - x_gate) * x_conv_out
        return x_highway
Example #27
0
    def forward(self, input):
        '''
        :param input: (batch_size,time_steps, mel_bins)
        :return: ()
        '''
        x = self.feature(input)  #(batch_size, 512, T/16, mel_bins/16)
        x = torch.mean(x, dim=3)  #(batch_size, 512, T/16)
        (x1, _) = torch.max(x, dim=2)
        x2 = torch.mean(x, dim=2)
        x = x1 + x2
        x = F.dropout(x, p=0.2, training=self.training)
        x = F.relu_(self.fc1(x))
        #(batch_size,class_num)
        output = torch.sigmoid(self.fc(x))
        # output = self.fc(x)

        return output
Example #28
0
    def forward(self, x_reshaped):
        """
        Compute word embedding
        @param input (Tensor): shape (batch_size, char_embed_size, max_word_length)
        @return (Tensor): shape (batch_size, embed_size), word embedding of each word in batch
        """
        # In the comments we’ll describe the dimensions for a single example (not a batch).
        # Then, sent_len and batch_size should be taking into account to reshape the tensor before the
        # convolutional stage and after the dropout layer.

        # Convolutional network.
        # x_conv = Conv1D(x_reshaped); ∈ R e_{word}x(m_{word}−k+1)
        # x_conv_out = MaxPool(ReLU(xconv)); ∈ R e_{word}
        # in our implementation e_{word} is equal to the number of filters f.
        x_conv = self.conv1d(x_reshaped)
        x_conv_out = self.max_pool_1d(F.relu_(x_conv)).squeeze()
        return x_conv_out
Example #29
0
 def forward(self, x):
     outputs = []
     # stem
     x = self.conv1(x)
     x = self.bn1(x)
     x = F.relu_(x)
     x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
     # blocks
     x = self.layer1(x)
     outputs.append(x)
     x = self.layer2(x)
     outputs.append(x)
     x = self.layer3(x)
     outputs.append(x)
     x = self.layer4(x)
     outputs.append(x)
     return outputs
Example #30
0
    def forward(self, input_data):
        x = input_data  # (batch_size, 1, time_steps, mel_bins)
        frames_num = x.shape[2]

        x = x.transpose(1, 3)
        x = self.bn0(x)
        x = x.transpose(1, 3)

        if self.training:
            if random.random() < 0.25:
                x = self.spec_augmenter(x)

        x = x.transpose(2, 3)

        x = self.encoder(x)

        # Aggregate in frequency axis
        x = torch.mean(x, dim=3)

        x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1)
        x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1)
        x = x1 + x2

        x = F.dropout(x, p=0.5, training=self.training)
        x = x.transpose(1, 2)
        x = F.relu_(self.fc1(x))
        x = x.transpose(1, 2)
        x = F.dropout(x, p=0.5, training=self.training)

        (clipwise_output, norm_att, segmentwise_output) = self.att_block(x)
        segmentwise_output = segmentwise_output.transpose(1, 2)

        interpolate_ratio = frames_num // segmentwise_output.size(1)

        # Get framewise output
        framewise_output = interpolate(segmentwise_output, interpolate_ratio)
        framewise_output = pad_framewise_output(framewise_output, frames_num)
        frame_shape = framewise_output.shape
        clip_shape = clipwise_output.shape
        output_dict = {
            'framewise_output': framewise_output,
            'clipwise_output': clipwise_output,
        }

        return output_dict
Example #31
0
    def forward(self, X: torch.FloatTensor) -> torch.FloatTensor:
        """
        Making a forward pass of the 2D-convolution block.

        Arg types:
            * **X** (PyTorch Float Tensor) - Input tensor, with shape (batch_size, num_his, num_nodes, input_dims).

        Return types:
            * **X** (PyTorch Float Tensor) - Output tensor, with shape (batch_size, num_his, num_nodes, output_dims).
        """
        X = X.permute(0, 3, 2, 1)
        X = F.pad(X, ([self._padding_size[1], self._padding_size[1],
                       self._padding_size[0], self._padding_size[0]]))
        X = self._conv2d(X)
        X = self._batch_norm(X)
        if self._activation is not None:
            X = F.relu_(X)
        return X.permute(0, 3, 2, 1)
Example #32
0
 def forward(self, x):
     x = self.conv1(x)
     x = self.bn1(x)
     x = F.relu_(x)
     x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
     return x