Esempio n. 1
0
    def __init__(self,
                 bottleneck2d,
                 time_dim=1,
                 time_padding=0,
                 frame=0,
                 num_segments=4):
        super(BasicBlock3d, self).__init__()

        spatial_stride = bottleneck2d.conv2.stride[0]
        self.frame = frame
        self.num_segments = num_segments

        self.conv1 = inflate.inflate_conv(bottleneck2d.conv1,
                                          time_dim=time_dim,
                                          time_padding=time_padding,
                                          center=False)
        self.bn1 = inflate.inflate_batch_norm(bottleneck2d.bn1)

        self.conv2 = inflate.inflate_conv(bottleneck2d.conv2,
                                          time_dim=time_dim,
                                          time_padding=time_padding,
                                          time_stride=1,
                                          center=False)
        self.bn2 = inflate.inflate_batch_norm(bottleneck2d.bn2)

        self.relu = torch.nn.ReLU(inplace=True)

        if bottleneck2d.downsample is not None:
            self.downsample = inflate_downsample(bottleneck2d.downsample,
                                                 time_stride=spatial_stride)
        else:
            self.downsample = None

        self.stride = bottleneck2d.stride
Esempio n. 2
0
def inflate_downsample(downsample2d, time_stride=1):
    downsample3d = torch.nn.Sequential(
        inflate.inflate_conv(downsample2d[0],
                             time_dim=1,
                             time_stride=1,
                             center=False),
        inflate.inflate_batch_norm(downsample2d[1]))
    return downsample3d
Esempio n. 3
0
    def __init__(self,
                 resnet2d,
                 frame_nb=16,
                 class_nb=1000,
                 conv_class=False,
                 num_segments=4,
                 test_mode=False,
                 fast_implementation=0):
        """
        Args:
            conv_class: Whether to use convolutional layer as classifier to
                adapt to various number of frames
        """
        super(I3ResNet, self).__init__()
        self.conv_class = conv_class
        self.num_segments = num_segments
        self.frame = frame_nb

        self.conv1 = inflate.inflate_conv(resnet2d.conv1,
                                          time_dim=1,
                                          time_padding=0,
                                          center=False)
        self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1)
        self.relu = torch.nn.ReLU(inplace=True)
        self.maxpool = inflate.inflate_pool(resnet2d.maxpool,
                                            time_dim=1,
                                            time_padding=0,
                                            time_stride=1)

        self.layer1 = inflate_reslayer(resnet2d.layer1)
        self.layer2 = inflate_reslayer(resnet2d.layer2,
                                       num_R4D=2,
                                       in_channels=512,
                                       fast_implementation=fast_implementation,
                                       num_segments=num_segments)
        self.layer3 = inflate_reslayer(resnet2d.layer3,
                                       time_dim=3,
                                       time_padding=1)
        self.layer4 = inflate_reslayer(resnet2d.layer4,
                                       time_dim=3,
                                       time_padding=1)

        if conv_class:
            self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=1)
            self.classifier = torch.nn.Conv3d(in_channels=2048,
                                              out_channels=class_nb,
                                              kernel_size=(1, 1, 1),
                                              bias=True)
        else:
            final_time_dim = int(math.ceil(frame_nb))
            if test_mode:
                self.avgpool = nn.AvgPool3d((frame_nb, 8, 8))
            else:
                self.avgpool = nn.AvgPool3d((frame_nb, 7, 7))
Esempio n. 4
0
    def __init__(self,
                 resnet2d,
                 frame_nb=16,
                 class_nb=1000,
                 conv_class=False,
                 num_segments=1,
                 gtsn=False):
        """
        Args:
            conv_class: Whether to use convolutional layer as classifier to
                adapt to various number of frames
        """
        super(I3ResNet_18_34, self).__init__()
        self.num_segments = num_segments
        self.conv_class = conv_class
        self.gtsn = gtsn

        self.conv1 = inflate.inflate_conv(resnet2d.conv1,
                                          time_dim=1,
                                          time_padding=0,
                                          center=False)
        self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1)
        self.relu = torch.nn.ReLU(inplace=True)
        self.maxpool = inflate.inflate_pool(resnet2d.maxpool,
                                            time_dim=1,
                                            time_padding=0,
                                            time_stride=1)

        self.layer1 = inflate_reslayer_18_34(resnet2d.layer1)
        self.layer2 = inflate_reslayer_18_34(resnet2d.layer2,
                                             num_R4D=3,
                                             in_channels=128)

        self.layer3 = inflate_reslayer_18_34(resnet2d.layer3,
                                             time_dim=3,
                                             time_padding=1,
                                             num_R4D=3,
                                             in_channels=256)
        self.layer4 = inflate_reslayer_18_34(resnet2d.layer4,
                                             time_dim=3,
                                             time_padding=1)

        if conv_class:
            self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=1)
            self.classifier = torch.nn.Conv3d(in_channels=2048,
                                              out_channels=class_nb,
                                              kernel_size=(1, 1, 1),
                                              bias=True)
        else:
            final_time_dim = int(math.ceil(frame_nb))

            self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=4)