Esempio n. 1
0
    def __init__(self, bottleneck2d):
        super(Bottleneck3d, self).__init__()

        spatial_stride = bottleneck2d.conv2.stride[0]

        self.conv1 = inflate.inflate_conv(bottleneck2d.conv1,
                                          time_dim=1,
                                          center=True)
        self.bn1 = inflate.inflate_batch_norm(bottleneck2d.bn1)

        self.conv2 = inflate.inflate_conv(bottleneck2d.conv2,
                                          time_dim=3,
                                          time_padding=1,
                                          time_stride=spatial_stride,
                                          center=True)
        self.bn2 = inflate.inflate_batch_norm(bottleneck2d.bn2)

        self.conv3 = inflate.inflate_conv(bottleneck2d.conv3,
                                          time_dim=1,
                                          center=True)
        self.bn3 = inflate.inflate_batch_norm(bottleneck2d.bn3)

        self.relu = torch.nn.ReLU(inplace=True)

        if bottleneck2d.downsample is not None:
            self.downsample = inflate_downsample(bottleneck2d.downsample,
                                                 time_stride=spatial_stride)
        else:
            self.downsample = None

        self.stride = bottleneck2d.stride
Esempio n. 2
0
    def __init__(self, bottleneck2d):
        super(Bottleneck3d, self).__init__()

        self.conv1 = inflate.inflate_conv(bottleneck2d.conv1, time_dim=1)
        self.bn1 = inflate.inflate_batch_norm(bottleneck2d.bn1)
        self.conv2 = inflate.inflate_conv(bottleneck2d.conv2, time_dim=1)
        self.bn2 = inflate.inflate_batch_norm(bottleneck2d.bn2)
        self.conv3 = inflate.inflate_conv(bottleneck2d.conv3, time_dim=1)
        self.bn3 = inflate.inflate_batch_norm(bottleneck2d.bn3)
        self.relu = nn.ReLU(inplace=True)

        if bottleneck2d.downsample is not None:
            self.downsample = self._inflate_downsample(bottleneck2d.downsample)
        else:
            self.downsample = None
Esempio n. 3
0
 def _inflate_downsample(self, downsample2d, time_stride=1):
     downsample3d = nn.Sequential(
         inflate.inflate_conv(downsample2d[0],
                              time_dim=1,
                              time_stride=time_stride),
         inflate.inflate_batch_norm(downsample2d[1]))
     return downsample3d
Esempio n. 4
0
    def __init__(self, num_classes, block, c3d_idx, nl_idx, temperature=4, contrastive_att=True, **kwargs):
        super(ResNet503D, self).__init__()

        self.block = block
        self.temperature = temperature
        self.contrastive_att = contrastive_att

        resnet2d = torchvision.models.resnet50(pretrained=True)
        resnet2d.layer4[0].conv2.stride=(1, 1)
        resnet2d.layer4[0].downsample[0].stride=(1, 1) 

        self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1)
        self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1)

        self.layer1 = self._inflate_reslayer(resnet2d.layer1, c3d_idx=c3d_idx[0], \
                                             nonlocal_idx=nl_idx[0], nonlocal_channels=256)
        self.layer2 = self._inflate_reslayer(resnet2d.layer2, c3d_idx=c3d_idx[1], \
                                             nonlocal_idx=nl_idx[1], nonlocal_channels=512)
        self.layer3 = self._inflate_reslayer(resnet2d.layer3, c3d_idx=c3d_idx[2], \
                                             nonlocal_idx=nl_idx[2], nonlocal_channels=1024)
        self.layer4 = self._inflate_reslayer(resnet2d.layer4, c3d_idx=c3d_idx[3], \
                                             nonlocal_idx=nl_idx[3], nonlocal_channels=2048)

        self.bn = nn.BatchNorm1d(2048)
        self.bn.apply(weights_init_kaiming)

        self.classifier = nn.Linear(2048, num_classes)
        self.classifier.apply(weights_init_classifier)
Esempio n. 5
0
    def __init__(self, num_classes, use_gpu, loss={'xent'}):
        super(TCLNet, self).__init__()
        self.loss = loss
        self.use_gpu = use_gpu
        resnet2d = resnet50_s1(pretrained=True)

        self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1)
        self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1)
        
        self.layer1 = self._inflate_reslayer(resnet2d.layer1)
        self.layer2 = self._inflate_reslayer(resnet2d.layer2, enhance_idx=[3], channels=512)
        self.layer3 = self._inflate_reslayer(resnet2d.layer3)
        layer4 = nn.Sequential(resnet2d.layer4[0], resnet2d.layer4[1])

        branch = nn.ModuleList([CloneBottleneck(resnet2d.layer4[-1]) for _ in range(2)])

        self.TSE_Module = TSE(layer4=layer4, branch=branch, use_gpu=use_gpu)

        bn = []
        for _ in range(2):
            add_block = nn.BatchNorm1d(2048)
            add_block.apply(weights_init_kaiming)
            bn.append(add_block)
        self.bn = nn.ModuleList(bn)

        classifier = []
        for _ in range(2):
            add_block = nn.Linear(2048, num_classes)
            add_block.apply(weights_init_classifier)
            classifier.append(add_block)
        self.classifier = nn.ModuleList(classifier)
Esempio n. 6
0
def inflate_downsample(downsample2d, time_stride=1):
    downsample3d = torch.nn.Sequential(
        inflate.inflate_conv(downsample2d[0],
                             time_dim=1,
                             time_stride=time_stride,
                             center=True),
        inflate.inflate_batch_norm(downsample2d[1]))
    return downsample3d
Esempio n. 7
0
    def __init__(self, num_classes, use_gpu, loss={'xent'}, vis=False, transformer_num_heads=6, transformer_num_layers=1):
        super(ResNetSingleCMHAL3MFMCSPAP, self).__init__()
        self.loss = loss
        self.use_gpu = use_gpu
        self.vis = vis
        resnet2d = resnet50_s1(pretrained=True)

        self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1)
        self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1)
        
        self.layer1 = self._inflate_reslayer(resnet2d.layer1)
        
        #self.layer2 = self._inflate_reslayer(resnet2d.layer2, enhance_idx=[3], channels=512)
        self.layer2 = self._inflate_reslayer(resnet2d.layer2)
        
        #layer3 = nn.Sequential(resnet2d.layer3[0], resnet2d.layer3[1], resnet2d.layer3[2], resnet2d.layer3[3], resnet2d.layer3[4])
        #branch3 = nn.ModuleList([CloneBottleneck(resnet2d.layer3[-1]) for _ in range(2)])
        #self.TSE_Module_3 = TSEBranching(layer=layer3, branch=branch3, use_gpu=use_gpu, in_channels=512, out_channels=1024)
        self.layer3 = self._inflate_reslayer(resnet2d.layer3)
        
        #layer4 = nn.Sequential(resnet2d.layer4[0], resnet2d.layer4[1])
        #branch4 = nn.ModuleList([CloneBottleneck(resnet2d.layer4[-1]) for _ in range(2)])
        #self.TSE_Module_4 = TSEBranching(layer=layer4, branch=branch4, use_gpu=use_gpu, in_channels=1024, out_channels=2048)
        self.layer4 = self._inflate_reslayer(resnet2d.layer4)
        
        stt_config = get_single_transformer_config()
        stt_config.transformer.num_heads = transformer_num_heads
        stt_config.transformer.num_layers = transformer_num_layers
        w = 8
        h = 16
        
        self.cmha = CMHALayer(config=stt_config, 
                            fmap_size=(h, w),
                            in_channels=1024,
                            vis=self.vis,
                            num_frames=4)

        stream_feature_size = 2048
        self.feature_size = []
        bn = []
        for _ in range(1):
            add_block = nn.BatchNorm1d(stream_feature_size)
            add_block.apply(weights_init_kaiming)
            bn.append(add_block)
            self.feature_size.append(stream_feature_size)
        self.bn = nn.ModuleList(bn)

        if self.loss == {'htri'}:
            return
            
        classifier = []
        for _ in range(1):
            add_block = nn.Linear(2048, num_classes)
            add_block.apply(weights_init_classifier)
            classifier.append(add_block)
        self.classifier = nn.ModuleList(classifier)
Esempio n. 8
0
    def __init__(self, bottleneck2d, block, inflate_time=False, temperature=4, contrastive_att=True):
        super(Bottleneck3D, self).__init__()

        self.conv1 = inflate.inflate_conv(bottleneck2d.conv1, time_dim=1)
        self.bn1 = inflate.inflate_batch_norm(bottleneck2d.bn1)
        if inflate_time == True:
            self.conv2 = block(bottleneck2d.conv2, temperature=temperature, contrastive_att=contrastive_att)
        else:
            self.conv2 = inflate.inflate_conv(bottleneck2d.conv2, time_dim=1)
        self.bn2 = inflate.inflate_batch_norm(bottleneck2d.bn2)
        self.conv3 = inflate.inflate_conv(bottleneck2d.conv3, time_dim=1)
        self.bn3 = inflate.inflate_batch_norm(bottleneck2d.bn3)
        self.relu = nn.ReLU(inplace=True)

        if bottleneck2d.downsample is not None:
            self.downsample = self._inflate_downsample(bottleneck2d.downsample)
        else:
            self.downsample = None
Esempio n. 9
0
    def __init__(self, num_classes, use_gpu, loss={'xent'}):
        super(ResNet2AP, self).__init__()
        self.loss = loss
        self.use_gpu = use_gpu
        resnet2d = resnet50_s1(pretrained=True)

        self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1)
        self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1)

        self.layer1 = self._inflate_reslayer(resnet2d.layer1)

        #self.layer2 = self._inflate_reslayer(resnet2d.layer2, enhance_idx=[3], channels=512)
        self.layer2 = self._inflate_reslayer(resnet2d.layer2)

        #layer3 = nn.Sequential(resnet2d.layer3[0], resnet2d.layer3[1], resnet2d.layer3[2], resnet2d.layer3[3], resnet2d.layer3[4])
        #branch3 = nn.ModuleList([CloneBottleneck(resnet2d.layer3[-1]) for _ in range(2)])
        #self.TSE_Module_3 = TSEBranching(layer=layer3, branch=branch3, use_gpu=use_gpu, in_channels=512, out_channels=1024)
        self.layer3 = self._inflate_reslayer(resnet2d.layer3)

        #layer4 = nn.Sequential(resnet2d.layer4[0], resnet2d.layer4[1])
        #branch4 = nn.ModuleList([CloneBottleneck(resnet2d.layer4[-1]) for _ in range(2)])
        #self.TSE_Module_4 = TSEBranching(layer=layer4, branch=branch4, use_gpu=use_gpu, in_channels=1024, out_channels=2048)
        self.layer4 = self._inflate_reslayer(resnet2d.layer4)

        stream_feature_size = 1024
        self.feature_size = []
        bn = []
        for _ in range(2):
            add_block = nn.BatchNorm1d(stream_feature_size)
            add_block.apply(weights_init_kaiming)
            bn.append(add_block)
            self.feature_size.append(stream_feature_size)
        stream_feature_size = 2048
        for _ in range(2):
            add_block = nn.BatchNorm1d(stream_feature_size)
            add_block.apply(weights_init_kaiming)
            bn.append(add_block)
            self.feature_size.append(stream_feature_size)
        self.bn = nn.ModuleList(bn)

        if self.loss == {'htri'}:
            return

        classifier = []
        for _ in range(2):
            add_block = nn.Linear(1024, num_classes)
            add_block.apply(weights_init_classifier)
            classifier.append(add_block)
        for _ in range(2):
            add_block = nn.Linear(2048, num_classes)
            add_block.apply(weights_init_classifier)
            classifier.append(add_block)
        self.classifier = nn.ModuleList(classifier)
Esempio n. 10
0
    def __init__(self,
                 num_classes,
                 use_gpu,
                 loss={'xent'},
                 attention_flatness=False):
        super(ResNetSingleSTTwoBN, self).__init__()
        self.loss = loss
        self.use_gpu = use_gpu
        resnet2d = resnet50_s1(pretrained=True)

        self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1)
        self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1)

        self.layer1 = self._inflate_reslayer(resnet2d.layer1)

        #self.layer2 = self._inflate_reslayer(resnet2d.layer2, enhance_idx=[3], channels=512)
        self.layer2 = self._inflate_reslayer(resnet2d.layer2)

        #layer3 = nn.Sequential(resnet2d.layer3[0], resnet2d.layer3[1], resnet2d.layer3[2], resnet2d.layer3[3], resnet2d.layer3[4])
        #branch3 = nn.ModuleList([CloneBottleneck(resnet2d.layer3[-1]) for _ in range(2)])
        #self.TSE_Module_3 = TSEBranching(layer=layer3, branch=branch3, use_gpu=use_gpu, in_channels=512, out_channels=1024)
        self.layer3 = self._inflate_reslayer(resnet2d.layer3)

        #layer4 = nn.Sequential(resnet2d.layer4[0], resnet2d.layer4[1])
        #branch4 = nn.ModuleList([CloneBottleneck(resnet2d.layer4[-1]) for _ in range(2)])
        #self.TSE_Module_4 = TSEBranching(layer=layer4, branch=branch4, use_gpu=use_gpu, in_channels=1024, out_channels=2048)
        self.layer4 = self._inflate_reslayer(resnet2d.layer4)

        stt_config = get_single_transformer_config()
        self.single_stt = ResTransformer(config=stt_config,
                                         fmap_size=(16, 8),
                                         in_channels=2048,
                                         vis=True)

        if self.loss == {'htri'}:
            return

        stream_feature_size = stt_config.hidden_size
        classifier = []
        for _ in range(2):
            add_block = nn.Linear(stream_feature_size, num_classes)
            add_block.apply(weights_init_classifier)
            classifier.append(add_block)
        self.classifier = nn.ModuleList(classifier)

        self.attention_flatness = attention_flatness
        if self.attention_flatness:
            #self.position_embeddings = nn.Parameter(torch.zeros(1, n_patches+1, config.hidden_size))
            self.target_distribution = nn.Parameter(torch.ones(16 * 8 + 1))
            self.kl_loss = nn.KLDivLoss(reduction='batchmean', log_target=True)
Esempio n. 11
0
    def __init__(self,
                 resnet2d,
                 block_func,
                 expansion=4,
                 sample_size=112,
                 frame_nb=16,
                 class_nb=1000,
                 conv_class=False):
        """
        Args:
            conv_class: Whether to use convolutional layer as classifier to
                adapt to various number of frames
        """
        super(I3ResNet, self).__init__()
        self.conv_class = conv_class

        self.conv1 = inflate.inflate_conv(resnet2d.conv1,
                                          time_dim=3,
                                          time_padding=1,
                                          center=True)
        self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1)
        self.relu = torch.nn.ReLU(inplace=True)
        self.maxpool = inflate.inflate_pool(resnet2d.maxpool,
                                            time_dim=3,
                                            time_padding=1,
                                            time_stride=2)

        self.layer1 = inflate_reslayer(resnet2d.layer1, block_func)
        self.layer2 = inflate_reslayer(resnet2d.layer2, block_func)
        self.layer3 = inflate_reslayer(resnet2d.layer3, block_func)
        self.layer4 = inflate_reslayer(resnet2d.layer4, block_func)

        if conv_class:
            self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=1)
            self.classifier = torch.nn.Conv3d(in_channels=2048,
                                              out_channels=class_nb,
                                              kernel_size=(1, 1, 1),
                                              bias=True)
        else:
            final_time_dim = int(math.ceil(frame_nb / 16))
            last_duration = int(math.ceil(frame_nb / 16))
            last_size = int(math.ceil(sample_size / 32))

            self.avgpool = torch.nn.AdaptiveAvgPool3d((1, 1, 1))
            # self.avgpool = torch.nn.AvgPool3d(
            #     (last_duration, last_size, last_size), stride=1)
            self.fc = torch.nn.Linear(512 * expansion, class_nb)
Esempio n. 12
0
    def __init__(self, **kwargs):
        super(VidNonLocalResNet50, self).__init__()

        resnet2d = torchvision.models.resnet50(pretrained=True)
        resnet2d.layer4[0].conv2.stride = (1, 1)
        resnet2d.layer4[0].downsample[0].stride = (1, 1)

        self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1)
        self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1)

        self.layer1 = self._inflate_reslayer(resnet2d.layer1)
        self.layer2 = self._inflate_reslayer(resnet2d.layer2,
                                             nonlocal_idx=[1, 3],
                                             nonlocal_channels=512)
        self.layer3 = self._inflate_reslayer(resnet2d.layer3,
                                             nonlocal_idx=[1, 3, 5],
                                             nonlocal_channels=1024)
        self.layer4 = self._inflate_reslayer(resnet2d.layer4)
    def __init__(self, resnet2d, frame_nb=16, class_nb=1000, conv_class=False):
        """
        Args:
            conv_class: Whether to use convolutional layer as classifier to
                adapt to various number of frames
        """
        super(I3ResNet, self).__init__()
        self.conv_class = conv_class

        self.conv1 = inflate.inflate_conv(resnet2d.conv1,
                                          time_dim=3,
                                          time_padding=1,
                                          center=True)
        self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1)
        self.relu = torch.nn.ReLU(inplace=True)
        self.maxpool = inflate.inflate_pool(resnet2d.maxpool,
                                            time_dim=3,
                                            time_padding=1,
                                            time_stride=2)

        self.layer1 = inflate_reslayer(resnet2d.layer1)
        self.layer2 = inflate_reslayer(resnet2d.layer2)
        self.layer3 = inflate_reslayer(resnet2d.layer3)
        self.layer4 = inflate_reslayer(resnet2d.layer4)

        if conv_class:
            self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=1)
            self.classifier = torch.nn.Conv3d(in_channels=2048,
                                              out_channels=class_nb,
                                              kernel_size=(1, 1, 1),
                                              bias=True)
        else:
            final_time_dim = int(math.ceil(frame_nb / 16))
            self.avgpool = inflate.inflate_pool(resnet2d.avgpool,
                                                time_dim=final_time_dim)
            self.fc = inflate.inflate_linear(resnet2d.fc, 1)