def __init__(self, num_classes, use_gpu, loss={'xent'}): super(TCLNet, self).__init__() self.loss = loss self.use_gpu = use_gpu resnet2d = resnet50_s1(pretrained=True) self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1) self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1) self.relu = nn.ReLU(inplace=True) self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1) self.layer1 = self._inflate_reslayer(resnet2d.layer1) self.layer2 = self._inflate_reslayer(resnet2d.layer2, enhance_idx=[3], channels=512) self.layer3 = self._inflate_reslayer(resnet2d.layer3) layer4 = nn.Sequential(resnet2d.layer4[0], resnet2d.layer4[1]) branch = nn.ModuleList([CloneBottleneck(resnet2d.layer4[-1]) for _ in range(2)]) self.TSE_Module = TSE(layer4=layer4, branch=branch, use_gpu=use_gpu) bn = [] for _ in range(2): add_block = nn.BatchNorm1d(2048) add_block.apply(weights_init_kaiming) bn.append(add_block) self.bn = nn.ModuleList(bn) classifier = [] for _ in range(2): add_block = nn.Linear(2048, num_classes) add_block.apply(weights_init_classifier) classifier.append(add_block) self.classifier = nn.ModuleList(classifier)
def __init__(self, num_classes, block, c3d_idx, nl_idx, temperature=4, contrastive_att=True, **kwargs): super(ResNet503D, self).__init__() self.block = block self.temperature = temperature self.contrastive_att = contrastive_att resnet2d = torchvision.models.resnet50(pretrained=True) resnet2d.layer4[0].conv2.stride=(1, 1) resnet2d.layer4[0].downsample[0].stride=(1, 1) self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1) self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1) self.relu = nn.ReLU(inplace=True) self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1) self.layer1 = self._inflate_reslayer(resnet2d.layer1, c3d_idx=c3d_idx[0], \ nonlocal_idx=nl_idx[0], nonlocal_channels=256) self.layer2 = self._inflate_reslayer(resnet2d.layer2, c3d_idx=c3d_idx[1], \ nonlocal_idx=nl_idx[1], nonlocal_channels=512) self.layer3 = self._inflate_reslayer(resnet2d.layer3, c3d_idx=c3d_idx[2], \ nonlocal_idx=nl_idx[2], nonlocal_channels=1024) self.layer4 = self._inflate_reslayer(resnet2d.layer4, c3d_idx=c3d_idx[3], \ nonlocal_idx=nl_idx[3], nonlocal_channels=2048) self.bn = nn.BatchNorm1d(2048) self.bn.apply(weights_init_kaiming) self.classifier = nn.Linear(2048, num_classes) self.classifier.apply(weights_init_classifier)
def __init__(self, resnet2d, block_func, expansion=4, sample_size=112, frame_nb=16, class_nb=1000, conv_class=False): """ Args: conv_class: Whether to use convolutional layer as classifier to adapt to various number of frames """ super(I3ResNet, self).__init__() self.conv_class = conv_class self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=3, time_padding=1, center=True) self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1) self.relu = torch.nn.ReLU(inplace=True) self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=3, time_padding=1, time_stride=2) self.layer1 = inflate_reslayer(resnet2d.layer1, block_func) self.layer2 = inflate_reslayer(resnet2d.layer2, block_func) self.layer3 = inflate_reslayer(resnet2d.layer3, block_func) self.layer4 = inflate_reslayer(resnet2d.layer4, block_func) if conv_class: self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=1) self.classifier = torch.nn.Conv3d(in_channels=2048, out_channels=class_nb, kernel_size=(1, 1, 1), bias=True) else: final_time_dim = int(math.ceil(frame_nb / 16)) last_duration = int(math.ceil(frame_nb / 16)) last_size = int(math.ceil(sample_size / 32)) self.avgpool = torch.nn.AdaptiveAvgPool3d((1, 1, 1)) # self.avgpool = torch.nn.AvgPool3d( # (last_duration, last_size, last_size), stride=1) self.fc = torch.nn.Linear(512 * expansion, class_nb)
def __init__(self, num_classes, use_gpu, loss={'xent'}, vis=False, transformer_num_heads=6, transformer_num_layers=1): super(ResNetSingleCMHAL3MFMCSPAP, self).__init__() self.loss = loss self.use_gpu = use_gpu self.vis = vis resnet2d = resnet50_s1(pretrained=True) self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1) self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1) self.relu = nn.ReLU(inplace=True) self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1) self.layer1 = self._inflate_reslayer(resnet2d.layer1) #self.layer2 = self._inflate_reslayer(resnet2d.layer2, enhance_idx=[3], channels=512) self.layer2 = self._inflate_reslayer(resnet2d.layer2) #layer3 = nn.Sequential(resnet2d.layer3[0], resnet2d.layer3[1], resnet2d.layer3[2], resnet2d.layer3[3], resnet2d.layer3[4]) #branch3 = nn.ModuleList([CloneBottleneck(resnet2d.layer3[-1]) for _ in range(2)]) #self.TSE_Module_3 = TSEBranching(layer=layer3, branch=branch3, use_gpu=use_gpu, in_channels=512, out_channels=1024) self.layer3 = self._inflate_reslayer(resnet2d.layer3) #layer4 = nn.Sequential(resnet2d.layer4[0], resnet2d.layer4[1]) #branch4 = nn.ModuleList([CloneBottleneck(resnet2d.layer4[-1]) for _ in range(2)]) #self.TSE_Module_4 = TSEBranching(layer=layer4, branch=branch4, use_gpu=use_gpu, in_channels=1024, out_channels=2048) self.layer4 = self._inflate_reslayer(resnet2d.layer4) stt_config = get_single_transformer_config() stt_config.transformer.num_heads = transformer_num_heads stt_config.transformer.num_layers = transformer_num_layers w = 8 h = 16 self.cmha = CMHALayer(config=stt_config, fmap_size=(h, w), in_channels=1024, vis=self.vis, num_frames=4) stream_feature_size = 2048 self.feature_size = [] bn = [] for _ in range(1): add_block = nn.BatchNorm1d(stream_feature_size) add_block.apply(weights_init_kaiming) bn.append(add_block) self.feature_size.append(stream_feature_size) self.bn = nn.ModuleList(bn) if self.loss == {'htri'}: return classifier = [] for _ in range(1): add_block = nn.Linear(2048, num_classes) add_block.apply(weights_init_classifier) classifier.append(add_block) self.classifier = nn.ModuleList(classifier)
def __init__(self, num_classes, use_gpu, loss={'xent'}): super(ResNet2AP, self).__init__() self.loss = loss self.use_gpu = use_gpu resnet2d = resnet50_s1(pretrained=True) self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1) self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1) self.relu = nn.ReLU(inplace=True) self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1) self.layer1 = self._inflate_reslayer(resnet2d.layer1) #self.layer2 = self._inflate_reslayer(resnet2d.layer2, enhance_idx=[3], channels=512) self.layer2 = self._inflate_reslayer(resnet2d.layer2) #layer3 = nn.Sequential(resnet2d.layer3[0], resnet2d.layer3[1], resnet2d.layer3[2], resnet2d.layer3[3], resnet2d.layer3[4]) #branch3 = nn.ModuleList([CloneBottleneck(resnet2d.layer3[-1]) for _ in range(2)]) #self.TSE_Module_3 = TSEBranching(layer=layer3, branch=branch3, use_gpu=use_gpu, in_channels=512, out_channels=1024) self.layer3 = self._inflate_reslayer(resnet2d.layer3) #layer4 = nn.Sequential(resnet2d.layer4[0], resnet2d.layer4[1]) #branch4 = nn.ModuleList([CloneBottleneck(resnet2d.layer4[-1]) for _ in range(2)]) #self.TSE_Module_4 = TSEBranching(layer=layer4, branch=branch4, use_gpu=use_gpu, in_channels=1024, out_channels=2048) self.layer4 = self._inflate_reslayer(resnet2d.layer4) stream_feature_size = 1024 self.feature_size = [] bn = [] for _ in range(2): add_block = nn.BatchNorm1d(stream_feature_size) add_block.apply(weights_init_kaiming) bn.append(add_block) self.feature_size.append(stream_feature_size) stream_feature_size = 2048 for _ in range(2): add_block = nn.BatchNorm1d(stream_feature_size) add_block.apply(weights_init_kaiming) bn.append(add_block) self.feature_size.append(stream_feature_size) self.bn = nn.ModuleList(bn) if self.loss == {'htri'}: return classifier = [] for _ in range(2): add_block = nn.Linear(1024, num_classes) add_block.apply(weights_init_classifier) classifier.append(add_block) for _ in range(2): add_block = nn.Linear(2048, num_classes) add_block.apply(weights_init_classifier) classifier.append(add_block) self.classifier = nn.ModuleList(classifier)
def __init__(self, num_classes, use_gpu, loss={'xent'}, attention_flatness=False): super(ResNetSingleSTTwoBN, self).__init__() self.loss = loss self.use_gpu = use_gpu resnet2d = resnet50_s1(pretrained=True) self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1) self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1) self.relu = nn.ReLU(inplace=True) self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1) self.layer1 = self._inflate_reslayer(resnet2d.layer1) #self.layer2 = self._inflate_reslayer(resnet2d.layer2, enhance_idx=[3], channels=512) self.layer2 = self._inflate_reslayer(resnet2d.layer2) #layer3 = nn.Sequential(resnet2d.layer3[0], resnet2d.layer3[1], resnet2d.layer3[2], resnet2d.layer3[3], resnet2d.layer3[4]) #branch3 = nn.ModuleList([CloneBottleneck(resnet2d.layer3[-1]) for _ in range(2)]) #self.TSE_Module_3 = TSEBranching(layer=layer3, branch=branch3, use_gpu=use_gpu, in_channels=512, out_channels=1024) self.layer3 = self._inflate_reslayer(resnet2d.layer3) #layer4 = nn.Sequential(resnet2d.layer4[0], resnet2d.layer4[1]) #branch4 = nn.ModuleList([CloneBottleneck(resnet2d.layer4[-1]) for _ in range(2)]) #self.TSE_Module_4 = TSEBranching(layer=layer4, branch=branch4, use_gpu=use_gpu, in_channels=1024, out_channels=2048) self.layer4 = self._inflate_reslayer(resnet2d.layer4) stt_config = get_single_transformer_config() self.single_stt = ResTransformer(config=stt_config, fmap_size=(16, 8), in_channels=2048, vis=True) if self.loss == {'htri'}: return stream_feature_size = stt_config.hidden_size classifier = [] for _ in range(2): add_block = nn.Linear(stream_feature_size, num_classes) add_block.apply(weights_init_classifier) classifier.append(add_block) self.classifier = nn.ModuleList(classifier) self.attention_flatness = attention_flatness if self.attention_flatness: #self.position_embeddings = nn.Parameter(torch.zeros(1, n_patches+1, config.hidden_size)) self.target_distribution = nn.Parameter(torch.ones(16 * 8 + 1)) self.kl_loss = nn.KLDivLoss(reduction='batchmean', log_target=True)
def __init__(self, resnet2d, frame_nb=16, class_nb=1000, conv_class=False): """ Args: conv_class: Whether to use convolutional layer as classifier to adapt to various number of frames """ super(I3ResNet, self).__init__() self.conv_class = conv_class self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=3, time_padding=1, center=True) self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1) self.relu = torch.nn.ReLU(inplace=True) self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=3, time_padding=1, time_stride=2) self.layer1 = inflate_reslayer(resnet2d.layer1) self.layer2 = inflate_reslayer(resnet2d.layer2) self.layer3 = inflate_reslayer(resnet2d.layer3) self.layer4 = inflate_reslayer(resnet2d.layer4) if conv_class: self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=1) self.classifier = torch.nn.Conv3d(in_channels=2048, out_channels=class_nb, kernel_size=(1, 1, 1), bias=True) else: final_time_dim = int(math.ceil(frame_nb / 16)) self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=final_time_dim) self.fc = inflate.inflate_linear(resnet2d.fc, 1)
def __init__(self, **kwargs): super(VidNonLocalResNet50, self).__init__() resnet2d = torchvision.models.resnet50(pretrained=True) resnet2d.layer4[0].conv2.stride = (1, 1) resnet2d.layer4[0].downsample[0].stride = (1, 1) self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1) self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1) self.relu = nn.ReLU(inplace=True) self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1) self.layer1 = self._inflate_reslayer(resnet2d.layer1) self.layer2 = self._inflate_reslayer(resnet2d.layer2, nonlocal_idx=[1, 3], nonlocal_channels=512) self.layer3 = self._inflate_reslayer(resnet2d.layer3, nonlocal_idx=[1, 3, 5], nonlocal_channels=1024) self.layer4 = self._inflate_reslayer(resnet2d.layer4)