def c2d50(pretrained=False, num_classes=1000, **kwargs): """Constructs a C2D ResNet-50 model. """ model = C2D(BottleneckC2D, [3, 4, 6, 3], num_classes=num_classes, **kwargs) if pretrained: logging.info("Loading weight from imagenet and inflated") # print("Loading weight from imagenet and inflated") pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/resnet50-19c8e357.pth') _pretrained = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(model, _pretrained, method='inflation', contains_nl=False) return model
def __init__(self, num_classes, pretrained=False, **kwargs): super(MFNET_FIVEP_LINEAR15_FRAME, self).__init__() groups = 16 k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential( OrderedDict([('conv', nn.Conv3d(3, conv1_num_out, kernel_size=(3, 5, 5), padding=(1, 2, 2), stride=(1, 2, 2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True))])) self.maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)) # conv2 - x56 (x8) num_mid = 96 conv2_num_out = 96 self.conv2 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv1_num_out if i == 1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(2, 1, 1) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[2] + 1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv2_num_out if i == 1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[3] + 1) ])) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out self.conv4 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv3_num_out if i == 1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[4] + 1) ])) # conv5 - x7 (x8) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv4_num_out if i == 1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[5] + 1) ])) # Define motion extractor after conv4 self.motion_exctractor = Motion_Exctractor_MEAN( inplanes=conv4_num_out, iterplanes=conv3_num_out, outplanes=conv2_num_out, num_embedding=20) # final self.tail = nn.Sequential( OrderedDict([('bn', nn.BatchNorm3d(conv5_num_out)), ('relu', nn.ReLU(inplace=True))])) # self.globalpool = nn.Sequential(OrderedDict([ # ('avg', nn.AvgPool3d(kernel_size=(8,7,7), stride=(1,1,1))), # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning # ])) # self.classifier = nn.Linear(conv5_num_out, num_classes) # Position related Linear Layers (input one-hot Dec-5) self.emb_prepool = nn.Sequential( OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(1, 7, 7), stride=(1, 1, 1))), # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning ])) self.emb_postpool = nn.AvgPool1d(kernel_size=8) self.classifier = nn.Linear(conv5_num_out + conv2_num_out * 7, num_classes) ############# # Initialization initializer.xavier(net=self) if pretrained: pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/MFNet2D_ImageNet1k-0000.pth') logging.info( "Network:: graph initialized, loading pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method='inflation') else: logging.info( "Network:: graph initialized, use random inilization!")
def __init__(self, num_classes, pretrained=False, **kwargs): super(MFNET_BASE, self).__init__() groups = 16 k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential(OrderedDict([ ('conv', nn.Conv3d( 3, conv1_num_out, kernel_size=(3,5,5), padding=(1,2,2), stride=(1,2,2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True)) ])) self.maxpool = nn.MaxPool3d(kernel_size=(1,3,3), stride=(1,2,2), padding=(0,1,1)) # conv2 - x56 (x8) num_mid = 96 conv2_num_out = 96 self.conv2 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv1_num_out if i==1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(2,1,1) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[2]+1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv2_num_out if i==1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[3]+1) ])) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out self.conv4 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv3_num_out if i==1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[4]+1) ])) # conv5 - x7 (x8) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv4_num_out if i==1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[5]+1) ])) # final self.tail = nn.Sequential(OrderedDict([ ('bn', nn.BatchNorm3d(conv5_num_out)), ('relu', nn.ReLU(inplace=True)) ])) self.globalpool = nn.Sequential(OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(8,7,7), stride=(1,1,1))), # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning ])) self.classifier = nn.Linear(conv5_num_out, num_classes) ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method='inflation' # 'random', 'inflation' pretrained_model=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'pretrained\\MFNet2D_ImageNet1k-0000.pth') logging.info("Network:: graph initialized, loading pretrained model: `{}'".format(pretrained_model)) assert os.path.exists(pretrained_model), "cannot locate: `{}'".format(pretrained_model) state_dict_2d = torch.load(pretrained_model, map_location='cpu') initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info("Network:: graph initialized, use random inilization!")
def __init__(self, num_classes, pretrained=False, **kwargs): super(RESNET101_3D_GCN_X5, self).__init__() groups = 1 k_sec = { 2: 3, \ 3: 4, \ 4: 23, \ 5: 3 } # conv1 - x112 (x16) conv1_num_out = 32 self.conv1 = nn.Sequential( OrderedDict([ ('conv', nn.Conv3d(3, conv1_num_out, kernel_size=(3, 5, 5), padding=(1, 2, 2), stride=(1, 2, 2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out, eps=1e-04)), ('relu', nn.ReLU(inplace=True)), ('max_pool', nn.MaxPool3d(kernel_size=(1, 3, 3), padding=(0, 1, 1), stride=(1, 2, 2))), ])) # conv2 - x56 (x16) num_mid = 64 conv2_num_out = 256 self.conv2 = nn.Sequential( OrderedDict([ ("B%02d" % i, RESIDUAL_BLOCK( num_in=conv1_num_out if i == 1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(1, 1, 1) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[2] + 1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out blocks = [] for i in range(1, k_sec[3] + 1): use_3d = bool(i % 2) blocks.append( ("B%02d" % i, RESIDUAL_BLOCK( num_in=conv2_num_out if i == 1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(2, 2, 2) if i == 1 else (1, 1, 1), use_3d=use_3d, g=groups, first_block=(i == 1)))) if i in [1, 3]: blocks.append(("B%02d_extra" % i, GloRe_Unit(num_in=conv3_num_out, num_mid=num_mid))) self.conv3 = nn.Sequential(OrderedDict(blocks)) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out blocks = [] for i in range(1, k_sec[4] + 1): use_3d = bool(i % 2) blocks.append( ("B%02d" % i, RESIDUAL_BLOCK( num_in=conv3_num_out if i == 1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), use_3d=use_3d, g=groups, first_block=(i == 1)))) if i in [6, 12, 18]: blocks.append(("B%02d_extra" % i, GloRe_Unit(num_in=conv4_num_out, num_mid=num_mid))) self.conv4 = nn.Sequential(OrderedDict(blocks)) # conv5 - x7 (x4) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential( OrderedDict([ ("B%02d" % i, RESIDUAL_BLOCK( num_in=conv4_num_out if i == 1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, use_3d=(i == 2), first_block=(i == 1))) for i in range(1, k_sec[5] + 1) ])) # final self.tail = nn.Sequential( OrderedDict([('bn', nn.BatchNorm3d(conv5_num_out, eps=1e-04)), ('relu', nn.ReLU(inplace=True))])) self.globalpool = nn.Sequential( OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(4, 7, 7), stride=(1, 1, 1))), ('dropout', nn.Dropout(p=0.5)), ])) self.classifier = nn.Linear(conv5_num_out, num_classes) ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method = 'inflation' # 'random', 'inflation' pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/resnet101-lite.pth') logging.info( "Network:: symbol initialized, use pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info( "Network:: symbol initialized, use random inilization!") blocker_name_list = [] for name, param in self.state_dict().items(): if name.endswith('blocker.weight'): blocker_name_list.append(name) param[:] = 0. if len(blocker_name_list) > 0: logging.info( "Network:: change params of the following layer be zeros: {}". format(blocker_name_list))
def __init__(self, use_fau, num_classes, pretrained=False, **kwargs): super(MFNET_3D, self).__init__() groups = 16 k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential( OrderedDict([('conv', nn.Conv3d(3, conv1_num_out, kernel_size=(3, 5, 5), padding=(1, 2, 2), stride=(1, 2, 2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True))])) self.maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)) # conv2 - x56 (x8) num_mid = 96 conv2_num_out = 96 self.conv2 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv1_num_out if i == 1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(2, 1, 1) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[2] + 1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv2_num_out if i == 1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[3] + 1) ])) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out self.conv4 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv3_num_out if i == 1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[4] + 1) ])) # conv5 - x7 (x8) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv4_num_out if i == 1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[5] + 1) ])) # final self.tail = nn.Sequential( OrderedDict([('bn', nn.BatchNorm3d(conv5_num_out)), ('relu', nn.ReLU(inplace=True))])) self.globalpool = nn.Sequential( OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(8, 7, 7), stride=(1, 1, 1))), # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning ])) self.classifier = nn.Linear(conv5_num_out, num_classes) self.use_fau = use_fau if use_fau > -1: c1 = c2 = 32 d1 = 64 d2 = 32 d3 = 256 kernel2 = FAUKernel_3d(c1, c2, latent_dim1=d1, latent_dim2=d2) # 28x28 - 64, td1=8x64 - 32 kernel1 = FAUKernel_thw(c1, latent_dim=d3) # 8x28x28=6400 ks = [kernel1, kernel2] k = ks[use_fau] logging.debug(f'using kernel[{use_fau}] {k}') self.faul = FAULayer_3d(in_channels=192, kernel=k, kq_channels=c1) ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method = 'inflation' # 'random', 'inflation' pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.path.join('pretrained', 'MFNet2D_ImageNet1k-0000.pth')) logging.info( "Network:: graph initialized, loading pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info( "Network:: graph initialized, use random inilization!")
first_block=(i==1))) ])) ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method='inflation' # 'random', 'inflation' pretrained_model=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'pretrained\\MFNet2D_ImageNet1k-0000.pth') logging.info("Network:: graph initialized, loading pretrained model: `{}'".format(pretrained_model)) assert os.path.exists(pretrained_model), "cannot locate: `{}'".format(pretrained_model) state_dict_2d = torch.load(pretrained_model, map_location='cpu') initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info("Network:: graph initialized, use random inilization!") def forward(self, x): assert x.shape[2] == 16 h = self.conv1(x) # x224 -> x112 h = self.maxpool(h) # x112 -> x56 h = self.conv2(h) # x56 -> x56 h = self.conv3(h) # x56 -> x28 h = self.conv4(h) # x28 -> x14 h = self.conv5(h) # x14 -> x7
def __init__(self, num_classes, pretrained=False, **kwargs): super(MFBPNET_3D_LATERAL_STABLE, self).__init__() groups = 16 k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential(OrderedDict([ ('conv', nn.Conv3d( 3, conv1_num_out, kernel_size=(3,5,5), padding=(1,2,2), stride=(1,2,2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True)) ])) self.maxpool = nn.MaxPool3d(kernel_size=(1,3,3), stride=(1,2,2), padding=(0,1,1)) # conv2 - x56 (x8) num_mid = 96 conv2_num_out = 96 self.conv2 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv1_num_out if i==1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(2,1,1) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[2]+1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv2_num_out if i==1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[3]+1) ])) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out self.conv4 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv3_num_out if i==1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[4]+1) ])) # conv5 - x7 (x8) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv4_num_out if i==1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[5]+1) ])) # final self.tail = nn.Sequential(OrderedDict([ ('bn', nn.BatchNorm3d(conv5_num_out)), ('relu', nn.ReLU(inplace=True)) ])) self.globalpool = nn.Sequential(OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(8,7,7), stride=(1,1,1))), # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning ])) self.concat_classifier = nn.Linear(2*conv5_num_out, num_classes) # This attempts to classify directly after concatenating two pooled features (Run ID 8, 9, 12, 13) """ The following would replace the original global pooling method with a combination of compact bilinear pooling and average pooling with multiple linear layers """ self.cbp_out = 5 * conv5_num_out # change bilinear channels # Change between CP/CP_attn to import different file (stable/attn ver), CP_attn is used for Ablation Study ATN 4 self.combinedpool = CP.CombinedPooling(num_in=conv5_num_out, num_out=self.cbp_out, num_mid1=4*conv5_num_out, num_mid2=2*conv5_num_out, kernel_s=(1, 7, 7), kernel_t=(7, 1, 1), pad=0, stride=1) # Sigmoid for attention (Comment for Run ID < 54) self.sigmoid = nn.Sigmoid() ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method='inflation' # 'random', 'inflation' pretrained_model=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'pretrained/MFNet2D_ImageNet1k-0000.pth') logging.info("Network:: graph initialized, loading pretrained model: `{}'".format(pretrained_model)) assert os.path.exists(pretrained_model), "cannot locate: `{}'".format(pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info("Network:: graph initialized, use random inilization!")
def __init__(self, num_classes, pretrained=False, **kwargs): super(MFNET_3D, self).__init__() groups = 16 # number of fb_unit. k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential( OrderedDict([('conv', nn.Conv3d(3, conv1_num_out, kernel_size=(3, 5, 5), padding=(1, 2, 2), stride=(1, 2, 2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True))])) self.maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)) # add first fb_unit with our video model #conv2 - x56 (x16) num_mid = 96 # size of f_map. next unit is double of now. conv2_num_out = 96 self.conv2 = nn.Sequential( OrderedDict([ ( "B%02d" % i, MF_UNIT( num_in=conv1_num_out if i == 1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(1, 1, 1) if i == 1 else ( 1, 1, 1 ), # keep the size of temporal channel. stride控制temple的维度; g=groups, first_block=(i == 1))) for i in range(1, k_sec[2] + 1) ])) # add second fb_unit with our video model #conv3 - x28 (x16) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential( OrderedDict([ ( "B%02d" % i, MF_UNIT( num_in=conv2_num_out if i == 1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1, 2, 2) if i == 1 else ( 1, 1, 1 ), # keep the size of temporal channel. stride 控制temple的维度; g=groups, first_block=(i == 1))) for i in range(1, k_sec[3] + 1) ])) # final self.tail = nn.Sequential( OrderedDict([('bn', nn.BatchNorm3d(conv3_num_out)), ('relu', nn.ReLU(inplace=True))])) self.globalpool = nn.Sequential( OrderedDict([ ('avg', nn.AvgPool3d( kernel_size=(16, 28, 28), stride=(1, 1, 1))), # it is half of input-length,avgpool3d ('dropout', nn.Dropout(p=0.5)), #only for overfitting ])) self.fc = nn.Linear(conv3_num_out, num_classes) # shape :96x6 ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method = 'inflation' # 'random', 'inflation' pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/MFNet2D_ImageNet1k-0000.pth') logging.info( "Network:: graph initialized, loading pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info( "Network:: graph initialized, use random inilization!")
def __init__(self, hash_bit, pretrained=False, **kwargs): super(MFNET_3D, self).__init__() self.hash_bit = hash_bit groups = 16 k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential( OrderedDict([('conv', nn.Conv3d(3, conv1_num_out, kernel_size=(3, 5, 5), padding=(1, 2, 2), stride=(1, 2, 2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True))])) self.maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)) # conv2 - x56 (x8) num_mid = 96 conv2_num_out = 96 self.conv2 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv1_num_out if i == 1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(2, 1, 1) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[2] + 1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv2_num_out if i == 1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[3] + 1) ])) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out self.conv4 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv3_num_out if i == 1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[4] + 1) ])) # conv5 - x7 (x8) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv4_num_out if i == 1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[5] + 1) ])) # final self.tail = nn.Sequential( OrderedDict([('bn', nn.BatchNorm3d(conv5_num_out)), ('relu', nn.ReLU(inplace=True))])) self.globalpool = nn.Sequential( OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(8, 7, 7), stride=(1, 1, 1))), #('dropout', nn.Dropout(p=0.5)), #only for fine-tuning ])) #self.classifier = nn.Linear(conv5_num_out, num_classes) # 2048 self.fc1 = nn.Linear(conv5_num_out, conv5_num_out) self.activation1 = nn.ReLU() self.fc2 = nn.Linear(conv5_num_out, conv5_num_out) self.activation2 = nn.ReLU() self.fc3 = nn.Linear(conv5_num_out, self.hash_bit) self.last_layer = nn.Tanh() self.hash_layer = nn.Sequential(self.fc1, self.activation1, self.fc2, self.activation2, self.fc3, self.last_layer) ############# # Initialization print("\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=") print("Initializer:: Info") initializer.xavier(net=self) if pretrained: import torch load_method = 'inflation' # 'random', 'inflation' pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/MFNet2D_ImageNet1k-0000.pth') assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) print("=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=")
def __init__(self, num_classes, pretrained=False, **kwargs): super(MFNET_3D, self).__init__() groups = 16 k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential(OrderedDict([ ('conv', nn.Conv3d( 3, conv1_num_out, kernel_size=(3,5,5), padding=(1,2,2), stride=(1,2,2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True)) ])) self.maxpool = nn.MaxPool3d(kernel_size=(1,3,3), stride=(1,2,2), padding=(0,1,1)) # conv2 - x56 (x8) num_mid = 96 conv2_num_out = 96 self.conv2 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv1_num_out if i==1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(2,1,1) if i==1 else (1,1,1), g=groups, first_block=(i==1)) ) for i in range(1,k_sec[2]+1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv2_num_out if i==1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[3]+1) ])) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out self.conv4 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv3_num_out if i==1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[4]+1) ])) # conv5 - x7 (x8) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv4_num_out if i==1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[5]+1) ])) # final self.tail = nn.Sequential(OrderedDict([ ('bn', nn.BatchNorm3d(conv5_num_out)), ('relu', nn.ReLU(inplace=True)) ])) self.globalpool = nn.Sequential(OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(8,7,7), stride=(1,1,1))), # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning ])) # self.classifier = nn.Linear(conv5_num_out, num_classes) # add new classifier self.classifier = metric.ArcFace(conv5_num_out, num_classes) # define metric fc layer # if CONFIG.metric == 'adacos': # print('Adacos will be used in fc layer.') # metric_fc = metric.AdaCos(in_features, CONFIG.n_classes) # elif CONFIG.metric == 'arcface': # print('Adacos will be used in fc layer.') # metric_fc = metric.ArcFace(in_features, CONFIG.n_classes) # elif CONFIG.metric == 'l2constrained': # print('L2-Constrained fc layer will be used.') # metric_fc = metric.L2ConstrainedLinear(in_features, CONFIG.n_classes) # else: # print('Use fc layer without metric learning method.') # metric_fc = nn.Linear(in_features, CONFIG.n_classes) self.tam = TemporalAttentionModule() self.cam = ChannelAttentionModule() ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method='inflation' # 'random', 'inflation' pretrained_model=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'pretrained/MFNet2D_ImageNet1k-0000.pth') logging.info("Network:: graph initialized, loading pretrained model: `{}'".format(pretrained_model)) assert os.path.exists(pretrained_model), "cannot locate: `{}'".format(pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info("Network:: graph initialized, use random inilization!")