def __init__(self, num_classes, pretrained=True, pool_first=True, **kwargs): super(RESNET18, self).__init__() self.resnet = torchvision.models.video.r3d_18(pretrained=False, progress=False, num_classes=num_classes, **kwargs) ############# # Initialization initializer.xavier(net=self) if pretrained: pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/r3d_18-b3b3357e.pth') logging.info( "Network:: graph initialized, loading pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) pretrained = torch.load(pretrained_model) # load_state(self.resnet, pretrained['state_dict']) load_state(self.resnet, pretrained) else: logging.info( "Network:: graph initialized, use random inilization!")
def __init__(self, num_classes, pretrained=False, **kwargs): super(MFNET_FIVEP_LINEAR15_FRAME, self).__init__() groups = 16 k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential( OrderedDict([('conv', nn.Conv3d(3, conv1_num_out, kernel_size=(3, 5, 5), padding=(1, 2, 2), stride=(1, 2, 2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True))])) self.maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)) # conv2 - x56 (x8) num_mid = 96 conv2_num_out = 96 self.conv2 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv1_num_out if i == 1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(2, 1, 1) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[2] + 1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv2_num_out if i == 1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[3] + 1) ])) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out self.conv4 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv3_num_out if i == 1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[4] + 1) ])) # conv5 - x7 (x8) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv4_num_out if i == 1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[5] + 1) ])) # Define motion extractor after conv4 self.motion_exctractor = Motion_Exctractor_MEAN( inplanes=conv4_num_out, iterplanes=conv3_num_out, outplanes=conv2_num_out, num_embedding=20) # final self.tail = nn.Sequential( OrderedDict([('bn', nn.BatchNorm3d(conv5_num_out)), ('relu', nn.ReLU(inplace=True))])) # self.globalpool = nn.Sequential(OrderedDict([ # ('avg', nn.AvgPool3d(kernel_size=(8,7,7), stride=(1,1,1))), # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning # ])) # self.classifier = nn.Linear(conv5_num_out, num_classes) # Position related Linear Layers (input one-hot Dec-5) self.emb_prepool = nn.Sequential( OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(1, 7, 7), stride=(1, 1, 1))), # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning ])) self.emb_postpool = nn.AvgPool1d(kernel_size=8) self.classifier = nn.Linear(conv5_num_out + conv2_num_out * 7, num_classes) ############# # Initialization initializer.xavier(net=self) if pretrained: pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/MFNet2D_ImageNet1k-0000.pth') logging.info( "Network:: graph initialized, loading pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method='inflation') else: logging.info( "Network:: graph initialized, use random inilization!")
def __init__( self, depth=50, pretrained=True, # pretrained2d=True, num_stages=4, spatial_strides=(1, 2, 2, 2), temporal_strides=(1, 1, 1, 1), dilations=(1, 1, 1, 1), out_indices=[3], conv1_kernel_t=5, conv1_stride_t=2, pool1_kernel_t=1, pool1_stride_t=2, style='pytorch', frozen_stages=-1, inflate_freq=((1, 1, 1), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 1, 0)), # For C2D baseline, this is set to -1. inflate_stride=(1, 1, 1, 1), inflate_style='3x1x1', nonlocal_stages=(-1, ), nonlocal_freq=(0, 0, 0, 0), # nonlocal_freq=(0, 1, 1, 0), # Default setting nonlocal_cfg=None, bn_eval=False, bn_frozen=False, partial_bn=False, with_cp=False, num_classes=11): super(Res_I3D, self).__init__() if depth not in self.arch_settings: raise KeyError('invalid depth {} for resnet'.format(depth)) self.depth = depth self.pretrained = pretrained # self.pretrained2d = pretrained2d self.num_stages = num_stages assert num_stages >= 1 and num_stages <= 4 self.spatial_strides = spatial_strides self.temporal_strides = temporal_strides self.dilations = dilations assert len(spatial_strides) == len(temporal_strides) == len( dilations) == num_stages self.out_indices = out_indices assert max(out_indices) < num_stages self.style = style self.frozen_stages = frozen_stages self.inflate_freqs = inflate_freq if not isinstance( inflate_freq, int) else (inflate_freq, ) * num_stages self.inflate_style = inflate_style self.nonlocal_stages = nonlocal_stages self.nonlocal_freqs = nonlocal_freq if not isinstance( nonlocal_freq, int) else (nonlocal_freq, ) * num_stages self.nonlocal_cfg = nonlocal_cfg self.bn_eval = bn_eval self.bn_frozen = bn_frozen self.partial_bn = partial_bn self.with_cp = with_cp self.block, stage_blocks = self.arch_settings[depth] self.stage_blocks = stage_blocks[:num_stages] self.inplanes = 64 self.conv1 = nn.Conv3d(3, 64, kernel_size=(conv1_kernel_t, 7, 7), stride=(conv1_stride_t, 2, 2), padding=((conv1_kernel_t - 1) // 2, 3, 3), bias=False) self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool3d(kernel_size=(pool1_kernel_t, 3, 3), stride=(pool1_stride_t, 2, 2), padding=(pool1_kernel_t // 2, 1, 1)) #TODO: Check whether pad=0 differs a lot self.pool2 = nn.MaxPool3d(kernel_size=(2, 1, 1), stride=(2, 1, 1), padding=(0, 0, 0)) self.avgpool = SimpleSpatialTemporalModule(spatial_type='avg', temporal_size=2, spatial_size=7) self.cls_head = ClsHead(with_avg_pool=False, temporal_feature_size=1, spatial_feature_size=1, dropout_ratio=0.5, in_channels=2048, num_classes=num_classes) self.res_layers = [] for i, num_blocks in enumerate(self.stage_blocks): spatial_stride = spatial_strides[i] temporal_stride = temporal_strides[i] dilation = dilations[i] planes = 64 * 2**i res_layer = make_res_layer(self.block, self.inplanes, planes, num_blocks, spatial_stride=spatial_stride, temporal_stride=temporal_stride, dilation=dilation, style=self.style, inflate_freq=self.inflate_freqs[i], inflate_style=self.inflate_style, nonlocal_freq=self.nonlocal_freqs[i], nonlocal_cfg=self.nonlocal_cfg if i in self.nonlocal_stages else None, with_cp=with_cp) self.inplanes = planes * self.block.expansion layer_name = 'layer{}'.format(i + 1) self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) self.feat_dim = self.block.expansion * 64 * 2**( len(self.stage_blocks) - 1) ############# # Initialization initializer.xavier(net=self) if pretrained: pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/i3d_kinetics_rgb_r50_c3d.pth') logging.info( "Network:: graph initialized, loading pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) pretrained = torch.load(pretrained_model) load_state(self, pretrained['state_dict']) else: logging.info( "Network:: graph initialized, use random inilization!")
def __init__(self, num_classes, pretrained=False, **kwargs): super(MFNET_BASE, self).__init__() groups = 16 k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential(OrderedDict([ ('conv', nn.Conv3d( 3, conv1_num_out, kernel_size=(3,5,5), padding=(1,2,2), stride=(1,2,2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True)) ])) self.maxpool = nn.MaxPool3d(kernel_size=(1,3,3), stride=(1,2,2), padding=(0,1,1)) # conv2 - x56 (x8) num_mid = 96 conv2_num_out = 96 self.conv2 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv1_num_out if i==1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(2,1,1) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[2]+1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv2_num_out if i==1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[3]+1) ])) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out self.conv4 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv3_num_out if i==1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[4]+1) ])) # conv5 - x7 (x8) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv4_num_out if i==1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[5]+1) ])) # final self.tail = nn.Sequential(OrderedDict([ ('bn', nn.BatchNorm3d(conv5_num_out)), ('relu', nn.ReLU(inplace=True)) ])) self.globalpool = nn.Sequential(OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(8,7,7), stride=(1,1,1))), # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning ])) self.classifier = nn.Linear(conv5_num_out, num_classes) ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method='inflation' # 'random', 'inflation' pretrained_model=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'pretrained\\MFNet2D_ImageNet1k-0000.pth') logging.info("Network:: graph initialized, loading pretrained model: `{}'".format(pretrained_model)) assert os.path.exists(pretrained_model), "cannot locate: `{}'".format(pretrained_model) state_dict_2d = torch.load(pretrained_model, map_location='cpu') initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info("Network:: graph initialized, use random inilization!")
def __init__(self, use_fau, num_classes, pretrained=False, **kwargs): super(MFNET_3D, self).__init__() groups = 16 k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential( OrderedDict([('conv', nn.Conv3d(3, conv1_num_out, kernel_size=(3, 5, 5), padding=(1, 2, 2), stride=(1, 2, 2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True))])) self.maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)) # conv2 - x56 (x8) num_mid = 96 conv2_num_out = 96 self.conv2 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv1_num_out if i == 1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(2, 1, 1) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[2] + 1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv2_num_out if i == 1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[3] + 1) ])) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out self.conv4 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv3_num_out if i == 1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[4] + 1) ])) # conv5 - x7 (x8) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv4_num_out if i == 1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[5] + 1) ])) # final self.tail = nn.Sequential( OrderedDict([('bn', nn.BatchNorm3d(conv5_num_out)), ('relu', nn.ReLU(inplace=True))])) self.globalpool = nn.Sequential( OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(8, 7, 7), stride=(1, 1, 1))), # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning ])) self.classifier = nn.Linear(conv5_num_out, num_classes) self.use_fau = use_fau if use_fau > -1: c1 = c2 = 32 d1 = 64 d2 = 32 d3 = 256 kernel2 = FAUKernel_3d(c1, c2, latent_dim1=d1, latent_dim2=d2) # 28x28 - 64, td1=8x64 - 32 kernel1 = FAUKernel_thw(c1, latent_dim=d3) # 8x28x28=6400 ks = [kernel1, kernel2] k = ks[use_fau] logging.debug(f'using kernel[{use_fau}] {k}') self.faul = FAULayer_3d(in_channels=192, kernel=k, kq_channels=c1) ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method = 'inflation' # 'random', 'inflation' pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.path.join('pretrained', 'MFNet2D_ImageNet1k-0000.pth')) logging.info( "Network:: graph initialized, loading pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info( "Network:: graph initialized, use random inilization!")
def __init__(self, num_classes, pretrained=False, **kwargs): super(RESNET101_3D_GCN_X5, self).__init__() groups = 1 k_sec = { 2: 3, \ 3: 4, \ 4: 23, \ 5: 3 } # conv1 - x112 (x16) conv1_num_out = 32 self.conv1 = nn.Sequential( OrderedDict([ ('conv', nn.Conv3d(3, conv1_num_out, kernel_size=(3, 5, 5), padding=(1, 2, 2), stride=(1, 2, 2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out, eps=1e-04)), ('relu', nn.ReLU(inplace=True)), ('max_pool', nn.MaxPool3d(kernel_size=(1, 3, 3), padding=(0, 1, 1), stride=(1, 2, 2))), ])) # conv2 - x56 (x16) num_mid = 64 conv2_num_out = 256 self.conv2 = nn.Sequential( OrderedDict([ ("B%02d" % i, RESIDUAL_BLOCK( num_in=conv1_num_out if i == 1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(1, 1, 1) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[2] + 1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out blocks = [] for i in range(1, k_sec[3] + 1): use_3d = bool(i % 2) blocks.append( ("B%02d" % i, RESIDUAL_BLOCK( num_in=conv2_num_out if i == 1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(2, 2, 2) if i == 1 else (1, 1, 1), use_3d=use_3d, g=groups, first_block=(i == 1)))) if i in [1, 3]: blocks.append(("B%02d_extra" % i, GloRe_Unit(num_in=conv3_num_out, num_mid=num_mid))) self.conv3 = nn.Sequential(OrderedDict(blocks)) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out blocks = [] for i in range(1, k_sec[4] + 1): use_3d = bool(i % 2) blocks.append( ("B%02d" % i, RESIDUAL_BLOCK( num_in=conv3_num_out if i == 1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), use_3d=use_3d, g=groups, first_block=(i == 1)))) if i in [6, 12, 18]: blocks.append(("B%02d_extra" % i, GloRe_Unit(num_in=conv4_num_out, num_mid=num_mid))) self.conv4 = nn.Sequential(OrderedDict(blocks)) # conv5 - x7 (x4) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential( OrderedDict([ ("B%02d" % i, RESIDUAL_BLOCK( num_in=conv4_num_out if i == 1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, use_3d=(i == 2), first_block=(i == 1))) for i in range(1, k_sec[5] + 1) ])) # final self.tail = nn.Sequential( OrderedDict([('bn', nn.BatchNorm3d(conv5_num_out, eps=1e-04)), ('relu', nn.ReLU(inplace=True))])) self.globalpool = nn.Sequential( OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(4, 7, 7), stride=(1, 1, 1))), ('dropout', nn.Dropout(p=0.5)), ])) self.classifier = nn.Linear(conv5_num_out, num_classes) ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method = 'inflation' # 'random', 'inflation' pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/resnet101-lite.pth') logging.info( "Network:: symbol initialized, use pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info( "Network:: symbol initialized, use random inilization!") blocker_name_list = [] for name, param in self.state_dict().items(): if name.endswith('blocker.weight'): blocker_name_list.append(name) param[:] = 0. if len(blocker_name_list) > 0: logging.info( "Network:: change params of the following layer be zeros: {}". format(blocker_name_list))
def __init__(self, num_classes, pretrained=True): super(C3D, self).__init__() self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2)) self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2)) self.conv3a = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.conv3b = nn.Conv3d(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2)) self.conv4a = nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.conv4b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2)) self.conv5a = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.pool5 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 1, 1)) self.fc6 = nn.Linear(8192, 4096) self.fc7 = nn.Linear(4096, 4096) self.fc8 = nn.Linear(4096, num_classes) self.dropout = nn.Dropout(p=0.5) self.relu = nn.ReLU() ############# # Initialization initializer.xavier(net=self) if pretrained: pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/c3d_pretrained.pth') logging.info( "Network:: graph initialized, loading pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) pretrained = torch.load(pretrained_model) corresp_name = self.__load_pretrained_weights( pretrained_model=pretrained_model) load_state(self, state_dict=pretrained['state_dict'], corresp_name=corresp_name) else: logging.info( "Network:: graph initialized, use random inilization!")
g=groups, first_block=(i==1))) ])) self.conv8 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv7_num_out if i==1 else conv8_num_out, num_mid=num_mid, num_out=conv8_num_out, stride=(1,1,1) if i==1 else (1,1,1), #??? g=groups, first_block=(i==1))) ])) ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method='inflation' # 'random', 'inflation' pretrained_model=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'pretrained\\MFNet2D_ImageNet1k-0000.pth') logging.info("Network:: graph initialized, loading pretrained model: `{}'".format(pretrained_model)) assert os.path.exists(pretrained_model), "cannot locate: `{}'".format(pretrained_model) state_dict_2d = torch.load(pretrained_model, map_location='cpu') initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info("Network:: graph initialized, use random inilization!") def forward(self, x): assert x.shape[2] == 16
def __init__(self, num_classes, pretrained=False, **kwargs): super(MFBPNET_3D_LATERAL_STABLE, self).__init__() groups = 16 k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential(OrderedDict([ ('conv', nn.Conv3d( 3, conv1_num_out, kernel_size=(3,5,5), padding=(1,2,2), stride=(1,2,2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True)) ])) self.maxpool = nn.MaxPool3d(kernel_size=(1,3,3), stride=(1,2,2), padding=(0,1,1)) # conv2 - x56 (x8) num_mid = 96 conv2_num_out = 96 self.conv2 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv1_num_out if i==1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(2,1,1) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[2]+1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv2_num_out if i==1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[3]+1) ])) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out self.conv4 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv3_num_out if i==1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[4]+1) ])) # conv5 - x7 (x8) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv4_num_out if i==1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[5]+1) ])) # final self.tail = nn.Sequential(OrderedDict([ ('bn', nn.BatchNorm3d(conv5_num_out)), ('relu', nn.ReLU(inplace=True)) ])) self.globalpool = nn.Sequential(OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(8,7,7), stride=(1,1,1))), # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning ])) self.concat_classifier = nn.Linear(2*conv5_num_out, num_classes) # This attempts to classify directly after concatenating two pooled features (Run ID 8, 9, 12, 13) """ The following would replace the original global pooling method with a combination of compact bilinear pooling and average pooling with multiple linear layers """ self.cbp_out = 5 * conv5_num_out # change bilinear channels # Change between CP/CP_attn to import different file (stable/attn ver), CP_attn is used for Ablation Study ATN 4 self.combinedpool = CP.CombinedPooling(num_in=conv5_num_out, num_out=self.cbp_out, num_mid1=4*conv5_num_out, num_mid2=2*conv5_num_out, kernel_s=(1, 7, 7), kernel_t=(7, 1, 1), pad=0, stride=1) # Sigmoid for attention (Comment for Run ID < 54) self.sigmoid = nn.Sigmoid() ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method='inflation' # 'random', 'inflation' pretrained_model=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'pretrained/MFNet2D_ImageNet1k-0000.pth') logging.info("Network:: graph initialized, loading pretrained model: `{}'".format(pretrained_model)) assert os.path.exists(pretrained_model), "cannot locate: `{}'".format(pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info("Network:: graph initialized, use random inilization!")
def __init__(self, num_classes, pretrained=False, **kwargs): super(MFNET_3D, self).__init__() groups = 16 # number of fb_unit. k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential( OrderedDict([('conv', nn.Conv3d(3, conv1_num_out, kernel_size=(3, 5, 5), padding=(1, 2, 2), stride=(1, 2, 2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True))])) self.maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)) # add first fb_unit with our video model #conv2 - x56 (x16) num_mid = 96 # size of f_map. next unit is double of now. conv2_num_out = 96 self.conv2 = nn.Sequential( OrderedDict([ ( "B%02d" % i, MF_UNIT( num_in=conv1_num_out if i == 1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(1, 1, 1) if i == 1 else ( 1, 1, 1 ), # keep the size of temporal channel. stride控制temple的维度; g=groups, first_block=(i == 1))) for i in range(1, k_sec[2] + 1) ])) # add second fb_unit with our video model #conv3 - x28 (x16) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential( OrderedDict([ ( "B%02d" % i, MF_UNIT( num_in=conv2_num_out if i == 1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1, 2, 2) if i == 1 else ( 1, 1, 1 ), # keep the size of temporal channel. stride 控制temple的维度; g=groups, first_block=(i == 1))) for i in range(1, k_sec[3] + 1) ])) # final self.tail = nn.Sequential( OrderedDict([('bn', nn.BatchNorm3d(conv3_num_out)), ('relu', nn.ReLU(inplace=True))])) self.globalpool = nn.Sequential( OrderedDict([ ('avg', nn.AvgPool3d( kernel_size=(16, 28, 28), stride=(1, 1, 1))), # it is half of input-length,avgpool3d ('dropout', nn.Dropout(p=0.5)), #only for overfitting ])) self.fc = nn.Linear(conv3_num_out, num_classes) # shape :96x6 ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method = 'inflation' # 'random', 'inflation' pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/MFNet2D_ImageNet1k-0000.pth') logging.info( "Network:: graph initialized, loading pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info( "Network:: graph initialized, use random inilization!")
def __init__(self, hash_bit, pretrained=False, **kwargs): super(MFNET_3D, self).__init__() self.hash_bit = hash_bit groups = 16 k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential( OrderedDict([('conv', nn.Conv3d(3, conv1_num_out, kernel_size=(3, 5, 5), padding=(1, 2, 2), stride=(1, 2, 2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True))])) self.maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)) # conv2 - x56 (x8) num_mid = 96 conv2_num_out = 96 self.conv2 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv1_num_out if i == 1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(2, 1, 1) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[2] + 1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv2_num_out if i == 1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[3] + 1) ])) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out self.conv4 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv3_num_out if i == 1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[4] + 1) ])) # conv5 - x7 (x8) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential( OrderedDict([ ("B%02d" % i, MF_UNIT(num_in=conv4_num_out if i == 1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1, 2, 2) if i == 1 else (1, 1, 1), g=groups, first_block=(i == 1))) for i in range(1, k_sec[5] + 1) ])) # final self.tail = nn.Sequential( OrderedDict([('bn', nn.BatchNorm3d(conv5_num_out)), ('relu', nn.ReLU(inplace=True))])) self.globalpool = nn.Sequential( OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(8, 7, 7), stride=(1, 1, 1))), #('dropout', nn.Dropout(p=0.5)), #only for fine-tuning ])) #self.classifier = nn.Linear(conv5_num_out, num_classes) # 2048 self.fc1 = nn.Linear(conv5_num_out, conv5_num_out) self.activation1 = nn.ReLU() self.fc2 = nn.Linear(conv5_num_out, conv5_num_out) self.activation2 = nn.ReLU() self.fc3 = nn.Linear(conv5_num_out, self.hash_bit) self.last_layer = nn.Tanh() self.hash_layer = nn.Sequential(self.fc1, self.activation1, self.fc2, self.activation2, self.fc3, self.last_layer) ############# # Initialization print("\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=") print("Initializer:: Info") initializer.xavier(net=self) if pretrained: import torch load_method = 'inflation' # 'random', 'inflation' pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/MFNet2D_ImageNet1k-0000.pth') assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) print("=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=")
def __init__(self, num_classes, pretrained=False, **kwargs): super(MFNET_3D, self).__init__() groups = 16 k_sec = { 2: 3, \ 3: 4, \ 4: 6, \ 5: 3 } # conv1 - x224 (x16) conv1_num_out = 16 self.conv1 = nn.Sequential(OrderedDict([ ('conv', nn.Conv3d( 3, conv1_num_out, kernel_size=(3,5,5), padding=(1,2,2), stride=(1,2,2), bias=False)), ('bn', nn.BatchNorm3d(conv1_num_out)), ('relu', nn.ReLU(inplace=True)) ])) self.maxpool = nn.MaxPool3d(kernel_size=(1,3,3), stride=(1,2,2), padding=(0,1,1)) # conv2 - x56 (x8) num_mid = 96 conv2_num_out = 96 self.conv2 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv1_num_out if i==1 else conv2_num_out, num_mid=num_mid, num_out=conv2_num_out, stride=(2,1,1) if i==1 else (1,1,1), g=groups, first_block=(i==1)) ) for i in range(1,k_sec[2]+1) ])) # conv3 - x28 (x8) num_mid *= 2 conv3_num_out = 2 * conv2_num_out self.conv3 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv2_num_out if i==1 else conv3_num_out, num_mid=num_mid, num_out=conv3_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[3]+1) ])) # conv4 - x14 (x8) num_mid *= 2 conv4_num_out = 2 * conv3_num_out self.conv4 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv3_num_out if i==1 else conv4_num_out, num_mid=num_mid, num_out=conv4_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[4]+1) ])) # conv5 - x7 (x8) num_mid *= 2 conv5_num_out = 2 * conv4_num_out self.conv5 = nn.Sequential(OrderedDict([ ("B%02d"%i, MF_UNIT(num_in=conv4_num_out if i==1 else conv5_num_out, num_mid=num_mid, num_out=conv5_num_out, stride=(1,2,2) if i==1 else (1,1,1), g=groups, first_block=(i==1))) for i in range(1,k_sec[5]+1) ])) # final self.tail = nn.Sequential(OrderedDict([ ('bn', nn.BatchNorm3d(conv5_num_out)), ('relu', nn.ReLU(inplace=True)) ])) self.globalpool = nn.Sequential(OrderedDict([ ('avg', nn.AvgPool3d(kernel_size=(8,7,7), stride=(1,1,1))), # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning ])) # self.classifier = nn.Linear(conv5_num_out, num_classes) # add new classifier self.classifier = metric.ArcFace(conv5_num_out, num_classes) # define metric fc layer # if CONFIG.metric == 'adacos': # print('Adacos will be used in fc layer.') # metric_fc = metric.AdaCos(in_features, CONFIG.n_classes) # elif CONFIG.metric == 'arcface': # print('Adacos will be used in fc layer.') # metric_fc = metric.ArcFace(in_features, CONFIG.n_classes) # elif CONFIG.metric == 'l2constrained': # print('L2-Constrained fc layer will be used.') # metric_fc = metric.L2ConstrainedLinear(in_features, CONFIG.n_classes) # else: # print('Use fc layer without metric learning method.') # metric_fc = nn.Linear(in_features, CONFIG.n_classes) self.tam = TemporalAttentionModule() self.cam = ChannelAttentionModule() ############# # Initialization initializer.xavier(net=self) if pretrained: import torch load_method='inflation' # 'random', 'inflation' pretrained_model=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'pretrained/MFNet2D_ImageNet1k-0000.pth') logging.info("Network:: graph initialized, loading pretrained model: `{}'".format(pretrained_model)) assert os.path.exists(pretrained_model), "cannot locate: `{}'".format(pretrained_model) state_dict_2d = torch.load(pretrained_model) initializer.init_3d_from_2d_dict(net=self, state_dict=state_dict_2d, method=load_method) else: logging.info("Network:: graph initialized, use random inilization!")
def __init__(self, block, layers, block_inplanes=[64, 128, 256, 512], n_input_channels=3, conv1_t_size=7, conv1_t_stride=1, no_max_pool=False, shortcut_type='B', widen_factor=1.0, num_classes=400, pretrained=True): super().__init__() block_inplanes = [int(x * widen_factor) for x in block_inplanes] self.in_planes = block_inplanes[0] self.no_max_pool = no_max_pool self.conv1 = nn.Conv3d(n_input_channels, self.in_planes, kernel_size=(conv1_t_size, 7, 7), stride=(conv1_t_stride, 2, 2), padding=(conv1_t_size // 2, 3, 3), bias=False) self.bn1 = nn.BatchNorm3d(self.in_planes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool3d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, block_inplanes[0], layers[0], shortcut_type) self.layer2 = self._make_layer(block, block_inplanes[1], layers[1], shortcut_type, stride=2) self.layer3 = self._make_layer(block, block_inplanes[2], layers[2], shortcut_type, stride=2) self.layer4 = self._make_layer(block, block_inplanes[3], layers[3], shortcut_type, stride=2) self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1)) self.fc = nn.Linear(block_inplanes[3] * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv3d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm3d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Initialization initializer.xavier(net=self) if pretrained: pretrained_model = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'pretrained/resnet-101-kinetics.pth') logging.info( "Network:: graph initialized, loading pretrained model: `{}'". format(pretrained_model)) assert os.path.exists( pretrained_model), "cannot locate: `{}'".format( pretrained_model) pretrained = torch.load(pretrained_model) load_state(self, pretrained['state_dict']) else: logging.info( "Network:: graph initialized, use random inilization!")