Esempio n. 1
0
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        if 'resnet' in base_model:
            self.base_model = getattr(torchvision.models, base_model)(True)
            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    place=self.shift_place,
                                    temporal_pool=self.temporal_pool)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)
            if self.cca3d:
                print('Adding CCA-3D module...')
                from ops.cca3d import make_cca3d
                make_cca3d(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)
        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        if 'resnet' in base_model:
            self.base_model = getattr(
                torchvision.models,
                base_model)(True if self.pretrain == 'imagenet' else False)
            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    place=self.shift_place,
                                    temporal_pool=self.temporal_pool)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'mobilenetv2':
            from archs.mobilenet_v2 import mobilenet_v2, InvertedResidual
            self.base_model = mobilenet_v2(True if self.pretrain ==
                                           'imagenet' else False)

            self.base_model.last_layer_name = 'classifier'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)
            if self.is_shift:
                from ops.temporal_shift import TemporalShift
                for m in self.base_model.modules():
                    if isinstance(m, InvertedResidual) and len(
                            m.conv) == 8 and m.use_res_connect:
                        if self.print_spec:
                            print('Adding temporal shift... {}'.format(
                                m.use_res_connect))
                        m.conv[0] = TemporalShift(m.conv[0],
                                                  n_segment=self.num_segments,
                                                  n_div=self.shift_div)
            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)
        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
Esempio n. 3
0
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        if 'resnet' in base_model:
            from archs.Resnet_ann_v3 import __resnet__
            self.base_model = __resnet__(
                base_model, True if self.pretrain == 'imagenet' else False,
                self.extra_temporal_modeling, self.num_segments,
                self.num_class)
            if self.is_shift:
                #summary(self.base_model,(3,256,256))
                #print(self.base_model)
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(
                    self.base_model,
                    self.num_segments,
                    n_div=self.shift_div,
                    place=self.shift_place,
                    temporal_pool=self.temporal_pool,
                    concat=self.concat,
                    prune_list=self.prune_list[0]
                    if self.is_prune in ['input', 'inout'] else {},
                    prune=True
                    if self.is_prune in ['input', 'inout'] else False)
            if self.is_prune in ['output', 'inout']:
                print('prune from conv1 and conv2...')
                from ops.prune import make_prune_conv
                make_prune_conv(self.base_model, place=self.shift_place)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'

            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif 'efficientnet' in base_model:
            from efficientnet_pytorch import EfficientNet
            self.base_model = EfficientNet.from_pretrained(
                base_model,
                temporal_modeling=self.extra_temporal_modeling,
                segment=self.num_segments,
                num_class=self.num_class)
            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_efficientnet_shift
                make_efficientnet_shift(self.base_model,
                                        self.num_segments,
                                        n_div=self.shift_div,
                                        place=self.shift_place,
                                        temporal_pool=self.temporal_pool,
                                        concat=self.concat)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.dropout = '_dropout'
            self.base_model.last_layer_name = '_fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'mobilenetv2':
            from archs.mobilenet_v2 import mobilenet_v2, InvertedResidual
            self.base_model = mobilenet_v2(True if self.pretrain ==
                                           'imagenet' else False)

            self.base_model.last_layer_name = 'classifier'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)
            if self.is_shift:
                from ops.temporal_shift import TemporalShift
                for m in self.base_model.modules():
                    if isinstance(m, InvertedResidual) and len(
                            m.conv) == 8 and m.use_res_connect:
                        if self.print_spec:
                            print('Adding temporal shift... {}'.format(
                                m.use_res_connect))
                        m.conv[0] = TemporalShift(m.conv[0],
                                                  n_segment=self.num_segments,
                                                  n_div=self.shift_div)
            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)
        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
Esempio n. 4
0
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        # 此处暂且只支持tiny resnet50,且采用
        if 'tiny_resnet' in base_model:
            from archs.resnet import ResNet
            # 此处只使用最原始的ResNet网络,不添加任何tricks
            self.base_model = ResNet(50,
                                     pretrained=None,
                                     num_stages=4,
                                     strides=(1, 2, 2, 2),
                                     dilations=(1, 1, 1, 1),
                                     out_indices=(0, 1, 2, 3),
                                     style='pytorch',
                                     frozen_stages=-1,
                                     bn_eval=False,
                                     bn_frozen=False,
                                     partial_bn=False,
                                     with_cp=False)

            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            # 如果是Flow或者RGBDiff需要做不一样的处理
            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        # 如果是resnet系列backbone,直接从torchvision.models中下载
        elif 'resnet' in base_model:
            self.base_model = getattr(
                torchvision.models,
                base_model)(True if self.pretrain == 'imagenet' else False)
            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            # 如果是Flow或者RGBDiff需要做不一样的处理
            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        # 由于mobilenet 和 BNInception并没有收录到torchvision中,故而需要从archs中导入
        elif base_model == 'mobilenetv2':
            from archs.mobilenet_v2 import mobilenet_v2, InvertedResidual
            self.base_model = mobilenet_v2(True if self.pretrain ==
                                           'imagenet' else False)

            self.base_model.last_layer_name = 'classifier'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'

            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        if 'resnet' in base_model:
            # for 0.5 res18 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
            self.base_model = getattr(
                torchvision.models,
                base_model)(True if self.pretrain == 'imagenet' else False)
            # if '18' in base_model:
            #     from .half_res18 import resnet18
            #     self.base_model = resnet18(pretrained=False)
            # else:
            #     self.base_model = getattr(torchvision.models, base_model)(
            #         True if self.pretrain == 'imagenet' else False)
            # for 0.5 res18 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                # from ops.spatial_transform import make_spatial_transform
                # make_spatial_transform(self.base_model)
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    place=self.shift_place,
                                    temporal_pool=self.temporal_pool,
                                    soft=self.is_softshift,
                                    init_mode=self.shift_init_mode)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif 'repvgg' in base_model:
            from archs.repvgg import repvgg_A0, repvgg_B1g2
            # imagenet pretrained, deploy MODE
            if 'A0' in base_model:  # compare with res18
                self.base_model = repvgg_A0(
                    True if self.pretrain == 'imagenet' else False,
                    deploy=self.deploy)
            elif 'B1g2' in base_model:  # compare with res50
                self.base_model = repvgg_B1g2(
                    True if self.pretrain == 'imagenet' else False,
                    deploy=self.deploy)

            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    place=self.shift_place,
                                    temporal_pool=self.temporal_pool,
                                    deploy=self.deploy,
                                    soft=self.is_softshift,
                                    init_mode=self.shift_init_mode)

            self.base_model.last_layer_name = 'linear'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.gap = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'mobilenetv2':
            from archs.mobilenet_v2 import mobilenet_v2, InvertedResidual
            self.base_model = mobilenet_v2(True if self.pretrain ==
                                           'imagenet' else False)
            # import pdb; pdb.set_trace()
            self.base_model.last_layer_name = 'classifier'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)
            if self.is_shift:
                from ops.temporal_shift import TemporalShift
                for m in self.base_model.modules():
                    if isinstance(m, InvertedResidual) and len(
                            m.conv) == 8 and m.use_res_connect:
                        if self.print_spec:
                            print('Adding temporal shift... {}'.format(
                                m.use_res_connect))
                        m.conv[0] = TemporalShift(
                            m.conv[0],
                            n_segment=self.num_segments,
                            n_div=self.shift_div,
                            soft=self.is_softshift,
                            init_mode=self.shift_init_mode)
            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)

        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
Esempio n. 6
0
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        if 'resnet' in base_model:
            self.base_model = getattr(
                torchvision.models,
                base_model)(True if self.pretrain == 'imagenet' else False)
            if self.is_sTSA:
                print('Adding spatial aware temporal selective aggregation...')
                from ops.sTSA import make_sTSA
                make_sTSA(self.base_model,
                          self.num_segments,
                          n_div=self.shift_div,
                          temporal_pool=self.temporal_pool)
            if self.is_TSA:
                print("Adding temporal selective aggregation...")
                from ops.TSA import make_TSA
                make_TSA(self.base_model,
                         self.num_segments,
                         n_div=self.shift_div,
                         temporal_pool=self.temporal_pool,
                         shift_groups=self.shift_groups,
                         shift_diffs=self.shift_diff)
            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    temporal_pool=self.temporal_pool)

            if self.is_ME:
                print("Adding temporal ME...")
                from ops.ME import make_temporal_me
                make_temporal_me(self.base_model)

            if self.is_tTSA:
                print("Adding tTSA module...")
                from ops.tTSA import make_tTSA
                make_tTSA(self.base_model,
                          self.num_segments,
                          n_div=self.shift_div,
                          temporal_pool=self.temporal_pool)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'mobilenetv2':
            from archs.mobilenet_v2 import mobilenet_v2, InvertedResidual
            self.base_model = mobilenet_v2(True if self.pretrain ==
                                           'imagenet' else False)

            self.base_model.last_layer_name = 'classifier'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)
            if self.is_shift:
                from ops.adaptive_temporal_shift import TemporalShift
                for m in self.base_model.modules():
                    if isinstance(m, InvertedResidual) and len(
                            m.conv) == 8 and m.use_res_connect:
                        if self.print_spec:
                            print('Adding temporal shift... {}'.format(
                                m.use_res_connect))
                        m.conv[0] = TemporalShift(m.conv[0],
                                                  n_segment=self.num_segments,
                                                  n_div=self.shift_div)
            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)

        elif base_model == "R2plus1D":
            from archs.R2plus1D import r2plus1d_34
            self.base_model = r2plus1d_34("r2plus1d_34_32_kinetics")

            if self.is_ME:
                print("Adding temporal ME...")
                from ops.ME import make_temporal_me
                make_temporal_me(self.base_model)

            self.input_size = 112
            self.input_mean = [0.43216, 0.394666, 0.37645]
            self.input_std = [0.22803, 0.22145, 0.216989]
            self.base_model.last_layer_name = 'fc'

        elif base_model == "X3D":
            from archs.X3D import X3D, build_model
            self.base_model = build_model(self.cfg_file)
            checkpoint = torch.load("/data/yijunq/models/TSA/x3d_l.pyth")
            self.base_model.load_state_dict(checkpoint["model_state"])
            self.input_size = 312
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]
            self.base_model.last_layer_name = 'projection'
        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
Esempio n. 7
0
File: models.py Progetto: zymale/TIN
    def _prepare_base_model(self, base_model, config={}):
        print('=> base model: {}'.format(base_model))

        if base_model.startswith('resnet'):

            self.base_model = getattr(torchvision.models, base_model)(True)

            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    place=self.shift_place,
                                    temporal_pool=self.temporal_pool,
                                    two_path=True)

            if self.tin:
                print('Adding temporal deformable conv...')
                from ops.temporal_interlace import make_temporal_interlace
                make_temporal_interlace(self.base_model,
                                        self.num_segments,
                                        shift_div=self.shift_div)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)