Ejemplo n.º 1
0
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        if 'resnet' in base_model:
            self.base_model = getattr(torchvision.models, base_model)(True if self.pretrain == 'imagenet' else False)  # 获取torchvision.models.[base_model]()属性,
            if self.is_shift:  # 即从torchvision中导入base_model
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model, self.num_segments,
                                    n_div=self.shift_div, place=self.shift_place, temporal_pool=self.temporal_pool)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [np.mean(self.input_std) * 2] * 3 * self.new_length

        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
Ejemplo n.º 2
0
Archivo: models.py Proyecto: CVIR/TCL
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        if 'resnet' in base_model:
            self.base_model = resnet_dict[base_model](True if self.pretrain ==
                                                      'imagenet' else False)
            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    second_segments=self.second_segments,
                                    n_div=self.shift_div,
                                    place=self.shift_place,
                                    temporal_pool=self.temporal_pool)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)
        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
Ejemplo n.º 3
0
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        if 'resnet' in base_model:
            self.base_model = getattr(torchvision.models, base_model)(True)
            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    place=self.shift_place,
                                    temporal_pool=self.temporal_pool)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)
            if self.cca3d:
                print('Adding CCA-3D module...')
                from ops.cca3d import make_cca3d
                make_cca3d(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)
        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
Ejemplo n.º 4
0
 def _make_a_shift(self, base_model):
     print('Adding temporal shift...')
     from ops.temporal_shift import make_temporal_shift
     make_temporal_shift(self.base_model,
                         self.args.num_segments,
                         n_div=self.shift_div,
                         place=self.shift_place,
                         temporal_pool=self.temporal_pool)
def _prepare_base_model(self):
        print('=> base model: {}'.format('resnet'))

        model = ResNet(Bottleneck, [3, 4, 6, 3])
        if self.pretrain == 'imagenet':
           model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
        
        if self.shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(model, self.n_segments,
                                    n_div=self.shift_div, place=self.shift_place)

        return model
Ejemplo n.º 6
0
    def _prepare_base_model(self, base_model):
        LOG.info(f"base model: {base_model}")
        backbone_pretrained = "imagenet" if self.pretrained == "imagenet" else None
        if self.pretrained and backbone_pretrained is not None:
            LOG.info(
                f"Loading backbone model with {backbone_pretrained} weights")
        elif self.pretrained is None and backbone_pretrained is None:
            LOG.info("Randomly initialising backbone")

        if "resnet" in base_model:
            self.base_model = getattr(
                torchvision.models, base_model)(pretrained=backbone_pretrained)
            if self.is_shift:
                LOG.info("Adding temporal shift...")

                make_temporal_shift(
                    self.base_model,
                    self.num_segments,
                    n_div=self.shift_div,
                    place=self.shift_place,
                    temporal_pool=self.temporal_pool,
                )

            if self.non_local:
                LOG.info("Adding non-local module...")
                from ..ops.non_local import make_non_local

                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = "fc"
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == "Flow":
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
        else:
            raise ValueError(f"Unknown base model: {base_model!r}")
Ejemplo n.º 7
0
def _prepare_base_model(self):
    print('=> base model: {}'.format('resnet'))

    model = ResNet(Bottleneck, [3, 4, 6, 3])
    if self.pretrain == 'imagenet':
        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))

    #import torchvision
    #resnet = getattr(torchvision.models, base_model)(True if self.pretrain == 'imagenet' else False)
    if self.shift:
        print('Adding temporal shift...')
        from ops.temporal_shift import make_temporal_shift
        make_temporal_shift(model,
                            self.n_segments,
                            n_div=self.shift_div,
                            place=self.shift_place)

    model.last_layer_name = 'fc'
    self.input_size = 600, 600
    self.input_mean = [0.485, 0.456, 0.406]
    self.input_std = [0.229, 0.224, 0.225]

    model.avgpool = nn.AdaptiveAvgPool2d(1)
    return model
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        if 'resnet' in base_model:
            self.base_model = getattr(
                torchvision.models,
                base_model)(True if self.pretrain == 'imagenet' else False)
            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    place=self.shift_place,
                                    temporal_pool=self.temporal_pool)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'mobilenetv2':
            from archs.mobilenet_v2 import mobilenet_v2, InvertedResidual
            self.base_model = mobilenet_v2(True if self.pretrain ==
                                           'imagenet' else False)

            self.base_model.last_layer_name = 'classifier'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)
            if self.is_shift:
                from ops.temporal_shift import TemporalShift
                for m in self.base_model.modules():
                    if isinstance(m, InvertedResidual) and len(
                            m.conv) == 8 and m.use_res_connect:
                        if self.print_spec:
                            print('Adding temporal shift... {}'.format(
                                m.use_res_connect))
                        m.conv[0] = TemporalShift(m.conv[0],
                                                  n_segment=self.num_segments,
                                                  n_div=self.shift_div)
            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)
        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
Ejemplo n.º 9
0
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        if 'resnet' in base_model:
            from archs.Resnet_ann_v3 import __resnet__
            self.base_model = __resnet__(
                base_model, True if self.pretrain == 'imagenet' else False,
                self.extra_temporal_modeling, self.num_segments,
                self.num_class)
            if self.is_shift:
                #summary(self.base_model,(3,256,256))
                #print(self.base_model)
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(
                    self.base_model,
                    self.num_segments,
                    n_div=self.shift_div,
                    place=self.shift_place,
                    temporal_pool=self.temporal_pool,
                    concat=self.concat,
                    prune_list=self.prune_list[0]
                    if self.is_prune in ['input', 'inout'] else {},
                    prune=True
                    if self.is_prune in ['input', 'inout'] else False)
            if self.is_prune in ['output', 'inout']:
                print('prune from conv1 and conv2...')
                from ops.prune import make_prune_conv
                make_prune_conv(self.base_model, place=self.shift_place)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'

            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif 'efficientnet' in base_model:
            from efficientnet_pytorch import EfficientNet
            self.base_model = EfficientNet.from_pretrained(
                base_model,
                temporal_modeling=self.extra_temporal_modeling,
                segment=self.num_segments,
                num_class=self.num_class)
            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_efficientnet_shift
                make_efficientnet_shift(self.base_model,
                                        self.num_segments,
                                        n_div=self.shift_div,
                                        place=self.shift_place,
                                        temporal_pool=self.temporal_pool,
                                        concat=self.concat)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.dropout = '_dropout'
            self.base_model.last_layer_name = '_fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'mobilenetv2':
            from archs.mobilenet_v2 import mobilenet_v2, InvertedResidual
            self.base_model = mobilenet_v2(True if self.pretrain ==
                                           'imagenet' else False)

            self.base_model.last_layer_name = 'classifier'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)
            if self.is_shift:
                from ops.temporal_shift import TemporalShift
                for m in self.base_model.modules():
                    if isinstance(m, InvertedResidual) and len(
                            m.conv) == 8 and m.use_res_connect:
                        if self.print_spec:
                            print('Adding temporal shift... {}'.format(
                                m.use_res_connect))
                        m.conv[0] = TemporalShift(m.conv[0],
                                                  n_segment=self.num_segments,
                                                  n_div=self.shift_div)
            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)
        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
Ejemplo n.º 10
0
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        if 'resnet' in base_model:
            # for 0.5 res18 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
            self.base_model = getattr(
                torchvision.models,
                base_model)(True if self.pretrain == 'imagenet' else False)
            # if '18' in base_model:
            #     from .half_res18 import resnet18
            #     self.base_model = resnet18(pretrained=False)
            # else:
            #     self.base_model = getattr(torchvision.models, base_model)(
            #         True if self.pretrain == 'imagenet' else False)
            # for 0.5 res18 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                # from ops.spatial_transform import make_spatial_transform
                # make_spatial_transform(self.base_model)
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    place=self.shift_place,
                                    temporal_pool=self.temporal_pool,
                                    soft=self.is_softshift,
                                    init_mode=self.shift_init_mode)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif 'repvgg' in base_model:
            from archs.repvgg import repvgg_A0, repvgg_B1g2
            # imagenet pretrained, deploy MODE
            if 'A0' in base_model:  # compare with res18
                self.base_model = repvgg_A0(
                    True if self.pretrain == 'imagenet' else False,
                    deploy=self.deploy)
            elif 'B1g2' in base_model:  # compare with res50
                self.base_model = repvgg_B1g2(
                    True if self.pretrain == 'imagenet' else False,
                    deploy=self.deploy)

            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    place=self.shift_place,
                                    temporal_pool=self.temporal_pool,
                                    deploy=self.deploy,
                                    soft=self.is_softshift,
                                    init_mode=self.shift_init_mode)

            self.base_model.last_layer_name = 'linear'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.gap = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'mobilenetv2':
            from archs.mobilenet_v2 import mobilenet_v2, InvertedResidual
            self.base_model = mobilenet_v2(True if self.pretrain ==
                                           'imagenet' else False)
            # import pdb; pdb.set_trace()
            self.base_model.last_layer_name = 'classifier'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)
            if self.is_shift:
                from ops.temporal_shift import TemporalShift
                for m in self.base_model.modules():
                    if isinstance(m, InvertedResidual) and len(
                            m.conv) == 8 and m.use_res_connect:
                        if self.print_spec:
                            print('Adding temporal shift... {}'.format(
                                m.use_res_connect))
                        m.conv[0] = TemporalShift(
                            m.conv[0],
                            n_segment=self.num_segments,
                            n_div=self.shift_div,
                            soft=self.is_softshift,
                            init_mode=self.shift_init_mode)
            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)

        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
    def _prepare_base_model(self, base_model):
        LOG.info("=> base model: {}".format(base_model))

        backbone_pretrained = "imagenet" if self.pretrained == "imagenet" else None
        if "resnet" in base_model.lower():
            self.base_model = getattr(
                pretrainedmodels, base_model)(pretrained=backbone_pretrained)
            if self.is_shift:
                LOG.info("Adding temporal shift...")
                from ops.temporal_shift import make_temporal_shift

                make_temporal_shift(
                    self.base_model,
                    self.num_segments,
                    n_div=self.shift_div,
                    place=self.shift_place,
                    temporal_pool=self.temporal_pool,
                )

            if self.non_local:
                LOG.info("Adding non-local module...")
                from ops.non_local import make_non_local

                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = "last_linear"
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == "Flow":
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == "RGBDiff":
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = (
                    self.input_std +
                    [np.mean(self.input_std) * 2] * 3 * self.new_length)

        elif base_model.lower() == "bninception":
            from archs import bninception

            self.base_model = bninception(pretrained=backbone_pretrained)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = "fc"
            if self.modality == "Flow":
                self.input_mean = [128]
            elif self.modality == "RGBDiff":
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                LOG.info("Adding temporal shift...")
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div,
                )
        else:
            raise ValueError("Unknown base model: {}".format(base_model))
Ejemplo n.º 12
0
    def _prepare_base_model(self, base_model):
        print('=> base model: {}'.format(base_model))

        if 'resnet' in base_model:
            self.base_model = getattr(
                torchvision.models,
                base_model)(True if self.pretrain == 'imagenet' else False)
            if self.is_sTSA:
                print('Adding spatial aware temporal selective aggregation...')
                from ops.sTSA import make_sTSA
                make_sTSA(self.base_model,
                          self.num_segments,
                          n_div=self.shift_div,
                          temporal_pool=self.temporal_pool)
            if self.is_TSA:
                print("Adding temporal selective aggregation...")
                from ops.TSA import make_TSA
                make_TSA(self.base_model,
                         self.num_segments,
                         n_div=self.shift_div,
                         temporal_pool=self.temporal_pool,
                         shift_groups=self.shift_groups,
                         shift_diffs=self.shift_diff)
            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    temporal_pool=self.temporal_pool)

            if self.is_ME:
                print("Adding temporal ME...")
                from ops.ME import make_temporal_me
                make_temporal_me(self.base_model)

            if self.is_tTSA:
                print("Adding tTSA module...")
                from ops.tTSA import make_tTSA
                make_tTSA(self.base_model,
                          self.num_segments,
                          n_div=self.shift_div,
                          temporal_pool=self.temporal_pool)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'mobilenetv2':
            from archs.mobilenet_v2 import mobilenet_v2, InvertedResidual
            self.base_model = mobilenet_v2(True if self.pretrain ==
                                           'imagenet' else False)

            self.base_model.last_layer_name = 'classifier'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)
            if self.is_shift:
                from ops.adaptive_temporal_shift import TemporalShift
                for m in self.base_model.modules():
                    if isinstance(m, InvertedResidual) and len(
                            m.conv) == 8 and m.use_res_connect:
                        if self.print_spec:
                            print('Adding temporal shift... {}'.format(
                                m.use_res_connect))
                        m.conv[0] = TemporalShift(m.conv[0],
                                                  n_segment=self.num_segments,
                                                  n_div=self.shift_div)
            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)

        elif base_model == "R2plus1D":
            from archs.R2plus1D import r2plus1d_34
            self.base_model = r2plus1d_34("r2plus1d_34_32_kinetics")

            if self.is_ME:
                print("Adding temporal ME...")
                from ops.ME import make_temporal_me
                make_temporal_me(self.base_model)

            self.input_size = 112
            self.input_mean = [0.43216, 0.394666, 0.37645]
            self.input_std = [0.22803, 0.22145, 0.216989]
            self.base_model.last_layer_name = 'fc'

        elif base_model == "X3D":
            from archs.X3D import X3D, build_model
            self.base_model = build_model(self.cfg_file)
            checkpoint = torch.load("/data/yijunq/models/TSA/x3d_l.pyth")
            self.base_model.load_state_dict(checkpoint["model_state"])
            self.input_size = 312
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]
            self.base_model.last_layer_name = 'projection'
        else:
            raise ValueError('Unknown base model: {}'.format(base_model))
Ejemplo n.º 13
0
Archivo: models.py Proyecto: zymale/TIN
    def _prepare_base_model(self, base_model, config={}):
        print('=> base model: {}'.format(base_model))

        if base_model.startswith('resnet'):

            self.base_model = getattr(torchvision.models, base_model)(True)

            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    place=self.shift_place,
                                    temporal_pool=self.temporal_pool,
                                    two_path=True)

            if self.tin:
                print('Adding temporal deformable conv...')
                from ops.temporal_interlace import make_temporal_interlace
                make_temporal_interlace(self.base_model,
                                        self.num_segments,
                                        shift_div=self.shift_div)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)