Example #1
0
def r2plus1d_resnet34_kinetics400_custom(nclass=400,
                                         pretrained=False,
                                         pretrained_base=True,
                                         use_kinetics_pretrain=True,
                                         root='~/.mxnet/models',
                                         num_segments=1,
                                         num_crop=1,
                                         feat_ext=False,
                                         ctx=cpu(),
                                         **kwargs):

    from .model_zoo import get_model
    #model = get_model('r2plus1d_resnet34_kinetics400', nclass=nclass,num_crop=num_crop,
    #                 feat_ext=feat_ext,num_segments=num_segments,ctx=ctx,pretrained=False)
    model = R2Plus1D(nclass=nclass,
                     block=BasicBlock,
                     layers=[3, 4, 6, 3],
                     num_segments=num_segments,
                     num_crop=num_crop,
                     feat_ext=feat_ext,
                     ctx=ctx,
                     **kwargs)
    model.initialize(init.MSRAPrelu(), ctx=ctx)

    if use_kinetics_pretrain and not pretrained:
        #from .model_store import get_model_file
        kinetics_model = get_model('r2plus1d_resnet34_kinetics400',
                                   nclass=400,
                                   pretrained=True)
        source_params = kinetics_model.collect_params()
        target_params = model.collect_params()
        assert len(source_params.keys()) == len(target_params.keys())

        pretrained_weights = []
        for layer_name in source_params.keys():
            pretrained_weights.append(source_params[layer_name].data())

        for i, layer_name in enumerate(target_params.keys()):
            #print(i,',',layer_name)
            if i + 2 == len(source_params.keys()):
                # skip the last dense layer
                break
            target_params[layer_name].set_data(pretrained_weights[i])

        #from ...data import Kinetics400Attr
        #attrib = Kinetics400Attr()
        #model.classes = attrib.classes
    elif pretrained:
        #model.load_parameters(get_model_file('r2plus1d_resnet18_kinetics400',tag=pretrained, root=root), ctx=ctx)
        pass
    else:
        model.initialize(init.MSRAPrelu(), ctx=ctx)
    model.collect_params().reset_ctx(ctx)

    return model
Example #2
0
    def __init__(self, depth, ctx, pretrained=True, num_features=0, num_classes=0, num_parts=1):
        super(ResNet, self).__init__()
        self.num_classes = num_classes
        self.num_parts = num_parts

        with self.name_scope():
            model = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[:-1]
            model[-1][0].body[0]._kwargs['stride'] = (1, 1)
            model[-1][0].downsample[0]._kwargs['stride'] = (1, 1)
            self.base = nn.HybridSequential()
            for m in model:
                self.base.add(m)

            #local
            self.feat = nn.HybridSequential()
            self.classify = nn.HybridSequential()
            for _ in range(num_parts):
                tmp = nn.HybridSequential()
                tmp.add(nn.GlobalMaxPool2D())
                feat = nn.Conv2D(channels=num_features, kernel_size=1, use_bias=False)
                feat.initialize(init.MSRAPrelu('in', 0), ctx=ctx)
                tmp.add(feat)
                bn = nn.BatchNorm()
                bn.initialize(init=init.Zero(), ctx=ctx)
                tmp.add(bn)
                tmp.add(nn.Flatten())
                self.feat.add(tmp)

                classifier = nn.Dense(num_classes, use_bias=False)
                classifier.initialize(init=init.Normal(0.001), ctx=ctx)
                self.classify.add(classifier)

            #global
            self.g_feat = nn.HybridSequential()
            self.g_classify = nn.HybridSequential()
            for _ in range(1):
                tmp = nn.HybridSequential()
                tmp.add(nn.GlobalAvgPool2D())
                feat = nn.Conv2D(channels=num_features, kernel_size=1, use_bias=False)
                feat.initialize(init.MSRAPrelu('in', 0), ctx=ctx)
                tmp.add(feat)
                bn = nn.BatchNorm(center=False, scale=False)
                bn.initialize(init=init.Zero(), ctx=ctx)
                tmp.add(bn)
                tmp.add(nn.Flatten())
                self.g_feat.add(tmp)

                classifier = nn.Dense(num_classes, use_bias=False)
                classifier.initialize(init=init.Normal(0.001), ctx=ctx)
                self.g_classify.add(classifier)
Example #3
0
def r2plus1d_resnet152_kinetics400(nclass=400,
                                   pretrained=False,
                                   pretrained_base=True,
                                   root='~/.mxnet/models',
                                   num_segments=1,
                                   num_crop=1,
                                   feat_ext=False,
                                   ctx=cpu(),
                                   **kwargs):
    r"""R2Plus1D with ResNet152 backbone trained on Kinetics400 dataset.

    Parameters
    ----------
    nclass : int.
        Number of categories in the dataset.
    pretrained : bool or str.
        Boolean value controls whether to load the default pretrained weights for model.
        String value represents the hashtag for a certain version of pretrained weights.
    pretrained_base : bool or str, optional, default is True.
        Load pretrained base network, the extra layers are randomized. Note that
        if pretrained is `True`, this has no effect.
    ctx : Context, default CPU.
        The context in which to load the pretrained weights.
    root : str, default $MXNET_HOME/models
        Location for keeping the model parameters.
    num_segments : int, default is 1.
        Number of segments used to evenly divide a video.
    num_crop : int, default is 1.
        Number of crops used during evaluation, choices are 1, 3 or 10.
    feat_ext : bool.
        Whether to extract features before dense classification layer or
        do a complete forward pass.
    """

    model = R2Plus1D(nclass=nclass,
                     block=Bottleneck,
                     layers=[3, 8, 36, 3],
                     num_segments=num_segments,
                     num_crop=num_crop,
                     feat_ext=feat_ext,
                     ctx=ctx,
                     **kwargs)
    model.initialize(init.MSRAPrelu(), ctx=ctx)

    if pretrained:
        from ..model_store import get_model_file
        model.load_parameters(get_model_file('r2plus1d_resnet152_kinetics400',
                                             tag=pretrained,
                                             root=root),
                              ctx=ctx)
        from ...data import Kinetics400Attr
        attrib = Kinetics400Attr()
        model.classes = attrib.classes
    model.collect_params().reset_ctx(ctx)

    return model
Example #4
0
def c3d_kinetics400(nclass=400,
                    pretrained=False,
                    ctx=cpu(),
                    root='~/.mxnet/models',
                    num_segments=1,
                    num_crop=1,
                    feat_ext=False,
                    **kwargs):
    r"""The Convolutional 3D network (C3D) trained on Kinetics400 dataset.
    Learning Spatiotemporal Features with 3D Convolutional Networks.
    ICCV, 2015. https://arxiv.org/abs/1412.0767

    Parameters
    ----------
    nclass : int.
        Number of categories in the dataset.
    pretrained : bool or str.
        Boolean value controls whether to load the default pretrained weights for model.
        String value represents the hashtag for a certain version of pretrained weights.
    ctx : Context, default CPU.
        The context in which to load the pretrained weights.
    root : str, default $MXNET_HOME/models
        Location for keeping the model parameters.
    num_segments : int, default is 1.
        Number of segments used to evenly divide a video.
    num_crop : int, default is 1.
        Number of crops used during evaluation, choices are 1, 3 or 10.
    feat_ext : bool.
        Whether to extract features before dense classification layer or
        do a complete forward pass.
    """

    model = C3D(nclass=nclass,
                ctx=ctx,
                num_segments=num_segments,
                num_crop=num_crop,
                feat_ext=feat_ext,
                **kwargs)
    model.initialize(init.MSRAPrelu(), ctx=ctx)

    if pretrained:
        from ..model_store import get_model_file
        model.load_parameters(get_model_file('c3d_kinetics400',
                                             tag=pretrained,
                                             root=root),
                              ctx=ctx)
        from ...data import Kinetics400Attr
        attrib = Kinetics400Attr()
        model.classes = attrib.classes
    model.collect_params().reset_ctx(ctx)

    return model
Example #5
0
def r2plus1d_resnet34_tranconv_lateral(nelength=16, pretrained=False,ctx=cpu(),**kwargs):
    model = R2Plus1D_TranConv_lateral()
    if pretrained:
        modelfile = '0.9315-ucf101-r2plus1d_resnet34_tranconv_lateral-079-best.params'#'0.8567-ucf101-r2plus1d_resnet34_tranconv_lateral-079-best.params'
        root = '/home/hp/lcx/Action-Recognition/logs/param_rgb_r2plus1d_resnet18_kinetics400_custom_ucf101_nlength16_lateral_1'
        filepath = os.path.join(root,modelfile)
        filepath = os.path.expanduser(filepath)
        model.load_parameters(modelfile,ctx=ctx,allow_missing=True)
        print(filepath)
    else:
        model.initialize(init.MSRAPrelu(), ctx=ctx)
    #model.collect_params().reset_ctx(ctx)
    
    return model
Example #6
0
def train_enhance_net(train_dataloader, test_dataloader, logger):
    net = get_model(name=config.MODEL_NAME, num_classes=config.NUM_CLASSES)
    net.initialize(init.MSRAPrelu(), ctx=config.CTX)
    net.collect_params().reset_ctx(config.CTX)
    net.load_parameters(config.BASE_MODEL_PATH, allow_missing=True)
    net.hybridize()
    if config.DTYPE != 'float32':
        net.cast('float16')
    for k, v in net.collect_params('.*beta|.*gamma|.*bias').items():
        v.wd_mult = 0.0
    train_mixup(net=net,
                train_dataloader=train_dataloader,
                valid_dataloader=test_dataloader,
                num_epochs=config.NUM_EPOCHES,
                batch_size=config.BATCH_SIZE,
                lr=config.LR,
                wd=config.WEIGHT_DECAY,
                ctx=config.CTX,
                dtype=config.DTYPE,
                logger=logger)
def get_model(args):
    '''Setup network'''
    ctx = [mxnet.gpu(gpu_id) for gpu_id in args.gpu]

    net = MVRNN(cnn_arch='vgg11_bn',
                cnn_feature_length=4096,
                num_views=args.num_views,
                num_class=args.num_classes,
                pretrained=True,
                pretrained_cnn=args.pretrained_cnn,
                ctx=ctx)
    if args.checkpoint:
        net.load_parameters(args.checkpoint, ctx=ctx)
    else:
        net.initialize(init=init.MSRAPrelu(), ctx=ctx)
    net.hybridize()
    net.collect_params().setattr('grad_req', 'add')
    net._cnn2.collect_params().setattr('lr_mult', args.output_lr_mult)

    return net
Example #8
0
def r2plus1d_resnet18_kinetics400_custom(nclass=400, pretrained=False, pretrained_base=True,use_kinetics_pretrain=True,
                                  root='~/.mxnet/models', num_segments=1, num_crop=1,use_lateral=False,
                                  feat_ext=False, ctx=cpu(), **kwargs):
    r"""R2Plus1D with ResNet18 backbone trained on Kinetics400 dataset.

    Parameters
    ----------
    nclass : int.
        Number of categories in the dataset.
    pretrained : bool or str.
        Boolean value controls whether to load the default pretrained weights for model.
        String value represents the hashtag for a certain version of pretrained weights.
    pretrained_base : bool or str, optional, default is True.
        Load pretrained base network, the extra layers are randomized. Note that
        if pretrained is `True`, this has no effect.
    ctx : Context, default CPU.
        The context in which to load the pretrained weights.
    root : str, default $MXNET_HOME/models
        Location for keeping the model parameters.
    num_segments : int, default is 1.
        Number of segments used to evenly divide a video.
    num_crop : int, default is 1.
        Number of crops used during evaluation, choices are 1, 3 or 10.
    feat_ext : bool.
        Whether to extract features before dense classification layer or
        do a complete forward pass.
    """

    """
    model = R2Plus1D(nclass=nclass,
                     block=BasicBlock,
                     layers=[2, 2, 2, 2],
                     num_segments=num_segments,
                     num_crop=num_crop,
                     feat_ext=feat_ext,
                     ctx=ctx,
                     **kwargs)
    """
    from .model_zoo import get_model
    #model = get_model('r2plus1d_resnet18_kinetics400', nclass=nclass,num_crop=num_crop,
    #                  feat_ext=feat_ext,num_segments=num_segments,ctx=ctx,pretrained=False) 
    
    model = R2Plus1D(nclass=nclass,
                     block=BasicBlock,
                     layers=[2, 2, 2, 2],
                     num_segments=num_segments,
                     num_crop=num_crop,
                     feat_ext=feat_ext,
                     ctx=ctx,
                     use_lateral=use_lateral,
                     **kwargs)    
    model.initialize(init.MSRAPrelu(), ctx=ctx)
    
    if use_kinetics_pretrain and not pretrained:
        print('use_kinetics_pretrain == True')
        from .model_store import get_model_file
        kinetics_model = get_model('r2plus1d_resnet18_kinetics400', nclass=400, pretrained=True)
        source_params = kinetics_model.collect_params()
        target_params = model.collect_params()        
        assert len(source_params.keys()) == len(target_params.keys())
        
        pretrained_weights = []
        for layer_name in source_params.keys():
            pretrained_weights.append(source_params[layer_name].data())
        
        for i, layer_name in enumerate(target_params.keys()):
            #print(i,',',layer_name)
            if i + 2 == len(source_params.keys()):
                # skip the last dense layer
                break
            target_params[layer_name].set_data(pretrained_weights[i])
            
        
        #from ...data import Kinetics400Attr
        #attrib = Kinetics400Attr()
        #model.classes = attrib.classes
    elif pretrained:
        #model.load_parameters(get_model_file('r2plus1d_resnet18_kinetics400',tag=pretrained, root=root), ctx=ctx)
        pass
    else:
        print('use_kinetics_pretrain == False')
        #model.initialize(init.MSRAPrelu(), ctx=ctx)
    model.collect_params().reset_ctx(ctx)

    return model
    def __init__(self,
                 in_channels=1024,
                 nonlocal_type="gaussian",
                 dim=3,
                 embed=True,
                 embed_dim=None,
                 sub_sample=True,
                 use_bn=True,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 ctx=None,
                 **kwargs):
        super(NonLocal, self).__init__()

        assert nonlocal_type in ['gaussian', 'dot', 'concat']
        self.nonlocal_type = nonlocal_type
        self.embed = embed
        self.embed_dim = embed_dim if embed_dim is not None else in_channels // 2
        self.sub_sample = sub_sample
        self.use_bn = use_bn

        with self.name_scope():
            if self.embed:
                if dim == 2:
                    self.theta = nn.Conv2D(in_channels=in_channels,
                                           channels=self.embed_dim,
                                           kernel_size=(1, 1),
                                           strides=(1, 1),
                                           padding=(0, 0),
                                           weight_initializer=init.MSRAPrelu())
                    self.phi = nn.Conv2D(in_channels=in_channels,
                                         channels=self.embed_dim,
                                         kernel_size=(1, 1),
                                         strides=(1, 1),
                                         padding=(0, 0),
                                         weight_initializer=init.MSRAPrelu())
                    self.g = nn.Conv2D(in_channels=in_channels,
                                       channels=self.embed_dim,
                                       kernel_size=(1, 1),
                                       strides=(1, 1),
                                       padding=(0, 0),
                                       weight_initializer=init.MSRAPrelu())
                elif dim == 3:
                    self.theta = nn.Conv3D(in_channels=in_channels,
                                           channels=self.embed_dim,
                                           kernel_size=(1, 1, 1),
                                           strides=(1, 1, 1),
                                           padding=(0, 0, 0),
                                           weight_initializer=init.MSRAPrelu())
                    self.phi = nn.Conv3D(in_channels=in_channels,
                                         channels=self.embed_dim,
                                         kernel_size=(1, 1, 1),
                                         strides=(1, 1, 1),
                                         padding=(0, 0, 0),
                                         weight_initializer=init.MSRAPrelu())
                    self.g = nn.Conv3D(in_channels=in_channels,
                                       channels=self.embed_dim,
                                       kernel_size=(1, 1, 1),
                                       strides=(1, 1, 1),
                                       padding=(0, 0, 0),
                                       weight_initializer=init.MSRAPrelu())

            if self.nonlocal_type == 'concat':
                if dim == 2:
                    self.concat_proj = nn.HybridSequential()
                    self.concat_proj.add(
                        nn.Conv2D(in_channels=self.embed_dim * 2,
                                  channels=1,
                                  kernel_size=(1, 1),
                                  strides=(1, 1),
                                  padding=(0, 0),
                                  weight_initializer=init.MSRAPrelu()))
                    self.concat_proj.add(nn.Activation('relu'))
                elif dim == 3:
                    self.concat_proj = nn.HybridSequential()
                    self.concat_proj.add(
                        nn.Conv3D(in_channels=self.embed_dim * 2,
                                  channels=1,
                                  kernel_size=(1, 1, 1),
                                  strides=(1, 1, 1),
                                  padding=(0, 0, 0),
                                  weight_initializer=init.MSRAPrelu()))
                    self.concat_proj.add(nn.Activation('relu'))

            if sub_sample:
                if dim == 2:
                    self.max_pool = nn.MaxPool2D(pool_size=(2, 2))
                elif dim == 3:
                    self.max_pool = nn.MaxPool3D(pool_size=(1, 2, 2))
                self.sub_phi = nn.HybridSequential()
                self.sub_phi.add(self.phi)
                self.sub_phi.add(self.max_pool)
                self.sub_g = nn.HybridSequential()
                self.sub_g.add(self.g)
                self.sub_g.add(self.max_pool)

            if dim == 2:
                self.W = nn.Conv2D(in_channels=self.embed_dim,
                                   channels=in_channels,
                                   kernel_size=(1, 1),
                                   strides=(1, 1),
                                   padding=(0, 0),
                                   weight_initializer=init.MSRAPrelu())
            elif dim == 3:
                self.W = nn.Conv3D(in_channels=self.embed_dim,
                                   channels=in_channels,
                                   kernel_size=(1, 1, 1),
                                   strides=(1, 1, 1),
                                   padding=(0, 0, 0),
                                   weight_initializer=init.MSRAPrelu())

            if use_bn:
                self.bn = norm_layer(
                    in_channels=in_channels,
                    gamma_initializer='zeros',
                    **({} if norm_kwargs is None else norm_kwargs))
                self.W_bn = nn.HybridSequential()
                self.W_bn.add(self.W)
                self.W_bn.add(self.bn)
Example #10
0
    def __init__(self, args):
        self.args = args

        filehandler = logging.FileHandler(args.logging_file)
        streamhandler = logging.StreamHandler()

        self.logger = logging.getLogger('')
        self.logger.setLevel(logging.INFO)
        self.logger.addHandler(filehandler)
        self.logger.addHandler(streamhandler)

        self.logger.info(args)


        # image transform
        input_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([.485, .456, .406], [.229, .224, .225]),  # Default mean and std
        ])

        ################################# dataset and dataloader #################################
        if platform.system() == "Darwin":
            data_root = os.path.join('~', 'Nutstore Files', 'Dataset')  # Mac
        elif platform.system() == "Linux":
            data_root = os.path.join('~', 'datasets')  # Laplace or HPC
            if args.colab:
                data_root = '/content/datasets'  # Colab
        else:
            raise ValueError('Notice Dataset Path')

        data_kwargs = {'base_size': args.base_size, 'transform': input_transform,
                       'crop_size': args.crop_size, 'root': data_root,
                       'base_dir': args.dataset}
        trainset = IceContrast(split=args.train_split, mode='train',   **data_kwargs)
        valset = IceContrast(split=args.val_split, mode='testval', **data_kwargs)
        self.train_data = gluon.data.DataLoader(trainset, args.batch_size, shuffle=True,
                                                last_batch='rollover', num_workers=args.workers)
        self.eval_data = gluon.data.DataLoader(valset, args.test_batch_size,
                                               last_batch='rollover', num_workers=args.workers)

        layers = [args.blocks] * 3
        channels = [x * args.channel_times for x in [8, 16, 32, 64]]
        if args.model == 'ResNetFPN':
            model = ASKCResNetFPN(layers=layers, channels=channels, fuse_mode=args.fuse_mode,
                                  tiny=args.tiny, classes=trainset.NUM_CLASS)
        elif args.model == 'ResUNet':
            model = ASKCResUNet(layers=layers, channels=channels, fuse_mode=args.fuse_mode,
                                tiny=args.tiny, classes=trainset.NUM_CLASS)
        print("layers: ", layers)
        print("channels: ", channels)
        print("fuse_mode: ", args.fuse_mode)
        print("tiny: ", args.tiny)
        print("classes: ", trainset.NUM_CLASS)

        if args.host == 'xxx':
            self.host_name = socket.gethostname()  # automatic
        else:
            self.host_name = args.host             # Puma needs to be specified
        self.save_prefix = '_'.join([args.model, args.fuse_mode, args.dataset, self.host_name,
                            'GPU', args.gpus])

        model.cast(args.dtype)
        # self.logger.info(model)

        # resume checkpoint if needed
        if args.resume is not None:
            if os.path.isfile(args.resume):
                model.load_parameters(args.resume, ctx=args.ctx)
            else:
                raise RuntimeError("=> no checkpoint found at '{}'".format(args.resume))
        else:
            model.initialize(init=init.MSRAPrelu(), ctx=args.ctx, force_reinit=True)
            print("Model Initializing")
            print("args.ctx: ", args.ctx)

        self.net = model
        if args.summary:
            summary(self.net, mx.nd.zeros((1, 3, args.crop_size, args.crop_size), ctx=args.ctx[0]))
            sys.exit()

        # create criterion
        self.criterion = SoftIoULoss()

        # optimizer and lr scheduling
        self.lr_scheduler = LRSequential([
                LRScheduler('linear', base_lr=0, target_lr=args.lr,
                            nepochs=args.warmup_epochs, iters_per_epoch=len(self.train_data)),
                LRScheduler(mode='poly', base_lr=args.lr,
                            nepochs=args.epochs-args.warmup_epochs,
                            iters_per_epoch=len(self.train_data),
                            power=0.9)
            ])
        kv = mx.kv.create(args.kvstore)

        if args.optimizer == 'sgd':
            optimizer_params = {'lr_scheduler': self.lr_scheduler,
                                'wd': args.weight_decay,
                                'momentum': args.momentum,
                                'learning_rate': args.lr}
        elif args.optimizer == 'adam':
            optimizer_params = {'lr_scheduler': self.lr_scheduler,
                                'wd': args.weight_decay,
                                'learning_rate': args.lr}
        elif args.optimizer == 'adagrad':
            optimizer_params = {
                'wd': args.weight_decay,
                'learning_rate': args.lr
            }
        else:
            raise ValueError('Unsupported optimizer {} used'.format(args.optimizer))

        if args.dtype == 'float16':
            optimizer_params['multi_precision'] = True

        if args.no_wd:
            for k, v in self.net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        self.optimizer = gluon.Trainer(self.net.collect_params(), args.optimizer,
                                       optimizer_params, kvstore=kv)

        ################################# evaluation metrics #################################

        self.iou_metric = SigmoidMetric(1)
        self.nIoU_metric = SamplewiseSigmoidMetric(1, score_thresh=self.args.score_thresh)
        self.best_iou = 0
        self.best_nIoU = 0
        self.is_best = False
Example #11
0
    def __init__(self,
                 nclass,
                 block=Bottleneck,
                 layers=None,
                 pretrained=False,
                 pretrained_base=False,
                 num_segments=1,
                 num_crop=1,
                 bn_eval=True,
                 bn_frozen=False,
                 partial_bn=False,
                 frozen_stages=-1,
                 dropout_ratio=0.5,
                 init_std=0.01,
                 alpha=8,
                 beta_inv=8,
                 fusion_conv_channel_ratio=2,
                 fusion_kernel_size=5,
                 width_per_group=64,
                 num_groups=1,
                 slow_temporal_stride=16,
                 fast_temporal_stride=2,
                 slow_frames=4,
                 fast_frames=32,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 ctx=None,
                 **kwargs):
        super(SlowFast, self).__init__()
        self.num_segments = num_segments
        self.num_crop = num_crop
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.alpha = alpha
        self.beta_inv = beta_inv
        self.fusion_conv_channel_ratio = fusion_conv_channel_ratio
        self.fusion_kernel_size = fusion_kernel_size
        self.width_per_group = width_per_group
        self.num_groups = num_groups
        self.dim_inner = self.num_groups * self.width_per_group
        self.out_dim_ratio = self.beta_inv // self.fusion_conv_channel_ratio
        self.slow_temporal_stride = slow_temporal_stride
        self.fast_temporal_stride = fast_temporal_stride
        self.slow_frames = slow_frames
        self.fast_frames = fast_frames

        with self.name_scope():
            # build fast pathway
            fast = nn.HybridSequential(prefix='fast_')
            with fast.name_scope():
                self.fast_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group // self.beta_inv,
                                            kernel_size=(5, 7, 7), strides=(1, 2, 2), padding=(2, 3, 3), use_bias=False)
                self.fast_bn1 = norm_layer(in_channels=self.width_per_group // self.beta_inv,
                                           **({} if norm_kwargs is None else norm_kwargs))
                self.fast_relu = nn.Activation('relu')
                self.fast_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))
            self.fast_res2 = self._make_layer_fast(inplanes=self.width_per_group // self.beta_inv,
                                                   planes=self.dim_inner // self.beta_inv,
                                                   num_blocks=layers[0],
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='fast_res2_')
            self.fast_res3 = self._make_layer_fast(inplanes=self.width_per_group * 4 // self.beta_inv,
                                                   planes=self.dim_inner * 2 // self.beta_inv,
                                                   num_blocks=layers[1],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='fast_res3_')
            self.fast_res4 = self._make_layer_fast(inplanes=self.width_per_group * 8 // self.beta_inv,
                                                   planes=self.dim_inner * 4 // self.beta_inv,
                                                   num_blocks=layers[2],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='fast_res4_')
            self.fast_res5 = self._make_layer_fast(inplanes=self.width_per_group * 16 // self.beta_inv,
                                                   planes=self.dim_inner * 8 // self.beta_inv,
                                                   num_blocks=layers[3],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='fast_res5_')

            # build lateral connections
            self.lateral_p1 = nn.HybridSequential(prefix='lateral_p1_')
            with self.lateral_p1.name_scope():
                self.lateral_p1.add(nn.Conv3D(in_channels=self.width_per_group // self.beta_inv,
                                              channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio,
                                              kernel_size=(self.fusion_kernel_size, 1, 1),
                                              strides=(self.alpha, 1, 1),
                                              padding=(self.fusion_kernel_size // 2, 0, 0),
                                              use_bias=False))
                self.lateral_p1.add(norm_layer(in_channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio,
                                               **({} if norm_kwargs is None else norm_kwargs)))
                self.lateral_p1.add(nn.Activation('relu'))

            self.lateral_res2 = nn.HybridSequential(prefix='lateral_res2_')
            with self.lateral_res2.name_scope():
                self.lateral_res2.add(nn.Conv3D(in_channels=self.width_per_group * 4 // self.beta_inv,
                                                channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                kernel_size=(self.fusion_kernel_size, 1, 1),
                                                strides=(self.alpha, 1, 1),
                                                padding=(self.fusion_kernel_size // 2, 0, 0),
                                                use_bias=False))
                self.lateral_res2.add(norm_layer(in_channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                 **({} if norm_kwargs is None else norm_kwargs)))
                self.lateral_res2.add(nn.Activation('relu'))

            self.lateral_res3 = nn.HybridSequential(prefix='lateral_res3_')
            with self.lateral_res3.name_scope():
                self.lateral_res3.add(nn.Conv3D(in_channels=self.width_per_group * 8 // self.beta_inv,
                                                channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                kernel_size=(self.fusion_kernel_size, 1, 1),
                                                strides=(self.alpha, 1, 1),
                                                padding=(self.fusion_kernel_size // 2, 0, 0),
                                                use_bias=False))
                self.lateral_res3.add(norm_layer(in_channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                 **({} if norm_kwargs is None else norm_kwargs)))
                self.lateral_res3.add(nn.Activation('relu'))

            self.lateral_res4 = nn.HybridSequential(prefix='lateral_res4_')
            with self.lateral_res4.name_scope():
                self.lateral_res4.add(nn.Conv3D(in_channels=self.width_per_group * 16 // self.beta_inv,
                                                channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                kernel_size=(self.fusion_kernel_size, 1, 1),
                                                strides=(self.alpha, 1, 1),
                                                padding=(self.fusion_kernel_size // 2, 0, 0),
                                                use_bias=False))
                self.lateral_res4.add(norm_layer(in_channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                 **({} if norm_kwargs is None else norm_kwargs)))
                self.lateral_res4.add(nn.Activation('relu'))

            # build slow pathway
            slow = nn.HybridSequential(prefix='slow_')
            with slow.name_scope():
                self.slow_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group,
                                            kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False)
                self.slow_bn1 = norm_layer(in_channels=self.width_per_group,
                                           **({} if norm_kwargs is None else norm_kwargs))
                self.slow_relu = nn.Activation('relu')
                self.slow_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))
            self.slow_res2 = self._make_layer_slow(inplanes=self.width_per_group + self.width_per_group // self.out_dim_ratio,
                                                   planes=self.dim_inner,
                                                   num_blocks=layers[0],
                                                   head_conv=1,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='slow_res2_')
            self.slow_res3 = self._make_layer_slow(inplanes=self.width_per_group * 4 + self.width_per_group * 4 // self.out_dim_ratio,
                                                   planes=self.dim_inner * 2,
                                                   num_blocks=layers[1],
                                                   strides=2,
                                                   head_conv=1,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='slow_res3_')
            self.slow_res4 = self._make_layer_slow(inplanes=self.width_per_group * 8 + self.width_per_group * 8 // self.out_dim_ratio,
                                                   planes=self.dim_inner * 4,
                                                   num_blocks=layers[2],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='slow_res4_')
            self.slow_res5 = self._make_layer_slow(inplanes=self.width_per_group * 16 + self.width_per_group * 16 // self.out_dim_ratio,
                                                   planes=self.dim_inner * 8,
                                                   num_blocks=layers[3],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='slow_res5_')

            # build classifier
            self.avg = nn.GlobalAvgPool3D()
            self.dp = nn.Dropout(rate=self.dropout_ratio)
            self.feat_dim = self.width_per_group * 32 // self.beta_inv + self.width_per_group * 32
            self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std), use_bias=True)

            self.initialize(init.MSRAPrelu(), ctx=ctx)
Example #12
0
    def __init__(self,
                 nclass,
                 base_model='resnet18_v1b',
                 pretrained_base=True,
                 num_segments=8,
                 num_temporal=1,
                 ifTSN=True,
                 input_channel=3,
                 batch_normal=True,
                 dropout_ratio=0.8,
                 init_std=0.001,
                 **kwargs):
        super(ECO, self).__init__()
        self.nclass = nclass
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.num_segments = num_segments
        self.ifTSN = ifTSN
        self.input_shape = 224
        self.base_model = base_model  #['resnet18_v1b','resnet18_v2','resnet18_v1b_kinetics400','resnet18_v1b_k400_ucf101'][1]

        # resnet50 101 152 的 self.expansion == 4
        #self.expansion = 4 if ('resnet50_v1b' in self.base_model)or('resnet101_v1b' in self.base_model)or('resnet152_v1b' in self.base_model) else 1

        if 'resnet18_v1b' in self.base_model:
            self.expansion = 1
        elif 'resnet34_v1b' in self.base_model:
            self.expansion = 1
        elif 'resnet50_v1b' in self.base_model:
            self.expansion = 4
        elif 'resnet101_v1b' in self.base_model:
            self.expansion = 4
        elif 'resnet152_v1b' in self.base_model:
            self.expansion = 4
        else:
            self.expansion = 1

        #2d 卷积的出来的维度
        self.feat_dim_2d = 128 * self.expansion

        # num_temporal 默认为1 论文中 一开始不减少时间维
        self.num_temporal = num_temporal
        if self.num_segments == 4:
            self.num_temporal = 1
        elif self.num_segments == 8:
            self.num_temporal = num_temporal
        elif self.num_segments == 16:
            self.num_temporal = num_temporal
        elif self.num_segments == 32:
            self.num_temporal = num_temporal
        else:
            self.num_temporal = 1

        # 输入fc的维度
        if self.ifTSN == True:
            self.feat_dim_3d = 512
        else:  # Flatten
            tmppara = self.num_segments // 4
            tmppara = tmppara // (self.num_temporal if tmppara > 1 else 1)
            self.feat_dim_3d = 512 * tmppara

        pretrained_model = get_model(self.base_model,
                                     pretrained=pretrained_base)

        with self.name_scope():
            # x = nd.zeros(shape=(7x8,3,224,224))
            #2D feature
            if self.base_model == 'resnet18_v2':
                self.feature2d = pretrained_model.features
            else:  #'resnet18_v1b' in self.base_model:
                self.conv1 = pretrained_model.conv1
                self.bn1 = pretrained_model.bn1
                self.relu = pretrained_model.relu
                self.conv1 = pretrained_model.conv1
                self.maxpool = pretrained_model.maxpool
                self.layer1 = pretrained_model.layer1
                self.layer2 = pretrained_model.layer2

            #3D feature
            self.features_3d = nn.HybridSequential(prefix='')
            # conv3_x
            self.features_3d.add(
                BasicBlock(in_channel=self.feat_dim_2d,
                           out_channel=128,
                           spatial_stride=1,
                           temporal_stride=self.num_temporal))
            self.features_3d.add(
                BasicBlock(in_channel=128,
                           out_channel=128,
                           spatial_stride=1,
                           temporal_stride=1))
            # conv4_x
            self.features_3d.add(
                BasicBlock(in_channel=128,
                           out_channel=256,
                           spatial_stride=2,
                           temporal_stride=2))
            self.features_3d.add(
                BasicBlock(in_channel=256,
                           out_channel=256,
                           spatial_stride=1,
                           temporal_stride=1))
            # conv5_x
            self.features_3d.add(
                BasicBlock(in_channel=256,
                           out_channel=512,
                           spatial_stride=2,
                           temporal_stride=2))
            self.features_3d.add(
                BasicBlock(in_channel=512,
                           out_channel=512,
                           spatial_stride=1,
                           temporal_stride=1))
            self.features_3d.add(nn.AvgPool3D(pool_size=(1, 7, 7)))
            self.dropout = nn.Dropout(rate=self.dropout_ratio)
            self.output = nn.HybridSequential(prefix='')
            if self.ifTSN == True:
                self.output.add(
                    nn.Dense(
                        units=self.nclass,
                        in_units=512,
                        weight_initializer=init.Normal(sigma=self.init_std)))
            else:
                self.output.add(
                    nn.Dense(
                        units=512,
                        in_units=self.feat_dim_3d,
                        weight_initializer=init.Normal(sigma=self.init_std)),
                    nn.Dense(
                        units=self.nclass,
                        in_units=512,
                        weight_initializer=init.Normal(sigma=self.init_std)))
            # init
            if pretrained_base:
                self.features_3d.initialize(init.MSRAPrelu())
                self.output.initialize(init.MSRAPrelu())
Example #13
0
def main():
    # Parse config and mkdir output
    logger, final_Model_path = create_logger(config)
    config.final_Model_path = final_Model_path
    gen_config(os.path.join(final_Model_path, 'hyperParams.yaml'))
    logger.info('Training config:{}\n'.format(pprint.pformat(config)))

    # define context
    if config.useGPU:
        ctx = [mx.gpu(int(i)) for i in config.gpu.split(',')]
    else:
        ctx = mx.cpu()
    logger.info("Using context:", ctx)

    # dataset, generate trainset/ validation set
    train_imdbs = []
    valid_imdbs = []
    for i in range(len(config.DATASET.train_image_set)):
        logger.info("Construct Dataset:", config.DATASET.dbname[i],
                    ", Dataset Path:", config.DATASET.dataset_path[i])
        train_imdbs.append(
            eval(config.DATASET.dbname[i])(config.DATASET.train_image_set[i],
                                           config.DATASET.root_path[i],
                                           config.DATASET.dataset_path[i]))
        valid_imdbs.append(
            eval(config.DATASET.dbname[i])(config.DATASET.valid_image_set[i],
                                           config.DATASET.root_path[i],
                                           config.DATASET.dataset_path[i],
                                           config.final_Model_path))
    data_names = ['hm36data']
    label_names = ['hm36label']
    train_data_iter = JointsDataIter(train_imdbs[0],
                                     runmode=0,
                                     data_names=data_names,
                                     label_names=label_names,
                                     shuffle=config.TRAIN.SHUFFLE,
                                     batch_size=len(ctx) *
                                     config.TRAIN.batchsize,
                                     logger=logger)
    valid_data_iter = JointsDataIter(valid_imdbs[0],
                                     runmode=1,
                                     data_names=data_names,
                                     label_names=label_names,
                                     shuffle=False,
                                     batch_size=len(ctx) *
                                     config.TEST.batchsize,
                                     logger=logger)

    assert train_data_iter.get_meanstd()['mean3d'].all(
    ) == valid_data_iter.get_meanstd()['mean3d'].all()

    # network
    net = get_net(config)
    if config.resume:
        ckp_path = os.path.join(config.resumeckp)
        net.collect_params().load(ckp_path, ctx=ctx)
    else:
        net.initialize(init=init.MSRAPrelu(), ctx=ctx)

    if config.NETWORK.hybrid:
        net.hybridize()

    logger.info(net)

    # define loss and metric
    mean3d = train_data_iter.get_meanstd()['mean3d']
    std3d = train_data_iter.get_meanstd()['std3d']
    train_metric = MPJPEMetric('train_metric', mean3d, std3d)
    eval_metric = MPJPEMetric('valid_metric', mean3d, std3d)
    loss = MeanSquareLoss()

    # optimizer
    optimizer, optimizer_params = get_optimizer(config, ctx)

    # train and valid
    TrainDBsize = train_data_iter.get_size()
    ValidDBsize = valid_data_iter.get_size()
    logger.info("Train DB size:", TrainDBsize, "Valid DB size:", ValidDBsize)

    if not isinstance(train_data_iter, mx.io.PrefetchingIter):
        train_data_iter = mx.io.PrefetchingIter(train_data_iter)

    trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)
    for epoch in range(config.TRAIN.begin_epoch, config.TRAIN.end_epoch):
        trainNet(net,
                 trainer,
                 train_data_iter,
                 loss,
                 train_metric,
                 epoch,
                 config,
                 logger=logger,
                 ctx=ctx)
        validNet(net,
                 valid_data_iter,
                 loss,
                 eval_metric,
                 epoch,
                 config,
                 logger=logger,
                 ctx=ctx)

    logger.kill()
def main(args):
    '''create dir'''
    experiment_dir = Path('./experiment/')
    experiment_dir.mkdir(exist_ok=True)
    checkpoints_dir = Path('./experiment/checkpoints/')
    checkpoints_dir.mkdir(exist_ok=True)
    log_dir = Path('./experiment/logs/')
    log_dir.mkdir(exist_ok=True)

    ctx = [mxnet.gpu(gpu_id) for gpu_id in args.gpu]
    '''initialize the network'''
    net = MVRNN(cnn_arch='vgg11_bn',
                cnn_feature_length=4096,
                num_views=args.num_views,
                num_class=args.num_classes,
                pretrained=True,
                pretrained_cnn=args.pretrained_cnn,
                ctx=ctx)
    if args.checkpoint:
        net.load_parameters(args.checkpoint, ctx=ctx)
    else:
        net.initialize(init=init.MSRAPrelu(), ctx=ctx)
    net.hybridize()
    '''set grad_req to 'add' to manually aggregate gradients'''
    net.collect_params().setattr('grad_req', 'add')
    net._cnn2.collect_params().setattr('lr_mult', args.output_lr_mult)
    '''Setup loss function'''
    loss_fun = gluon.loss.SoftmaxCrossEntropyLoss(
        sparse_label=not args.label_smoothing)
    '''Loading dataset'''
    train_ds = MultiViewImageDataset(os.path.join(args.dataset_path, 'train'),
                                     args.num_views,
                                     transform=Compose([
                                         ToTensor(),
                                         Normalize(mean=(0.485, 0.456, 0.406),
                                                   std=(0.229, 0.224, 0.225))
                                     ]))
    test_ds = MultiViewImageDataset(os.path.join(args.dataset_path, 'test'),
                                    args.num_views,
                                    transform=Compose([
                                        ToTensor(),
                                        Normalize(mean=(0.485, 0.456, 0.406),
                                                  std=(0.229, 0.224, 0.225))
                                    ]))
    loader = gluon.data.DataLoader
    train_data = loader(train_ds,
                        args.batch_size,
                        shuffle=True,
                        last_batch='keep',
                        num_workers=4)
    test_data = loader(test_ds,
                       args.batch_size,
                       shuffle=False,
                       last_batch='keep',
                       num_workers=4)

    current_time = datetime.datetime.now()
    time_str = '%d-%d-%d--%d-%d-%d' % (
        current_time.year, current_time.month, current_time.day,
        current_time.hour, current_time.minute, current_time.second)
    log_filename = time_str + '.txt'
    checkpoint_name = 'checkpoint_' + time_str
    checkpoint_dir = Path(os.path.join(checkpoints_dir, checkpoint_name))
    checkpoint_dir.mkdir(exist_ok=True)

    with open(os.path.join(
            log_dir,
            log_filename,
    ), 'w') as log_out:
        try:
            kv = mxnet.kv.create('device')
            utils.log_string(log_out, sys.argv[0])
            utils.train(net, train_data, test_data, loss_fun, kv, log_out,
                        str(checkpoint_dir), args)
        except Exception as e:
            raise e
Example #15
0
    def __init__(self, args):
        self.args = args
        # image transform
        input_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([.485, .456, .406], [.229, .224, .225]), # Default mean and std
            # transforms.Normalize([.418, .447, .571], [.091, .078, .076]),   # Iceberg mean and std
        ])
        ################################# dataset and dataloader #################################
        if platform.system() == "Darwin":
            data_root = os.path.join('~', 'Nutstore Files', 'Dataset')
        elif platform.system() == "Linux":
            data_root = os.path.join('~', 'datasets')
            if args.colab:
                # data_root = '/content/gdrive/My Drive/Colab Notebooks/datasets'
                data_root = '/content/datasets'
        else:
            raise ValueError('Notice Dataset Path')

        data_kwargs = {'base_size': args.base_size, 'transform': input_transform,
                       'crop_size': args.crop_size, 'root': data_root,
                       'base_dir' : args.dataset}
        trainset = IceContrast(split=args.train_split, mode='train',   **data_kwargs)
        valset = IceContrast(split=args.val_split, mode='testval', **data_kwargs)

        self.train_data = gluon.data.DataLoader(trainset, args.batch_size, shuffle=True,
            last_batch='rollover', num_workers=args.workers)
        self.eval_data = gluon.data.DataLoader(valset, args.test_batch_size,
            last_batch='rollover', num_workers=args.workers)

        # net_choice = 'PCMNet'  # ResNetFPN, PCMNet, MPCMNet, LayerwiseMPCMNet
        net_choice = self.args.net_choice
        print("net_choice: ", net_choice)

        if net_choice == 'MPCMResNetFPN':
            r = self.args.r
            layers = [self.args.blocks] * 3
            channels = [8, 16, 32, 64]
            shift = self.args.shift
            pyramid_mode = self.args.pyramid_mode
            scale_mode = self.args.scale_mode
            pyramid_fuse = self.args.pyramid_fuse

            model = MPCMResNetFPN(layers=layers, channels=channels, shift=shift,
                                  pyramid_mode=pyramid_mode, scale_mode=scale_mode,
                                  pyramid_fuse=pyramid_fuse, r=r, classes=trainset.NUM_CLASS)
            print("net_choice: ", net_choice)
            print("scale_mode: ", scale_mode)
            print("pyramid_fuse: ", pyramid_fuse)
            print("r: ", r)
            print("layers: ", layers)
            print("channels: ", channels)
            print("shift: ", shift)


        self.host_name = socket.gethostname()
        self.save_prefix = self.host_name + '_' + net_choice + '_scale-mode_' + args.scale_mode + \
                           '_pyramid-fuse_' + args.pyramid_fuse + '_b_' + str(args.blocks)
        if args.net_choice == 'ResNetFCN':
            self.save_prefix = self.host_name + '_' + net_choice + '_b_' + str(args.blocks)

        # resume checkpoint if needed
        if args.resume is not None:
            if os.path.isfile(args.resume):
                model.load_parameters(args.resume, ctx=args.ctx)
            else:
                raise RuntimeError("=> no checkpoint found at '{}'".format(args.resume))
        else:
            # model.initialize(init=init.Xavier(), ctx=args.ctx, force_reinit=True)
            model.initialize(init=init.MSRAPrelu(), ctx=args.ctx, force_reinit=True)
            print("Model Initializing")
            print("args.ctx: ", args.ctx)


        self.net = model
        # self.net.summary(mx.nd.zeros((1, 3, 480, 480)))

        if args.summary:
            self.net.summary(mx.nd.zeros((1, 3, 480, 480), self.args.ctx[0]))
            sys.exit()

        # create criterion
        self.criterion = SoftIoULoss()

        # optimizer and lr scheduling
        self.lr_scheduler = LRScheduler(mode='poly', base_lr=args.lr,
                                        nepochs=args.epochs,
                                        iters_per_epoch=len(self.train_data),
                                        power=0.9)
        kv = mx.kv.create(args.kvstore)

        # For SGD
        # optimizer_params = {'lr_scheduler': self.lr_scheduler,
        #                     'wd': args.weight_decay,
        #                     'momentum': args.momentum,
        #                     'learning_rate': args.lr
        #                    }
        optimizer_params = {
            # 'lr_scheduler': self.lr_scheduler,
            'wd': args.weight_decay,
            'learning_rate': args.lr
        }
        # For Adam

        if args.dtype == 'float16':
            optimizer_params['multi_precision'] = True

        if args.no_wd:
            for k, v in self.net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        # self.optimizer = gluon.Trainer(self.net.collect_params(), 'sgd',
        #                                optimizer_params, kvstore = kv)
        # self.optimizer = gluon.Trainer(self.net.collect_params(), 'adam',
        #                                optimizer_params, kvstore = kv)
        self.optimizer = gluon.Trainer(self.net.collect_params(), 'adagrad',
                                       optimizer_params, kvstore=kv)
        # self.optimizer = gluon.Trainer(self.net.collect_params(), 'nag',
        #                                optimizer_params, kvstore=kv)

        ################################# evaluation metrics #################################

        self.iou_metric = SigmoidMetric(1)
        self.nIoU_metric = SamplewiseSigmoidMetric(1, score_thresh=self.args.score_thresh)
        # self.metric = Seg2DetVOC07MApMetric(iou_thresh=self.args.iou_thresh,
        #                                     sparsity=self.args.sparsity,
        #                                     score_thresh=self.args.score_thresh)
        self.best_metric = 0
        self.best_iou = 0
        self.best_nIoU = 0
        self.is_best = False