def r2plus1d_resnet34_kinetics400_custom(nclass=400, pretrained=False, pretrained_base=True, use_kinetics_pretrain=True, root='~/.mxnet/models', num_segments=1, num_crop=1, feat_ext=False, ctx=cpu(), **kwargs): from .model_zoo import get_model #model = get_model('r2plus1d_resnet34_kinetics400', nclass=nclass,num_crop=num_crop, # feat_ext=feat_ext,num_segments=num_segments,ctx=ctx,pretrained=False) model = R2Plus1D(nclass=nclass, block=BasicBlock, layers=[3, 4, 6, 3], num_segments=num_segments, num_crop=num_crop, feat_ext=feat_ext, ctx=ctx, **kwargs) model.initialize(init.MSRAPrelu(), ctx=ctx) if use_kinetics_pretrain and not pretrained: #from .model_store import get_model_file kinetics_model = get_model('r2plus1d_resnet34_kinetics400', nclass=400, pretrained=True) source_params = kinetics_model.collect_params() target_params = model.collect_params() assert len(source_params.keys()) == len(target_params.keys()) pretrained_weights = [] for layer_name in source_params.keys(): pretrained_weights.append(source_params[layer_name].data()) for i, layer_name in enumerate(target_params.keys()): #print(i,',',layer_name) if i + 2 == len(source_params.keys()): # skip the last dense layer break target_params[layer_name].set_data(pretrained_weights[i]) #from ...data import Kinetics400Attr #attrib = Kinetics400Attr() #model.classes = attrib.classes elif pretrained: #model.load_parameters(get_model_file('r2plus1d_resnet18_kinetics400',tag=pretrained, root=root), ctx=ctx) pass else: model.initialize(init.MSRAPrelu(), ctx=ctx) model.collect_params().reset_ctx(ctx) return model
def __init__(self, depth, ctx, pretrained=True, num_features=0, num_classes=0, num_parts=1): super(ResNet, self).__init__() self.num_classes = num_classes self.num_parts = num_parts with self.name_scope(): model = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[:-1] model[-1][0].body[0]._kwargs['stride'] = (1, 1) model[-1][0].downsample[0]._kwargs['stride'] = (1, 1) self.base = nn.HybridSequential() for m in model: self.base.add(m) #local self.feat = nn.HybridSequential() self.classify = nn.HybridSequential() for _ in range(num_parts): tmp = nn.HybridSequential() tmp.add(nn.GlobalMaxPool2D()) feat = nn.Conv2D(channels=num_features, kernel_size=1, use_bias=False) feat.initialize(init.MSRAPrelu('in', 0), ctx=ctx) tmp.add(feat) bn = nn.BatchNorm() bn.initialize(init=init.Zero(), ctx=ctx) tmp.add(bn) tmp.add(nn.Flatten()) self.feat.add(tmp) classifier = nn.Dense(num_classes, use_bias=False) classifier.initialize(init=init.Normal(0.001), ctx=ctx) self.classify.add(classifier) #global self.g_feat = nn.HybridSequential() self.g_classify = nn.HybridSequential() for _ in range(1): tmp = nn.HybridSequential() tmp.add(nn.GlobalAvgPool2D()) feat = nn.Conv2D(channels=num_features, kernel_size=1, use_bias=False) feat.initialize(init.MSRAPrelu('in', 0), ctx=ctx) tmp.add(feat) bn = nn.BatchNorm(center=False, scale=False) bn.initialize(init=init.Zero(), ctx=ctx) tmp.add(bn) tmp.add(nn.Flatten()) self.g_feat.add(tmp) classifier = nn.Dense(num_classes, use_bias=False) classifier.initialize(init=init.Normal(0.001), ctx=ctx) self.g_classify.add(classifier)
def r2plus1d_resnet152_kinetics400(nclass=400, pretrained=False, pretrained_base=True, root='~/.mxnet/models', num_segments=1, num_crop=1, feat_ext=False, ctx=cpu(), **kwargs): r"""R2Plus1D with ResNet152 backbone trained on Kinetics400 dataset. Parameters ---------- nclass : int. Number of categories in the dataset. pretrained : bool or str. Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True. Load pretrained base network, the extra layers are randomized. Note that if pretrained is `True`, this has no effect. ctx : Context, default CPU. The context in which to load the pretrained weights. root : str, default $MXNET_HOME/models Location for keeping the model parameters. num_segments : int, default is 1. Number of segments used to evenly divide a video. num_crop : int, default is 1. Number of crops used during evaluation, choices are 1, 3 or 10. feat_ext : bool. Whether to extract features before dense classification layer or do a complete forward pass. """ model = R2Plus1D(nclass=nclass, block=Bottleneck, layers=[3, 8, 36, 3], num_segments=num_segments, num_crop=num_crop, feat_ext=feat_ext, ctx=ctx, **kwargs) model.initialize(init.MSRAPrelu(), ctx=ctx) if pretrained: from ..model_store import get_model_file model.load_parameters(get_model_file('r2plus1d_resnet152_kinetics400', tag=pretrained, root=root), ctx=ctx) from ...data import Kinetics400Attr attrib = Kinetics400Attr() model.classes = attrib.classes model.collect_params().reset_ctx(ctx) return model
def c3d_kinetics400(nclass=400, pretrained=False, ctx=cpu(), root='~/.mxnet/models', num_segments=1, num_crop=1, feat_ext=False, **kwargs): r"""The Convolutional 3D network (C3D) trained on Kinetics400 dataset. Learning Spatiotemporal Features with 3D Convolutional Networks. ICCV, 2015. https://arxiv.org/abs/1412.0767 Parameters ---------- nclass : int. Number of categories in the dataset. pretrained : bool or str. Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. ctx : Context, default CPU. The context in which to load the pretrained weights. root : str, default $MXNET_HOME/models Location for keeping the model parameters. num_segments : int, default is 1. Number of segments used to evenly divide a video. num_crop : int, default is 1. Number of crops used during evaluation, choices are 1, 3 or 10. feat_ext : bool. Whether to extract features before dense classification layer or do a complete forward pass. """ model = C3D(nclass=nclass, ctx=ctx, num_segments=num_segments, num_crop=num_crop, feat_ext=feat_ext, **kwargs) model.initialize(init.MSRAPrelu(), ctx=ctx) if pretrained: from ..model_store import get_model_file model.load_parameters(get_model_file('c3d_kinetics400', tag=pretrained, root=root), ctx=ctx) from ...data import Kinetics400Attr attrib = Kinetics400Attr() model.classes = attrib.classes model.collect_params().reset_ctx(ctx) return model
def r2plus1d_resnet34_tranconv_lateral(nelength=16, pretrained=False,ctx=cpu(),**kwargs): model = R2Plus1D_TranConv_lateral() if pretrained: modelfile = '0.9315-ucf101-r2plus1d_resnet34_tranconv_lateral-079-best.params'#'0.8567-ucf101-r2plus1d_resnet34_tranconv_lateral-079-best.params' root = '/home/hp/lcx/Action-Recognition/logs/param_rgb_r2plus1d_resnet18_kinetics400_custom_ucf101_nlength16_lateral_1' filepath = os.path.join(root,modelfile) filepath = os.path.expanduser(filepath) model.load_parameters(modelfile,ctx=ctx,allow_missing=True) print(filepath) else: model.initialize(init.MSRAPrelu(), ctx=ctx) #model.collect_params().reset_ctx(ctx) return model
def train_enhance_net(train_dataloader, test_dataloader, logger): net = get_model(name=config.MODEL_NAME, num_classes=config.NUM_CLASSES) net.initialize(init.MSRAPrelu(), ctx=config.CTX) net.collect_params().reset_ctx(config.CTX) net.load_parameters(config.BASE_MODEL_PATH, allow_missing=True) net.hybridize() if config.DTYPE != 'float32': net.cast('float16') for k, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 train_mixup(net=net, train_dataloader=train_dataloader, valid_dataloader=test_dataloader, num_epochs=config.NUM_EPOCHES, batch_size=config.BATCH_SIZE, lr=config.LR, wd=config.WEIGHT_DECAY, ctx=config.CTX, dtype=config.DTYPE, logger=logger)
def get_model(args): '''Setup network''' ctx = [mxnet.gpu(gpu_id) for gpu_id in args.gpu] net = MVRNN(cnn_arch='vgg11_bn', cnn_feature_length=4096, num_views=args.num_views, num_class=args.num_classes, pretrained=True, pretrained_cnn=args.pretrained_cnn, ctx=ctx) if args.checkpoint: net.load_parameters(args.checkpoint, ctx=ctx) else: net.initialize(init=init.MSRAPrelu(), ctx=ctx) net.hybridize() net.collect_params().setattr('grad_req', 'add') net._cnn2.collect_params().setattr('lr_mult', args.output_lr_mult) return net
def r2plus1d_resnet18_kinetics400_custom(nclass=400, pretrained=False, pretrained_base=True,use_kinetics_pretrain=True, root='~/.mxnet/models', num_segments=1, num_crop=1,use_lateral=False, feat_ext=False, ctx=cpu(), **kwargs): r"""R2Plus1D with ResNet18 backbone trained on Kinetics400 dataset. Parameters ---------- nclass : int. Number of categories in the dataset. pretrained : bool or str. Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True. Load pretrained base network, the extra layers are randomized. Note that if pretrained is `True`, this has no effect. ctx : Context, default CPU. The context in which to load the pretrained weights. root : str, default $MXNET_HOME/models Location for keeping the model parameters. num_segments : int, default is 1. Number of segments used to evenly divide a video. num_crop : int, default is 1. Number of crops used during evaluation, choices are 1, 3 or 10. feat_ext : bool. Whether to extract features before dense classification layer or do a complete forward pass. """ """ model = R2Plus1D(nclass=nclass, block=BasicBlock, layers=[2, 2, 2, 2], num_segments=num_segments, num_crop=num_crop, feat_ext=feat_ext, ctx=ctx, **kwargs) """ from .model_zoo import get_model #model = get_model('r2plus1d_resnet18_kinetics400', nclass=nclass,num_crop=num_crop, # feat_ext=feat_ext,num_segments=num_segments,ctx=ctx,pretrained=False) model = R2Plus1D(nclass=nclass, block=BasicBlock, layers=[2, 2, 2, 2], num_segments=num_segments, num_crop=num_crop, feat_ext=feat_ext, ctx=ctx, use_lateral=use_lateral, **kwargs) model.initialize(init.MSRAPrelu(), ctx=ctx) if use_kinetics_pretrain and not pretrained: print('use_kinetics_pretrain == True') from .model_store import get_model_file kinetics_model = get_model('r2plus1d_resnet18_kinetics400', nclass=400, pretrained=True) source_params = kinetics_model.collect_params() target_params = model.collect_params() assert len(source_params.keys()) == len(target_params.keys()) pretrained_weights = [] for layer_name in source_params.keys(): pretrained_weights.append(source_params[layer_name].data()) for i, layer_name in enumerate(target_params.keys()): #print(i,',',layer_name) if i + 2 == len(source_params.keys()): # skip the last dense layer break target_params[layer_name].set_data(pretrained_weights[i]) #from ...data import Kinetics400Attr #attrib = Kinetics400Attr() #model.classes = attrib.classes elif pretrained: #model.load_parameters(get_model_file('r2plus1d_resnet18_kinetics400',tag=pretrained, root=root), ctx=ctx) pass else: print('use_kinetics_pretrain == False') #model.initialize(init.MSRAPrelu(), ctx=ctx) model.collect_params().reset_ctx(ctx) return model
def __init__(self, in_channels=1024, nonlocal_type="gaussian", dim=3, embed=True, embed_dim=None, sub_sample=True, use_bn=True, norm_layer=BatchNorm, norm_kwargs=None, ctx=None, **kwargs): super(NonLocal, self).__init__() assert nonlocal_type in ['gaussian', 'dot', 'concat'] self.nonlocal_type = nonlocal_type self.embed = embed self.embed_dim = embed_dim if embed_dim is not None else in_channels // 2 self.sub_sample = sub_sample self.use_bn = use_bn with self.name_scope(): if self.embed: if dim == 2: self.theta = nn.Conv2D(in_channels=in_channels, channels=self.embed_dim, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), weight_initializer=init.MSRAPrelu()) self.phi = nn.Conv2D(in_channels=in_channels, channels=self.embed_dim, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), weight_initializer=init.MSRAPrelu()) self.g = nn.Conv2D(in_channels=in_channels, channels=self.embed_dim, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), weight_initializer=init.MSRAPrelu()) elif dim == 3: self.theta = nn.Conv3D(in_channels=in_channels, channels=self.embed_dim, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding=(0, 0, 0), weight_initializer=init.MSRAPrelu()) self.phi = nn.Conv3D(in_channels=in_channels, channels=self.embed_dim, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding=(0, 0, 0), weight_initializer=init.MSRAPrelu()) self.g = nn.Conv3D(in_channels=in_channels, channels=self.embed_dim, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding=(0, 0, 0), weight_initializer=init.MSRAPrelu()) if self.nonlocal_type == 'concat': if dim == 2: self.concat_proj = nn.HybridSequential() self.concat_proj.add( nn.Conv2D(in_channels=self.embed_dim * 2, channels=1, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), weight_initializer=init.MSRAPrelu())) self.concat_proj.add(nn.Activation('relu')) elif dim == 3: self.concat_proj = nn.HybridSequential() self.concat_proj.add( nn.Conv3D(in_channels=self.embed_dim * 2, channels=1, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding=(0, 0, 0), weight_initializer=init.MSRAPrelu())) self.concat_proj.add(nn.Activation('relu')) if sub_sample: if dim == 2: self.max_pool = nn.MaxPool2D(pool_size=(2, 2)) elif dim == 3: self.max_pool = nn.MaxPool3D(pool_size=(1, 2, 2)) self.sub_phi = nn.HybridSequential() self.sub_phi.add(self.phi) self.sub_phi.add(self.max_pool) self.sub_g = nn.HybridSequential() self.sub_g.add(self.g) self.sub_g.add(self.max_pool) if dim == 2: self.W = nn.Conv2D(in_channels=self.embed_dim, channels=in_channels, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), weight_initializer=init.MSRAPrelu()) elif dim == 3: self.W = nn.Conv3D(in_channels=self.embed_dim, channels=in_channels, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding=(0, 0, 0), weight_initializer=init.MSRAPrelu()) if use_bn: self.bn = norm_layer( in_channels=in_channels, gamma_initializer='zeros', **({} if norm_kwargs is None else norm_kwargs)) self.W_bn = nn.HybridSequential() self.W_bn.add(self.W) self.W_bn.add(self.bn)
def __init__(self, args): self.args = args filehandler = logging.FileHandler(args.logging_file) streamhandler = logging.StreamHandler() self.logger = logging.getLogger('') self.logger.setLevel(logging.INFO) self.logger.addHandler(filehandler) self.logger.addHandler(streamhandler) self.logger.info(args) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), # Default mean and std ]) ################################# dataset and dataloader ################################# if platform.system() == "Darwin": data_root = os.path.join('~', 'Nutstore Files', 'Dataset') # Mac elif platform.system() == "Linux": data_root = os.path.join('~', 'datasets') # Laplace or HPC if args.colab: data_root = '/content/datasets' # Colab else: raise ValueError('Notice Dataset Path') data_kwargs = {'base_size': args.base_size, 'transform': input_transform, 'crop_size': args.crop_size, 'root': data_root, 'base_dir': args.dataset} trainset = IceContrast(split=args.train_split, mode='train', **data_kwargs) valset = IceContrast(split=args.val_split, mode='testval', **data_kwargs) self.train_data = gluon.data.DataLoader(trainset, args.batch_size, shuffle=True, last_batch='rollover', num_workers=args.workers) self.eval_data = gluon.data.DataLoader(valset, args.test_batch_size, last_batch='rollover', num_workers=args.workers) layers = [args.blocks] * 3 channels = [x * args.channel_times for x in [8, 16, 32, 64]] if args.model == 'ResNetFPN': model = ASKCResNetFPN(layers=layers, channels=channels, fuse_mode=args.fuse_mode, tiny=args.tiny, classes=trainset.NUM_CLASS) elif args.model == 'ResUNet': model = ASKCResUNet(layers=layers, channels=channels, fuse_mode=args.fuse_mode, tiny=args.tiny, classes=trainset.NUM_CLASS) print("layers: ", layers) print("channels: ", channels) print("fuse_mode: ", args.fuse_mode) print("tiny: ", args.tiny) print("classes: ", trainset.NUM_CLASS) if args.host == 'xxx': self.host_name = socket.gethostname() # automatic else: self.host_name = args.host # Puma needs to be specified self.save_prefix = '_'.join([args.model, args.fuse_mode, args.dataset, self.host_name, 'GPU', args.gpus]) model.cast(args.dtype) # self.logger.info(model) # resume checkpoint if needed if args.resume is not None: if os.path.isfile(args.resume): model.load_parameters(args.resume, ctx=args.ctx) else: raise RuntimeError("=> no checkpoint found at '{}'".format(args.resume)) else: model.initialize(init=init.MSRAPrelu(), ctx=args.ctx, force_reinit=True) print("Model Initializing") print("args.ctx: ", args.ctx) self.net = model if args.summary: summary(self.net, mx.nd.zeros((1, 3, args.crop_size, args.crop_size), ctx=args.ctx[0])) sys.exit() # create criterion self.criterion = SoftIoULoss() # optimizer and lr scheduling self.lr_scheduler = LRSequential([ LRScheduler('linear', base_lr=0, target_lr=args.lr, nepochs=args.warmup_epochs, iters_per_epoch=len(self.train_data)), LRScheduler(mode='poly', base_lr=args.lr, nepochs=args.epochs-args.warmup_epochs, iters_per_epoch=len(self.train_data), power=0.9) ]) kv = mx.kv.create(args.kvstore) if args.optimizer == 'sgd': optimizer_params = {'lr_scheduler': self.lr_scheduler, 'wd': args.weight_decay, 'momentum': args.momentum, 'learning_rate': args.lr} elif args.optimizer == 'adam': optimizer_params = {'lr_scheduler': self.lr_scheduler, 'wd': args.weight_decay, 'learning_rate': args.lr} elif args.optimizer == 'adagrad': optimizer_params = { 'wd': args.weight_decay, 'learning_rate': args.lr } else: raise ValueError('Unsupported optimizer {} used'.format(args.optimizer)) if args.dtype == 'float16': optimizer_params['multi_precision'] = True if args.no_wd: for k, v in self.net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 self.optimizer = gluon.Trainer(self.net.collect_params(), args.optimizer, optimizer_params, kvstore=kv) ################################# evaluation metrics ################################# self.iou_metric = SigmoidMetric(1) self.nIoU_metric = SamplewiseSigmoidMetric(1, score_thresh=self.args.score_thresh) self.best_iou = 0 self.best_nIoU = 0 self.is_best = False
def __init__(self, nclass, block=Bottleneck, layers=None, pretrained=False, pretrained_base=False, num_segments=1, num_crop=1, bn_eval=True, bn_frozen=False, partial_bn=False, frozen_stages=-1, dropout_ratio=0.5, init_std=0.01, alpha=8, beta_inv=8, fusion_conv_channel_ratio=2, fusion_kernel_size=5, width_per_group=64, num_groups=1, slow_temporal_stride=16, fast_temporal_stride=2, slow_frames=4, fast_frames=32, norm_layer=BatchNorm, norm_kwargs=None, ctx=None, **kwargs): super(SlowFast, self).__init__() self.num_segments = num_segments self.num_crop = num_crop self.dropout_ratio = dropout_ratio self.init_std = init_std self.alpha = alpha self.beta_inv = beta_inv self.fusion_conv_channel_ratio = fusion_conv_channel_ratio self.fusion_kernel_size = fusion_kernel_size self.width_per_group = width_per_group self.num_groups = num_groups self.dim_inner = self.num_groups * self.width_per_group self.out_dim_ratio = self.beta_inv // self.fusion_conv_channel_ratio self.slow_temporal_stride = slow_temporal_stride self.fast_temporal_stride = fast_temporal_stride self.slow_frames = slow_frames self.fast_frames = fast_frames with self.name_scope(): # build fast pathway fast = nn.HybridSequential(prefix='fast_') with fast.name_scope(): self.fast_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group // self.beta_inv, kernel_size=(5, 7, 7), strides=(1, 2, 2), padding=(2, 3, 3), use_bias=False) self.fast_bn1 = norm_layer(in_channels=self.width_per_group // self.beta_inv, **({} if norm_kwargs is None else norm_kwargs)) self.fast_relu = nn.Activation('relu') self.fast_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)) self.fast_res2 = self._make_layer_fast(inplanes=self.width_per_group // self.beta_inv, planes=self.dim_inner // self.beta_inv, num_blocks=layers[0], head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res2_') self.fast_res3 = self._make_layer_fast(inplanes=self.width_per_group * 4 // self.beta_inv, planes=self.dim_inner * 2 // self.beta_inv, num_blocks=layers[1], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res3_') self.fast_res4 = self._make_layer_fast(inplanes=self.width_per_group * 8 // self.beta_inv, planes=self.dim_inner * 4 // self.beta_inv, num_blocks=layers[2], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res4_') self.fast_res5 = self._make_layer_fast(inplanes=self.width_per_group * 16 // self.beta_inv, planes=self.dim_inner * 8 // self.beta_inv, num_blocks=layers[3], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='fast_res5_') # build lateral connections self.lateral_p1 = nn.HybridSequential(prefix='lateral_p1_') with self.lateral_p1.name_scope(): self.lateral_p1.add(nn.Conv3D(in_channels=self.width_per_group // self.beta_inv, channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_p1.add(norm_layer(in_channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_p1.add(nn.Activation('relu')) self.lateral_res2 = nn.HybridSequential(prefix='lateral_res2_') with self.lateral_res2.name_scope(): self.lateral_res2.add(nn.Conv3D(in_channels=self.width_per_group * 4 // self.beta_inv, channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_res2.add(norm_layer(in_channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_res2.add(nn.Activation('relu')) self.lateral_res3 = nn.HybridSequential(prefix='lateral_res3_') with self.lateral_res3.name_scope(): self.lateral_res3.add(nn.Conv3D(in_channels=self.width_per_group * 8 // self.beta_inv, channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_res3.add(norm_layer(in_channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_res3.add(nn.Activation('relu')) self.lateral_res4 = nn.HybridSequential(prefix='lateral_res4_') with self.lateral_res4.name_scope(): self.lateral_res4.add(nn.Conv3D(in_channels=self.width_per_group * 16 // self.beta_inv, channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio, kernel_size=(self.fusion_kernel_size, 1, 1), strides=(self.alpha, 1, 1), padding=(self.fusion_kernel_size // 2, 0, 0), use_bias=False)) self.lateral_res4.add(norm_layer(in_channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio, **({} if norm_kwargs is None else norm_kwargs))) self.lateral_res4.add(nn.Activation('relu')) # build slow pathway slow = nn.HybridSequential(prefix='slow_') with slow.name_scope(): self.slow_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group, kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False) self.slow_bn1 = norm_layer(in_channels=self.width_per_group, **({} if norm_kwargs is None else norm_kwargs)) self.slow_relu = nn.Activation('relu') self.slow_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)) self.slow_res2 = self._make_layer_slow(inplanes=self.width_per_group + self.width_per_group // self.out_dim_ratio, planes=self.dim_inner, num_blocks=layers[0], head_conv=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res2_') self.slow_res3 = self._make_layer_slow(inplanes=self.width_per_group * 4 + self.width_per_group * 4 // self.out_dim_ratio, planes=self.dim_inner * 2, num_blocks=layers[1], strides=2, head_conv=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res3_') self.slow_res4 = self._make_layer_slow(inplanes=self.width_per_group * 8 + self.width_per_group * 8 // self.out_dim_ratio, planes=self.dim_inner * 4, num_blocks=layers[2], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res4_') self.slow_res5 = self._make_layer_slow(inplanes=self.width_per_group * 16 + self.width_per_group * 16 // self.out_dim_ratio, planes=self.dim_inner * 8, num_blocks=layers[3], strides=2, head_conv=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name='slow_res5_') # build classifier self.avg = nn.GlobalAvgPool3D() self.dp = nn.Dropout(rate=self.dropout_ratio) self.feat_dim = self.width_per_group * 32 // self.beta_inv + self.width_per_group * 32 self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std), use_bias=True) self.initialize(init.MSRAPrelu(), ctx=ctx)
def __init__(self, nclass, base_model='resnet18_v1b', pretrained_base=True, num_segments=8, num_temporal=1, ifTSN=True, input_channel=3, batch_normal=True, dropout_ratio=0.8, init_std=0.001, **kwargs): super(ECO, self).__init__() self.nclass = nclass self.dropout_ratio = dropout_ratio self.init_std = init_std self.num_segments = num_segments self.ifTSN = ifTSN self.input_shape = 224 self.base_model = base_model #['resnet18_v1b','resnet18_v2','resnet18_v1b_kinetics400','resnet18_v1b_k400_ucf101'][1] # resnet50 101 152 的 self.expansion == 4 #self.expansion = 4 if ('resnet50_v1b' in self.base_model)or('resnet101_v1b' in self.base_model)or('resnet152_v1b' in self.base_model) else 1 if 'resnet18_v1b' in self.base_model: self.expansion = 1 elif 'resnet34_v1b' in self.base_model: self.expansion = 1 elif 'resnet50_v1b' in self.base_model: self.expansion = 4 elif 'resnet101_v1b' in self.base_model: self.expansion = 4 elif 'resnet152_v1b' in self.base_model: self.expansion = 4 else: self.expansion = 1 #2d 卷积的出来的维度 self.feat_dim_2d = 128 * self.expansion # num_temporal 默认为1 论文中 一开始不减少时间维 self.num_temporal = num_temporal if self.num_segments == 4: self.num_temporal = 1 elif self.num_segments == 8: self.num_temporal = num_temporal elif self.num_segments == 16: self.num_temporal = num_temporal elif self.num_segments == 32: self.num_temporal = num_temporal else: self.num_temporal = 1 # 输入fc的维度 if self.ifTSN == True: self.feat_dim_3d = 512 else: # Flatten tmppara = self.num_segments // 4 tmppara = tmppara // (self.num_temporal if tmppara > 1 else 1) self.feat_dim_3d = 512 * tmppara pretrained_model = get_model(self.base_model, pretrained=pretrained_base) with self.name_scope(): # x = nd.zeros(shape=(7x8,3,224,224)) #2D feature if self.base_model == 'resnet18_v2': self.feature2d = pretrained_model.features else: #'resnet18_v1b' in self.base_model: self.conv1 = pretrained_model.conv1 self.bn1 = pretrained_model.bn1 self.relu = pretrained_model.relu self.conv1 = pretrained_model.conv1 self.maxpool = pretrained_model.maxpool self.layer1 = pretrained_model.layer1 self.layer2 = pretrained_model.layer2 #3D feature self.features_3d = nn.HybridSequential(prefix='') # conv3_x self.features_3d.add( BasicBlock(in_channel=self.feat_dim_2d, out_channel=128, spatial_stride=1, temporal_stride=self.num_temporal)) self.features_3d.add( BasicBlock(in_channel=128, out_channel=128, spatial_stride=1, temporal_stride=1)) # conv4_x self.features_3d.add( BasicBlock(in_channel=128, out_channel=256, spatial_stride=2, temporal_stride=2)) self.features_3d.add( BasicBlock(in_channel=256, out_channel=256, spatial_stride=1, temporal_stride=1)) # conv5_x self.features_3d.add( BasicBlock(in_channel=256, out_channel=512, spatial_stride=2, temporal_stride=2)) self.features_3d.add( BasicBlock(in_channel=512, out_channel=512, spatial_stride=1, temporal_stride=1)) self.features_3d.add(nn.AvgPool3D(pool_size=(1, 7, 7))) self.dropout = nn.Dropout(rate=self.dropout_ratio) self.output = nn.HybridSequential(prefix='') if self.ifTSN == True: self.output.add( nn.Dense( units=self.nclass, in_units=512, weight_initializer=init.Normal(sigma=self.init_std))) else: self.output.add( nn.Dense( units=512, in_units=self.feat_dim_3d, weight_initializer=init.Normal(sigma=self.init_std)), nn.Dense( units=self.nclass, in_units=512, weight_initializer=init.Normal(sigma=self.init_std))) # init if pretrained_base: self.features_3d.initialize(init.MSRAPrelu()) self.output.initialize(init.MSRAPrelu())
def main(): # Parse config and mkdir output logger, final_Model_path = create_logger(config) config.final_Model_path = final_Model_path gen_config(os.path.join(final_Model_path, 'hyperParams.yaml')) logger.info('Training config:{}\n'.format(pprint.pformat(config))) # define context if config.useGPU: ctx = [mx.gpu(int(i)) for i in config.gpu.split(',')] else: ctx = mx.cpu() logger.info("Using context:", ctx) # dataset, generate trainset/ validation set train_imdbs = [] valid_imdbs = [] for i in range(len(config.DATASET.train_image_set)): logger.info("Construct Dataset:", config.DATASET.dbname[i], ", Dataset Path:", config.DATASET.dataset_path[i]) train_imdbs.append( eval(config.DATASET.dbname[i])(config.DATASET.train_image_set[i], config.DATASET.root_path[i], config.DATASET.dataset_path[i])) valid_imdbs.append( eval(config.DATASET.dbname[i])(config.DATASET.valid_image_set[i], config.DATASET.root_path[i], config.DATASET.dataset_path[i], config.final_Model_path)) data_names = ['hm36data'] label_names = ['hm36label'] train_data_iter = JointsDataIter(train_imdbs[0], runmode=0, data_names=data_names, label_names=label_names, shuffle=config.TRAIN.SHUFFLE, batch_size=len(ctx) * config.TRAIN.batchsize, logger=logger) valid_data_iter = JointsDataIter(valid_imdbs[0], runmode=1, data_names=data_names, label_names=label_names, shuffle=False, batch_size=len(ctx) * config.TEST.batchsize, logger=logger) assert train_data_iter.get_meanstd()['mean3d'].all( ) == valid_data_iter.get_meanstd()['mean3d'].all() # network net = get_net(config) if config.resume: ckp_path = os.path.join(config.resumeckp) net.collect_params().load(ckp_path, ctx=ctx) else: net.initialize(init=init.MSRAPrelu(), ctx=ctx) if config.NETWORK.hybrid: net.hybridize() logger.info(net) # define loss and metric mean3d = train_data_iter.get_meanstd()['mean3d'] std3d = train_data_iter.get_meanstd()['std3d'] train_metric = MPJPEMetric('train_metric', mean3d, std3d) eval_metric = MPJPEMetric('valid_metric', mean3d, std3d) loss = MeanSquareLoss() # optimizer optimizer, optimizer_params = get_optimizer(config, ctx) # train and valid TrainDBsize = train_data_iter.get_size() ValidDBsize = valid_data_iter.get_size() logger.info("Train DB size:", TrainDBsize, "Valid DB size:", ValidDBsize) if not isinstance(train_data_iter, mx.io.PrefetchingIter): train_data_iter = mx.io.PrefetchingIter(train_data_iter) trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params) for epoch in range(config.TRAIN.begin_epoch, config.TRAIN.end_epoch): trainNet(net, trainer, train_data_iter, loss, train_metric, epoch, config, logger=logger, ctx=ctx) validNet(net, valid_data_iter, loss, eval_metric, epoch, config, logger=logger, ctx=ctx) logger.kill()
def main(args): '''create dir''' experiment_dir = Path('./experiment/') experiment_dir.mkdir(exist_ok=True) checkpoints_dir = Path('./experiment/checkpoints/') checkpoints_dir.mkdir(exist_ok=True) log_dir = Path('./experiment/logs/') log_dir.mkdir(exist_ok=True) ctx = [mxnet.gpu(gpu_id) for gpu_id in args.gpu] '''initialize the network''' net = MVRNN(cnn_arch='vgg11_bn', cnn_feature_length=4096, num_views=args.num_views, num_class=args.num_classes, pretrained=True, pretrained_cnn=args.pretrained_cnn, ctx=ctx) if args.checkpoint: net.load_parameters(args.checkpoint, ctx=ctx) else: net.initialize(init=init.MSRAPrelu(), ctx=ctx) net.hybridize() '''set grad_req to 'add' to manually aggregate gradients''' net.collect_params().setattr('grad_req', 'add') net._cnn2.collect_params().setattr('lr_mult', args.output_lr_mult) '''Setup loss function''' loss_fun = gluon.loss.SoftmaxCrossEntropyLoss( sparse_label=not args.label_smoothing) '''Loading dataset''' train_ds = MultiViewImageDataset(os.path.join(args.dataset_path, 'train'), args.num_views, transform=Compose([ ToTensor(), Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ])) test_ds = MultiViewImageDataset(os.path.join(args.dataset_path, 'test'), args.num_views, transform=Compose([ ToTensor(), Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ])) loader = gluon.data.DataLoader train_data = loader(train_ds, args.batch_size, shuffle=True, last_batch='keep', num_workers=4) test_data = loader(test_ds, args.batch_size, shuffle=False, last_batch='keep', num_workers=4) current_time = datetime.datetime.now() time_str = '%d-%d-%d--%d-%d-%d' % ( current_time.year, current_time.month, current_time.day, current_time.hour, current_time.minute, current_time.second) log_filename = time_str + '.txt' checkpoint_name = 'checkpoint_' + time_str checkpoint_dir = Path(os.path.join(checkpoints_dir, checkpoint_name)) checkpoint_dir.mkdir(exist_ok=True) with open(os.path.join( log_dir, log_filename, ), 'w') as log_out: try: kv = mxnet.kv.create('device') utils.log_string(log_out, sys.argv[0]) utils.train(net, train_data, test_data, loss_fun, kv, log_out, str(checkpoint_dir), args) except Exception as e: raise e
def __init__(self, args): self.args = args # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), # Default mean and std # transforms.Normalize([.418, .447, .571], [.091, .078, .076]), # Iceberg mean and std ]) ################################# dataset and dataloader ################################# if platform.system() == "Darwin": data_root = os.path.join('~', 'Nutstore Files', 'Dataset') elif platform.system() == "Linux": data_root = os.path.join('~', 'datasets') if args.colab: # data_root = '/content/gdrive/My Drive/Colab Notebooks/datasets' data_root = '/content/datasets' else: raise ValueError('Notice Dataset Path') data_kwargs = {'base_size': args.base_size, 'transform': input_transform, 'crop_size': args.crop_size, 'root': data_root, 'base_dir' : args.dataset} trainset = IceContrast(split=args.train_split, mode='train', **data_kwargs) valset = IceContrast(split=args.val_split, mode='testval', **data_kwargs) self.train_data = gluon.data.DataLoader(trainset, args.batch_size, shuffle=True, last_batch='rollover', num_workers=args.workers) self.eval_data = gluon.data.DataLoader(valset, args.test_batch_size, last_batch='rollover', num_workers=args.workers) # net_choice = 'PCMNet' # ResNetFPN, PCMNet, MPCMNet, LayerwiseMPCMNet net_choice = self.args.net_choice print("net_choice: ", net_choice) if net_choice == 'MPCMResNetFPN': r = self.args.r layers = [self.args.blocks] * 3 channels = [8, 16, 32, 64] shift = self.args.shift pyramid_mode = self.args.pyramid_mode scale_mode = self.args.scale_mode pyramid_fuse = self.args.pyramid_fuse model = MPCMResNetFPN(layers=layers, channels=channels, shift=shift, pyramid_mode=pyramid_mode, scale_mode=scale_mode, pyramid_fuse=pyramid_fuse, r=r, classes=trainset.NUM_CLASS) print("net_choice: ", net_choice) print("scale_mode: ", scale_mode) print("pyramid_fuse: ", pyramid_fuse) print("r: ", r) print("layers: ", layers) print("channels: ", channels) print("shift: ", shift) self.host_name = socket.gethostname() self.save_prefix = self.host_name + '_' + net_choice + '_scale-mode_' + args.scale_mode + \ '_pyramid-fuse_' + args.pyramid_fuse + '_b_' + str(args.blocks) if args.net_choice == 'ResNetFCN': self.save_prefix = self.host_name + '_' + net_choice + '_b_' + str(args.blocks) # resume checkpoint if needed if args.resume is not None: if os.path.isfile(args.resume): model.load_parameters(args.resume, ctx=args.ctx) else: raise RuntimeError("=> no checkpoint found at '{}'".format(args.resume)) else: # model.initialize(init=init.Xavier(), ctx=args.ctx, force_reinit=True) model.initialize(init=init.MSRAPrelu(), ctx=args.ctx, force_reinit=True) print("Model Initializing") print("args.ctx: ", args.ctx) self.net = model # self.net.summary(mx.nd.zeros((1, 3, 480, 480))) if args.summary: self.net.summary(mx.nd.zeros((1, 3, 480, 480), self.args.ctx[0])) sys.exit() # create criterion self.criterion = SoftIoULoss() # optimizer and lr scheduling self.lr_scheduler = LRScheduler(mode='poly', base_lr=args.lr, nepochs=args.epochs, iters_per_epoch=len(self.train_data), power=0.9) kv = mx.kv.create(args.kvstore) # For SGD # optimizer_params = {'lr_scheduler': self.lr_scheduler, # 'wd': args.weight_decay, # 'momentum': args.momentum, # 'learning_rate': args.lr # } optimizer_params = { # 'lr_scheduler': self.lr_scheduler, 'wd': args.weight_decay, 'learning_rate': args.lr } # For Adam if args.dtype == 'float16': optimizer_params['multi_precision'] = True if args.no_wd: for k, v in self.net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 # self.optimizer = gluon.Trainer(self.net.collect_params(), 'sgd', # optimizer_params, kvstore = kv) # self.optimizer = gluon.Trainer(self.net.collect_params(), 'adam', # optimizer_params, kvstore = kv) self.optimizer = gluon.Trainer(self.net.collect_params(), 'adagrad', optimizer_params, kvstore=kv) # self.optimizer = gluon.Trainer(self.net.collect_params(), 'nag', # optimizer_params, kvstore=kv) ################################# evaluation metrics ################################# self.iou_metric = SigmoidMetric(1) self.nIoU_metric = SamplewiseSigmoidMetric(1, score_thresh=self.args.score_thresh) # self.metric = Seg2DetVOC07MApMetric(iou_thresh=self.args.iou_thresh, # sparsity=self.args.sparsity, # score_thresh=self.args.score_thresh) self.best_metric = 0 self.best_iou = 0 self.best_nIoU = 0 self.is_best = False