help='Number of bits for binarization/quantization') parser.add_argument( '--log-interval', type=int, default=100, metavar='N', help='how many batches to wait before logging training status') opt = parser.parse_args() num_channels_conv = 64 act = 'tanh' num_fc = 1000 num_outputs = 10 # define network net = nn.HybridSequential(prefix="") with net.name_scope(): if opt.bits == 1: net.add(gluon.nn.Conv2D(channels=num_channels_conv, kernel_size=5)) net.add(gluon.nn.Activation(activation=act)) net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2)) net.add(gluon.nn.BatchNorm(axis=1, center=True, scale=True)) net.add(gluon.nn.QActivation()) net.add(gluon.nn.QConv2D(channels=num_channels_conv, kernel_size=5)) net.add(gluon.nn.BatchNorm(axis=1, center=True, scale=True)) net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2)) # The Flatten layer collapses all axis, except the first one, into one axis. net.add(gluon.nn.Flatten())
def __init__(self, alpha=1.0, beta=1.0, dropout_rate=0.0, classes=1000, **kwargs): super(EfficientNet, self).__init__(**kwargs) with self.name_scope(): self.features = nn.HybridSequential(prefix='features_') with self.features.name_scope(): # stem conv _add_conv(self.features, int(32 * beta), kernel=3, stride=2, pad=1) # base model settings repeats = [1, 2, 2, 3, 3, 4, 1] channels_num = [16, 24, 40, 80, 112, 192, 320] kernels_num = [3, 3, 5, 3, 5, 5, 3] t_num = [1, 6, 6, 6, 6, 6, 6] strides_first = [1, 2, 2, 1, 2, 2, 1] # determine params of MBConv layers in_channels_group = [] for rep, ch_num in zip([1] + repeats[:-1], [32] + channels_num[:-1]): in_channels_group += [int(ch_num * beta)] * int( ceil(alpha * rep)) channels_group, kernels, ts, strides = [], [], [], [] for rep, ch, kernel, t, s in zip(repeats, channels_num, kernels_num, t_num, strides_first): rep = int(ceil(alpha * rep)) channels_group += [int(ch * beta)] * rep kernels += [kernel] * rep ts += [t] * rep strides += [s] + [1] * (rep - 1) # add MBConv layers for in_c, c, t, k, s in zip(in_channels_group, channels_group, ts, kernels, strides): self.features.add( MBConv(in_channels=in_c, channels=c, t=t, kernel=k, stride=s)) # head layers last_channels = int(1280 * beta) if beta > 1.0 else 1280 _add_conv(self.features, last_channels) self.features.add(nn.GlobalAvgPool2D()) # features dropout self.dropout = nn.Dropout( dropout_rate) if dropout_rate > 0.0 else None # output layer self.output = nn.HybridSequential(prefix='output_') with self.output.name_scope(): self.output.add( nn.Conv2D(classes, 1, use_bias=False, prefix='pred_'), nn.Flatten())
def make_res_layer(block, inplanes, planes, blocks, spatial_stride=1, temporal_stride=1, dilation=1, inflate_freq=1, inflate_style='3x1x1', nonlocal_freq=1, nonlocal_cfg=None, norm_layer=BatchNorm, norm_kwargs=None, layer_name=''): inflate_freq = inflate_freq if not isinstance(inflate_freq, int) else (inflate_freq, ) * blocks nonlocal_freq = nonlocal_freq if not isinstance(nonlocal_freq, int) else (nonlocal_freq, ) * blocks assert len(inflate_freq) == blocks assert len(nonlocal_freq) == blocks downsample = None if spatial_stride != 1 or inplanes != planes * block.expansion: downsample = nn.HybridSequential(prefix=layer_name+'downsample_') with downsample.name_scope(): downsample.add(nn.Conv3D(in_channels=inplanes, channels=planes * block.expansion, kernel_size=1, strides=(temporal_stride, spatial_stride, spatial_stride), use_bias=False)) downsample.add(norm_layer(in_channels=planes * block.expansion, **({} if norm_kwargs is None else norm_kwargs))) layers = nn.HybridSequential(prefix=layer_name) cnt = 0 with layers.name_scope(): layers.add(block(inplanes=inplanes, planes=planes, spatial_stride=spatial_stride, temporal_stride=temporal_stride, dilation=dilation, downsample=downsample, if_inflate=(inflate_freq[0] == 1), inflate_style=inflate_style, if_nonlocal=(nonlocal_freq[0] == 1), nonlocal_cfg=nonlocal_cfg, layer_name='%d_' % cnt)) cnt += 1 inplanes = planes * block.expansion for i in range(1, blocks): layers.add(block(inplanes=inplanes, planes=planes, spatial_stride=1, temporal_stride=1, dilation=dilation, if_inflate=(inflate_freq[i] == 1), inflate_style=inflate_style, if_nonlocal=(nonlocal_freq[i] == 1), nonlocal_cfg=nonlocal_cfg, layer_name='%d_' % cnt)) cnt += 1 return layers
def __init__(self, args, config): super(Segmentator, self).__init__() # self.actions = actions self.pms = config['list_puncuation_marks'] self.config = config self.num_layers = 6 self.num_heads = 12 self.hidden_size = 512 self.max_seq_length = config['int_max_length'] self.units = 768 self.args = args self.beam_size = 5 if args.decoder or args is None: self.error_type = ['correct', 'R', 'S', 'M', 'W', '[START]', '[END]'] else: self.error_type = ['correct', 'R', 'S', 'M', 'W'] if self.args.dataset == 'CGED16': with self.name_scope(): self.vocab_tgt = None self.encoder, self.vocab_src = nlp.model.get_model('bert_12_768_12', dataset_name = 'wiki_cn_cased', use_classifier = False, use_decoder = False, pretrained = False); if args.decoder: self.emb_tgt = nn.HybridSequential() self.emb_tgt.add(nn.Embedding(len(self.error_type), self.units)) self.emb_tgt.add(nn.Dropout(0.5)) self.decoder = trans.TransformerDecoder(attention_cell = 'multi_head', num_layers = self.num_layers, units = self.units, hidden_size = self.hidden_size, max_length = self.max_seq_length, num_heads = self.num_heads, scaled=True, dropout=0.1, use_residual = True, output_attention=False, weight_initializer=None, bias_initializer='zeros', scale_embed=True, prefix=None, params=None) self.beam_scorer = nlp.model.BeamSearchScorer() self.beam_sampler = nlp.model.BeamSearchSampler(beam_size = self.beam_size, decoder = self._decode_step_CGED, eos_id = self.error_type.index('[END]'), scorer = self.beam_scorer, max_length = self.max_seq_length) self.seq_sampler = nlp.model.SequenceSampler(beam_size = self.beam_size, decoder = self._decode_step_CGED, eos_id = self.error_type.index('[END]'), max_length = self.max_seq_length, temperature = 0.97) # vocab_size = len(self.error_type)) self.tokenizer = nlp.data.BERTTokenizer(self.vocab_src, lower = False); self.transformer = nlp.data.BERTSentenceTransform(self.tokenizer, max_seq_length = self.max_seq_length, pair = False, pad = True); self.dropout = nn.Dropout(0.5) self.fc_error = nn.Dense(len(self.error_type), flatten = False) # self.fc_start = nn.Dense(2, flatten = False) # Binary # self.fc_end = nn.Dense(2, flatten = False) # Binary else: with self.name_scope(): self.encoder, self.vocab_src = nlp.model.get_model('bert_12_768_12', dataset_name = 'wiki_cn_cased', use_classifier = False, use_decoder = False, pretrained = True); # self.encoder = trans.TransformerEncoder(attention_cell='multi_head', # num_layers=2, units=300, hidden_size=2048, # max_length=150, num_heads=4, scaled=True, dropout=0.0, use_residual=True, output_attention=False, weight_initializer=None, bias_initializer='zeros', prefix=None, params=None) # self.encoder2 = nlp.model.TransformerEncoder(attention_cell='multi_head', # num_layers=2, units=768, hidden_size=2048, # max_length = self.max_seq_length, # num_heads = 8, # scaled=True, # dropout=0.1, # use_residual=True, # output_attention=False, # weight_initializer=None, # bias_initializer='zeros', prefix=None, params=None) # if (self.args.use_tc): # keys = self.vocab_src.token_to_idx.keys() # key_to_check = ['E', 'e', 'e', '1', '1'] # for k in key_to_check: # print('{} => {}'.format(k, k in keys)) # raise self.counter_tgt = nlp.data.count_tokens(self.config['str_character_target']) self.vocab_tgt = nlp.vocab.BERTVocab(self.counter_tgt) # keys = self.vocab_tgt.token_to_idx.keys() # key_to_check = ['E', 'e', 'e', '1', '1'] # for k in key_to_check: # print('{} => {}'.format(k, k in keys)) # raise self.dropout = nn.Dropout(0.5) self.decoder = trans.TransformerDecoder(attention_cell = 'multi_head', num_layers = self.num_layers, units = self.units, hidden_size = self.hidden_size, max_length = self.max_seq_length, num_heads = self.num_heads, scaled=True, dropout=0.1, use_residual = True, output_attention=False, weight_initializer=None, bias_initializer='zeros', scale_embed=True, prefix=None, params=None) # self.decoder_action = trans.TransformerDecoder(attention_cell = 'multi_head', # num_layers = self.num_layers, # units = self.units, hidden_size = self.hidden_size, max_length = self.max_seq_length, # num_heads = self.num_heads, scaled=True, dropout=0.1, # use_residual = True, output_attention=False, # weight_initializer=None, bias_initializer='zeros', # scale_embed=True, prefix=None, params=None) # self.fc_actions = nn.Dense(len(self.actions), flatten = False) # self.fc_pms = nn.Dense(len(self.pms), flatten = False) self.fc_proj = nn.Dense(len(self.vocab_tgt), flatten = False, in_units = 768) self.emb_tgt = nn.HybridSequential() self.fc_pm_error = nn.Dense(2, flatten = False, in_units = 768) self.fc_pm_remove = nn.Dense(2, flatten = False, in_units = 768) self.fc_pm_add = nn.Dense(2, flatten = False, in_units = 768) # self.fc_error = nn.Dense(len(self.error_type), flatten = False, in_units = 768) # self.fc_correction = nn.Dense(len(self.vocab_tgt) + 1, flatten = False, in_units = 768) self.emb_tgt.add(nn.Embedding(len(self.vocab_tgt), self.units)) self.emb_tgt.add(nn.Dropout(0.5)) # self.emb_actions = (nn.Embedding(input_dim = len(self.actions), output_dim = self.units)) # self.emb_pms = (nn.Embedding(input_dim = len(self.pms), output_dim = self.units)) # self.emb self.tokenizer = nlp.data.BERTTokenizer(self.vocab_src, lower = True); self.transformer = nlp.data.BERTSentenceTransform(self.tokenizer, max_seq_length = self.max_seq_length, pair = False, pad = True); self.beam_scorer = nlp.model.BeamSearchScorer() self.beam_sampler = nlp.model.BeamSearchSampler(beam_size = self.beam_size, decoder = self._decode_step, eos_id = self.vocab_tgt.token_to_idx[self.vocab_tgt.sep_token], scorer = self.beam_scorer, max_length = self.max_seq_length)
def __init__( self, context_length: int, prediction_length: int, d_hidden: int, d_var: int, n_head: int, dropout: float = 0.0, **kwargs, ): super(TemporalFusionDecoder, self).__init__(**kwargs) self.context_length = context_length self.prediction_length = prediction_length with self.name_scope(): self.enrich = GatedResidualNetwork( d_hidden=d_hidden, d_static=d_var, dropout=dropout, ) self.attention = SelfAttention( context_length=context_length, prediction_length=prediction_length, d_hidden=d_hidden, n_head=n_head, share_values=True, dropout=dropout, ) self.att_net = nn.HybridSequential(prefix="attention_") self.att_net.add(nn.Dropout(dropout)) self.att_net.add( nn.Dense( units=d_hidden * 2, in_units=d_hidden, flatten=False, weight_initializer=init.Xavier(), )) self.att_net.add(GatedLinearUnit( axis=-1, nonlinear=False, )) self.att_lnorm = nn.LayerNorm( axis=-1, in_channels=d_hidden, ) self.ff_net = nn.HybridSequential() self.ff_net.add(GatedResidualNetwork( d_hidden, dropout=dropout, )) self.ff_net.add( nn.Dense( units=d_hidden * 2, in_units=d_hidden, flatten=False, weight_initializer=init.Xavier(), )) self.ff_net.add(GatedLinearUnit( axis=-1, nonlinear=False, )) self.ff_lnorm = nn.LayerNorm(axis=-1, in_channels=d_hidden)
def transition_block(channels): out = nn.HybridSequential() out.add(nn.BatchNorm(), nn.Activation('relu'), nn.Conv2D(channels, kernel_size=1), nn.AvgPool2D(pool_size=2, strides=2)) return out
def __init__(self, in_channels, channels, strides=1, dilation=1, groups=1, norm_act=bnrelu, dropout=None, dist_bn=False ): """Configurable identity-mapping residual block Parameters ---------- in_channels : int Number of input channels. channels : list of int Number of channels in the internal feature maps. Can either have two or three elements: if three construct a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then `3 x 3` then `1 x 1` convolutions. stride : int Stride of the first `3 x 3` convolution dilation : int Dilation to apply to the `3 x 3` convolutions. groups : int Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with bottleneck blocks. norm_act : callable Function to create normalization / activation Module. dropout: callable Function to create Dropout Module. dist_bn: Boolean A variable to enable or disable use of distributed BN """ super(IdentityResidualBlock, self).__init__() self.dist_bn = dist_bn # Check parameters for inconsistencies if len(channels) != 2 and len(channels) != 3: raise ValueError("channels must contain either two or three values") if len(channels) == 2 and groups != 1: raise ValueError("groups > 1 are only valid if len(channels) == 3") is_bottleneck = len(channels) == 3 need_proj_conv = strides != 1 or in_channels != channels[-1] self.bn1 = norm_act(in_channels) if not is_bottleneck: layers = [ ("conv1", nn.Conv2D(in_channels=in_channels, channels=channels[0], kernel_size=3, strides=strides, padding=dilation, use_bias=False, dilation=dilation)), ("bn2", norm_act(channels[0])), ("conv2", nn.Conv2D(in_channels=channels[0], channels=channels[1], kernel_size=3, strides=1, padding=dilation, use_bias=False, dilation=dilation)) ] if dropout is not None: layers = layers[0:2] + [("dropout", dropout())] + layers[2:] else: layers = [ ("conv1", nn.Conv2D(in_channels=in_channels, channels=channels[0], kernel_size=1, strides=strides, padding=0, use_bias=False)), ("bn2", norm_act(channels[0])), ("conv2", nn.Conv2D(in_channels=channels[0], channels=channels[1], kernel_size=3, strides=1, padding=dilation, use_bias=False, groups=groups, dilation=dilation)), ("bn3", norm_act(channels[1])), ("conv3", nn.Conv2D(in_channels=channels[1], channels=channels[2], kernel_size=1, strides=1, padding=0, use_bias=False)) ] if dropout is not None: layers = layers[0:4] + [("dropout", dropout())] + layers[4:] layer_dict = OrderedDict(layers) self.convs = nn.HybridSequential(prefix='') for key in layer_dict.keys(): self.convs.add(layer_dict[key]) if need_proj_conv: self.proj_conv = nn.Conv2D(in_channels=in_channels, channels=channels[-1], kernel_size=1, strides=strides, padding=0, use_bias=False)
def faster_rcnn_resnet101_v1d_custom(classes, transfer=None, pretrained_base=True, pretrained=False, **kwargs): r"""Faster RCNN model with resnet101_v1d base network on custom dataset. Parameters ---------- classes : iterable of str Names of custom foreground classes. `len(classes)` is the number of foreground classes. transfer : str or None If not `None`, will try to reuse pre-trained weights from faster RCNN networks trained on other datasets. pretrained_base : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Returns ------- mxnet.gluon.HybridBlock Hybrid faster RCNN network. """ if pretrained: warnings.warn( "Custom models don't provide `pretrained` weights, ignored.") if transfer is None: from ..resnetv1b import resnet101_v1d base_network = resnet101_v1d(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in [ 'conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3' ]: features.add(getattr(base_network, layer)) for layer in ['layer4']: top_features.add(getattr(base_network, layer)) train_patterns = '|'.join( ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv']) return get_faster_rcnn(name='resnet101_v1d', dataset='custom', pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=600, max_size=1000, train_patterns=train_patterns, nms_thresh=0.3, nms_topk=400, post_nms=100, roi_mode='align', roi_size=(14, 14), stride=16, clip=None, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=300, rpn_min_size=16, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=300, **kwargs) else: from ...model_zoo import get_model net = get_model('faster_rcnn_resnet101_v1d_' + str(transfer), pretrained=True, **kwargs) reuse_classes = [x for x in classes if x in net.classes] net.reset_class(classes, reuse_weights=reuse_classes) return net
def body(): """return the body network""" out = nn.HybridSequential() for nfilters in [16, 32, 64]: out.add(down_sample(nfilters)) return out
def __init__(self, block, layers, cardinality=1, bottleneck_width=64, classes=1000, dilated=False, dilation=1, norm_layer=BatchNorm, norm_kwargs=None, last_gamma=False, deep_stem=False, stem_width=32, avg_down=False, final_drop=0.0, use_global_stats=False, name_prefix='', dropblock_prob=0, input_size=224, use_splat=False, radix=2, avd=False, avd_first=False, split_drop_ratio=0): self.cardinality = cardinality self.bottleneck_width = bottleneck_width self.inplanes = stem_width*2 if deep_stem else 64 self.radix = radix self.split_drop_ratio = split_drop_ratio self.avd_first = avd_first super(ResNest, self).__init__(prefix=name_prefix) norm_kwargs = norm_kwargs if norm_kwargs is not None else {} if use_global_stats: norm_kwargs['use_global_stats'] = True self.norm_kwargs = norm_kwargs with self.name_scope(): if not deep_stem: self.conv1 = nn.Conv2D(channels=64, kernel_size=7, strides=2, padding=3, use_bias=False, in_channels=3) else: self.conv1 = nn.HybridSequential(prefix='conv1') self.conv1.add(nn.Conv2D(channels=stem_width, kernel_size=3, strides=2, padding=1, use_bias=False, in_channels=3)) self.conv1.add(norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add(nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False, in_channels=stem_width)) self.conv1.add(norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add(nn.Conv2D(channels=stem_width*2, kernel_size=3, strides=1, padding=1, use_bias=False, in_channels=stem_width)) input_size = _update_input_size(input_size, 2) self.bn1 = norm_layer(in_channels=64 if not deep_stem else stem_width*2, **norm_kwargs) self.relu = nn.Activation('relu') self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1) input_size = _update_input_size(input_size, 2) self.layer1 = self._make_layer(1, block, 64, layers[0], avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, use_splat=use_splat, avd=avd) self.layer2 = self._make_layer(2, block, 128, layers[1], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) if dilated or dilation==4: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=4, pre_dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) elif dilation==3: # special self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, dilation=2, pre_dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) elif dilation==2: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) else: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) self.avgpool = nn.GlobalAvgPool2D() self.flat = nn.Flatten() self.drop = None if final_drop > 0.0: self.drop = nn.Dropout(final_drop) self.fc = nn.Dense(in_units=512 * block.expansion, units=classes)
def _make_layer(self, stage_index, block, planes, blocks, strides=1, dilation=1, pre_dilation=1, avg_down=False, norm_layer=None, last_gamma=False, dropblock_prob=0, input_size=224, use_splat=False, avd=False): downsample = None if strides != 1 or self.inplanes != planes * block.expansion: downsample = nn.HybridSequential(prefix='down%d_'%stage_index) with downsample.name_scope(): if avg_down: if pre_dilation == 1: downsample.add(nn.AvgPool2D(pool_size=strides, strides=strides, ceil_mode=True, count_include_pad=False)) elif strides==1: downsample.add(nn.AvgPool2D(pool_size=1, strides=1, ceil_mode=True, count_include_pad=False)) else: downsample.add(nn.AvgPool2D(pool_size=pre_dilation*strides, strides=strides, padding=1, ceil_mode=True, count_include_pad=False)) downsample.add(nn.Conv2D(channels=planes * block.expansion, kernel_size=1, strides=1, use_bias=False, in_channels=self.inplanes)) downsample.add(norm_layer(in_channels=planes * block.expansion, **self.norm_kwargs)) else: downsample.add(nn.Conv2D(channels=planes * block.expansion, kernel_size=1, strides=strides, use_bias=False, in_channels=self.inplanes)) downsample.add(norm_layer(in_channels=planes * block.expansion, **self.norm_kwargs)) layers = nn.HybridSequential(prefix='layers%d_'%stage_index) with layers.name_scope(): if dilation in (1, 2): layers.add(block(planes, cardinality=self.cardinality, bottleneck_width=self.bottleneck_width, strides=strides, dilation=pre_dilation, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd, avd_first=self.avd_first, radix=self.radix, in_channels=self.inplanes, split_drop_ratio=self.split_drop_ratio)) elif dilation == 4: layers.add(block(planes, cardinality=self.cardinality, bottleneck_width=self.bottleneck_width, strides=strides, dilation=pre_dilation, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd, avd_first=self.avd_first, radix=self.radix, in_channels=self.inplanes, split_drop_ratio=self.split_drop_ratio)) else: raise RuntimeError("=> unknown dilation size: {}".format(dilation)) input_size = _update_input_size(input_size, strides) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.add(block(planes, cardinality=self.cardinality, bottleneck_width=self.bottleneck_width, dilation=dilation, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd, avd_first=self.avd_first, radix=self.radix, in_channels=self.inplanes, split_drop_ratio=self.split_drop_ratio)) return layers
def __init__(self, dilations, stages, channels, anchors, strides, classes, alloc_size=(128, 128), nms_thresh=0.45, nms_topk=400, post_nms=100, pos_iou_thresh=1.0, ignore_iou_thresh=0.7, num_sync_bn_devices=-1, **kwargs): super(YOLOV4, self).__init__(stages, stages, channels, anchors, strides, classes, alloc_size=alloc_size, nms_thresh=nms_thresh, nms_topk=nms_topk, post_nms=100, pos_iou_thresh=1.0, ignore_iou_thresh=ignore_iou_thresh, num_sync_bn_devices=-1, **kwargs) self._classes = classes self.nms_thresh = nms_thresh self.nms_topk = nms_topk self.post_nms = post_nms self._pos_iou_thresh = pos_iou_thresh self._ignore_iou_thresh = ignore_iou_thresh if pos_iou_thresh >= 1: self._target_generator = YOLOV3TargetMerger( len(classes), ignore_iou_thresh) else: raise NotImplementedError( "pos_iou_thresh({}) < 1.0 is not implemented!".format( pos_iou_thresh)) self._loss = YOLOV3Loss() with self.name_scope(): self.stages = nn.HybridSequential() self.transitions = nn.HybridSequential() self.yolo_blocks = nn.HybridSequential() self.yolo_outputs = nn.HybridSequential() # note that anchors and strides and dilations should be used in reverse order for i, stage, channel, anchor, stride, dilation in zip( range(len(stages)), stages, channels, anchors[::-1], strides[::-1], dilations[::-1]): self.stages.add(stage) block = YOLODetectionBlockV4(channel, dilation, num_sync_bn_devices) self.yolo_blocks.add(block) output = YOLOOutputV3(i, len(classes), anchor, stride, alloc_size=alloc_size) self.yolo_outputs.add(output) if i > 0: self.transitions.add( _conv2d(channel, 1, 0, 1, num_sync_bn_devices))
def test_DeformableConvolution(): """test of the deformable convolution layer with possible combinations of arguments, currently this layer only supports gpu """ net = nn.HybridSequential() net.add( DeformableConvolution(10, kernel_size=(3, 3), strides=1, padding=0), DeformableConvolution(10, kernel_size=(3, 2), strides=1, padding=0, activation='relu', offset_use_bias=False, use_bias=False), DeformableConvolution(10, kernel_size=(3, 2), strides=1, padding=0, activation='relu', offset_use_bias=False), DeformableConvolution(10, kernel_size=(3, 2), strides=1, padding=0, activation='relu', use_bias=False), DeformableConvolution(10, kernel_size=(3, 2), strides=1, padding=0, offset_use_bias=False, use_bias=False), DeformableConvolution(10, kernel_size=(3, 2), strides=1, padding=0, offset_use_bias=False), DeformableConvolution(12, kernel_size=(3, 2), strides=1, padding=0, use_bias=False), DeformableConvolution(12, kernel_size=(3, 2), strides=1, padding=0, use_bias=False, num_deformable_group=4), ) try: ctx = mx.gpu() _ = mx.nd.array([0], ctx=ctx) except mx.base.MXNetError: print("deformable_convolution only supports GPU") return net.initialize(force_reinit=True, ctx=ctx) net.hybridize() x = mx.nd.random.uniform(shape=(8, 5, 30, 31), ctx=ctx) with mx.autograd.record(): y = net(x) y.backward()
def get_net(): net = nn.HybridSequential() # Here we use the class HybridSequential. net.add(nn.Dense(256, activation='relu'), nn.Dense(128, activation='relu'), nn.Dense(2)) return net
def conv_block(channels): out = nn.HybridSequential() out.add(nn.BatchNorm(), nn.Activation('relu'), nn.Conv2D(channels, kernel_size=3, padding=1)) return out
class SwapAxes(nn.HybridBlock): def __init__(self, dim1, dim2): super(SwapAxes, self).__init__() self.dim1 = dim1 self.dim2 = dim2 # def forward(self, x): # return nd.swapaxes(x, self.dim1, self.dim2) def hybrid_forward(self, F, x, *args, **kwargs): return F.swapaxes(x, self.dim1, self.dim2) with mx.Context(mx.cpu(0)): model = nn.HybridSequential() model.add( SwapAxes(1, 2), CBR(40, 1), CBR(40), CBR(40), nn.MaxPool1D(2), CBR(80, 1), CBR(80), CBR(80), nn.MaxPool1D(2), CBR(160, 1), nn.Dropout(0.3), CBR(160), CBR(160), CBR(160),
def __init__(self, layers, growth_rate, **kwargs): super(DenseBlock, self).__init__(**kwargs) self.net = nn.HybridSequential() for i in range(layers): self.net.add(conv_block(growth_rate))
def __init__(self, network, base_size, features, num_filters, sizes, ratios, steps, classes, use_1x1_transition=True, use_bn=True, reduce_ratio=1.0, min_depth=128, global_pool=False, pretrained=False, stds=(0.1, 0.1, 0.2, 0.2), anchor_alloc_size=128, nms_overlap_thresh=0.5, nms_topk=200, nms_valid_thresh=0.0, post_nms=200, norm_layer=GroupBatchNorm, fuse_bn_relu=True, fuse_bn_add_relu=True, bn_fp16=False, norm_kwargs=None, predictors_kernel=(3, 3), predictors_pad=(1, 1), ctx=mx.cpu(), layout='NCHW', **kwargs): super(SSD, self).__init__(**kwargs) if norm_kwargs is None: norm_kwargs = {} if network is None: num_layers = len(ratios) else: num_layers = len(features) + len(num_filters) + int(global_pool) assert len(sizes) == num_layers + 1 sizes = list(zip(sizes[:-1], sizes[1:])) assert isinstance(ratios, list), "Must provide ratios as list or list of list" if not isinstance(ratios[0], (tuple, list)): ratios = ratios * num_layers # propagate to all layers if use same ratio assert num_layers == len(sizes) == len(ratios), \ f"Mismatched (number of layers) vs (sizes) vs (ratios): {num_layers}, {len(sizes)}, {len(ratios)}." assert num_layers > 0, "SSD require at least one layer, suggest multiple." self._num_layers = num_layers self.classes = classes self.nms_overlap_thresh = nms_overlap_thresh self.nms_topk = nms_topk self.nms_valid_thresh = nms_valid_thresh self.post_nms = post_nms self.layout = layout self.reduce_ratio = reduce_ratio self._bn_fp16 = bn_fp16 self._bn_group = norm_kwargs.get('bn_group', 1) logging.info(f'[SSD] network: {network}') logging.info(f'[SSD] norm layer: {norm_layer}') logging.info(f'[SSD] fuse bn relu: {fuse_bn_relu}') logging.info(f'[SSD] fuse bn add relu: {fuse_bn_add_relu}') logging.info(f'[SSD] bn group: {self._bn_group}') with self.name_scope(): if network is None: # use fine-grained manually designed block as features self.features = features(pretrained=pretrained, ctx=ctx, norm_layer=norm_layer, fuse_bn_relu=fuse_bn_relu, fuse_bn_add_relu=fuse_bn_add_relu, bn_fp16=bn_fp16, norm_kwargs=norm_kwargs) else: self.features = FeatureExpander( network=network, outputs=features, num_filters=num_filters, use_1x1_transition=use_1x1_transition, use_bn=use_bn, reduce_ratio=reduce_ratio, min_depth=min_depth, global_pool=global_pool, pretrained=pretrained, ctx=ctx, norm_layer=norm_layer, fuse_bn_relu=fuse_bn_relu, fuse_bn_add_relu=fuse_bn_add_relu, bn_fp16=bn_fp16, norm_kwargs=norm_kwargs, layout=layout) # use a single ConvPredictor for conf and loc predictors (head fusion), # but they are treated as two different segments self.predictors = nn.HybridSequential() self.num_defaults = [4, 6, 6, 6, 4, 4] padding_channels_to = 8 self.padding_amounts = [ ] # We keep track of padding to slice conf/loc correctly self.predictor_offsets = [ ] # We keep track of offset to initialize conf/loc correctly for nd in self.num_defaults: # keep track of beginning/ending offsets for all segments offsets = [0] n = nd * (self.num_classes + 1 ) # output channels for conf predictors offsets.append(n) n = n + nd * 4 # output channels for both conf and loc predictors offsets.append(n) # padding if necessary padding_amt = 0 # manually pad to get HMMA kernels for NHWC layout if (self.layout == 'NHWC') and (n % padding_channels_to): padding_amt = padding_channels_to - (n % padding_channels_to) n = n + padding_amt if padding_amt: offsets.append(n) self.predictors.add( ConvPredictor(n, kernel=predictors_kernel, pad=predictors_pad, layout=layout)) self.predictor_offsets.append(offsets) self.padding_amounts.append(padding_amt) self.bbox_decoder = NormalizedBoxCenterDecoder(stds) self.cls_decoder = MultiPerClassDecoder(self.num_classes + 1, thresh=0)
def __init__(self, structure, norm_act=bnrelu, classes=0, dilation=False, dist_bn=False ): super(WiderResNetA2, self).__init__() self.dist_bn = dist_bn norm_act = bnrelu self.structure = structure self.dilation = dilation if len(structure) != 6: raise ValueError("Expected a structure with six values") self.mod1 = nn.HybridSequential(prefix='mod1') self.mod1.add(nn.Conv2D(in_channels=3, channels=64, kernel_size=3, strides=1, padding=1, use_bias=False)) # Groups of residual blocks in_channels = 64 channels = [(128, 128), (256, 256), (512, 512), (512, 1024), (512, 1024, 2048), (1024, 2048, 4096)] for mod_id, num in enumerate(structure): # Create blocks for module blocks = [] for block_id in range(num): if not dilation: dil = 1 strides = 2 if block_id == 0 and 2 <= mod_id <= 4 else 1 else: if mod_id == 3: dil = 2 elif mod_id > 3: dil = 4 else: dil = 1 strides = 2 if block_id == 0 and mod_id == 2 else 1 if mod_id == 4: drop = partial(nn.Dropout, rate=0.3) elif mod_id == 5: drop = partial(nn.Dropout, rate=0.5) else: drop = None blocks.append(( "block%d" % (block_id + 1), IdentityResidualBlock(in_channels=in_channels, channels=channels[mod_id], norm_act=norm_act, strides=strides, dilation=dil, dropout=drop, dist_bn=self.dist_bn) )) # Update channels and p_keep in_channels = channels[mod_id][-1] # Create module if mod_id == 0: self.pool2 = nn.MaxPool2D(pool_size=3, strides=2, padding=1) blocks_dict = OrderedDict(blocks) self.mod2 = nn.HybridSequential(prefix='mod2') for key in blocks_dict.keys(): self.mod2.add(blocks_dict[key]) if mod_id == 1: self.pool3 = nn.MaxPool2D(pool_size=3, strides=2, padding=1) blocks_dict = OrderedDict(blocks) self.mod3 = nn.HybridSequential(prefix='mod3') for key in blocks_dict.keys(): self.mod3.add(blocks_dict[key]) if mod_id == 2: blocks_dict = OrderedDict(blocks) self.mod4 = nn.HybridSequential(prefix='mod4') for key in blocks_dict.keys(): self.mod4.add(blocks_dict[key]) if mod_id == 3: blocks_dict = OrderedDict(blocks) self.mod5 = nn.HybridSequential(prefix='mod5') for key in blocks_dict.keys(): self.mod5.add(blocks_dict[key]) if mod_id == 4: blocks_dict = OrderedDict(blocks) self.mod6 = nn.HybridSequential(prefix='mod6') for key in blocks_dict.keys(): self.mod6.add(blocks_dict[key]) if mod_id == 5: blocks_dict = OrderedDict(blocks) self.mod7 = nn.HybridSequential(prefix='mod7') for key in blocks_dict.keys(): self.mod7.add(blocks_dict[key]) # Pooling and predictor self.bn_out = norm_act(in_channels) if classes != 0: self.classifier = nn.HybridSequential(prefix='classifier') self.classifier.add(nn.GlobalAvgPool2D()) self.classifier.add(nn.Dense(in_units=in_channels, units=classes))
def __init__(self, block, layers, classes=1000, dilated=False, norm_layer=BatchNorm, norm_kwargs={}, last_gamma=False, deep_stem=False, stem_width=32, avg_down=False, final_drop=0.0, use_global_stats=False, **kwargs): self.inplanes = stem_width * 2 if deep_stem else 64 super(ResNetV1b, self).__init__() self.norm_kwargs = norm_kwargs if use_global_stats: self.norm_kwargs['use_global_stats'] = True with self.name_scope(): if not deep_stem: self.conv1 = nn.Conv2D(channels=64, kernel_size=7, strides=2, padding=3, use_bias=False) else: self.conv1 = nn.HybridSequential(prefix='conv1') self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=2, padding=1, use_bias=False)) self.conv1.add(norm_layer(**norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False)) self.conv1.add(norm_layer(**norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add( nn.Conv2D(channels=stem_width * 2, kernel_size=3, strides=1, padding=1, use_bias=False)) self.bn1 = norm_layer(**norm_kwargs) self.relu = nn.Activation('relu') self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1) self.layer1 = self._make_layer(1, block, 64, layers[0], avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer2 = self._make_layer(2, block, 128, layers[1], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) if dilated: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=4, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) else: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.avgpool = nn.GlobalAvgPool2D() self.flat = nn.Flatten() self.drop = None if final_drop > 0.0: self.drop = nn.Dropout(final_drop) self.fc = nn.Dense(in_units=512 * block.expansion, units=classes)
def custom_rcnn_fpn(pretrained_base=True, base_network_name='resnet18_v1b', norm_layer=nn.BatchNorm, norm_kwargs=None, sym_norm_layer=None, sym_norm_kwargs=None, num_fpn_filters=256, num_box_head_conv=4, num_box_head_conv_filters=256, num_box_head_dense_filters=1024): r"""Generate custom RCNN model with resnet base network w/FPN. Parameters ---------- pretrained_base : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. base_network_name : str, default 'resnet18_v1b' base network for mask RCNN. Currently support: 'resnet18_v1b', 'resnet50_v1b', and 'resnet101_v1d' norm_layer : nn.HybridBlock, default nn.BatchNorm Gluon normalization layer to use. Default is frozen batch normalization layer. norm_kwargs : dict Keyword arguments for gluon normalization layer sym_norm_layer : nn.SymbolBlock, default `None` Symbol normalization layer to use in FPN. This is due to FPN being implemented using SymbolBlock. Default is `None`, meaning no normalization layer will be used in FPN. sym_norm_kwargs : dict Keyword arguments for symbol normalization layer used in FPN. num_fpn_filters : int, default 256 Number of filters for FPN output layers. num_box_head_conv : int, default 4 Number of convolution layers to use in box head if batch normalization is not frozen. num_box_head_conv_filters : int, default 256 Number of filters for convolution layers in box head. Only applicable if batch normalization is not frozen. num_box_head_dense_filters : int, default 1024 Number of hidden units for the last fully connected layer in box head. Returns ------- SymbolBlock or HybridBlock Base feature extractor eg. resnet w/ FPN. None or HybridBlock R-CNN feature before each task heads. HybridBlock Box feature extractor """ use_global_stats = norm_layer is nn.BatchNorm if base_network_name == 'resnet18_v1b': from ...model_zoo.resnetv1b import resnet18_v1b base_network = resnet18_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=use_global_stats, norm_layer=norm_layer, norm_kwargs=norm_kwargs) fpn_inputs_names = [ 'layers1_relu3_fwd', 'layers2_relu3_fwd', 'layers3_relu3_fwd', 'layers4_relu3_fwd' ] elif base_network_name == 'resnet50_v1b': from ...model_zoo.resnetv1b import resnet50_v1b base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=use_global_stats, norm_layer=norm_layer, norm_kwargs=norm_kwargs) fpn_inputs_names = [ 'layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd', 'layers4_relu8_fwd' ] elif base_network_name == 'resnet101_v1d': from ...model_zoo.resnetv1b import resnet101_v1d base_network = resnet101_v1d(pretrained=pretrained_base, dilated=False, use_global_stats=use_global_stats, norm_layer=norm_layer, norm_kwargs=norm_kwargs) fpn_inputs_names = [ 'layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu68_fwd', 'layers4_relu8_fwd' ] elif base_network_name == 'resnest50': from ...model_zoo.resnest import resnest50 base_network = resnest50(pretrained=pretrained_base, dilated=False, use_global_stats=use_global_stats, norm_layer=norm_layer, norm_kwargs=norm_kwargs) fpn_inputs_names = [ 'layers1_relu11_fwd', 'layers2_relu15_fwd', 'layers3_relu23_fwd', 'layers4_relu11_fwd' ] elif base_network_name == 'resnest101': from ...model_zoo.resnest import resnest101 base_network = resnest101(pretrained=pretrained_base, dilated=False, use_global_stats=use_global_stats, norm_layer=norm_layer, norm_kwargs=norm_kwargs) fpn_inputs_names = [ 'layers1_relu11_fwd', 'layers2_relu15_fwd', 'layers3_relu91_fwd', 'layers4_relu11_fwd' ] else: raise NotImplementedError('Unsupported network', base_network_name) features = FPNFeatureExpander(network=base_network, outputs=fpn_inputs_names, num_filters=[num_fpn_filters] * len(fpn_inputs_names), use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=not use_global_stats, pretrained=pretrained_base, norm_layer=sym_norm_layer, norm_kwargs=sym_norm_kwargs) top_features = None box_features = nn.HybridSequential() box_features.add(nn.AvgPool2D(pool_size=(3, 3), strides=2, padding=1)) # reduce to 7x7 if use_global_stats: box_features.add( nn.Dense(num_box_head_dense_filters, weight_initializer=mx.init.Normal(0.01)), nn.Activation('relu')) else: for _ in range(num_box_head_conv): box_features.add( nn.Conv2D(num_box_head_conv_filters, 3, padding=1, use_bias=False), norm_layer(**norm_kwargs), nn.Activation('relu')) box_features.add( nn.Dense(num_box_head_dense_filters, weight_initializer=mx.init.Normal(0.01)), nn.Activation('relu')) return features, top_features, box_features
def _make_layer(self, stage_index, block, planes, blocks, strides=1, dilation=1, avg_down=False, norm_layer=None, last_gamma=False): downsample = None if strides != 1 or self.inplanes != planes * block.expansion: downsample = nn.HybridSequential(prefix='down%d_' % stage_index) with downsample.name_scope(): if avg_down: if dilation == 1: downsample.add( nn.AvgPool2D(pool_size=strides, strides=strides)) else: downsample.add(nn.AvgPool2D(pool_size=1, strides=1)) downsample.add( nn.Conv2D(channels=planes * block.expansion, kernel_size=1, strides=1, use_bias=False)) downsample.add(norm_layer(**self.norm_kwargs)) else: downsample.add( nn.Conv2D(channels=planes * block.expansion, kernel_size=1, strides=strides, use_bias=False)) downsample.add(norm_layer(**self.norm_kwargs)) layers = nn.HybridSequential(prefix='layers%d_' % stage_index) with layers.name_scope(): if dilation in (1, 2): layers.add( block(planes, strides, dilation=1, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma)) elif dilation == 4: layers.add( block(planes, strides, dilation=2, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma)) else: raise RuntimeError( "=> unknown dilation size: {}".format(dilation)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.add( block(planes, dilation=dilation, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma)) return layers
def faster_rcnn_resnet50_v1b_coco(pretrained=False, pretrained_base=True, **kwargs): r"""Faster RCNN model from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" Parameters ---------- pretrained : bool, optional, default is False Load pretrained weights. pretrained_base : bool, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `Ture`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_resnet50_v1b_coco(pretrained=True) >>> print(model) """ from ..resnetv1b import resnet50_v1b from ...data import COCODetection classes = COCODetection.CLASSES pretrained_base = False if pretrained else pretrained_base base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=True) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in [ 'conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3' ]: features.add(getattr(base_network, layer)) for layer in ['layer4']: top_features.add(getattr(base_network, layer)) train_patterns = '|'.join( ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv']) return get_faster_rcnn(name='resnet50_v1b', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=800, max_size=1333, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(14, 14), stride=16, clip=4.42, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=0, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, **kwargs)
def __init__(self, askc_type, channels, cardinality, bottleneck_width, stride, downsample=False, last_gamma=False, use_se=False, avg_down=True, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(AFFResNeXtBlock, self).__init__(**kwargs) D = int(math.floor(channels * (bottleneck_width / 64))) group_width = cardinality * D self.body = nn.HybridSequential(prefix='') self.body.add(nn.Conv2D(group_width, kernel_size=1, use_bias=False)) self.body.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.body.add(nn.Activation('relu')) self.body.add( nn.Conv2D(group_width, kernel_size=3, strides=stride, padding=1, groups=cardinality, use_bias=False)) self.body.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.body.add(nn.Activation('relu')) self.body.add(nn.Conv2D(channels * 4, kernel_size=1, use_bias=False)) if last_gamma: self.body.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) else: self.body.add( norm_layer(gamma_initializer='zeros', **({} if norm_kwargs is None else norm_kwargs))) if use_se: self.se = nn.HybridSequential(prefix='') self.se.add(nn.Conv2D(channels // 4, kernel_size=1, padding=0)) self.se.add(nn.Activation('relu')) self.se.add(nn.Conv2D(channels * 4, kernel_size=1, padding=0)) self.se.add(nn.Activation('sigmoid')) else: self.se = None if downsample: self.downsample = nn.HybridSequential(prefix='') if avg_down: self.downsample.add( nn.AvgPool2D(pool_size=stride, strides=stride, ceil_mode=True, count_include_pad=False)) self.downsample.add( nn.Conv2D(channels=channels * 4, kernel_size=1, strides=1, use_bias=False)) else: self.downsample.add( nn.Conv2D(channels * 4, kernel_size=1, strides=stride, use_bias=False)) self.downsample.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) else: self.downsample = None if askc_type == 'DirectAdd': self.attention = DirectAddFuse() elif askc_type == 'ResGlobLocaforGlobLocaCha': self.attention = ResGlobLocaforGlobLocaChaFuse(channels=channels * 4, r=16) elif askc_type == 'ASKCFuse': self.attention = ASKCFuse(channels=channels * 4, r=16) else: raise ValueError('Unknown askc_type')
voc_test = VOCSegDataset(False, crop_size, "/home/lizh/learn-gluon/data/VOC2012", colormap2label) train_iter = gdata.DataLoader(voc_train, args.batch_size, shuffle=True, last_batch="discard", num_workers=num_workers) test_iter = gdata.DataLoader(voc_test, args.batch_size, last_batch="discard", num_workers=num_workers) pretrained_net = model_zoo.vision.resnet18_v2( pretrained=True, root="/home/lizh/learn-gluon/models") net = nn.HybridSequential() for layer in pretrained_net.features[:-2]: net.add(layer) net.add( nn.Conv2D(num_classes, kernel_size=1), nn.Conv2DTranspose(num_classes, kernel_size=64, padding=16, strides=32)) net[-2].initialize(init.Xavier()) net[-1].initialize( init.Constant(bilinear_kernel(num_classes, num_classes, 64))) net.collect_params().reset_ctx(ctx) if args.train: if args.load_parameters: net.load_parameters("/home/lizh/learn-gluon/models/fcn.params") num_epochs = args.num_epochs
def __init__(self, nclass=1000, norm_layer=BatchNorm, num_segments=1, norm_kwargs=None, partial_bn=False, pretrained_base=True, dropout_ratio=0.5, init_std=0.01, ctx=None, **kwargs): super(I3D_InceptionV3, self).__init__(**kwargs) self.num_segments = num_segments self.feat_dim = 2048 self.dropout_ratio = dropout_ratio self.init_std = init_std with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add( _make_basic_conv(in_channels=3, channels=32, kernel_size=3, strides=2, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) if partial_bn: if norm_kwargs is not None: norm_kwargs['use_global_stats'] = True else: norm_kwargs = {} norm_kwargs['use_global_stats'] = True self.features.add( _make_basic_conv(in_channels=32, channels=32, kernel_size=3, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add( _make_basic_conv(in_channels=32, channels=64, kernel_size=3, padding=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add( nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0))) self.features.add( _make_basic_conv(in_channels=64, channels=80, kernel_size=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add( _make_basic_conv(in_channels=80, channels=192, kernel_size=3, padding=(1, 0, 0), norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.features.add( nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0))) self.features.add(_make_A(192, 32, 'A1_', norm_layer, norm_kwargs)) self.features.add(_make_A(256, 64, 'A2_', norm_layer, norm_kwargs)) self.features.add(_make_A(288, 64, 'A3_', norm_layer, norm_kwargs)) self.features.add(_make_B('B_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 128, 'C1_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 160, 'C2_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 160, 'C3_', norm_layer, norm_kwargs)) self.features.add(_make_C(768, 192, 'C4_', norm_layer, norm_kwargs)) self.features.add(_make_D('D_', norm_layer, norm_kwargs)) self.features.add(_make_E(1280, 'E1_', norm_layer, norm_kwargs)) self.features.add(_make_E(2048, 'E2_', norm_layer, norm_kwargs)) self.features.add(nn.GlobalAvgPool3D()) self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.output = nn.Dense( units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.output) self.features.initialize(ctx=ctx) self.head.initialize(ctx=ctx) if pretrained_base: inceptionv3_2d = inception_v3(pretrained=True) weights2d = inceptionv3_2d.collect_params() weights3d = self.collect_params() assert len(weights2d.keys()) == len( weights3d.keys()), 'Number of parameters should be same.' dict2d = {} for key_id, key_name in enumerate(weights2d.keys()): dict2d[key_id] = key_name dict3d = {} for key_id, key_name in enumerate(weights3d.keys()): dict3d[key_id] = key_name dict_transform = {} for key_id, key_name in dict3d.items(): dict_transform[dict2d[key_id]] = key_name cnt = 0 for key2d, key3d in dict_transform.items(): if 'conv' in key3d: temporal_dim = weights3d[key3d].shape[2] temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2) inflated_2d = nd.broadcast_to( temporal_2d, shape=[0, 0, temporal_dim, 0, 0 ]) / temporal_dim assert inflated_2d.shape == weights3d[ key3d].shape, 'the shape of %s and %s does not match. ' % ( key2d, key3d) weights3d[key3d].set_data(inflated_2d) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'batchnorm' in key3d: assert weights2d[key2d].shape == weights3d[ key3d].shape, 'the shape of %s and %s does not match. ' % ( key2d, key3d) weights3d[key3d].set_data(weights2d[key2d].data()) cnt += 1 print('%s is done with shape: ' % (key3d), weights3d[key3d].shape) if 'dense' in key3d: cnt += 1 print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape) assert cnt == len( weights2d.keys() ), 'Not all parameters have been ported, check the initialization.'
def __init__(self, channels, init_block_channels, final_block_channels, exp_kernel_counts, conv1_kernel_counts, conv2_kernel_counts, exp_factors, se_factors, bn_use_global_stats=False, in_channels=3, in_size=(224, 224), classes=1000, **kwargs): super(MixNet, self).__init__(**kwargs) self.in_size = in_size self.classes = classes with self.name_scope(): self.features = nn.HybridSequential(prefix="") self.features.add(MixInitBlock( in_channels=in_channels, out_channels=init_block_channels, bn_use_global_stats=bn_use_global_stats)) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels): stage = nn.HybridSequential(prefix="stage{}_".format(i + 1)) with stage.name_scope(): for j, out_channels in enumerate(channels_per_stage): strides = 2 if ((j == 0) and (i != 3)) or\ ((j == len(channels_per_stage) // 2) and (i == 3)) else 1 exp_kernel_count = exp_kernel_counts[i][j] conv1_kernel_count = conv1_kernel_counts[i][j] conv2_kernel_count = conv2_kernel_counts[i][j] exp_factor = exp_factors[i][j] se_factor = se_factors[i][j] activation = "relu" if i == 0 else "swish" stage.add(MixUnit( in_channels=in_channels, out_channels=out_channels, strides=strides, exp_kernel_count=exp_kernel_count, conv1_kernel_count=conv1_kernel_count, conv2_kernel_count=conv2_kernel_count, exp_factor=exp_factor, se_factor=se_factor, bn_use_global_stats=bn_use_global_stats, activation=activation)) in_channels = out_channels self.features.add(stage) self.features.add(conv1x1_block( in_channels=in_channels, out_channels=final_block_channels, bn_use_global_stats=bn_use_global_stats, activation=activation)) in_channels = final_block_channels self.features.add(nn.AvgPool2D( pool_size=7, strides=1)) self.output = nn.HybridSequential(prefix="") self.output.add(nn.Flatten()) self.output.add(nn.Dense( units=classes, in_units=in_channels))
def body(): out = nn.HybridSequential() for nfilters in [16, 32, 64]: out.add(down_sample(nfilters)) return out
def __init__(self, nclass, depth, num_stages=4, pretrained=False, pretrained_base=True, num_segments=1, spatial_strides=(1, 2, 2, 2), temporal_strides=(1, 1, 1, 1), dilations=(1, 1, 1, 1), out_indices=(0, 1, 2, 3), conv1_kernel_t=5, conv1_stride_t=2, pool1_kernel_t=1, pool1_stride_t=2, inflate_freq=(1, 1, 1, 1), inflate_stride=(1, 1, 1, 1), inflate_style='3x1x1', nonlocal_stages=(-1, ), nonlocal_freq=(0, 1, 1, 0), nonlocal_cfg=None, bn_eval=True, bn_frozen=False, partial_bn=False, frozen_stages=-1, dropout_ratio=0.5, init_std=0.01, norm_layer=BatchNorm, norm_kwargs=None, ctx=None, **kwargs): super(I3D_ResNetV1, self).__init__() if depth not in self.arch_settings: raise KeyError('invalid depth {} for resnet'.format(depth)) self.nclass = nclass self.depth = depth self.num_stages = num_stages self.pretrained = pretrained self.pretrained_base = pretrained_base self.num_segments = num_segments self.spatial_strides = spatial_strides self.temporal_strides = temporal_strides self.dilations = dilations assert len(spatial_strides) == len(temporal_strides) == len(dilations) == num_stages self.out_indices = out_indices assert max(out_indices) < num_stages self.inflate_freqs = inflate_freq if not isinstance(inflate_freq, int) else (inflate_freq, ) * num_stages self.inflate_style = inflate_style self.nonlocal_stages = nonlocal_stages self.nonlocal_freqs = nonlocal_freq if not isinstance(nonlocal_freq, int) else (nonlocal_freq, ) * num_stages self.nonlocal_cfg = nonlocal_cfg self.bn_eval = bn_eval self.bn_frozen = bn_frozen self.partial_bn = partial_bn self.frozen_stages = frozen_stages self.dropout_ratio = dropout_ratio self.init_std = init_std self.block, stage_blocks = self.arch_settings[depth] self.stage_blocks = stage_blocks[:num_stages] self.inplanes = 64 self.first_stage = nn.HybridSequential(prefix='') self.first_stage.add(nn.Conv3D(in_channels=3, channels=64, kernel_size=(conv1_kernel_t, 7, 7), strides=(conv1_stride_t, 2, 2), padding=((conv1_kernel_t - 1)//2, 3, 3), use_bias=False)) self.first_stage.add(norm_layer(in_channels=64, **({} if norm_kwargs is None else norm_kwargs))) self.first_stage.add(nn.Activation('relu')) self.first_stage.add(nn.MaxPool3D(pool_size=(pool1_kernel_t, 3, 3), strides=(pool1_stride_t, 2, 2), padding=(pool1_kernel_t//2, 1, 1))) self.pool2 = nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1), padding=(0, 0, 0)) self.res_layers = nn.HybridSequential(prefix='') for i, num_blocks in enumerate(self.stage_blocks): spatial_stride = spatial_strides[i] temporal_stride = temporal_strides[i] dilation = dilations[i] planes = 64 * 2**i layer_name = 'layer{}_'.format(i + 1) res_layer = make_res_layer(self.block, self.inplanes, planes, num_blocks, spatial_stride=spatial_stride, temporal_stride=temporal_stride, dilation=dilation, inflate_freq=self.inflate_freqs[i], inflate_style=self.inflate_style, nonlocal_freq=self.nonlocal_freqs[i], nonlocal_cfg=self.nonlocal_cfg if i in self.nonlocal_stages else None, norm_layer=norm_layer, norm_kwargs=norm_kwargs, layer_name=layer_name) self.inplanes = planes * self.block.expansion self.res_layers.add(res_layer) self.feat_dim = self.block.expansion * 64 * 2**(len(self.stage_blocks) - 1) # We use ``GlobalAvgPool3D`` here for simplicity. Otherwise the input size must be fixed. # You can also use ``AvgPool3D`` and specify the arguments on your own, e.g. # self.st_avg = nn.AvgPool3D(pool_size=(4, 7, 7), strides=1, padding=0) # ``AvgPool3D`` is 10% faster, but ``GlobalAvgPool3D`` makes the code cleaner. self.st_avg = nn.GlobalAvgPool3D() self.head = nn.HybridSequential(prefix='') self.head.add(nn.Dropout(rate=self.dropout_ratio)) self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std)) self.head.add(self.fc) self.init_weights(ctx)
def __init__(self, in_channels_list, out_channels_list, num_branches, num_subblocks, bn_use_global_stats, **kwargs): super(HRBlock, self).__init__(**kwargs) self.in_channels_list = in_channels_list self.num_branches = num_branches with self.name_scope(): self.branches = nn.HybridSequential(prefix="") for i in range(num_branches): layers = nn.HybridSequential(prefix="branch{}_".format(i + 1)) in_channels_i = self.in_channels_list[i] out_channels_i = out_channels_list[i] for j in range(num_subblocks[i]): layers.add( ResUnit(in_channels=in_channels_i, out_channels=out_channels_i, strides=1, bottleneck=False, bn_use_global_stats=bn_use_global_stats)) in_channels_i = out_channels_i self.in_channels_list[i] = out_channels_i self.branches.add(layers) if num_branches > 1: self.fuse_layers = nn.HybridSequential(prefix="") for i in range(num_branches): fuse_layer = nn.HybridSequential( prefix="fuse_layer{}_".format(i + 1)) for j in range(num_branches): if j > i: fuse_layer.add( UpSamplingBlock( in_channels=in_channels_list[j], out_channels=in_channels_list[i], bn_use_global_stats=bn_use_global_stats, scale_factor=2**(j - i))) elif j == i: fuse_layer.add(Identity()) else: conv3x3_seq = nn.HybridSequential( prefix="conv3x3_seq{}_".format(j + 1)) for k in range(i - j): if k == i - j - 1: conv3x3_seq.add( conv3x3_block( in_channels=in_channels_list[j], out_channels=in_channels_list[i], strides=2, activation=None, bn_use_global_stats= bn_use_global_stats)) else: conv3x3_seq.add( conv3x3_block( in_channels=in_channels_list[j], out_channels=in_channels_list[j], strides=2, bn_use_global_stats= bn_use_global_stats)) fuse_layer.add(conv3x3_seq) self.fuse_layers.add(fuse_layer) self.activ = nn.Activation("relu")