Ejemplo n.º 1
0
                    help='Number of bits for binarization/quantization')
parser.add_argument(
    '--log-interval',
    type=int,
    default=100,
    metavar='N',
    help='how many batches to wait before logging training status')
opt = parser.parse_args()

num_channels_conv = 64
act = 'tanh'
num_fc = 1000
num_outputs = 10

# define network
net = nn.HybridSequential(prefix="")
with net.name_scope():
    if opt.bits == 1:
        net.add(gluon.nn.Conv2D(channels=num_channels_conv, kernel_size=5))
        net.add(gluon.nn.Activation(activation=act))
        net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
        net.add(gluon.nn.BatchNorm(axis=1, center=True, scale=True))

        net.add(gluon.nn.QActivation())
        net.add(gluon.nn.QConv2D(channels=num_channels_conv, kernel_size=5))
        net.add(gluon.nn.BatchNorm(axis=1, center=True, scale=True))
        net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))

        # The Flatten layer collapses all axis, except the first one, into one axis.
        net.add(gluon.nn.Flatten())
Ejemplo n.º 2
0
    def __init__(self,
                 alpha=1.0,
                 beta=1.0,
                 dropout_rate=0.0,
                 classes=1000,
                 **kwargs):
        super(EfficientNet, self).__init__(**kwargs)
        with self.name_scope():
            self.features = nn.HybridSequential(prefix='features_')
            with self.features.name_scope():
                # stem conv
                _add_conv(self.features,
                          int(32 * beta),
                          kernel=3,
                          stride=2,
                          pad=1)

                # base model settings
                repeats = [1, 2, 2, 3, 3, 4, 1]
                channels_num = [16, 24, 40, 80, 112, 192, 320]
                kernels_num = [3, 3, 5, 3, 5, 5, 3]
                t_num = [1, 6, 6, 6, 6, 6, 6]
                strides_first = [1, 2, 2, 1, 2, 2, 1]

                # determine params of MBConv layers
                in_channels_group = []
                for rep, ch_num in zip([1] + repeats[:-1],
                                       [32] + channels_num[:-1]):
                    in_channels_group += [int(ch_num * beta)] * int(
                        ceil(alpha * rep))
                channels_group, kernels, ts, strides = [], [], [], []
                for rep, ch, kernel, t, s in zip(repeats, channels_num,
                                                 kernels_num, t_num,
                                                 strides_first):
                    rep = int(ceil(alpha * rep))
                    channels_group += [int(ch * beta)] * rep
                    kernels += [kernel] * rep
                    ts += [t] * rep
                    strides += [s] + [1] * (rep - 1)

                # add MBConv layers
                for in_c, c, t, k, s in zip(in_channels_group, channels_group,
                                            ts, kernels, strides):
                    self.features.add(
                        MBConv(in_channels=in_c,
                               channels=c,
                               t=t,
                               kernel=k,
                               stride=s))

                # head layers
                last_channels = int(1280 * beta) if beta > 1.0 else 1280
                _add_conv(self.features, last_channels)
                self.features.add(nn.GlobalAvgPool2D())

            # features dropout
            self.dropout = nn.Dropout(
                dropout_rate) if dropout_rate > 0.0 else None

            # output layer
            self.output = nn.HybridSequential(prefix='output_')
            with self.output.name_scope():
                self.output.add(
                    nn.Conv2D(classes, 1, use_bias=False, prefix='pred_'),
                    nn.Flatten())
Ejemplo n.º 3
0
def make_res_layer(block,
                   inplanes,
                   planes,
                   blocks,
                   spatial_stride=1,
                   temporal_stride=1,
                   dilation=1,
                   inflate_freq=1,
                   inflate_style='3x1x1',
                   nonlocal_freq=1,
                   nonlocal_cfg=None,
                   norm_layer=BatchNorm,
                   norm_kwargs=None,
                   layer_name=''):

    inflate_freq = inflate_freq if not isinstance(inflate_freq, int) else (inflate_freq, ) * blocks
    nonlocal_freq = nonlocal_freq if not isinstance(nonlocal_freq, int) else (nonlocal_freq, ) * blocks
    assert len(inflate_freq) == blocks
    assert len(nonlocal_freq) == blocks

    downsample = None
    if spatial_stride != 1 or inplanes != planes * block.expansion:
        downsample = nn.HybridSequential(prefix=layer_name+'downsample_')
        with downsample.name_scope():
            downsample.add(nn.Conv3D(in_channels=inplanes,
                                     channels=planes * block.expansion,
                                     kernel_size=1,
                                     strides=(temporal_stride, spatial_stride, spatial_stride),
                                     use_bias=False))
            downsample.add(norm_layer(in_channels=planes * block.expansion, **({} if norm_kwargs is None else norm_kwargs)))


    layers = nn.HybridSequential(prefix=layer_name)
    cnt = 0
    with layers.name_scope():
        layers.add(block(inplanes=inplanes,
                         planes=planes,
                         spatial_stride=spatial_stride,
                         temporal_stride=temporal_stride,
                         dilation=dilation,
                         downsample=downsample,
                         if_inflate=(inflate_freq[0] == 1),
                         inflate_style=inflate_style,
                         if_nonlocal=(nonlocal_freq[0] == 1),
                         nonlocal_cfg=nonlocal_cfg,
                         layer_name='%d_' % cnt))

        cnt += 1
        inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.add(block(inplanes=inplanes,
                             planes=planes,
                             spatial_stride=1,
                             temporal_stride=1,
                             dilation=dilation,
                             if_inflate=(inflate_freq[i] == 1),
                             inflate_style=inflate_style,
                             if_nonlocal=(nonlocal_freq[i] == 1),
                             nonlocal_cfg=nonlocal_cfg,
                             layer_name='%d_' % cnt))
            cnt += 1
    return layers
Ejemplo n.º 4
0
 def __init__(self, args, config):
 
   super(Segmentator, self).__init__()
   
   # self.actions = actions
   self.pms = config['list_puncuation_marks']
   self.config = config
   self.num_layers = 6
   self.num_heads = 12
   self.hidden_size = 512
   self.max_seq_length = config['int_max_length']
   self.units = 768
   self.args = args
   self.beam_size = 5
   if args.decoder or args is None:
     self.error_type = ['correct', 'R', 'S', 'M', 'W', '[START]', '[END]']
   else:
     self.error_type = ['correct', 'R', 'S', 'M', 'W']
   
   if self.args.dataset == 'CGED16':
     with self.name_scope():
       self.vocab_tgt = None
       self.encoder, self.vocab_src = nlp.model.get_model('bert_12_768_12', dataset_name = 'wiki_cn_cased', use_classifier = False, use_decoder = False, pretrained = False);
       if args.decoder:
         self.emb_tgt = nn.HybridSequential()
       
         self.emb_tgt.add(nn.Embedding(len(self.error_type), self.units))
         self.emb_tgt.add(nn.Dropout(0.5))
         self.decoder = trans.TransformerDecoder(attention_cell = 'multi_head', 
                                               num_layers = self.num_layers,
                                               units = self.units, hidden_size = self.hidden_size, max_length = self.max_seq_length,
                                               num_heads = self.num_heads, scaled=True, dropout=0.1,
                                               use_residual = True, output_attention=False,
                                               weight_initializer=None, bias_initializer='zeros',
                                               scale_embed=True, prefix=None, params=None)
         self.beam_scorer = nlp.model.BeamSearchScorer()
         self.beam_sampler = nlp.model.BeamSearchSampler(beam_size = self.beam_size,
                                           decoder = self._decode_step_CGED,
                                          eos_id = self.error_type.index('[END]'),
                                          scorer = self.beam_scorer,
                                          max_length = self.max_seq_length)
                                          
         
         self.seq_sampler = nlp.model.SequenceSampler(beam_size = self.beam_size,
                                       decoder = self._decode_step_CGED,
                                       eos_id = self.error_type.index('[END]'),
                                       max_length = self.max_seq_length,
                                       temperature = 0.97)
                                         #  vocab_size = len(self.error_type))
       self.tokenizer = nlp.data.BERTTokenizer(self.vocab_src, lower = False);
       self.transformer = nlp.data.BERTSentenceTransform(self.tokenizer, max_seq_length = self.max_seq_length, pair = False, pad = True);
       
       self.dropout = nn.Dropout(0.5) 
       self.fc_error = nn.Dense(len(self.error_type), flatten = False)
       # self.fc_start = nn.Dense(2, flatten = False) # Binary
       # self.fc_end = nn.Dense(2, flatten = False) # Binary
   else:
     with self.name_scope():
       self.encoder, self.vocab_src = nlp.model.get_model('bert_12_768_12', dataset_name = 'wiki_cn_cased', use_classifier = False, use_decoder = False, pretrained = True);
       # self.encoder = trans.TransformerEncoder(attention_cell='multi_head',
       # num_layers=2, units=300, hidden_size=2048,
       # max_length=150, num_heads=4, scaled=True, dropout=0.0,     use_residual=True, output_attention=False, weight_initializer=None, bias_initializer='zeros', prefix=None, params=None)
       # self.encoder2 = nlp.model.TransformerEncoder(attention_cell='multi_head',
       #   num_layers=2, units=768, hidden_size=2048,
       #   max_length = self.max_seq_length,
       #   num_heads = 8,
       #   scaled=True,
       #   dropout=0.1,
       #   use_residual=True,
       #   output_attention=False,
       #   weight_initializer=None,
       #   bias_initializer='zeros', prefix=None, params=None)
       # if (self.args.use_tc):
       # keys = self.vocab_src.token_to_idx.keys()
       
       # key_to_check = ['E', 'e', 'e', '1', '1']
       
       # for k in key_to_check:
       
       #   print('{} => {}'.format(k, k in keys))
       
       # raise
       self.counter_tgt = nlp.data.count_tokens(self.config['str_character_target'])
       self.vocab_tgt = nlp.vocab.BERTVocab(self.counter_tgt)
     
       # keys = self.vocab_tgt.token_to_idx.keys()
       
       # key_to_check = ['E', 'e', 'e', '1', '1']
       
       # for k in key_to_check:
       
       #   print('{} => {}'.format(k, k in keys))
       
       # raise
       self.dropout = nn.Dropout(0.5) 
       self.decoder = trans.TransformerDecoder(attention_cell = 'multi_head', 
                                               num_layers = self.num_layers,
                                               units = self.units, hidden_size = self.hidden_size, max_length = self.max_seq_length,
                                               num_heads = self.num_heads, scaled=True, dropout=0.1,
                                               use_residual = True, output_attention=False,
                                               weight_initializer=None, bias_initializer='zeros',
                                               scale_embed=True, prefix=None, params=None)
       
       # self.decoder_action = trans.TransformerDecoder(attention_cell = 'multi_head', 
       #                                         num_layers = self.num_layers,
       #                                         units = self.units, hidden_size = self.hidden_size, max_length = self.max_seq_length,
       #                                         num_heads = self.num_heads, scaled=True, dropout=0.1,
       #                                         use_residual = True, output_attention=False,
       #                                         weight_initializer=None, bias_initializer='zeros',
       #                                         scale_embed=True, prefix=None, params=None)
                                               
       # self.fc_actions = nn.Dense(len(self.actions), flatten = False)
       # self.fc_pms = nn.Dense(len(self.pms), flatten = False)
       self.fc_proj = nn.Dense(len(self.vocab_tgt), flatten = False, in_units = 768)
       self.emb_tgt = nn.HybridSequential()
       self.fc_pm_error = nn.Dense(2, flatten = False, in_units = 768)
       self.fc_pm_remove = nn.Dense(2, flatten = False, in_units = 768)
       self.fc_pm_add = nn.Dense(2, flatten = False, in_units = 768)
       # self.fc_error = nn.Dense(len(self.error_type), flatten = False, in_units = 768)
       # self.fc_correction = nn.Dense(len(self.vocab_tgt) + 1, flatten = False, in_units = 768) 
       
       self.emb_tgt.add(nn.Embedding(len(self.vocab_tgt), self.units))
       self.emb_tgt.add(nn.Dropout(0.5))
       
       # self.emb_actions = (nn.Embedding(input_dim = len(self.actions), output_dim = self.units))
       # self.emb_pms = (nn.Embedding(input_dim = len(self.pms), output_dim = self.units))
       # self.emb 
       self.tokenizer = nlp.data.BERTTokenizer(self.vocab_src, lower = True);
       self.transformer = nlp.data.BERTSentenceTransform(self.tokenizer, max_seq_length = self.max_seq_length, pair = False, pad = True);
     self.beam_scorer = nlp.model.BeamSearchScorer()
     self.beam_sampler = nlp.model.BeamSearchSampler(beam_size = self.beam_size,
                                           decoder = self._decode_step,
                                          eos_id = self.vocab_tgt.token_to_idx[self.vocab_tgt.sep_token],
                                          scorer = self.beam_scorer,
                                          max_length = self.max_seq_length)
Ejemplo n.º 5
0
    def __init__(
        self,
        context_length: int,
        prediction_length: int,
        d_hidden: int,
        d_var: int,
        n_head: int,
        dropout: float = 0.0,
        **kwargs,
    ):
        super(TemporalFusionDecoder, self).__init__(**kwargs)
        self.context_length = context_length
        self.prediction_length = prediction_length

        with self.name_scope():
            self.enrich = GatedResidualNetwork(
                d_hidden=d_hidden,
                d_static=d_var,
                dropout=dropout,
            )
            self.attention = SelfAttention(
                context_length=context_length,
                prediction_length=prediction_length,
                d_hidden=d_hidden,
                n_head=n_head,
                share_values=True,
                dropout=dropout,
            )
            self.att_net = nn.HybridSequential(prefix="attention_")
            self.att_net.add(nn.Dropout(dropout))
            self.att_net.add(
                nn.Dense(
                    units=d_hidden * 2,
                    in_units=d_hidden,
                    flatten=False,
                    weight_initializer=init.Xavier(),
                ))
            self.att_net.add(GatedLinearUnit(
                axis=-1,
                nonlinear=False,
            ))
            self.att_lnorm = nn.LayerNorm(
                axis=-1,
                in_channels=d_hidden,
            )
            self.ff_net = nn.HybridSequential()
            self.ff_net.add(GatedResidualNetwork(
                d_hidden,
                dropout=dropout,
            ))
            self.ff_net.add(
                nn.Dense(
                    units=d_hidden * 2,
                    in_units=d_hidden,
                    flatten=False,
                    weight_initializer=init.Xavier(),
                ))
            self.ff_net.add(GatedLinearUnit(
                axis=-1,
                nonlinear=False,
            ))
            self.ff_lnorm = nn.LayerNorm(axis=-1, in_channels=d_hidden)
Ejemplo n.º 6
0
def transition_block(channels):
    out = nn.HybridSequential()
    out.add(nn.BatchNorm(), nn.Activation('relu'),
            nn.Conv2D(channels, kernel_size=1),
            nn.AvgPool2D(pool_size=2, strides=2))
    return out
    def __init__(self,
                 in_channels,
                 channels,
                 strides=1,
                 dilation=1,
                 groups=1,
                 norm_act=bnrelu,
                 dropout=None,
                 dist_bn=False
                 ):
        """Configurable identity-mapping residual block

        Parameters
        ----------
        in_channels : int
            Number of input channels.
        channels : list of int
            Number of channels in the internal feature maps.
            Can either have two or three elements: if three construct
            a residual block with two `3 x 3` convolutions,
            otherwise construct a bottleneck block with `1 x 1`, then
            `3 x 3` then `1 x 1` convolutions.
        stride : int
            Stride of the first `3 x 3` convolution
        dilation : int
            Dilation to apply to the `3 x 3` convolutions.
        groups : int
            Number of convolution groups.
            This is used to create ResNeXt-style blocks and is only compatible with
            bottleneck blocks.
        norm_act : callable
            Function to create normalization / activation Module.
        dropout: callable
            Function to create Dropout Module.
        dist_bn: Boolean
            A variable to enable or disable use of distributed BN
        """
        super(IdentityResidualBlock, self).__init__()
        self.dist_bn = dist_bn

        # Check parameters for inconsistencies
        if len(channels) != 2 and len(channels) != 3:
            raise ValueError("channels must contain either two or three values")
        if len(channels) == 2 and groups != 1:
            raise ValueError("groups > 1 are only valid if len(channels) == 3")

        is_bottleneck = len(channels) == 3
        need_proj_conv = strides != 1 or in_channels != channels[-1]

        self.bn1 = norm_act(in_channels)
        if not is_bottleneck:
            layers = [
                ("conv1", nn.Conv2D(in_channels=in_channels,
                                    channels=channels[0],
                                    kernel_size=3,
                                    strides=strides,
                                    padding=dilation,
                                    use_bias=False,
                                    dilation=dilation)),
                ("bn2", norm_act(channels[0])),
                ("conv2", nn.Conv2D(in_channels=channels[0],
                                    channels=channels[1],
                                    kernel_size=3,
                                    strides=1,
                                    padding=dilation,
                                    use_bias=False,
                                    dilation=dilation))
            ]
            if dropout is not None:
                layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
        else:
            layers = [
                ("conv1",
                 nn.Conv2D(in_channels=in_channels,
                           channels=channels[0],
                           kernel_size=1,
                           strides=strides,
                           padding=0,
                           use_bias=False)),
                ("bn2", norm_act(channels[0])),
                ("conv2", nn.Conv2D(in_channels=channels[0],
                                    channels=channels[1],
                                    kernel_size=3,
                                    strides=1,
                                    padding=dilation,
                                    use_bias=False,
                                    groups=groups,
                                    dilation=dilation)),
                ("bn3", norm_act(channels[1])),
                ("conv3", nn.Conv2D(in_channels=channels[1],
                                    channels=channels[2],
                                    kernel_size=1,
                                    strides=1,
                                    padding=0,
                                    use_bias=False))
            ]
            if dropout is not None:
                layers = layers[0:4] + [("dropout", dropout())] + layers[4:]

        layer_dict = OrderedDict(layers)
        self.convs = nn.HybridSequential(prefix='')
        for key in layer_dict.keys():
            self.convs.add(layer_dict[key])

        if need_proj_conv:
            self.proj_conv = nn.Conv2D(in_channels=in_channels,
                                       channels=channels[-1],
                                       kernel_size=1,
                                       strides=strides,
                                       padding=0,
                                       use_bias=False)
Ejemplo n.º 8
0
def faster_rcnn_resnet101_v1d_custom(classes,
                                     transfer=None,
                                     pretrained_base=True,
                                     pretrained=False,
                                     **kwargs):
    r"""Faster RCNN model with resnet101_v1d base network on custom dataset.

    Parameters
    ----------
    classes : iterable of str
        Names of custom foreground classes. `len(classes)` is the number of foreground classes.
    transfer : str or None
        If not `None`, will try to reuse pre-trained weights from faster RCNN networks trained
        on other datasets.
    pretrained_base : bool or str
        Boolean value controls whether to load the default pretrained weights for model.
        String value represents the hashtag for a certain version of pretrained weights.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '~/.mxnet/models'
        Location for keeping the model parameters.

    Returns
    -------
    mxnet.gluon.HybridBlock
        Hybrid faster RCNN network.
    """
    if pretrained:
        warnings.warn(
            "Custom models don't provide `pretrained` weights, ignored.")
    if transfer is None:
        from ..resnetv1b import resnet101_v1d
        base_network = resnet101_v1d(pretrained=pretrained_base,
                                     dilated=False,
                                     use_global_stats=True,
                                     **kwargs)
        features = nn.HybridSequential()
        top_features = nn.HybridSequential()
        for layer in [
                'conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3'
        ]:
            features.add(getattr(base_network, layer))
        for layer in ['layer4']:
            top_features.add(getattr(base_network, layer))
        train_patterns = '|'.join(
            ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv'])
        return get_faster_rcnn(name='resnet101_v1d',
                               dataset='custom',
                               pretrained=pretrained,
                               features=features,
                               top_features=top_features,
                               classes=classes,
                               short=600,
                               max_size=1000,
                               train_patterns=train_patterns,
                               nms_thresh=0.3,
                               nms_topk=400,
                               post_nms=100,
                               roi_mode='align',
                               roi_size=(14, 14),
                               stride=16,
                               clip=None,
                               rpn_channel=1024,
                               base_size=16,
                               scales=(2, 4, 8, 16, 32),
                               ratios=(0.5, 1, 2),
                               alloc_size=(128, 128),
                               rpn_nms_thresh=0.7,
                               rpn_train_pre_nms=12000,
                               rpn_train_post_nms=2000,
                               rpn_test_pre_nms=6000,
                               rpn_test_post_nms=300,
                               rpn_min_size=16,
                               num_sample=128,
                               pos_iou_thresh=0.5,
                               pos_ratio=0.25,
                               max_num_gt=300,
                               **kwargs)
    else:
        from ...model_zoo import get_model
        net = get_model('faster_rcnn_resnet101_v1d_' + str(transfer),
                        pretrained=True,
                        **kwargs)
        reuse_classes = [x for x in classes if x in net.classes]
        net.reset_class(classes, reuse_weights=reuse_classes)
    return net
Ejemplo n.º 9
0
def body():
    """return the body network"""
    out = nn.HybridSequential()
    for nfilters in [16, 32, 64]:
        out.add(down_sample(nfilters))
    return out
Ejemplo n.º 10
0
 def __init__(self, block, layers, cardinality=1, bottleneck_width=64,
              classes=1000, dilated=False, dilation=1, norm_layer=BatchNorm,
              norm_kwargs=None, last_gamma=False, deep_stem=False, stem_width=32,
              avg_down=False, final_drop=0.0, use_global_stats=False,
              name_prefix='', dropblock_prob=0, input_size=224,
              use_splat=False, radix=2, avd=False, avd_first=False, split_drop_ratio=0):
     self.cardinality = cardinality
     self.bottleneck_width = bottleneck_width
     self.inplanes = stem_width*2 if deep_stem else 64
     self.radix = radix
     self.split_drop_ratio = split_drop_ratio
     self.avd_first = avd_first
     super(ResNest, self).__init__(prefix=name_prefix)
     norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
     if use_global_stats:
         norm_kwargs['use_global_stats'] = True
     self.norm_kwargs = norm_kwargs
     with self.name_scope():
         if not deep_stem:
             self.conv1 = nn.Conv2D(channels=64, kernel_size=7, strides=2,
                                    padding=3, use_bias=False, in_channels=3)
         else:
             self.conv1 = nn.HybridSequential(prefix='conv1')
             self.conv1.add(nn.Conv2D(channels=stem_width, kernel_size=3, strides=2,
                                      padding=1, use_bias=False, in_channels=3))
             self.conv1.add(norm_layer(in_channels=stem_width, **norm_kwargs))
             self.conv1.add(nn.Activation('relu'))
             self.conv1.add(nn.Conv2D(channels=stem_width, kernel_size=3, strides=1,
                                      padding=1, use_bias=False, in_channels=stem_width))
             self.conv1.add(norm_layer(in_channels=stem_width, **norm_kwargs))
             self.conv1.add(nn.Activation('relu'))
             self.conv1.add(nn.Conv2D(channels=stem_width*2, kernel_size=3, strides=1,
                                      padding=1, use_bias=False, in_channels=stem_width))
         input_size = _update_input_size(input_size, 2)
         self.bn1 = norm_layer(in_channels=64 if not deep_stem else stem_width*2,
                               **norm_kwargs)
         self.relu = nn.Activation('relu')
         self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1)
         input_size = _update_input_size(input_size, 2)
         self.layer1 = self._make_layer(1, block, 64, layers[0], avg_down=avg_down,
                                        norm_layer=norm_layer, last_gamma=last_gamma, use_splat=use_splat,
                                        avd=avd)
         self.layer2 = self._make_layer(2, block, 128, layers[1], strides=2, avg_down=avg_down,
                                        norm_layer=norm_layer, last_gamma=last_gamma, use_splat=use_splat,
                                        avd=avd)
         input_size = _update_input_size(input_size, 2)
         if dilated or dilation==4:
             self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2,
                                            avg_down=avg_down, norm_layer=norm_layer,
                                            last_gamma=last_gamma, dropblock_prob=dropblock_prob,
                                            input_size=input_size, use_splat=use_splat, avd=avd)
             self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=4, pre_dilation=2,
                                            avg_down=avg_down, norm_layer=norm_layer,
                                            last_gamma=last_gamma, dropblock_prob=dropblock_prob,
                                            input_size=input_size, use_splat=use_splat, avd=avd)
         elif dilation==3:
             # special
             self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2,
                                            avg_down=avg_down, norm_layer=norm_layer,
                                            last_gamma=last_gamma, dropblock_prob=dropblock_prob,
                                            input_size=input_size, use_splat=use_splat, avd=avd)
             self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, dilation=2, pre_dilation=2,
                                            avg_down=avg_down, norm_layer=norm_layer,
                                            last_gamma=last_gamma, dropblock_prob=dropblock_prob,
                                            input_size=input_size, use_splat=use_splat, avd=avd)
         elif dilation==2:
             self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2,
                                            avg_down=avg_down, norm_layer=norm_layer,
                                            last_gamma=last_gamma, dropblock_prob=dropblock_prob,
                                            input_size=input_size, use_splat=use_splat, avd=avd)
             self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=2,
                                            avg_down=avg_down, norm_layer=norm_layer,
                                            last_gamma=last_gamma, dropblock_prob=dropblock_prob,
                                            input_size=input_size, use_splat=use_splat, avd=avd)
         else:
             self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2,
                                            avg_down=avg_down, norm_layer=norm_layer,
                                            last_gamma=last_gamma, dropblock_prob=dropblock_prob,
                                            input_size=input_size, use_splat=use_splat, avd=avd)
             input_size = _update_input_size(input_size, 2)
             self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2,
                                            avg_down=avg_down, norm_layer=norm_layer,
                                            last_gamma=last_gamma, dropblock_prob=dropblock_prob,
                                            input_size=input_size, use_splat=use_splat, avd=avd)
             input_size = _update_input_size(input_size, 2)
         self.avgpool = nn.GlobalAvgPool2D()
         self.flat = nn.Flatten()
         self.drop = None
         if final_drop > 0.0:
             self.drop = nn.Dropout(final_drop)
         self.fc = nn.Dense(in_units=512 * block.expansion, units=classes)
Ejemplo n.º 11
0
    def _make_layer(self, stage_index, block, planes, blocks, strides=1, dilation=1,
                    pre_dilation=1, avg_down=False, norm_layer=None,
                    last_gamma=False,
                    dropblock_prob=0, input_size=224, use_splat=False, avd=False):
        downsample = None
        if strides != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.HybridSequential(prefix='down%d_'%stage_index)
            with downsample.name_scope():
                if avg_down:
                    if pre_dilation == 1:
                        downsample.add(nn.AvgPool2D(pool_size=strides, strides=strides,
                                                    ceil_mode=True, count_include_pad=False))
                    elif strides==1:
                        downsample.add(nn.AvgPool2D(pool_size=1, strides=1,
                                                    ceil_mode=True, count_include_pad=False))
                    else:
                        downsample.add(nn.AvgPool2D(pool_size=pre_dilation*strides, strides=strides, padding=1,
                                                    ceil_mode=True, count_include_pad=False))
                    downsample.add(nn.Conv2D(channels=planes * block.expansion, kernel_size=1,
                                             strides=1, use_bias=False, in_channels=self.inplanes))
                    downsample.add(norm_layer(in_channels=planes * block.expansion,
                                              **self.norm_kwargs))
                else:
                    downsample.add(nn.Conv2D(channels=planes * block.expansion,
                                             kernel_size=1, strides=strides, use_bias=False,
                                             in_channels=self.inplanes))
                    downsample.add(norm_layer(in_channels=planes * block.expansion,
                                              **self.norm_kwargs))

        layers = nn.HybridSequential(prefix='layers%d_'%stage_index)
        with layers.name_scope():
            if dilation in (1, 2):
                layers.add(block(planes, cardinality=self.cardinality,
                                 bottleneck_width=self.bottleneck_width,
                                 strides=strides, dilation=pre_dilation,
                                 downsample=downsample, previous_dilation=dilation,
                                 norm_layer=norm_layer, norm_kwargs=self.norm_kwargs,
                                 last_gamma=last_gamma, dropblock_prob=dropblock_prob,
                                 input_size=input_size, use_splat=use_splat, avd=avd, avd_first=self.avd_first,
                                 radix=self.radix, in_channels=self.inplanes,
                                 split_drop_ratio=self.split_drop_ratio))
            elif dilation == 4:
                layers.add(block(planes, cardinality=self.cardinality,
                                 bottleneck_width=self.bottleneck_width,
                                 strides=strides, dilation=pre_dilation,
                                 downsample=downsample, previous_dilation=dilation,
                                 norm_layer=norm_layer, norm_kwargs=self.norm_kwargs,
                                 last_gamma=last_gamma, dropblock_prob=dropblock_prob,
                                 input_size=input_size, use_splat=use_splat, avd=avd, avd_first=self.avd_first,
                                 radix=self.radix, in_channels=self.inplanes,
                                 split_drop_ratio=self.split_drop_ratio))
            else:
                raise RuntimeError("=> unknown dilation size: {}".format(dilation))

            input_size = _update_input_size(input_size, strides)
            self.inplanes = planes * block.expansion
            for i in range(1, blocks):
                layers.add(block(planes, cardinality=self.cardinality,
                                 bottleneck_width=self.bottleneck_width, dilation=dilation,
                                 previous_dilation=dilation, norm_layer=norm_layer,
                                 norm_kwargs=self.norm_kwargs, last_gamma=last_gamma,
                                 dropblock_prob=dropblock_prob, input_size=input_size,
                                 use_splat=use_splat, avd=avd, avd_first=self.avd_first,
                                 radix=self.radix, in_channels=self.inplanes,
                                 split_drop_ratio=self.split_drop_ratio))

        return layers
Ejemplo n.º 12
0
 def __init__(self,
              dilations,
              stages,
              channels,
              anchors,
              strides,
              classes,
              alloc_size=(128, 128),
              nms_thresh=0.45,
              nms_topk=400,
              post_nms=100,
              pos_iou_thresh=1.0,
              ignore_iou_thresh=0.7,
              num_sync_bn_devices=-1,
              **kwargs):
     super(YOLOV4, self).__init__(stages,
                                  stages,
                                  channels,
                                  anchors,
                                  strides,
                                  classes,
                                  alloc_size=alloc_size,
                                  nms_thresh=nms_thresh,
                                  nms_topk=nms_topk,
                                  post_nms=100,
                                  pos_iou_thresh=1.0,
                                  ignore_iou_thresh=ignore_iou_thresh,
                                  num_sync_bn_devices=-1,
                                  **kwargs)
     self._classes = classes
     self.nms_thresh = nms_thresh
     self.nms_topk = nms_topk
     self.post_nms = post_nms
     self._pos_iou_thresh = pos_iou_thresh
     self._ignore_iou_thresh = ignore_iou_thresh
     if pos_iou_thresh >= 1:
         self._target_generator = YOLOV3TargetMerger(
             len(classes), ignore_iou_thresh)
     else:
         raise NotImplementedError(
             "pos_iou_thresh({}) < 1.0 is not implemented!".format(
                 pos_iou_thresh))
     self._loss = YOLOV3Loss()
     with self.name_scope():
         self.stages = nn.HybridSequential()
         self.transitions = nn.HybridSequential()
         self.yolo_blocks = nn.HybridSequential()
         self.yolo_outputs = nn.HybridSequential()
         # note that anchors and strides and dilations should be used in reverse order
         for i, stage, channel, anchor, stride, dilation in zip(
                 range(len(stages)), stages, channels, anchors[::-1],
                 strides[::-1], dilations[::-1]):
             self.stages.add(stage)
             block = YOLODetectionBlockV4(channel, dilation,
                                          num_sync_bn_devices)
             self.yolo_blocks.add(block)
             output = YOLOOutputV3(i,
                                   len(classes),
                                   anchor,
                                   stride,
                                   alloc_size=alloc_size)
             self.yolo_outputs.add(output)
             if i > 0:
                 self.transitions.add(
                     _conv2d(channel, 1, 0, 1, num_sync_bn_devices))
Ejemplo n.º 13
0
def test_DeformableConvolution():
    """test of the deformable convolution layer with possible combinations of arguments,
    currently this layer only supports gpu
    """
    net = nn.HybridSequential()
    net.add(
        DeformableConvolution(10, kernel_size=(3, 3), strides=1, padding=0),
        DeformableConvolution(10,
                              kernel_size=(3, 2),
                              strides=1,
                              padding=0,
                              activation='relu',
                              offset_use_bias=False,
                              use_bias=False),
        DeformableConvolution(10,
                              kernel_size=(3, 2),
                              strides=1,
                              padding=0,
                              activation='relu',
                              offset_use_bias=False),
        DeformableConvolution(10,
                              kernel_size=(3, 2),
                              strides=1,
                              padding=0,
                              activation='relu',
                              use_bias=False),
        DeformableConvolution(10,
                              kernel_size=(3, 2),
                              strides=1,
                              padding=0,
                              offset_use_bias=False,
                              use_bias=False),
        DeformableConvolution(10,
                              kernel_size=(3, 2),
                              strides=1,
                              padding=0,
                              offset_use_bias=False),
        DeformableConvolution(12,
                              kernel_size=(3, 2),
                              strides=1,
                              padding=0,
                              use_bias=False),
        DeformableConvolution(12,
                              kernel_size=(3, 2),
                              strides=1,
                              padding=0,
                              use_bias=False,
                              num_deformable_group=4),
    )

    try:
        ctx = mx.gpu()
        _ = mx.nd.array([0], ctx=ctx)
    except mx.base.MXNetError:
        print("deformable_convolution only supports GPU")
        return

    net.initialize(force_reinit=True, ctx=ctx)
    net.hybridize()

    x = mx.nd.random.uniform(shape=(8, 5, 30, 31), ctx=ctx)
    with mx.autograd.record():
        y = net(x)
        y.backward()
Ejemplo n.º 14
0
 def get_net():
     net = nn.HybridSequential()  # Here we use the class HybridSequential.
     net.add(nn.Dense(256, activation='relu'),
             nn.Dense(128, activation='relu'), nn.Dense(2))
     return net
Ejemplo n.º 15
0
def conv_block(channels):
    out = nn.HybridSequential()
    out.add(nn.BatchNorm(), nn.Activation('relu'),
            nn.Conv2D(channels, kernel_size=3, padding=1))
    return out
Ejemplo n.º 16
0
class SwapAxes(nn.HybridBlock):
    def __init__(self, dim1, dim2):
        super(SwapAxes, self).__init__()
        self.dim1 = dim1
        self.dim2 = dim2

    # def forward(self, x):
    #     return nd.swapaxes(x, self.dim1, self.dim2)

    def hybrid_forward(self, F, x, *args, **kwargs):
        return F.swapaxes(x, self.dim1, self.dim2)


with mx.Context(mx.cpu(0)):
    model = nn.HybridSequential()
    model.add(
        SwapAxes(1, 2),
        CBR(40, 1),
        CBR(40),
        CBR(40),
        nn.MaxPool1D(2),
        CBR(80, 1),
        CBR(80),
        CBR(80),
        nn.MaxPool1D(2),
        CBR(160, 1),
        nn.Dropout(0.3),
        CBR(160),
        CBR(160),
        CBR(160),
Ejemplo n.º 17
0
 def __init__(self, layers, growth_rate, **kwargs):
     super(DenseBlock, self).__init__(**kwargs)
     self.net = nn.HybridSequential()
     for i in range(layers):
         self.net.add(conv_block(growth_rate))
Ejemplo n.º 18
0
    def __init__(self,
                 network,
                 base_size,
                 features,
                 num_filters,
                 sizes,
                 ratios,
                 steps,
                 classes,
                 use_1x1_transition=True,
                 use_bn=True,
                 reduce_ratio=1.0,
                 min_depth=128,
                 global_pool=False,
                 pretrained=False,
                 stds=(0.1, 0.1, 0.2, 0.2),
                 anchor_alloc_size=128,
                 nms_overlap_thresh=0.5,
                 nms_topk=200,
                 nms_valid_thresh=0.0,
                 post_nms=200,
                 norm_layer=GroupBatchNorm,
                 fuse_bn_relu=True,
                 fuse_bn_add_relu=True,
                 bn_fp16=False,
                 norm_kwargs=None,
                 predictors_kernel=(3, 3),
                 predictors_pad=(1, 1),
                 ctx=mx.cpu(),
                 layout='NCHW',
                 **kwargs):
        super(SSD, self).__init__(**kwargs)
        if norm_kwargs is None:
            norm_kwargs = {}
        if network is None:
            num_layers = len(ratios)
        else:
            num_layers = len(features) + len(num_filters) + int(global_pool)
        assert len(sizes) == num_layers + 1
        sizes = list(zip(sizes[:-1], sizes[1:]))
        assert isinstance(ratios,
                          list), "Must provide ratios as list or list of list"
        if not isinstance(ratios[0], (tuple, list)):
            ratios = ratios * num_layers  # propagate to all layers if use same ratio
        assert num_layers == len(sizes) == len(ratios), \
            f"Mismatched (number of layers) vs (sizes) vs (ratios): {num_layers}, {len(sizes)}, {len(ratios)}."
        assert num_layers > 0, "SSD require at least one layer, suggest multiple."
        self._num_layers = num_layers
        self.classes = classes
        self.nms_overlap_thresh = nms_overlap_thresh
        self.nms_topk = nms_topk
        self.nms_valid_thresh = nms_valid_thresh
        self.post_nms = post_nms
        self.layout = layout
        self.reduce_ratio = reduce_ratio
        self._bn_fp16 = bn_fp16
        self._bn_group = norm_kwargs.get('bn_group', 1)

        logging.info(f'[SSD] network: {network}')
        logging.info(f'[SSD] norm layer: {norm_layer}')
        logging.info(f'[SSD] fuse bn relu: {fuse_bn_relu}')
        logging.info(f'[SSD] fuse bn add relu: {fuse_bn_add_relu}')
        logging.info(f'[SSD] bn group: {self._bn_group}')

        with self.name_scope():
            if network is None:
                # use fine-grained manually designed block as features
                self.features = features(pretrained=pretrained,
                                         ctx=ctx,
                                         norm_layer=norm_layer,
                                         fuse_bn_relu=fuse_bn_relu,
                                         fuse_bn_add_relu=fuse_bn_add_relu,
                                         bn_fp16=bn_fp16,
                                         norm_kwargs=norm_kwargs)
            else:
                self.features = FeatureExpander(
                    network=network,
                    outputs=features,
                    num_filters=num_filters,
                    use_1x1_transition=use_1x1_transition,
                    use_bn=use_bn,
                    reduce_ratio=reduce_ratio,
                    min_depth=min_depth,
                    global_pool=global_pool,
                    pretrained=pretrained,
                    ctx=ctx,
                    norm_layer=norm_layer,
                    fuse_bn_relu=fuse_bn_relu,
                    fuse_bn_add_relu=fuse_bn_add_relu,
                    bn_fp16=bn_fp16,
                    norm_kwargs=norm_kwargs,
                    layout=layout)

            # use a single ConvPredictor for conf and loc predictors (head fusion),
            # but they are treated as two different segments
            self.predictors = nn.HybridSequential()
            self.num_defaults = [4, 6, 6, 6, 4, 4]
            padding_channels_to = 8
            self.padding_amounts = [
            ]  # We keep track of padding to slice conf/loc correctly
            self.predictor_offsets = [
            ]  # We keep track of offset to initialize conf/loc correctly
            for nd in self.num_defaults:
                # keep track of beginning/ending offsets for all segments
                offsets = [0]
                n = nd * (self.num_classes + 1
                          )  # output channels for conf predictors
                offsets.append(n)
                n = n + nd * 4  # output channels for both conf and loc predictors
                offsets.append(n)
                # padding if necessary
                padding_amt = 0
                # manually pad to get HMMA kernels for NHWC layout
                if (self.layout == 'NHWC') and (n % padding_channels_to):
                    padding_amt = padding_channels_to - (n %
                                                         padding_channels_to)
                    n = n + padding_amt
                    if padding_amt:
                        offsets.append(n)
                self.predictors.add(
                    ConvPredictor(n,
                                  kernel=predictors_kernel,
                                  pad=predictors_pad,
                                  layout=layout))
                self.predictor_offsets.append(offsets)
                self.padding_amounts.append(padding_amt)

            self.bbox_decoder = NormalizedBoxCenterDecoder(stds)
            self.cls_decoder = MultiPerClassDecoder(self.num_classes + 1,
                                                    thresh=0)
    def __init__(self,
                 structure,
                 norm_act=bnrelu,
                 classes=0,
                 dilation=False,
                 dist_bn=False
                 ):
        super(WiderResNetA2, self).__init__()
        self.dist_bn = dist_bn

        norm_act = bnrelu
        self.structure = structure
        self.dilation = dilation

        if len(structure) != 6:
            raise ValueError("Expected a structure with six values")

        self.mod1 = nn.HybridSequential(prefix='mod1')
        self.mod1.add(nn.Conv2D(in_channels=3, channels=64,
                                kernel_size=3, strides=1, padding=1, use_bias=False))

        # Groups of residual blocks
        in_channels = 64
        channels = [(128, 128), (256, 256), (512, 512), (512, 1024), (512, 1024, 2048),
                    (1024, 2048, 4096)]
        for mod_id, num in enumerate(structure):
            # Create blocks for module
            blocks = []
            for block_id in range(num):
                if not dilation:
                    dil = 1
                    strides = 2 if block_id == 0 and 2 <= mod_id <= 4 else 1
                else:
                    if mod_id == 3:
                        dil = 2
                    elif mod_id > 3:
                        dil = 4
                    else:
                        dil = 1
                    strides = 2 if block_id == 0 and mod_id == 2 else 1

                if mod_id == 4:
                    drop = partial(nn.Dropout, rate=0.3)
                elif mod_id == 5:
                    drop = partial(nn.Dropout, rate=0.5)
                else:
                    drop = None

                blocks.append((
                    "block%d" % (block_id + 1),
                    IdentityResidualBlock(in_channels=in_channels,
                                          channels=channels[mod_id],
                                          norm_act=norm_act,
                                          strides=strides,
                                          dilation=dil,
                                          dropout=drop,
                                          dist_bn=self.dist_bn)
                ))

                # Update channels and p_keep
                in_channels = channels[mod_id][-1]

            # Create module
            if mod_id == 0:
                self.pool2 = nn.MaxPool2D(pool_size=3, strides=2, padding=1)
                blocks_dict = OrderedDict(blocks)
                self.mod2 = nn.HybridSequential(prefix='mod2')
                for key in blocks_dict.keys():
                    self.mod2.add(blocks_dict[key])

            if mod_id == 1:
                self.pool3 = nn.MaxPool2D(pool_size=3, strides=2, padding=1)
                blocks_dict = OrderedDict(blocks)
                self.mod3 = nn.HybridSequential(prefix='mod3')
                for key in blocks_dict.keys():
                    self.mod3.add(blocks_dict[key])


            if mod_id == 2:
                blocks_dict = OrderedDict(blocks)
                self.mod4 = nn.HybridSequential(prefix='mod4')
                for key in blocks_dict.keys():
                    self.mod4.add(blocks_dict[key])

            if mod_id == 3:
                blocks_dict = OrderedDict(blocks)
                self.mod5 = nn.HybridSequential(prefix='mod5')
                for key in blocks_dict.keys():
                    self.mod5.add(blocks_dict[key])

            if mod_id == 4:
                blocks_dict = OrderedDict(blocks)
                self.mod6 = nn.HybridSequential(prefix='mod6')
                for key in blocks_dict.keys():
                    self.mod6.add(blocks_dict[key])

            if mod_id == 5:
                blocks_dict = OrderedDict(blocks)
                self.mod7 = nn.HybridSequential(prefix='mod7')
                for key in blocks_dict.keys():
                    self.mod7.add(blocks_dict[key])

        # Pooling and predictor
        self.bn_out = norm_act(in_channels)
        if classes != 0:
            self.classifier = nn.HybridSequential(prefix='classifier')
            self.classifier.add(nn.GlobalAvgPool2D())
            self.classifier.add(nn.Dense(in_units=in_channels, units=classes))
Ejemplo n.º 20
0
 def __init__(self,
              block,
              layers,
              classes=1000,
              dilated=False,
              norm_layer=BatchNorm,
              norm_kwargs={},
              last_gamma=False,
              deep_stem=False,
              stem_width=32,
              avg_down=False,
              final_drop=0.0,
              use_global_stats=False,
              **kwargs):
     self.inplanes = stem_width * 2 if deep_stem else 64
     super(ResNetV1b, self).__init__()
     self.norm_kwargs = norm_kwargs
     if use_global_stats:
         self.norm_kwargs['use_global_stats'] = True
     with self.name_scope():
         if not deep_stem:
             self.conv1 = nn.Conv2D(channels=64,
                                    kernel_size=7,
                                    strides=2,
                                    padding=3,
                                    use_bias=False)
         else:
             self.conv1 = nn.HybridSequential(prefix='conv1')
             self.conv1.add(
                 nn.Conv2D(channels=stem_width,
                           kernel_size=3,
                           strides=2,
                           padding=1,
                           use_bias=False))
             self.conv1.add(norm_layer(**norm_kwargs))
             self.conv1.add(nn.Activation('relu'))
             self.conv1.add(
                 nn.Conv2D(channels=stem_width,
                           kernel_size=3,
                           strides=1,
                           padding=1,
                           use_bias=False))
             self.conv1.add(norm_layer(**norm_kwargs))
             self.conv1.add(nn.Activation('relu'))
             self.conv1.add(
                 nn.Conv2D(channels=stem_width * 2,
                           kernel_size=3,
                           strides=1,
                           padding=1,
                           use_bias=False))
         self.bn1 = norm_layer(**norm_kwargs)
         self.relu = nn.Activation('relu')
         self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1)
         self.layer1 = self._make_layer(1,
                                        block,
                                        64,
                                        layers[0],
                                        avg_down=avg_down,
                                        norm_layer=norm_layer,
                                        last_gamma=last_gamma)
         self.layer2 = self._make_layer(2,
                                        block,
                                        128,
                                        layers[1],
                                        strides=2,
                                        avg_down=avg_down,
                                        norm_layer=norm_layer,
                                        last_gamma=last_gamma)
         if dilated:
             self.layer3 = self._make_layer(3,
                                            block,
                                            256,
                                            layers[2],
                                            strides=1,
                                            dilation=2,
                                            avg_down=avg_down,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
             self.layer4 = self._make_layer(4,
                                            block,
                                            512,
                                            layers[3],
                                            strides=1,
                                            dilation=4,
                                            avg_down=avg_down,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
         else:
             self.layer3 = self._make_layer(3,
                                            block,
                                            256,
                                            layers[2],
                                            strides=2,
                                            avg_down=avg_down,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
             self.layer4 = self._make_layer(4,
                                            block,
                                            512,
                                            layers[3],
                                            strides=1,
                                            avg_down=avg_down,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
         self.avgpool = nn.GlobalAvgPool2D()
         self.flat = nn.Flatten()
         self.drop = None
         if final_drop > 0.0:
             self.drop = nn.Dropout(final_drop)
         self.fc = nn.Dense(in_units=512 * block.expansion, units=classes)
Ejemplo n.º 21
0
def custom_rcnn_fpn(pretrained_base=True,
                    base_network_name='resnet18_v1b',
                    norm_layer=nn.BatchNorm,
                    norm_kwargs=None,
                    sym_norm_layer=None,
                    sym_norm_kwargs=None,
                    num_fpn_filters=256,
                    num_box_head_conv=4,
                    num_box_head_conv_filters=256,
                    num_box_head_dense_filters=1024):
    r"""Generate custom RCNN model with resnet base network w/FPN.

    Parameters
    ----------
    pretrained_base : bool or str
        Boolean value controls whether to load the default pretrained weights for model.
        String value represents the hashtag for a certain version of pretrained weights.
    base_network_name : str, default 'resnet18_v1b'
        base network for mask RCNN. Currently support: 'resnet18_v1b', 'resnet50_v1b',
        and 'resnet101_v1d'
    norm_layer : nn.HybridBlock, default nn.BatchNorm
        Gluon normalization layer to use. Default is frozen batch normalization layer.
    norm_kwargs : dict
        Keyword arguments for gluon normalization layer
    sym_norm_layer : nn.SymbolBlock, default `None`
        Symbol normalization layer to use in FPN. This is due to FPN being implemented using
        SymbolBlock. Default is `None`, meaning no normalization layer will be used in FPN.
    sym_norm_kwargs : dict
        Keyword arguments for symbol normalization layer used in FPN.
    num_fpn_filters : int, default 256
        Number of filters for FPN output layers.
    num_box_head_conv : int, default 4
        Number of convolution layers to use in box head if batch normalization is not frozen.
    num_box_head_conv_filters : int, default 256
        Number of filters for convolution layers in box head.
        Only applicable if batch normalization is not frozen.
    num_box_head_dense_filters : int, default 1024
        Number of hidden units for the last fully connected layer in box head.

    Returns
    -------
    SymbolBlock or HybridBlock
        Base feature extractor eg. resnet w/ FPN.
    None or HybridBlock
        R-CNN feature before each task heads.
    HybridBlock
        Box feature extractor
    """
    use_global_stats = norm_layer is nn.BatchNorm
    if base_network_name == 'resnet18_v1b':
        from ...model_zoo.resnetv1b import resnet18_v1b
        base_network = resnet18_v1b(pretrained=pretrained_base,
                                    dilated=False,
                                    use_global_stats=use_global_stats,
                                    norm_layer=norm_layer,
                                    norm_kwargs=norm_kwargs)
        fpn_inputs_names = [
            'layers1_relu3_fwd', 'layers2_relu3_fwd', 'layers3_relu3_fwd',
            'layers4_relu3_fwd'
        ]
    elif base_network_name == 'resnet50_v1b':
        from ...model_zoo.resnetv1b import resnet50_v1b
        base_network = resnet50_v1b(pretrained=pretrained_base,
                                    dilated=False,
                                    use_global_stats=use_global_stats,
                                    norm_layer=norm_layer,
                                    norm_kwargs=norm_kwargs)
        fpn_inputs_names = [
            'layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd',
            'layers4_relu8_fwd'
        ]
    elif base_network_name == 'resnet101_v1d':
        from ...model_zoo.resnetv1b import resnet101_v1d
        base_network = resnet101_v1d(pretrained=pretrained_base,
                                     dilated=False,
                                     use_global_stats=use_global_stats,
                                     norm_layer=norm_layer,
                                     norm_kwargs=norm_kwargs)
        fpn_inputs_names = [
            'layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu68_fwd',
            'layers4_relu8_fwd'
        ]
    elif base_network_name == 'resnest50':
        from ...model_zoo.resnest import resnest50
        base_network = resnest50(pretrained=pretrained_base,
                                 dilated=False,
                                 use_global_stats=use_global_stats,
                                 norm_layer=norm_layer,
                                 norm_kwargs=norm_kwargs)
        fpn_inputs_names = [
            'layers1_relu11_fwd', 'layers2_relu15_fwd', 'layers3_relu23_fwd',
            'layers4_relu11_fwd'
        ]
    elif base_network_name == 'resnest101':
        from ...model_zoo.resnest import resnest101
        base_network = resnest101(pretrained=pretrained_base,
                                  dilated=False,
                                  use_global_stats=use_global_stats,
                                  norm_layer=norm_layer,
                                  norm_kwargs=norm_kwargs)
        fpn_inputs_names = [
            'layers1_relu11_fwd', 'layers2_relu15_fwd', 'layers3_relu91_fwd',
            'layers4_relu11_fwd'
        ]
    else:
        raise NotImplementedError('Unsupported network', base_network_name)
    features = FPNFeatureExpander(network=base_network,
                                  outputs=fpn_inputs_names,
                                  num_filters=[num_fpn_filters] *
                                  len(fpn_inputs_names),
                                  use_1x1=True,
                                  use_upsample=True,
                                  use_elewadd=True,
                                  use_p6=True,
                                  no_bias=not use_global_stats,
                                  pretrained=pretrained_base,
                                  norm_layer=sym_norm_layer,
                                  norm_kwargs=sym_norm_kwargs)
    top_features = None
    box_features = nn.HybridSequential()
    box_features.add(nn.AvgPool2D(pool_size=(3, 3), strides=2,
                                  padding=1))  # reduce to 7x7
    if use_global_stats:
        box_features.add(
            nn.Dense(num_box_head_dense_filters,
                     weight_initializer=mx.init.Normal(0.01)),
            nn.Activation('relu'))
    else:
        for _ in range(num_box_head_conv):
            box_features.add(
                nn.Conv2D(num_box_head_conv_filters,
                          3,
                          padding=1,
                          use_bias=False), norm_layer(**norm_kwargs),
                nn.Activation('relu'))
    box_features.add(
        nn.Dense(num_box_head_dense_filters,
                 weight_initializer=mx.init.Normal(0.01)),
        nn.Activation('relu'))
    return features, top_features, box_features
Ejemplo n.º 22
0
    def _make_layer(self,
                    stage_index,
                    block,
                    planes,
                    blocks,
                    strides=1,
                    dilation=1,
                    avg_down=False,
                    norm_layer=None,
                    last_gamma=False):
        downsample = None
        if strides != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.HybridSequential(prefix='down%d_' % stage_index)
            with downsample.name_scope():
                if avg_down:
                    if dilation == 1:
                        downsample.add(
                            nn.AvgPool2D(pool_size=strides, strides=strides))
                    else:
                        downsample.add(nn.AvgPool2D(pool_size=1, strides=1))
                    downsample.add(
                        nn.Conv2D(channels=planes * block.expansion,
                                  kernel_size=1,
                                  strides=1,
                                  use_bias=False))
                    downsample.add(norm_layer(**self.norm_kwargs))
                else:
                    downsample.add(
                        nn.Conv2D(channels=planes * block.expansion,
                                  kernel_size=1,
                                  strides=strides,
                                  use_bias=False))
                    downsample.add(norm_layer(**self.norm_kwargs))

        layers = nn.HybridSequential(prefix='layers%d_' % stage_index)
        with layers.name_scope():
            if dilation in (1, 2):
                layers.add(
                    block(planes,
                          strides,
                          dilation=1,
                          downsample=downsample,
                          previous_dilation=dilation,
                          norm_layer=norm_layer,
                          norm_kwargs=self.norm_kwargs,
                          last_gamma=last_gamma))
            elif dilation == 4:
                layers.add(
                    block(planes,
                          strides,
                          dilation=2,
                          downsample=downsample,
                          previous_dilation=dilation,
                          norm_layer=norm_layer,
                          norm_kwargs=self.norm_kwargs,
                          last_gamma=last_gamma))
            else:
                raise RuntimeError(
                    "=> unknown dilation size: {}".format(dilation))

            self.inplanes = planes * block.expansion
            for i in range(1, blocks):
                layers.add(
                    block(planes,
                          dilation=dilation,
                          previous_dilation=dilation,
                          norm_layer=norm_layer,
                          norm_kwargs=self.norm_kwargs,
                          last_gamma=last_gamma))

        return layers
Ejemplo n.º 23
0
def faster_rcnn_resnet50_v1b_coco(pretrained=False,
                                  pretrained_base=True,
                                  **kwargs):
    r"""Faster RCNN model from the paper
    "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards
    real-time object detection with region proposal networks"

    Parameters
    ----------
    pretrained : bool, optional, default is False
        Load pretrained weights.
    pretrained_base : bool, optional, default is True
        Load pretrained base network, the extra layers are randomized. Note that
        if pretrained is `Ture`, this has no effect.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '~/.mxnet/models'
        Location for keeping the model parameters.

    Examples
    --------
    >>> model = get_faster_rcnn_resnet50_v1b_coco(pretrained=True)
    >>> print(model)
    """
    from ..resnetv1b import resnet50_v1b
    from ...data import COCODetection
    classes = COCODetection.CLASSES
    pretrained_base = False if pretrained else pretrained_base
    base_network = resnet50_v1b(pretrained=pretrained_base,
                                dilated=False,
                                use_global_stats=True)
    features = nn.HybridSequential()
    top_features = nn.HybridSequential()
    for layer in [
            'conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3'
    ]:
        features.add(getattr(base_network, layer))
    for layer in ['layer4']:
        top_features.add(getattr(base_network, layer))
    train_patterns = '|'.join(
        ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv'])
    return get_faster_rcnn(name='resnet50_v1b',
                           dataset='coco',
                           pretrained=pretrained,
                           features=features,
                           top_features=top_features,
                           classes=classes,
                           short=800,
                           max_size=1333,
                           train_patterns=train_patterns,
                           nms_thresh=0.5,
                           nms_topk=-1,
                           post_nms=-1,
                           roi_mode='align',
                           roi_size=(14, 14),
                           stride=16,
                           clip=4.42,
                           rpn_channel=1024,
                           base_size=16,
                           scales=(2, 4, 8, 16, 32),
                           ratios=(0.5, 1, 2),
                           alloc_size=(128, 128),
                           rpn_nms_thresh=0.7,
                           rpn_train_pre_nms=12000,
                           rpn_train_post_nms=2000,
                           rpn_test_pre_nms=6000,
                           rpn_test_post_nms=1000,
                           rpn_min_size=0,
                           num_sample=128,
                           pos_iou_thresh=0.5,
                           pos_ratio=0.25,
                           **kwargs)
Ejemplo n.º 24
0
    def __init__(self,
                 askc_type,
                 channels,
                 cardinality,
                 bottleneck_width,
                 stride,
                 downsample=False,
                 last_gamma=False,
                 use_se=False,
                 avg_down=True,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 **kwargs):
        super(AFFResNeXtBlock, self).__init__(**kwargs)
        D = int(math.floor(channels * (bottleneck_width / 64)))
        group_width = cardinality * D

        self.body = nn.HybridSequential(prefix='')
        self.body.add(nn.Conv2D(group_width, kernel_size=1, use_bias=False))
        self.body.add(
            norm_layer(**({} if norm_kwargs is None else norm_kwargs)))
        self.body.add(nn.Activation('relu'))
        self.body.add(
            nn.Conv2D(group_width,
                      kernel_size=3,
                      strides=stride,
                      padding=1,
                      groups=cardinality,
                      use_bias=False))
        self.body.add(
            norm_layer(**({} if norm_kwargs is None else norm_kwargs)))
        self.body.add(nn.Activation('relu'))
        self.body.add(nn.Conv2D(channels * 4, kernel_size=1, use_bias=False))
        if last_gamma:
            self.body.add(
                norm_layer(**({} if norm_kwargs is None else norm_kwargs)))
        else:
            self.body.add(
                norm_layer(gamma_initializer='zeros',
                           **({} if norm_kwargs is None else norm_kwargs)))

        if use_se:
            self.se = nn.HybridSequential(prefix='')
            self.se.add(nn.Conv2D(channels // 4, kernel_size=1, padding=0))
            self.se.add(nn.Activation('relu'))
            self.se.add(nn.Conv2D(channels * 4, kernel_size=1, padding=0))
            self.se.add(nn.Activation('sigmoid'))
        else:
            self.se = None

        if downsample:
            self.downsample = nn.HybridSequential(prefix='')
            if avg_down:
                self.downsample.add(
                    nn.AvgPool2D(pool_size=stride,
                                 strides=stride,
                                 ceil_mode=True,
                                 count_include_pad=False))
                self.downsample.add(
                    nn.Conv2D(channels=channels * 4,
                              kernel_size=1,
                              strides=1,
                              use_bias=False))
            else:
                self.downsample.add(
                    nn.Conv2D(channels * 4,
                              kernel_size=1,
                              strides=stride,
                              use_bias=False))
            self.downsample.add(
                norm_layer(**({} if norm_kwargs is None else norm_kwargs)))
        else:
            self.downsample = None

        if askc_type == 'DirectAdd':
            self.attention = DirectAddFuse()
        elif askc_type == 'ResGlobLocaforGlobLocaCha':
            self.attention = ResGlobLocaforGlobLocaChaFuse(channels=channels *
                                                           4,
                                                           r=16)
        elif askc_type == 'ASKCFuse':
            self.attention = ASKCFuse(channels=channels * 4, r=16)
        else:
            raise ValueError('Unknown askc_type')
Ejemplo n.º 25
0
    voc_test = VOCSegDataset(False, crop_size,
                             "/home/lizh/learn-gluon/data/VOC2012",
                             colormap2label)
    train_iter = gdata.DataLoader(voc_train,
                                  args.batch_size,
                                  shuffle=True,
                                  last_batch="discard",
                                  num_workers=num_workers)
    test_iter = gdata.DataLoader(voc_test,
                                 args.batch_size,
                                 last_batch="discard",
                                 num_workers=num_workers)

    pretrained_net = model_zoo.vision.resnet18_v2(
        pretrained=True, root="/home/lizh/learn-gluon/models")
    net = nn.HybridSequential()
    for layer in pretrained_net.features[:-2]:
        net.add(layer)
    net.add(
        nn.Conv2D(num_classes, kernel_size=1),
        nn.Conv2DTranspose(num_classes, kernel_size=64, padding=16,
                           strides=32))
    net[-2].initialize(init.Xavier())
    net[-1].initialize(
        init.Constant(bilinear_kernel(num_classes, num_classes, 64)))
    net.collect_params().reset_ctx(ctx)

    if args.train:
        if args.load_parameters:
            net.load_parameters("/home/lizh/learn-gluon/models/fcn.params")
        num_epochs = args.num_epochs
Ejemplo n.º 26
0
    def __init__(self,
                 nclass=1000,
                 norm_layer=BatchNorm,
                 num_segments=1,
                 norm_kwargs=None,
                 partial_bn=False,
                 pretrained_base=True,
                 dropout_ratio=0.5,
                 init_std=0.01,
                 ctx=None,
                 **kwargs):
        super(I3D_InceptionV3, self).__init__(**kwargs)
        self.num_segments = num_segments
        self.feat_dim = 2048
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std

        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')
            self.features.add(
                _make_basic_conv(in_channels=3,
                                 channels=32,
                                 kernel_size=3,
                                 strides=2,
                                 padding=(1, 0, 0),
                                 norm_layer=norm_layer,
                                 norm_kwargs=norm_kwargs))
            if partial_bn:
                if norm_kwargs is not None:
                    norm_kwargs['use_global_stats'] = True
                else:
                    norm_kwargs = {}
                    norm_kwargs['use_global_stats'] = True

            self.features.add(
                _make_basic_conv(in_channels=32,
                                 channels=32,
                                 kernel_size=3,
                                 padding=(1, 0, 0),
                                 norm_layer=norm_layer,
                                 norm_kwargs=norm_kwargs))
            self.features.add(
                _make_basic_conv(in_channels=32,
                                 channels=64,
                                 kernel_size=3,
                                 padding=1,
                                 norm_layer=norm_layer,
                                 norm_kwargs=norm_kwargs))
            self.features.add(
                nn.MaxPool3D(pool_size=3, strides=(1, 2, 2),
                             padding=(1, 0, 0)))
            self.features.add(
                _make_basic_conv(in_channels=64,
                                 channels=80,
                                 kernel_size=1,
                                 norm_layer=norm_layer,
                                 norm_kwargs=norm_kwargs))
            self.features.add(
                _make_basic_conv(in_channels=80,
                                 channels=192,
                                 kernel_size=3,
                                 padding=(1, 0, 0),
                                 norm_layer=norm_layer,
                                 norm_kwargs=norm_kwargs))
            self.features.add(
                nn.MaxPool3D(pool_size=3, strides=(1, 2, 2),
                             padding=(1, 0, 0)))
            self.features.add(_make_A(192, 32, 'A1_', norm_layer, norm_kwargs))
            self.features.add(_make_A(256, 64, 'A2_', norm_layer, norm_kwargs))
            self.features.add(_make_A(288, 64, 'A3_', norm_layer, norm_kwargs))
            self.features.add(_make_B('B_', norm_layer, norm_kwargs))
            self.features.add(_make_C(768, 128, 'C1_', norm_layer,
                                      norm_kwargs))
            self.features.add(_make_C(768, 160, 'C2_', norm_layer,
                                      norm_kwargs))
            self.features.add(_make_C(768, 160, 'C3_', norm_layer,
                                      norm_kwargs))
            self.features.add(_make_C(768, 192, 'C4_', norm_layer,
                                      norm_kwargs))
            self.features.add(_make_D('D_', norm_layer, norm_kwargs))
            self.features.add(_make_E(1280, 'E1_', norm_layer, norm_kwargs))
            self.features.add(_make_E(2048, 'E2_', norm_layer, norm_kwargs))
            self.features.add(nn.GlobalAvgPool3D())

            self.head = nn.HybridSequential(prefix='')
            self.head.add(nn.Dropout(rate=self.dropout_ratio))
            self.output = nn.Dense(
                units=nclass,
                in_units=self.feat_dim,
                weight_initializer=init.Normal(sigma=self.init_std))
            self.head.add(self.output)

            self.features.initialize(ctx=ctx)
            self.head.initialize(ctx=ctx)

            if pretrained_base:
                inceptionv3_2d = inception_v3(pretrained=True)
                weights2d = inceptionv3_2d.collect_params()
                weights3d = self.collect_params()
                assert len(weights2d.keys()) == len(
                    weights3d.keys()), 'Number of parameters should be same.'

                dict2d = {}
                for key_id, key_name in enumerate(weights2d.keys()):
                    dict2d[key_id] = key_name

                dict3d = {}
                for key_id, key_name in enumerate(weights3d.keys()):
                    dict3d[key_id] = key_name

                dict_transform = {}
                for key_id, key_name in dict3d.items():
                    dict_transform[dict2d[key_id]] = key_name

                cnt = 0
                for key2d, key3d in dict_transform.items():
                    if 'conv' in key3d:
                        temporal_dim = weights3d[key3d].shape[2]
                        temporal_2d = nd.expand_dims(weights2d[key2d].data(),
                                                     axis=2)
                        inflated_2d = nd.broadcast_to(
                            temporal_2d, shape=[0, 0, temporal_dim, 0, 0
                                                ]) / temporal_dim
                        assert inflated_2d.shape == weights3d[
                            key3d].shape, 'the shape of %s and %s does not match. ' % (
                                key2d, key3d)
                        weights3d[key3d].set_data(inflated_2d)
                        cnt += 1
                        print('%s is done with shape: ' % (key3d),
                              weights3d[key3d].shape)
                    if 'batchnorm' in key3d:
                        assert weights2d[key2d].shape == weights3d[
                            key3d].shape, 'the shape of %s and %s does not match. ' % (
                                key2d, key3d)
                        weights3d[key3d].set_data(weights2d[key2d].data())
                        cnt += 1
                        print('%s is done with shape: ' % (key3d),
                              weights3d[key3d].shape)
                    if 'dense' in key3d:
                        cnt += 1
                        print('%s is skipped with shape: ' % (key3d),
                              weights3d[key3d].shape)

                assert cnt == len(
                    weights2d.keys()
                ), 'Not all parameters have been ported, check the initialization.'
Ejemplo n.º 27
0
    def __init__(self,
                 channels,
                 init_block_channels,
                 final_block_channels,
                 exp_kernel_counts,
                 conv1_kernel_counts,
                 conv2_kernel_counts,
                 exp_factors,
                 se_factors,
                 bn_use_global_stats=False,
                 in_channels=3,
                 in_size=(224, 224),
                 classes=1000,
                 **kwargs):
        super(MixNet, self).__init__(**kwargs)
        self.in_size = in_size
        self.classes = classes

        with self.name_scope():
            self.features = nn.HybridSequential(prefix="")
            self.features.add(MixInitBlock(
                in_channels=in_channels,
                out_channels=init_block_channels,
                bn_use_global_stats=bn_use_global_stats))
            in_channels = init_block_channels
            for i, channels_per_stage in enumerate(channels):
                stage = nn.HybridSequential(prefix="stage{}_".format(i + 1))
                with stage.name_scope():
                    for j, out_channels in enumerate(channels_per_stage):
                        strides = 2 if ((j == 0) and (i != 3)) or\
                                       ((j == len(channels_per_stage) // 2) and (i == 3)) else 1
                        exp_kernel_count = exp_kernel_counts[i][j]
                        conv1_kernel_count = conv1_kernel_counts[i][j]
                        conv2_kernel_count = conv2_kernel_counts[i][j]
                        exp_factor = exp_factors[i][j]
                        se_factor = se_factors[i][j]
                        activation = "relu" if i == 0 else "swish"
                        stage.add(MixUnit(
                            in_channels=in_channels,
                            out_channels=out_channels,
                            strides=strides,
                            exp_kernel_count=exp_kernel_count,
                            conv1_kernel_count=conv1_kernel_count,
                            conv2_kernel_count=conv2_kernel_count,
                            exp_factor=exp_factor,
                            se_factor=se_factor,
                            bn_use_global_stats=bn_use_global_stats,
                            activation=activation))
                        in_channels = out_channels
                self.features.add(stage)
            self.features.add(conv1x1_block(
                in_channels=in_channels,
                out_channels=final_block_channels,
                bn_use_global_stats=bn_use_global_stats,
                activation=activation))
            in_channels = final_block_channels
            self.features.add(nn.AvgPool2D(
                pool_size=7,
                strides=1))

            self.output = nn.HybridSequential(prefix="")
            self.output.add(nn.Flatten())
            self.output.add(nn.Dense(
                units=classes,
                in_units=in_channels))
Ejemplo n.º 28
0
def body():
    out = nn.HybridSequential()
    for nfilters in [16, 32, 64]:
        out.add(down_sample(nfilters))
    return out
Ejemplo n.º 29
0
    def __init__(self,
                 nclass,
                 depth,
                 num_stages=4,
                 pretrained=False,
                 pretrained_base=True,
                 num_segments=1,
                 spatial_strides=(1, 2, 2, 2),
                 temporal_strides=(1, 1, 1, 1),
                 dilations=(1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
                 conv1_kernel_t=5,
                 conv1_stride_t=2,
                 pool1_kernel_t=1,
                 pool1_stride_t=2,
                 inflate_freq=(1, 1, 1, 1),
                 inflate_stride=(1, 1, 1, 1),
                 inflate_style='3x1x1',
                 nonlocal_stages=(-1, ),
                 nonlocal_freq=(0, 1, 1, 0),
                 nonlocal_cfg=None,
                 bn_eval=True,
                 bn_frozen=False,
                 partial_bn=False,
                 frozen_stages=-1,
                 dropout_ratio=0.5,
                 init_std=0.01,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 ctx=None,
                 **kwargs):
        super(I3D_ResNetV1, self).__init__()

        if depth not in self.arch_settings:
            raise KeyError('invalid depth {} for resnet'.format(depth))

        self.nclass = nclass
        self.depth = depth
        self.num_stages = num_stages
        self.pretrained = pretrained
        self.pretrained_base = pretrained_base
        self.num_segments = num_segments
        self.spatial_strides = spatial_strides
        self.temporal_strides = temporal_strides
        self.dilations = dilations
        assert len(spatial_strides) == len(temporal_strides) == len(dilations) == num_stages
        self.out_indices = out_indices
        assert max(out_indices) < num_stages
        self.inflate_freqs = inflate_freq if not isinstance(inflate_freq, int) else (inflate_freq, ) * num_stages
        self.inflate_style = inflate_style
        self.nonlocal_stages = nonlocal_stages
        self.nonlocal_freqs = nonlocal_freq if not isinstance(nonlocal_freq, int) else (nonlocal_freq, ) * num_stages
        self.nonlocal_cfg = nonlocal_cfg
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen
        self.partial_bn = partial_bn
        self.frozen_stages = frozen_stages
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std

        self.block, stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        self.inplanes = 64

        self.first_stage = nn.HybridSequential(prefix='')
        self.first_stage.add(nn.Conv3D(in_channels=3, channels=64, kernel_size=(conv1_kernel_t, 7, 7),
                                       strides=(conv1_stride_t, 2, 2), padding=((conv1_kernel_t - 1)//2, 3, 3), use_bias=False))
        self.first_stage.add(norm_layer(in_channels=64, **({} if norm_kwargs is None else norm_kwargs)))
        self.first_stage.add(nn.Activation('relu'))
        self.first_stage.add(nn.MaxPool3D(pool_size=(pool1_kernel_t, 3, 3), strides=(pool1_stride_t, 2, 2), padding=(pool1_kernel_t//2, 1, 1)))

        self.pool2 = nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1), padding=(0, 0, 0))

        self.res_layers = nn.HybridSequential(prefix='')
        for i, num_blocks in enumerate(self.stage_blocks):
            spatial_stride = spatial_strides[i]
            temporal_stride = temporal_strides[i]
            dilation = dilations[i]
            planes = 64 * 2**i
            layer_name = 'layer{}_'.format(i + 1)

            res_layer = make_res_layer(self.block,
                                       self.inplanes,
                                       planes,
                                       num_blocks,
                                       spatial_stride=spatial_stride,
                                       temporal_stride=temporal_stride,
                                       dilation=dilation,
                                       inflate_freq=self.inflate_freqs[i],
                                       inflate_style=self.inflate_style,
                                       nonlocal_freq=self.nonlocal_freqs[i],
                                       nonlocal_cfg=self.nonlocal_cfg if i in self.nonlocal_stages else None,
                                       norm_layer=norm_layer,
                                       norm_kwargs=norm_kwargs,
                                       layer_name=layer_name)
            self.inplanes = planes * self.block.expansion
            self.res_layers.add(res_layer)

        self.feat_dim = self.block.expansion * 64 * 2**(len(self.stage_blocks) - 1)

        # We use ``GlobalAvgPool3D`` here for simplicity. Otherwise the input size must be fixed.
        # You can also use ``AvgPool3D`` and specify the arguments on your own, e.g.
        # self.st_avg = nn.AvgPool3D(pool_size=(4, 7, 7), strides=1, padding=0)
        # ``AvgPool3D`` is 10% faster, but ``GlobalAvgPool3D`` makes the code cleaner.
        self.st_avg = nn.GlobalAvgPool3D()

        self.head = nn.HybridSequential(prefix='')
        self.head.add(nn.Dropout(rate=self.dropout_ratio))
        self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std))
        self.head.add(self.fc)

        self.init_weights(ctx)
Ejemplo n.º 30
0
    def __init__(self, in_channels_list, out_channels_list, num_branches,
                 num_subblocks, bn_use_global_stats, **kwargs):
        super(HRBlock, self).__init__(**kwargs)
        self.in_channels_list = in_channels_list
        self.num_branches = num_branches

        with self.name_scope():
            self.branches = nn.HybridSequential(prefix="")
            for i in range(num_branches):
                layers = nn.HybridSequential(prefix="branch{}_".format(i + 1))
                in_channels_i = self.in_channels_list[i]
                out_channels_i = out_channels_list[i]
                for j in range(num_subblocks[i]):
                    layers.add(
                        ResUnit(in_channels=in_channels_i,
                                out_channels=out_channels_i,
                                strides=1,
                                bottleneck=False,
                                bn_use_global_stats=bn_use_global_stats))
                    in_channels_i = out_channels_i
                self.in_channels_list[i] = out_channels_i
                self.branches.add(layers)

            if num_branches > 1:
                self.fuse_layers = nn.HybridSequential(prefix="")
                for i in range(num_branches):
                    fuse_layer = nn.HybridSequential(
                        prefix="fuse_layer{}_".format(i + 1))
                    for j in range(num_branches):
                        if j > i:
                            fuse_layer.add(
                                UpSamplingBlock(
                                    in_channels=in_channels_list[j],
                                    out_channels=in_channels_list[i],
                                    bn_use_global_stats=bn_use_global_stats,
                                    scale_factor=2**(j - i)))
                        elif j == i:
                            fuse_layer.add(Identity())
                        else:
                            conv3x3_seq = nn.HybridSequential(
                                prefix="conv3x3_seq{}_".format(j + 1))
                            for k in range(i - j):
                                if k == i - j - 1:
                                    conv3x3_seq.add(
                                        conv3x3_block(
                                            in_channels=in_channels_list[j],
                                            out_channels=in_channels_list[i],
                                            strides=2,
                                            activation=None,
                                            bn_use_global_stats=
                                            bn_use_global_stats))
                                else:
                                    conv3x3_seq.add(
                                        conv3x3_block(
                                            in_channels=in_channels_list[j],
                                            out_channels=in_channels_list[j],
                                            strides=2,
                                            bn_use_global_stats=
                                            bn_use_global_stats))
                            fuse_layer.add(conv3x3_seq)
                    self.fuse_layers.add(fuse_layer)
                self.activ = nn.Activation("relu")