Beispiel #1
0
    def __init__(self,
                 num_classes,
                 depth,
                 pretrained=None,
                 pretrained_base=True,
                 feat_ext=False,
                 num_segments=1,
                 num_crop=1,
                 num_stages=4,
                 spatial_strides=(1, 2, 2, 2),
                 temporal_strides=(1, 1, 1, 1),
                 dilations=(1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
                 conv1_kernel_t=1,
                 conv1_stride_t=1,
                 pool1_kernel_t=1,
                 pool1_stride_t=1,
                 frozen_stages=-1,
                 inflate_freq=(0, 0, 1, 1),
                 inflate_stride=(1, 1, 1, 1),
                 inflate_style='3x1x1',
                 nonlocal_stages=(-1, ),
                 nonlocal_freq=(0, 0, 0, 0),
                 nonlocal_cfg=None,
                 bn_eval=False,
                 bn_frozen=False,
                 partial_bn=False,
                 dropout_ratio=0.5,
                 init_std=0.01,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 ctx=None,
                 **kwargs):
        super(ResNet_SlowFast, self).__init__()

        if depth not in self.arch_settings:
            raise KeyError('invalid depth {} for resnet'.format(depth))

        self.num_classes = num_classes
        self.depth = depth
        self.pretrained = pretrained
        self.pretrained_base = pretrained_base
        self.feat_ext = feat_ext
        self.num_segments = num_segments
        self.num_crop = num_crop
        self.num_stages = num_stages
        assert 1 <= num_stages <= 4
        self.spatial_strides = spatial_strides
        self.temporal_strides = temporal_strides
        self.dilations = dilations
        assert len(spatial_strides) == len(temporal_strides) == len(
            dilations) == num_stages
        self.out_indices = out_indices
        assert max(out_indices) < num_stages
        self.frozen_stages = frozen_stages
        self.inflate_freqs = inflate_freq if not isinstance(
            inflate_freq, int) else (inflate_freq, ) * num_stages
        self.inflate_style = inflate_style
        self.nonlocal_stages = nonlocal_stages
        self.nonlocal_freqs = nonlocal_freq if not isinstance(
            nonlocal_freq, int) else (nonlocal_freq, ) * num_stages
        self.nonlocal_cfg = nonlocal_cfg
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen
        self.partial_bn = partial_bn
        self.feat_ext = feat_ext

        self.dropout_ratio = dropout_ratio
        self.init_std = init_std

        self.block, stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        self.inplanes = 64

        self.first_stage = nn.HybridSequential(prefix='')
        self.first_stage.add(
            nn.Conv3D(in_channels=3,
                      channels=64,
                      kernel_size=(conv1_kernel_t, 7, 7),
                      strides=(conv1_stride_t, 2, 2),
                      padding=((conv1_kernel_t - 1) // 2, 3, 3),
                      use_bias=False))
        self.first_stage.add(
            norm_layer(in_channels=64,
                       **({} if norm_kwargs is None else norm_kwargs)))
        self.first_stage.add(nn.Activation('relu'))
        self.first_stage.add(
            nn.MaxPool3D(pool_size=(pool1_kernel_t, 3, 3),
                         strides=(pool1_stride_t, 2, 2),
                         padding=(pool1_kernel_t // 2, 1, 1)))

        self.res_layers = nn.HybridSequential(prefix='')
        for i, num_blocks in enumerate(self.stage_blocks):
            spatial_stride = spatial_strides[i]
            temporal_stride = temporal_strides[i]
            dilation = dilations[i]
            planes = 64 * 2**i
            layer_name = 'layer{}_'.format(i + 1)

            res_layer = make_res_layer(self.block,
                                       self.inplanes,
                                       planes,
                                       num_blocks,
                                       spatial_stride=spatial_stride,
                                       temporal_stride=temporal_stride,
                                       dilation=dilation,
                                       inflate_freq=self.inflate_freqs[i],
                                       inflate_style=self.inflate_style,
                                       nonlocal_freq=self.nonlocal_freqs[i],
                                       nonlocal_cfg=self.nonlocal_cfg
                                       if i in self.nonlocal_stages else None,
                                       norm_layer=norm_layer,
                                       norm_kwargs=norm_kwargs,
                                       layer_name=layer_name)
            self.inplanes = planes * self.block.expansion
            self.res_layers.add(res_layer)

        self.feat_dim = self.block.expansion * 64 * 2**(
            len(self.stage_blocks) - 1)

        self.st_avg = nn.GlobalAvgPool3D()

        self.head = nn.HybridSequential(prefix='')
        self.head.add(nn.Dropout(rate=self.dropout_ratio))
        self.fc = nn.Dense(in_units=self.feat_dim,
                           units=num_classes,
                           weight_initializer=init.Normal(sigma=self.init_std))
        self.head.add(self.fc)

        self.init_weights(ctx)
Beispiel #2
0
        # 保存更新过的moving_mean和moving_var
        Y, self.moving_mean, self.moving_var = batch_norm(X,
                                                          self.gamma.data(),
                                                          self.beta.data(),
                                                          self.moving_mean,
                                                          self.moving_var,
                                                          eps=1e-5,
                                                          momentum=0.9)
        return Y


# In[3]:

net = nn.Sequential()
net.add(nn.Conv2D(6, kernel_size=5), BatchNorm(6, num_dims=4),
        nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(16, kernel_size=5), BatchNorm(16, num_dims=4),
        nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2),
        nn.Dense(120), BatchNorm(120, num_dims=2), nn.Activation('sigmoid'),
        nn.Dense(84), BatchNorm(84, num_dims=2), nn.Activation('sigmoid'),
        nn.Dense(10))

# In[ ]:

lr, num_epochs, batch_size, ctx = 1.0, 5, 256, d2l.try_gpu()
net.initialize(ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

# In[5]:
Beispiel #3
0
 def conv2d_block(num_filters, kernel_size):
     block = nn.HybridSequential()
     block.add(nn.Conv2D(num_filters, (kernel_size, kernel_size)))
     block.add(nn.BatchNorm())
     block.add(nn.Activation(activation='relu'))
     return block
Beispiel #4
0
    def __init__(self,
                 channels,
                 init_block_channels,
                 stem_blocks_channels,
                 in_channels=3,
                 classes=1000,
                 **kwargs):
        super(NASNet, self).__init__(**kwargs)

        with self.name_scope():
            self.features = nasnet_dual_path_sequential(
                return_two=False,
                first_ordinals=1,
                last_ordinals=2,
                prefix='')
            self.features.add(NASNetInitBlock(
                in_channels=in_channels,
                out_channels=init_block_channels))
            in_channels = init_block_channels

            out_channels = stem_blocks_channels[0]
            self.features.add(Stem1Unit(
                in_channels=in_channels,
                out_channels=out_channels))
            prev_in_channels = in_channels
            in_channels = out_channels

            out_channels = stem_blocks_channels[1]
            self.features.add(Stem2Unit(
                in_channels=in_channels,
                prev_in_channels=prev_in_channels,
                out_channels=out_channels))
            prev_in_channels = in_channels
            in_channels = out_channels

            for i, channels_per_stage in enumerate(channels):
                stage = nasnet_dual_path_sequential(prefix='stage{}_'.format(i + 1))
                with stage.name_scope():
                    for j, out_channels in enumerate(channels_per_stage):
                        if (j == 0) and (i != 0):
                            unit = ReductionUnit
                        elif ((i == 0) and (j == 0)) or ((i != 0) and (j == 1)):
                            unit = FirstUnit
                        else:
                            unit = NormalUnit
                        stage.add(unit(
                            in_channels=in_channels,
                            prev_in_channels=prev_in_channels,
                            out_channels=out_channels))
                        prev_in_channels = in_channels
                        in_channels = out_channels
                self.features.add(stage)

            self.features.add(nn.Activation('relu'))
            self.features.add(nn.AvgPool2D(
                pool_size=7,
                strides=1))

            self.output = nn.HybridSequential(prefix='')
            self.output.add(nn.Flatten())
            self.output.add(nn.Dropout(rate=0.5))
            self.output.add(nn.Dense(
                units=classes,
                in_units=in_channels))
Beispiel #5
0
def conv_block(num_channels):
    blk = nn.Sequential()
    blk.add(nn.BatchNorm(), nn.Activation('relu'),
            nn.Conv2D(num_channels, kernel_size=3, padding=1))
    return blk
Beispiel #6
0
def Act():
    if config.net_act == 'prelu':
        return nn.PReLU()
    else:
        return nn.Activation(config.net_act)
Beispiel #7
0
    netD.add(nn.BatchNorm())
    netD.add(nn.LeakyReLU(.2))
    netD.add(nn.Conv2D(channels=512, kernel_size=6, strides=2, padding=2))
    netD.add(nn.BatchNorm())
    netD.add(nn.LeakyReLU(.2))
    netD.add(nn.Conv2D(channels=1024, kernel_size=6, strides=2, padding=2))
    netD.add(nn.BatchNorm())
    netD.add(nn.LeakyReLU(.2))
    netD.add(nn.Conv2D(channels=1, kernel_size=6))

#===============generator==================
netG = nn.Sequential()
with netG.name_scope():
    netG.add(nn.Conv2DTranspose(channels=1024, kernel_size=6))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation(activation='relu'))
    netG.add(
        nn.Conv2DTranspose(channels=512, kernel_size=6, strides=2, padding=2))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation(activation='relu'))
    netG.add(
        nn.Conv2DTranspose(channels=256, kernel_size=6, strides=2, padding=2))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation(activation='relu'))
    netG.add(
        nn.Conv2DTranspose(channels=128, kernel_size=6, strides=2, padding=2))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation(activation='relu'))
    netG.add(
        nn.Conv2DTranspose(channels=3, kernel_size=6, strides=2, padding=2))
    netG.add(nn.BatchNorm())
Beispiel #8
0
    def __init__(self,
                 levels,
                 channels,
                 classes=1000,
                 block=BasicBlock,
                 momentum=0.9,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 residual_root=False,
                 linear_root=False,
                 use_feature=False,
                 **kwargs):
        super(DLA, self).__init__(**kwargs)
        if norm_kwargs is None:
            norm_kwargs = {}
        norm_kwargs['momentum'] = momentum
        self._use_feature = use_feature
        self.channels = channels
        self.base_layer = nn.HybridSequential('base')
        self.base_layer.add(
            nn.Conv2D(in_channels=3,
                      channels=channels[0],
                      kernel_size=7,
                      strides=1,
                      padding=3,
                      use_bias=False))
        self.base_layer.add(norm_layer(in_channels=channels[0], **norm_kwargs))
        self.base_layer.add(nn.Activation('relu'))

        self.level0 = self._make_conv_level(channels[0], channels[0],
                                            levels[0], norm_layer, norm_kwargs)
        self.level1 = self._make_conv_level(channels[0],
                                            channels[1],
                                            levels[1],
                                            norm_layer,
                                            norm_kwargs,
                                            stride=2)
        self.level2 = Tree(levels[2],
                           block,
                           channels[1],
                           channels[2],
                           2,
                           level_root=False,
                           root_residual=residual_root,
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           prefix='level2_')
        self.level3 = Tree(levels[3],
                           block,
                           channels[2],
                           channels[3],
                           2,
                           level_root=True,
                           root_residual=residual_root,
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           prefix='level3_')
        self.level4 = Tree(levels[4],
                           block,
                           channels[3],
                           channels[4],
                           2,
                           level_root=True,
                           root_residual=residual_root,
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           prefix='level4_')
        self.level5 = Tree(levels[5],
                           block,
                           channels[4],
                           channels[5],
                           2,
                           level_root=True,
                           root_residual=residual_root,
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           prefix='level5_')

        if not self._use_feature:
            self.global_avg_pool = nn.GlobalAvgPool2D()
            self.fc = nn.Dense(units=classes)
Beispiel #9
0
def conv_block(channels):
    out = nn.Sequential()
    out.add(nn.BatchNorm(), nn.Activation('relu'),
            nn.Conv2D(channels, kernel_size=3, padding=1))
    return out
def conv_1x1_bn(channels, activation=nn.Activation('relu')):
    out = nn.HybridSequential()
    out.add(nn.Conv2D(channels, 1, 1, 0, use_bias=False),
            nn.BatchNorm(scale=True), activation)
    return out
Beispiel #11
0
 def __init__(self, in_channels, bn_use_global_stats=False, **kwargs):
     super(PreActivation, self).__init__(**kwargs)
     with self.name_scope():
         self.bn = nn.BatchNorm(in_channels=in_channels,
                                use_global_stats=bn_use_global_stats)
         self.activ = nn.Activation("relu")
def conv_bn(channels, filter_size, stride, activation=nn.Activation('relu')):
    out = nn.HybridSequential()
    out.add(nn.Conv2D(channels, 3, stride, 1, use_bias=False),
            nn.BatchNorm(scale=True), activation)
    return out
Beispiel #13
0
    def __init__(self, features, top_features, classes, box_features=None,
                 short=600, max_size=1000, min_stage=4, max_stage=4, train_patterns=None,
                 nms_thresh=0.3, nms_topk=400, post_nms=100, roi_mode='align', roi_size=(14, 14), strides=16,
                 clip=None, rpn_channel=1024, base_size=16, scales=(8, 16, 32),
                 ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7,
                 rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000,
                 rpn_test_post_nms=300, rpn_min_size=16, per_device_batch_size=1, num_sample=128,
                 pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=300, additional_output=False,
                 force_nms=False, minimal_opset=False, **kwargs):
        super(DoubleHeadRCNN, self).__init__(
            features=features, top_features=top_features, classes=classes,
            box_features=box_features, short=short, max_size=max_size,
            train_patterns=train_patterns, nms_thresh=nms_thresh, nms_topk=nms_topk, post_nms=post_nms,
            roi_mode=roi_mode, roi_size=roi_size, strides=strides, clip=clip, force_nms=force_nms,
            minimal_opset=minimal_opset, **kwargs)
        if max_stage - min_stage > 1 and isinstance(strides, (int, float)):
            raise ValueError('Multi level detected but strides is of a single number:', strides)

        if rpn_train_post_nms > rpn_train_pre_nms:
            rpn_train_post_nms = rpn_train_pre_nms
        if rpn_test_post_nms > rpn_test_pre_nms:
            rpn_test_post_nms = rpn_test_pre_nms

        self.ashape = alloc_size[0]
        self._min_stage = min_stage
        self._max_stage = max_stage
        self.num_stages = max_stage - min_stage + 1
        if self.num_stages > 1:
            assert len(scales) == len(strides) == self.num_stages, \
                "The num_stages (%d) must match number of scales (%d) and strides (%d)" \
                % (self.num_stages, len(scales), len(strides))
        self._batch_size = per_device_batch_size
        self._num_sample = num_sample
        self._rpn_test_post_nms = rpn_test_post_nms
        if minimal_opset:
            self._target_generator = None
        else:
            self._target_generator = lambda: RCNNTargetGenerator(self.num_class,
                                                                 int(num_sample * pos_ratio),
                                                                 self._batch_size)

        self._additional_output = additional_output
        with self.name_scope():
            self.rpn = RPN(
                channels=rpn_channel, strides=strides, base_size=base_size,
                scales=scales, ratios=ratios, alloc_size=alloc_size,
                clip=clip, nms_thresh=rpn_nms_thresh, train_pre_nms=rpn_train_pre_nms,
                train_post_nms=rpn_train_post_nms, test_pre_nms=rpn_test_pre_nms,
                test_post_nms=rpn_test_post_nms, min_size=rpn_min_size,
                multi_level=self.num_stages > 1, per_level_nms=False,
                minimal_opset=minimal_opset)
            self.sampler = RCNNTargetSampler(num_image=self._batch_size,
                                             num_proposal=rpn_train_post_nms, num_sample=num_sample,
                                             pos_iou_thresh=pos_iou_thresh, pos_ratio=pos_ratio,
                                             max_num_gt=max_num_gt)
            # double head branch with class and box
            self.class_features = nn.HybridSequential(prefix='double_fc_')
            with self.class_features.name_scope():
                for _ in range(2):
                    self.class_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)))
                    self.class_features.add(nn.Activation('relu'))

            self.newbox_features = nn.HybridSequential(prefix='double_')
            with self.newbox_features.name_scope():
                for _ in range(2):
                    self.newbox_features.add(BottleneckV1(channels=1024, stride=1))
Beispiel #14
0
    def __init__(self,
                 inplanes,
                 planes,
                 spatial_stride=1,
                 temporal_stride=1,
                 dilation=1,
                 downsample=None,
                 if_inflate=True,
                 inflate_style='3x1x1',
                 if_nonlocal=True,
                 nonlocal_cfg=None,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 layer_name='',
                 **kwargs):

        super(Bottleneck, self).__init__()
        assert inflate_style in ['3x1x1', '3x3x3']
        self.inplanes = inplanes
        self.planes = planes
        self.conv1_stride = 1
        self.conv2_stride = spatial_stride
        self.conv1_stride_t = 1
        self.conv2_stride_t = temporal_stride
        self.layer_name = layer_name

        if if_inflate:
            if inflate_style == '3x1x1':
                self.conv1 = nn.Conv3D(in_channels=inplanes,
                                       channels=planes,
                                       kernel_size=(3, 1, 1),
                                       strides=(self.conv1_stride_t,
                                                self.conv1_stride,
                                                self.conv1_stride),
                                       padding=(1, 0, 0),
                                       use_bias=False)
                self.conv2 = nn.Conv3D(in_channels=planes,
                                       channels=planes,
                                       kernel_size=(1, 3, 3),
                                       strides=(self.conv2_stride_t,
                                                self.conv2_stride,
                                                self.conv2_stride),
                                       padding=(0, dilation, dilation),
                                       dilation=(1, dilation, dilation),
                                       use_bias=False)
            else:
                self.conv1 = nn.Conv3D(in_channels=inplanes,
                                       channels=planes,
                                       kernel_size=1,
                                       strides=(self.conv1_stride_t,
                                                self.conv1_stride,
                                                self.conv1_stride),
                                       use_bias=False)
                self.conv2 = nn.Conv3D(in_channels=planes,
                                       channels=planes,
                                       kernel_size=3,
                                       strides=(self.conv2_stride_t,
                                                self.conv2_stride,
                                                self.conv2_stride),
                                       padding=(1, dilation, dilation),
                                       dilation=(1, dilation, dilation),
                                       use_bias=False)
        else:
            self.conv1 = nn.Conv3D(in_channels=inplanes,
                                   channels=planes,
                                   kernel_size=1,
                                   strides=(1, self.conv1_stride,
                                            self.conv1_stride),
                                   use_bias=False)
            self.conv2 = nn.Conv3D(in_channels=planes,
                                   channels=planes,
                                   kernel_size=(1, 3, 3),
                                   strides=(1, self.conv2_stride,
                                            self.conv2_stride),
                                   padding=(0, dilation, dilation),
                                   dilation=(1, dilation, dilation),
                                   use_bias=False)

        self.bn1 = norm_layer(in_channels=planes,
                              **({} if norm_kwargs is None else norm_kwargs))
        self.bn2 = norm_layer(in_channels=planes,
                              **({} if norm_kwargs is None else norm_kwargs))
        self.conv3 = nn.Conv3D(in_channels=planes,
                               channels=planes * self.expansion,
                               kernel_size=1,
                               use_bias=False)
        self.bn3 = norm_layer(in_channels=planes * self.expansion,
                              **({} if norm_kwargs is None else norm_kwargs))
        self.relu = nn.Activation('relu')

        self.downsample = downsample
        self.spatial_tride = spatial_stride
        self.temporal_tride = temporal_stride
        self.dilation = dilation

        if if_nonlocal and nonlocal_cfg is not None:
            nonlocal_cfg_ = nonlocal_cfg.copy()
            nonlocal_cfg_['in_channels'] = planes * self.expansion
            self.nonlocal_block = build_nonlocal_block(nonlocal_cfg_)
        else:
            self.nonlocal_block = None
Beispiel #15
0
use_gpu = True
ctx = mx.gpu() if use_gpu else mx.cpu()

#%%
# build the generator
nc = 3  # number of channel
ngf = 64  # final image width X height
netG = nn.Sequential()  # sequential model

# simple generator. Use any models but should upscale the late
# nt variable(randome vectors) to 64 * 64 * 3 channel image
with netG.name_scope():
    # input is random_z (batchsize X 150 X 1), going into a tranposed convolution
    netG.add(nn.Conv2DTranspose(ngf * 8, 4, 1, 0))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation('relu'))
    # output size. (ngf*8) x 4 x 4
    netG.add(nn.Conv2DTranspose(ngf * 4, 4, 2, 1))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation('relu'))
    # output size. (ngf*8) x 8 x 8
    netG.add(nn.Conv2DTranspose(ngf * 2, 4, 2, 1))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation('relu'))
    # output size. (ngf*8) x 16 x 16
    netG.add(nn.Conv2DTranspose(ngf, 4, 2, 1))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation('relu'))
    # output size. (ngf*8) x 32 x 32
    netG.add(nn.Conv2DTranspose(nc, 4, 2, 1))
    netG.add(
Beispiel #16
0
 def __init__(self,
              block,
              layers,
              classes=1000,
              dilated=False,
              norm_layer=BatchNorm,
              norm_kwargs=None,
              last_gamma=False,
              deep_stem=False,
              stem_width=32,
              avg_down=False,
              final_drop=0.0,
              use_global_stats=False,
              name_prefix='',
              **kwargs):
     self.inplanes = stem_width * 2 if deep_stem else 64
     super(ResNetV1b, self).__init__(prefix=name_prefix)
     norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
     if use_global_stats:
         norm_kwargs['use_global_stats'] = True
     self.norm_kwargs = norm_kwargs
     with self.name_scope():
         if not deep_stem:
             self.conv1 = nn.Conv2D(channels=64,
                                    kernel_size=7,
                                    strides=2,
                                    padding=3,
                                    use_bias=False)
         else:
             self.conv1 = nn.HybridSequential(prefix='conv1')
             self.conv1.add(
                 nn.Conv2D(channels=stem_width,
                           kernel_size=3,
                           strides=2,
                           padding=1,
                           use_bias=False))
             self.conv1.add(
                 norm_layer(in_channels=stem_width, **norm_kwargs))
             self.conv1.add(nn.Activation('relu'))
             self.conv1.add(
                 nn.Conv2D(channels=stem_width,
                           kernel_size=3,
                           strides=1,
                           padding=1,
                           use_bias=False))
             self.conv1.add(
                 norm_layer(in_channels=stem_width, **norm_kwargs))
             self.conv1.add(nn.Activation('relu'))
             self.conv1.add(
                 nn.Conv2D(channels=stem_width * 2,
                           kernel_size=3,
                           strides=1,
                           padding=1,
                           use_bias=False))
         self.bn1 = norm_layer(
             in_channels=64 if not deep_stem else stem_width * 2,
             **norm_kwargs)
         self.relu = nn.Activation('relu')
         self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1)
         self.layer1 = self._make_layer(1,
                                        block,
                                        64,
                                        layers[0],
                                        avg_down=avg_down,
                                        norm_layer=norm_layer,
                                        last_gamma=last_gamma)
         self.layer2 = self._make_layer(2,
                                        block,
                                        128,
                                        layers[1],
                                        strides=2,
                                        avg_down=avg_down,
                                        norm_layer=norm_layer,
                                        last_gamma=last_gamma)
         if dilated:
             self.layer3 = self._make_layer(3,
                                            block,
                                            256,
                                            layers[2],
                                            strides=1,
                                            dilation=2,
                                            avg_down=avg_down,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
             self.layer4 = self._make_layer(4,
                                            block,
                                            512,
                                            layers[3],
                                            strides=1,
                                            dilation=4,
                                            avg_down=avg_down,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
         else:
             self.layer3 = self._make_layer(3,
                                            block,
                                            256,
                                            layers[2],
                                            strides=2,
                                            avg_down=avg_down,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
             self.layer4 = self._make_layer(4,
                                            block,
                                            512,
                                            layers[3],
                                            strides=2,
                                            avg_down=avg_down,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
         self.avgpool = nn.GlobalAvgPool2D()
         self.flat = nn.Flatten()
         self.drop = None
         if final_drop > 0.0:
             self.drop = nn.Dropout(final_drop)
    def __init__(self,
                 bits,
                 bits_a,
                 num_init_features,
                 growth_rate,
                 block_config,
                 reduction,
                 bn_size,
                 modifier=[],
                 thumbnail=False,
                 dropout=0,
                 classes=1000,
                 **kwargs):
        assert len(modifier) == 0

        super(DenseNetX, self).__init__(**kwargs)
        with self.name_scope():
            self.fp_features = nn.HybridSequential(prefix='')
            if thumbnail:
                self.fp_features.add(
                    nn.Conv2D(num_init_features,
                              kernel_size=3,
                              strides=1,
                              padding=1,
                              in_channels=0,
                              use_bias=False))
            else:
                self.fp_features.add(
                    nn.Conv2D(num_init_features,
                              kernel_size=7,
                              strides=2,
                              padding=3,
                              use_bias=False))
                self.fp_features.add(nn.BatchNorm())
                self.fp_features.add(nn.Activation('relu'))
                self.fp_features.add(
                    nn.MaxPool2D(pool_size=3, strides=2, padding=1))
            # Add dense blocks
            num_features = num_init_features

            self.features1 = nn.HybridSequential(prefix='')
            self.features2 = nn.HybridSequential(prefix='')
            add_to = self.features1
            for i, num_layers in enumerate(block_config):
                add_to.add(
                    _make_dense_block(bits, bits_a, num_layers, bn_size,
                                      growth_rate, dropout, i + 1))
                num_features = num_features + num_layers * growth_rate
                if i != len(block_config) - 1:
                    features_after_transition = num_features // reduction[i]
                    # make it to be multiples of 32
                    features_after_transition = int(
                        round(features_after_transition / 32)) * 32
                    if i == 0:
                        add_to.add(nn.BatchNorm())
                        add_to.add(nn.QActivation(bits=bits_a))
                        add_to.add(
                            nn.QConv2D(features_after_transition,
                                       bits=bits,
                                       kernel_size=1))
                        add_to = self.features2
                        add_to.add(nn.AvgPool2D(pool_size=2, strides=2))
                    else:
                        add_to.add(nn.BatchNorm())
                        add_to.add(nn.QActivation(bits=bits_a))
                        add_to.add(
                            nn.QConv2D(features_after_transition,
                                       bits=bits,
                                       kernel_size=1))
                        add_to.add(nn.AvgPool2D(pool_size=2, strides=2))
                    num_features = features_after_transition
            add_to.add(nn.BatchNorm())
            add_to.add(nn.Activation('relu'))
            add_to.add(nn.AvgPool2D(pool_size=4 if thumbnail else 7))
            add_to.add(nn.Flatten())

            self.output = nn.Dense(classes)
Beispiel #18
0
    def __init__(self,
                 units,
                 act=nn.Activation('tanh'),
                 normalized=False,
                 dropout=0.0,
                 weight_initializer=None,
                 bias_initializer='zeros',
                 prefix=None,
                 params=None):
        # Define a temporary class to implement the normalized version
        # TODO(sxjscience) Find a better solution
        class _NormalizedScoreProj(HybridBlock):
            def __init__(self,
                         in_units,
                         weight_initializer=None,
                         prefix=None,
                         params=None):
                super(_NormalizedScoreProj, self).__init__(prefix=prefix,
                                                           params=params)
                self.g = self.params.get('g',
                                         shape=(1, ),
                                         init=mx.init.Constant(
                                             1.0 / math.sqrt(in_units)),
                                         allow_deferred_init=True)
                self.v = self.params.get('v',
                                         shape=(1, in_units),
                                         init=weight_initializer,
                                         allow_deferred_init=True)

            def hybrid_forward(self, F, x, g, v):  # pylint: disable=arguments-differ
                v = F.broadcast_div(v, F.sqrt(F.dot(v, v, transpose_b=True)))
                weight = F.broadcast_mul(g, v)
                out = F.FullyConnected(x,
                                       weight,
                                       None,
                                       no_bias=True,
                                       num_hidden=1,
                                       flatten=False,
                                       name='fwd')
                return out

        super(MLPAttentionCell, self).__init__(prefix=prefix, params=params)
        self._units = units
        self._act = act
        self._normalized = normalized
        self._dropout = dropout
        with self.name_scope():
            self._dropout_layer = nn.Dropout(dropout)
            self._query_mid_layer = nn.Dense(
                units=self._units,
                flatten=False,
                use_bias=True,
                weight_initializer=weight_initializer,
                bias_initializer=bias_initializer,
                prefix='query_')
            self._key_mid_layer = nn.Dense(
                units=self._units,
                flatten=False,
                use_bias=False,
                weight_initializer=weight_initializer,
                prefix='key_')
            if self._normalized:
                self._attention_score = \
                    _NormalizedScoreProj(in_units=units,
                                         weight_initializer=weight_initializer,
                                         prefix='score_')
            else:
                self._attention_score = nn.Dense(
                    units=1,
                    in_units=self._units,
                    flatten=False,
                    use_bias=False,
                    weight_initializer=weight_initializer,
                    prefix='score_')
Beispiel #19
0
def _make_basic_conv(in_channels, channels, norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
    out = nn.HybridSequential(prefix='')
    out.add(nn.Conv3D(in_channels=in_channels, channels=channels, use_bias=False, **kwargs))
    out.add(norm_layer(in_channels=channels, epsilon=0.001, **({} if norm_kwargs is None else norm_kwargs)))
    out.add(nn.Activation('relu'))
    return out
Beispiel #20
0
    def __init__(self,
                 scale,
                 m,
                 classes=10,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 **kwargs):
        super(cifar_ResidualAttentionModel, self).__init__(**kwargs)
        assert len(scale) == 3 and len(m) == 3
        m1, m2, m3 = m
        with self.name_scope():
            self.conv1 = nn.HybridSequential()
            with self.conv1.name_scope():
                self.conv1.add(
                    nn.Conv2D(32,
                              kernel_size=3,
                              strides=1,
                              padding=1,
                              use_bias=False))
                self.conv1.add(
                    norm_layer(**({} if norm_kwargs is None else norm_kwargs)))
                self.conv1.add(nn.Activation('relu'))
            # 32 x 32
            # self.mpool1 = nn.MaxPool2D(pool_size=2, strides=2, padding=0)

            self.residual_block1 = ResidualBlock(128, in_channels=32)
            self.attention_module1 = nn.HybridSequential()
            _add_block(self.attention_module1,
                       AttentionModule_stage2,
                       m1,
                       128,
                       size1=32,
                       size2=16,
                       scale=scale,
                       norm_layer=norm_layer,
                       norm_kwargs=norm_kwargs)
            self.residual_block2 = ResidualBlock(256,
                                                 in_channels=128,
                                                 stride=2)
            self.attention_module2 = nn.HybridSequential()
            _add_block(self.attention_module2,
                       AttentionModule_stage3,
                       m2,
                       256,
                       size1=16,
                       scale=scale,
                       norm_layer=norm_layer,
                       norm_kwargs=norm_kwargs)

            self.residual_block3 = ResidualBlock(512,
                                                 in_channels=256,
                                                 stride=2)
            self.attention_module3 = nn.HybridSequential()
            _add_block(self.attention_module3,
                       AttentionModule_stage4,
                       m3,
                       512,
                       scale=scale,
                       norm_layer=norm_layer,
                       norm_kwargs=norm_kwargs)

            self.residual_block4 = ResidualBlock(1024, in_channels=512)
            self.residual_block5 = ResidualBlock(1024)
            self.residual_block6 = ResidualBlock(1024)
            self.mpool2 = nn.HybridSequential()
            with self.mpool2.name_scope():
                self.mpool2.add(
                    norm_layer(**({} if norm_kwargs is None else norm_kwargs)))
                self.mpool2.add(nn.Activation('relu'))
                self.mpool2.add(nn.AvgPool2D(pool_size=8, strides=1))
            self.fc = nn.Conv2D(classes, kernel_size=1)
Beispiel #21
0
    def __init__(self, **kwargs):
        super(GRP_DSOD320, self).__init__(**kwargs)
        growth_rate = 48
        dropout = 0
        nchannels = 128
        with self.name_scope():
            self.net0 = nn.HybridSequential()
            self.net0.add(
                nn.Conv2D(64,
                          3,
                          strides=2,
                          padding=1,
                          use_bias=False,
                          weight_initializer='xavier'),
                nn.BatchNorm(epsilon=1e-4), nn.Activation('relu'),
                nn.Conv2D(64,
                          3,
                          strides=1,
                          padding=1,
                          use_bias=False,
                          weight_initializer='xavier'),
                nn.BatchNorm(epsilon=1e-4), nn.Activation('relu'),
                nn.Conv2D(128,
                          3,
                          strides=1,
                          padding=1,
                          use_bias=False,
                          weight_initializer='xavier'),
                nn.BatchNorm(epsilon=1e-4), nn.Activation('relu'))
            self.net1 = nn.HybridSequential()
            self.net1.add(
                nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2), ceil_mode=True))

            times = 1
            for i in range(6):
                self.net1.add(bl_layer_block(growth_rate, dropout, 4))
                nchannels += growth_rate
            nchannels = int(nchannels / times)
            self.net1.add(transition_w_o_block(nchannels, dropout))

            self.net2 = nn.HybridSequential()
            self.net2.add(
                nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2), ceil_mode=True))
            for i in range(8):
                self.net2.add(bl_layer_block(growth_rate, dropout, 4))
                nchannels += growth_rate
            nchannels = int(nchannels / times)
            self.net2.add(transition_w_o_block(nchannels, dropout))

            self.extra0 = nn.HybridSequential()
            self.extra0.add(
                nn.MaxPool2D(pool_size=(4, 4), strides=(4, 4), ceil_mode=True),
                conv_block(kernel_size=1,
                           channels=128,
                           stride=1,
                           pad=0,
                           dropout=dropout))

            self.extra1 = nn.HybridSequential()
            self.extra1.add(
                nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2), ceil_mode=True),
                conv_block(kernel_size=1,
                           channels=128,
                           stride=1,
                           pad=0,
                           dropout=dropout))

            self.net3 = nn.HybridSequential()
            self.net3.add(
                nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2), ceil_mode=True))
            for i in range(8):
                self.net3.add(bl_layer_block(growth_rate, dropout, 4))
                nchannels += growth_rate
            nchannels = int(nchannels / times)
            self.net3.add(transition_w_o_block(nchannels, dropout))
            for i in range(8):
                self.net3.add(bl_layer_block(growth_rate, dropout, 4))
                nchannels += growth_rate
            self.net3.add(transition_w_o_block(171, dropout))

            self.first = nn.HybridSequential()
            self.first.add(
                nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2), ceil_mode=True),
                conv_block(kernel_size=1,
                           channels=171,
                           stride=1,
                           pad=0,
                           dropout=dropout))

            self.net4 = nn.HybridSequential()
            self.net4.add(bl_layer_block2(86, dropout, 1))

            self.net5 = nn.HybridSequential()
            self.net5.add(bl_layer_block2(86, dropout, 1))

            self.net6 = nn.HybridSequential()
            self.net6.add(bl_layer_block2(86, dropout, 1))

            self.net7 = nn.HybridSequential()
            self.net7.add(bl_layer_block2(128, dropout, 1))

            self.Recurrent1 = nn.HybridSequential()
            self.Recurrent1.add(recurrent_layer(2, 128, dropout))

            self.Recurrent2 = nn.HybridSequential()
            self.Recurrent2.add(recurrent_layer(2, 171, dropout))

            self.Recurrent3 = nn.HybridSequential()
            self.Recurrent3.add(recurrent_layer(2, 86, dropout))

            self.Recurrent4 = nn.HybridSequential()
            self.Recurrent4.add(recurrent_layer(1, 86, dropout))

            self.Recurrent5 = nn.HybridSequential()
            self.Recurrent5.add(recurrent_layer(1, 86, dropout))
Beispiel #22
0
    def __init__(
        self,
        bin_values: mx.nd.NDArray,
        n_residue: int,
        n_skip: int,
        dilation_depth: int,
        n_stacks: int,
        act_type: str,
        cardinality: List[int],
        embedding_dimension: int,
        pred_length: int,
        **kwargs,
    ):

        super().__init__(**kwargs)

        self.dilation_depth = dilation_depth
        self.pred_length = pred_length

        self.mu = len(bin_values)
        self.dilations = WaveNet._get_dilations(
            dilation_depth=dilation_depth, n_stacks=n_stacks
        )
        self.receptive_field = WaveNet.get_receptive_field(
            dilation_depth=dilation_depth, n_stacks=n_stacks
        )
        self.trim_lengths = [
            sum(self.dilations) - sum(self.dilations[: i + 1])
            for i, _ in enumerate(self.dilations)
        ]

        with self.name_scope():
            self.feature_embedder = FeatureEmbedder(
                cardinalities=cardinality,
                embedding_dims=[embedding_dimension for _ in cardinality],
            )

            self.post_transform = LookupValues(bin_values)
            self.target_embed = nn.Embedding(
                input_dim=self.mu, output_dim=n_residue
            )
            self.residuals = nn.HybridSequential()
            for i, d in enumerate(self.dilations):
                is_not_last = i + 1 < len(self.dilations)
                self.residuals.add(
                    CausalDilatedResidue(
                        n_residue=n_residue,
                        n_skip=n_skip,
                        dilation=d,
                        return_dense_out=is_not_last,
                        kernel_size=2,
                    )
                )

            # heuristic assuming ~5 features
            std = 1.0 / math.sqrt(n_residue + 5)
            self.conv_project = nn.Conv1D(
                channels=n_residue,
                kernel_size=1,
                use_bias=True,
                weight_initializer=mx.init.Uniform(std),
                bias_initializer="zero",
            )

            self.conv1 = conv1d(
                in_channels=n_skip, channels=n_skip, kernel_size=1
            )

            self.conv2 = conv1d(
                in_channels=n_skip, channels=self.mu, kernel_size=1
            )
            self.output_act = (
                nn.ELU()
                if act_type == "elu"
                else nn.Activation(act_type=act_type)
            )
            self.cross_entropy_loss = gluon.loss.SoftmaxCrossEntropyLoss()
Beispiel #23
0
        return X


#Transition_block to decrease the complexity
def transition_block(num_channels):
    blk = nn.Sequential()
    blk.add(nn.BatchNorm(), nn.Activation('relu'),
            nn.Conv2D(num_channels, kernel_size=1),
            nn.AvgPool2D(pool_size=2, strides=2))
    return blk


#Build DenseNet
net = nn.Sequential()
net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3), nn.BatchNorm(),
        nn.Activation('relu'), nn.MaxPool2D(pool_size=3, strides=2, padding=1))

num_channels, growth_rate = 64, 32  #
num_convs_in_dense_blocks = [4, 4, 4, 4]

for i, num_convs in enumerate(num_convs_in_dense_blocks):
    net.add(DenseBlock(num_convs, growth_rate))
    # Record channels of last block
    num_channels += num_convs * growth_rate
    # Add transition block between dense blocks
    if i != len(num_convs_in_dense_blocks) - 1:
        num_channels //= 2
        net.add(transition_block(num_channels))

net.add(nn.BatchNorm(), nn.Activation('relu'), nn.GlobalAvgPool2D(),
        nn.Dense(10))
Beispiel #24
0
    def __init__(self,
                 repeat=6,
                 penultimate_filters=4032,
                 stem_filters=96,
                 filters_multiplier=2,
                 classes=1000,
                 use_aux=True):
        super(NASNetALarge, self).__init__()

        filters = penultimate_filters // 24

        self.conv0 = nn.HybridSequential(prefix='')
        self.conv0.add(
            nn.Conv2D(stem_filters, 3, padding=0, strides=2, use_bias=False))
        self.conv0.add(nn.BatchNorm(momentum=0.1, epsilon=0.001))

        self.cell_stem_0 = CellStem0(stem_filters,
                                     num_filters=filters //
                                     (filters_multiplier**2))
        self.cell_stem_1 = CellStem1(num_filters=filters // filters_multiplier)

        self.norm_1 = nn.HybridSequential(prefix='')
        self.norm_1.add(
            FirstCell(out_channels_left=filters // 2,
                      out_channels_right=filters))
        for _ in range(repeat - 1):
            self.norm_1.add(
                NormalCell(out_channels_left=filters,
                           out_channels_right=filters))

        self.reduction_cell_0 = ReductionCell0(out_channels_left=2 * filters,
                                               out_channels_right=2 * filters)

        self.norm_2 = nn.HybridSequential(prefix='')
        self.norm_2.add(
            FirstCell(out_channels_left=filters,
                      out_channels_right=2 * filters))
        for _ in range(repeat - 1):
            self.norm_2.add(
                NormalCell(out_channels_left=2 * filters,
                           out_channels_right=2 * filters))

        if use_aux:
            self.out_aux = nn.HybridSequential(prefix='')
            self.out_aux.add(
                nn.Conv2D(filters // 3, kernel_size=1, use_bias=False))
            self.out_aux.add(nn.BatchNorm(epsilon=0.001))
            self.out_aux.add(nn.Activation('relu'))
            self.out_aux.add(
                nn.Conv2D(2 * filters, kernel_size=5, use_bias=False))
            self.out_aux.add(nn.BatchNorm(epsilon=0.001))
            self.out_aux.add(nn.Activation('relu'))
            self.out_aux.add(nn.Dense(classes))
        else:
            self.out_aux = None

        self.reduction_cell_1 = ReductionCell1(out_channels_left=4 * filters,
                                               out_channels_right=4 * filters)

        self.norm_3 = nn.HybridSequential(prefix='')
        self.norm_3.add(
            FirstCell(out_channels_left=2 * filters,
                      out_channels_right=4 * filters))
        for _ in range(repeat - 1):
            self.norm_3.add(
                NormalCell(out_channels_left=4 * filters,
                           out_channels_right=4 * filters))

        self.out = nn.HybridSequential(prefix='')
        self.out.add(nn.Activation('relu'))
        self.out.add(nn.GlobalAvgPool2D())
        self.out.add(nn.Dropout(0.5))
        self.out.add(nn.Dense(classes))
Beispiel #25
0
def transition_block(num_channels):
    blk = nn.Sequential()
    blk.add(nn.BatchNorm(), nn.Activation('relu'),
            nn.Conv2D(num_channels, kernel_size=1),
            nn.AvgPool2D(pool_size=2, strides=2))
    return blk
Beispiel #26
0
 def __init__(self, channels, **kwargs):
     super(PreActivation, self).__init__(**kwargs)
     with self.name_scope():
         self.bn = dpn_batch_norm(channels=channels)
         self.activ = nn.Activation('relu')
Beispiel #27
0
    def forward(self, X):
        # 如果X不在内存上,将moving_mean和moving_var复制到X所在显存上
        if self.moving_mean.context != X.context:
            self.moving_mean = self.moving_mean.copyto(X.context)
            self.moving_var = self.moving_var.copyto(X.context)
        # 保存更新过的moving_mean和moving_var
        Y, self.moving_mean, self.moving_var = batch_norm(X,
                                                          self.gamma.data(),
                                                          self.beta.data(),
                                                          self.moving_mean,
                                                          self.moving_var,
                                                          eps=1e-5,
                                                          momentum=0.9)
        return Y


net = nn.Sequential()
net.add(nn.Conv2D(6, kernel_size=5), BatchNorm(6, num_dims=4),
        nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(16, kernel_size=5), BatchNorm(16, num_dims=4),
        nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2),
        nn.Dense(120), BatchNorm(120, num_dims=2), nn.Activation('sigmoid'),
        nn.Dense(84), BatchNorm(84, num_dims=2), nn.Activation('sigmoid'),
        nn.Dense(10))

lr, num_epochs, batch_size, ctx = 1.0, 5, 256, d2l.try_gpu()
net.initialize(ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
Beispiel #28
0
def _make_basic_conv(**kwargs):
    out = nn.HybridSequential(prefix='')
    out.add(nn.QConv2D(use_bias=False, **kwargs))
    out.add(nn.BatchNorm(epsilon=0.001))
    out.add(nn.Activation('relu'))
    return out
Beispiel #29
0
    def __init__(self,
                 num_filter,
                 stride,
                 dim_match,
                 isBin=False,
                 prefix='',
                 **kwargs):
        super(ResidualUnit, self).__init__(**kwargs)

        self.dim_match = dim_match
        self.features = nn.HybridSequential()
        self.bn1 = nn.BatchNorm(use_global_stats=use_global_stats,
                                prefix=prefix + '_nb1_')
        self.act1 = nn.Activation('relu')

        self.scale = nn.Conv2D(channels=num_filter,
                               kernel_size=(1, 1),
                               strides=stride,
                               use_bias=False,
                               prefix=prefix + '_sc_')
        if isBin:
            self.features.add(
                nn.QConv2D(channels=int(num_filter * 0.25),
                           kernel_size=(1, 1),
                           strides=(1, 1),
                           padding=(0, 0),
                           use_bias=False,
                           apply_scaling=True,
                           prefix=prefix + '_conv1_'))
            self.features.add(
                nn.BatchNorm(use_global_stats=use_global_stats,
                             prefix=prefix + '_nb2_'))
            self.features.add(nn.Activation('relu'))
            self.features.add(
                nn.QConv2D(channels=int(num_filter * 0.25),
                           kernel_size=(3, 3),
                           strides=stride,
                           padding=(1, 1),
                           use_bias=False,
                           apply_scaling=True,
                           prefix=prefix + '_conv2_'))
            self.features.add(
                nn.BatchNorm(use_global_stats=use_global_stats,
                             prefix=prefix + '_nb3_'))
            self.features.add(nn.Activation('relu'))
            self.features.add(
                nn.QConv2D(channels=num_filter,
                           kernel_size=(1, 1),
                           strides=(1, 1),
                           padding=(0, 0),
                           use_bias=False,
                           apply_scaling=True,
                           prefix=prefix + '_conv3_'))
        else:
            self.features.add(
                nn.Conv2D(channels=int(num_filter * 0.25),
                          kernel_size=(1, 1),
                          strides=(1, 1),
                          padding=(0, 0),
                          use_bias=False,
                          prefix=prefix + '_conv1_'))
            self.features.add(
                nn.BatchNorm(use_global_stats=use_global_stats,
                             prefix=prefix + '_nb2_'))
            self.features.add(nn.Activation('relu'))
            self.features.add(
                nn.Conv2D(channels=int(num_filter * 0.25),
                          kernel_size=(3, 3),
                          strides=stride,
                          padding=(1, 1),
                          use_bias=False,
                          prefix=prefix + '_conv2_'))
            self.features.add(
                nn.BatchNorm(use_global_stats=use_global_stats,
                             prefix=prefix + '_nb3_'))
            self.features.add(nn.Activation('relu'))
            self.features.add(
                nn.Conv2D(channels=num_filter,
                          kernel_size=(1, 1),
                          strides=(1, 1),
                          padding=(0, 0),
                          use_bias=False,
                          prefix=prefix + '_conv3_'))
    def __init__(
        self,
        num_scenes,
        num_actions,
        model_depth,
        final_spatial_kernel=7,
        final_temporal_kernel=2,
        with_bias=False,
    ):
        super(R2Plus2D_MT, self).__init__()
        self.comp_count = 0
        self.base = nn.Sequential(prefix='base_')
        with self.base.name_scope():
            self.base.add(
                nn.Conv3D(channels=45,
                          kernel_size=(1, 7, 7),
                          strides=(1, 2, 2),
                          padding=(0, 3, 3),
                          use_bias=with_bias), nn.BatchNorm(),
                nn.Activation(activation='relu'),
                nn.Conv3D(channels=64,
                          kernel_size=(3, 1, 1),
                          strides=(1, 1, 1),
                          padding=(1, 0, 0),
                          use_bias=with_bias), nn.BatchNorm(),
                nn.Activation(activation='relu'))

        self.base_name = self.set_base_name()
        (n2, n3, n4, n5) = BLOCK_CONFIG[model_depth]

        self.conv2_name = []
        self.conv2 = nn.Sequential(prefix='conv2_')
        with self.conv2.name_scope():
            for _ in range(n2):
                self.conv2_name.extend(
                    self.add_comp_count_index(change_channels=False,
                                              comp_index=self.comp_count,
                                              prefix=self.conv2.prefix))
                self.conv2.add(
                    R3DBlock(input_filter=64,
                             num_filter=64,
                             comp_index=self.comp_count,
                             use_bias=with_bias))
                self.comp_count += 1

        #self.conv3
        self.conv3_name = []
        self.conv3 = nn.Sequential(prefix='conv3_')
        with self.conv3.name_scope():
            print("this in conv3 comp_count is ", self.comp_count)
            self.conv3_name.extend(
                self.add_comp_count_index(change_channels=True,
                                          downsampling=True,
                                          comp_index=self.comp_count))
            self.conv3.add(
                R3DBlock(input_filter=64,
                         num_filter=128,
                         comp_index=self.comp_count,
                         downsampling=True,
                         use_bias=with_bias))

            self.comp_count += 1
            for _ in range(n3 - 1):
                self.conv3_name.extend(
                    self.add_comp_count_index(change_channels=False,
                                              downsampling=False,
                                              comp_index=self.comp_count))
                self.conv3.add(
                    R3DBlock(input_filter=128,
                             num_filter=128,
                             comp_index=self.comp_count,
                             use_bias=with_bias))
                self.comp_count += 1

        # self.conv4
        self.conv4_name = []
        self.conv4 = nn.Sequential(prefix='conv4_')
        with self.conv4.name_scope():
            self.conv4_name.extend(
                self.add_comp_count_index(change_channels=True,
                                          downsampling=True,
                                          comp_index=self.comp_count))
            self.conv4.add(
                R3DBlock(128,
                         256,
                         comp_index=self.comp_count,
                         downsampling=True,
                         use_bias=with_bias))
            self.comp_count += 1

            for _ in range(n4 - 1):
                self.conv4_name.extend(
                    self.add_comp_count_index(change_channels=False,
                                              downsampling=False,
                                              comp_index=self.comp_count))
                self.conv4.add(
                    R3DBlock(256,
                             256,
                             comp_index=self.comp_count,
                             use_bias=with_bias))
                self.comp_count += 1

        #conv5
        self.conv5_name = []
        self.conv5 = nn.Sequential(prefix='conv5_')
        with self.conv5.name_scope():
            self.conv5_name.extend(
                self.add_comp_count_index(change_channels=True,
                                          downsampling=True,
                                          comp_index=self.comp_count))
            self.conv5.add(
                R3DBlock(256,
                         512,
                         comp_index=self.comp_count,
                         downsampling=True,
                         use_bias=with_bias))
            self.comp_count += 1
            for _ in range(n5 - 1):
                self.conv5_name.extend(
                    self.add_comp_count_index(comp_index=self.comp_count))
                self.conv5.add(
                    R3DBlock(512, 512, self.comp_count, use_bias=with_bias))
                self.comp_count += 1

        # final output of conv5 is [512,t/8,7,7] #512x1x7x7
        # for static scene tagging
        self.scene_conv = nn.Sequential()
        self.scene_conv.add(
            nn.Conv3D(256, kernel_size=(1, 3, 3), strides=(1, 2, 2)),
            nn.BatchNorm(),
            nn.Activation('relu'))  # shape 256*1*2*2 # reshape(1024)
        self.scene_drop = nn.Dropout(rate=0.3)
        self.scene_output = nn.Dense(num_scenes)

        # for action classification
        self.action_conv = nn.Sequential()
        self.action_conv.add(
            nn.Conv3D(512,
                      kernel_size=(1, 3, 3),
                      strides=(1, 1, 1),
                      padding=(0, 1, 1)), nn.BatchNorm(),
            nn.Activation('relu'))
        self.action_avg = nn.AvgPool3D(pool_size=(final_temporal_kernel,
                                                  final_spatial_kernel,
                                                  final_spatial_kernel),
                                       strides=(1, 1, 1),
                                       padding=(0, 0, 0))

        self.action_output = nn.Dense(units=num_actions)
        self.dense0_name = ['final_fc_weight', 'final_fc_bias']