Ejemplo n.º 1
0
 def __init__(self, in_channel, **kwargs):
     super(TCL, self).__init__()
     self.branch1 = nn.HybridSequential()
     self.branch1.add(
         nn.Conv3D(in_channels=in_channel,
                   channels=32,
                   kernel_size=(3, 1, 1),
                   strides=(1, 1, 1),
                   padding=(1, 0, 0),
                   weight_initializer=init.Xavier(),
                   bias_initializer='zero'),
         nn.Activation('relu'),
         #                nn.BatchNorm(),
         nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1)))
     self.branch2 = nn.HybridSequential()
     self.branch2.add(
         nn.Conv3D(in_channels=in_channel,
                   channels=32,
                   kernel_size=(5, 1, 1),
                   strides=(1, 1, 1),
                   padding=(2, 0, 0),
                   weight_initializer=init.Xavier(),
                   bias_initializer='zero'),
         nn.Activation('relu'),
         #                nn.BatchNorm(),
         nn.MaxPool3D(pool_size=(2, 1, 1), strides=(2, 1, 1)))
Ejemplo n.º 2
0
 def __init__(self,**kwargs):
     super(Refiner_hybrid,self).__init__(**kwargs)
     self.layer1 = nn.HybridSequential()
     self.layer1.add(
         nn.Conv3D(32, kernel_size=4, padding=2),
         nn.BatchNorm(in_channels=32),
         nn.LeakyReLU(.2),
         nn.MaxPool3D(pool_size=2)
     )
     self.layer2 = nn.HybridSequential()
     self.layer2.add(
         nn.Conv3D(64, kernel_size=4, padding=2),
         nn.BatchNorm(in_channels=64),
         nn.LeakyReLU(.2),
         nn.MaxPool3D(pool_size=2)
     )
     self.layer3 = nn.HybridSequential()
     self.layer3.add(
         nn.Conv3D(128, kernel_size=4, padding=2),
         nn.BatchNorm(in_channels=128),
         nn.LeakyReLU(.2),
         nn.MaxPool3D(pool_size=2)
     )
     self.layer4 = nn.HybridSequential()
     self.layer4.add(
         nn.Dense(2048,activation = 'relu')
     )
     self.layer5 = nn.HybridSequential()
     self.layer5.add(
         nn.Dense(8192,activation='relu')
     )
     self.layer6 = nn.HybridSequential()
     self.layer6.add(
         nn.Conv3DTranspose(64, kernel_size=4, strides=2, padding=1, use_bias=False ),
         nn.BatchNorm(in_channels = 64),
         nn.Activation('relu')
     )
     self.layer7 = nn.HybridSequential()
     self.layer7.add(
         nn.Conv3DTranspose(32, kernel_size=4, strides=2, padding=1, use_bias=False),
         nn.BatchNorm(in_channels =32),
         nn.Activation('relu')
     )
     self.layer8 = nn.HybridSequential()
     self.layer8.add(
         nn.Conv3DTranspose(1, kernel_size=4, strides=2, padding=1, use_bias=False),
         nn.Activation('sigmoid')
     )
Ejemplo n.º 3
0
    def __init__(self,
                 block,
                 layers,
                 channels,
                 classes=1000,
                 thumbnail=False,
                 caption_length=50,
                 **kwargs):
        super(ResNetV1, self).__init__(**kwargs)
        assert len(layers) == len(channels) - 1
        with self.name_scope():
            self.caption_length = caption_length
            self.features = nn.HybridSequential(prefix='')
            if thumbnail:
                self.features.add(_conv3x3(channels[0], 1, 0))
            else:
                self.features.add(
                    nn.Conv3D(channels[0], 7, 2, 3, use_bias=False))
                self.features.add(nn.BatchNorm())
                self.features.add(nn.Activation('relu'))
                self.features.add(nn.MaxPool3D(3, 2, 1))

            for i, num_layer in enumerate(layers):
                stride = 1 if i == 0 else 2
                self.features.add(
                    self._make_layer(block,
                                     num_layer,
                                     channels[i + 1],
                                     stride,
                                     i + 1,
                                     in_channels=channels[i]))
            self.features.add(nn.GlobalAvgPool3D())
            #self.features.add(nn.Dense(classes, in_units=in_channels))
            self.output = nn.Dense(caption_length * caption_length)
Ejemplo n.º 4
0
    def _make_3d_feature(self, config_3d_conv, config_3d_pool, batch_normal):
        featurizer = nn.HybridSequential(prefix='')
        conv_layer, conv_channels = config_3d_conv
        pool_size, pool_stride, pool_padding = config_3d_pool
        assert len(conv_layer) == len(conv_channels) == len(pool_size) == len(
            pool_stride) == len(pool_padding)

        for i, num in enumerate(conv_layer):
            for _ in range(num):
                featurizer.add(
                    nn.Conv3D(channels=conv_channels[i],
                              kernel_size=(3, 3, 3),
                              strides=(1, 1, 1),
                              padding=(1, 1, 1),
                              weight_initializer=init.Xavier(
                                  rnd_type='gaussian',
                                  factor_type='out',
                                  magnitude=2),
                              bias_initializer='zero'))
                if batch_normal:
                    featurizer.add(nn.BatchNorm())
                featurizer.add(nn.Activation('relu'))
            featurizer.add(
                nn.MaxPool3D(pool_size=pool_size[i],
                             strides=pool_stride[i],
                             padding=pool_padding[i]))
        # flatten to (N, 8192)
        featurizer.add(nn.Flatten())
        return featurizer
Ejemplo n.º 5
0
def test_pool():
    layers1d = [
        nn.MaxPool1D(),
        nn.MaxPool1D(3),
        nn.MaxPool1D(3, 2),
        nn.AvgPool1D(),
        nn.AvgPool1D(count_include_pad=False),
        nn.GlobalAvgPool1D(),
        ]
    for layer in layers1d:
        check_layer_forward(layer, (1, 2, 10))


    layers2d = [
        nn.MaxPool2D(),
        nn.MaxPool2D((3, 3)),
        nn.MaxPool2D(3, 2),
        nn.AvgPool2D(),
        nn.AvgPool2D(count_include_pad=False),
        nn.GlobalAvgPool2D(),
        ]
    for layer in layers2d:
        check_layer_forward(layer, (1, 2, 10, 10))

    layers3d = [
        nn.MaxPool3D(),
        nn.MaxPool3D((3, 3, 3)),
        nn.MaxPool3D(3, 2),
        nn.AvgPool3D(),
        nn.AvgPool3D(count_include_pad=False),
        nn.GlobalAvgPool3D(),
        ]
    for layer in layers3d:
        check_layer_forward(layer, (1, 2, 10, 10, 10))

    # test ceil_mode
    x = mx.nd.zeros((2, 2, 10, 10))

    layer = nn.MaxPool2D(3, ceil_mode=False)
    layer.collect_params().initialize()
    assert (layer(x).shape==(2, 2, 3, 3))

    layer = nn.MaxPool2D(3, ceil_mode=True)
    layer.collect_params().initialize()
    assert (layer(x).shape==(2, 2, 4, 4))
Ejemplo n.º 6
0
 def __init__(self, dr_rate, **kwargs):
     super(LipNet, self).__init__(**kwargs)
     with self.name_scope():
         self.conv1 = nn.Conv3D(32, kernel_size=(3, 5, 5), strides=(1, 2, 2), padding=(1, 2, 2))
         self.bn1 = nn.InstanceNorm(in_channels=32)
         self.dr1 = nn.Dropout(dr_rate, axes=(1, 2))
         self.pool1 = nn.MaxPool3D((1, 2, 2), (1, 2, 2))
         self.conv2 = nn.Conv3D(64, kernel_size=(3, 5, 5), strides=(1, 1, 1), padding=(1, 2, 2))
         self.bn2 = nn.InstanceNorm(in_channels=64)
         self.dr2 = nn.Dropout(dr_rate, axes=(1, 2))
         self.pool2 = nn.MaxPool3D((1, 2, 2), (1, 2, 2))
         self.conv3 = nn.Conv3D(96, kernel_size=(3, 3, 3), strides=(1, 1, 1), padding=(1, 2, 2))
         self.bn3 = nn.InstanceNorm(in_channels=96)
         self.dr3 = nn.Dropout(dr_rate, axes=(1, 2))
         self.pool3 = nn.MaxPool3D((1, 2, 2), (1, 2, 2))
         self.gru1 = rnn.GRU(256, bidirectional=True)
         self.gru2 = rnn.GRU(256, bidirectional=True)
         self.dense = nn.Dense(27+1, flatten=False)
Ejemplo n.º 7
0
 def __init__(self, out_channels, **kwargs):
     super(TransitionBlockDown, self).__init__(**kwargs)
     self.ops = nn.HybridSequential()
     self.ops.add(
         nn.BatchNorm(), nn.Activation(activation='relu'),
         nn.Conv3D(channels=out_channels,
                   kernel_size=1,
                   strides=1,
                   use_bias=False), nn.MaxPool3D(pool_size=2, strides=2))
Ejemplo n.º 8
0
 def __init__(self, c1, c2):
     super(Reduction, self).__init__()
     '''
         3 convolutions, which go from channels=c1->c2
     '''
     self.conv1 = BasicConv(c1)
     self.conv2 = BasicConv(c2)
     self.conv3 = BasicConv(c2)
     self.pool = nn.MaxPool3D(pool_size=2, strides=1)
Ejemplo n.º 9
0
    def __init__(self,
                 nclass,
                 input_channel=3,
                 batch_normal=True,
                 dropout_ratio=0.8,
                 init_std=0.001,
                 **kwargs):
        super(P3D, self).__init__()
        self.nclass = nclass
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.expansion = 1

        with self.name_scope():
            self.conv1 = nn.Conv3D(in_channels=input_channel,
                                   channels=64,
                                   kernel_size=(1, 7, 7),
                                   strides=(1, 2, 2),
                                   padding=(0, 3, 3),
                                   use_bias=False)
            self.bn1 = nn.BatchNorm(in_channels=64)
            self.relu = nn.Activation('relu')
            self.maxpool = nn.MaxPool3D(pool_size=(1, 3, 3),
                                        strides=(1, 2, 2),
                                        padding=(0, 1, 1))
            self.conv2 = nn.HybridSequential()
            self.conv2.add(
                P3D_block('A', 64, 64 * self.expansion, 2),
                P3D_block('B', 64 * self.expansion, 64 * self.expansion),
                P3D_block('C', 64 * self.expansion, 64 * self.expansion))
            self.conv3 = nn.HybridSequential()
            self.conv3.add(
                P3D_block('A', 64 * self.expansion, 128 * self.expansion, 2),
                P3D_block('B', 128 * self.expansion, 128 * self.expansion),
                P3D_block('C', 128 * self.expansion, 128 * self.expansion),
                P3D_block('A', 128 * self.expansion, 128 * self.expansion))
            self.conv4 = nn.HybridSequential()
            self.conv4.add(
                P3D_block('B', 128 * self.expansion, 256 * self.expansion, 2),
                P3D_block('C', 256 * self.expansion, 256 * self.expansion),
                P3D_block('A', 256 * self.expansion, 256 * self.expansion),
                P3D_block('B', 256 * self.expansion, 256 * self.expansion),
                P3D_block('C', 256 * self.expansion, 256 * self.expansion),
                P3D_block('A', 256 * self.expansion, 256 * self.expansion))
            self.conv5 = nn.HybridSequential()
            self.conv5.add(
                P3D_block('B', 256 * self.expansion, 512 * self.expansion, 2),
                P3D_block('C', 512 * self.expansion, 512 * self.expansion),
                P3D_block('A', 512 * self.expansion, 512))
            self.avg_pool = nn.AvgPool3D(pool_size=(1, 3, 3))
            self.output = nn.Dense(
                in_units=512,
                units=nclass,
                weight_initializer=init.Normal(sigma=init_std))
Ejemplo n.º 10
0
def _make_branch(use_pool=None, *conv_settings):
    out = nn.HybridSequential(prefix='')
    if use_pool == 'max':
        # 1 is for the depth dimension for video inflation
        out.add(
            nn.MaxPool3D(pool_size=(3, 3, 3),
                         strides=(1, 1, 1),
                         padding=(1, 1, 1)))
    setting_names = ['channels', 'kernel_size', 'strides', 'padding']
    for setting in conv_settings:
        kwargs = {}
        for i, value in enumerate(setting):
            if value is not None:
                kwargs[setting_names[i]] = value
        out.add(_make_unit3d(**kwargs))

    return out
Ejemplo n.º 11
0
def _make_branch(use_pool, norm_layer, norm_kwargs, *conv_settings):
    out = nn.HybridSequential(prefix='')
    if use_pool == 'avg':
        out.add(nn.AvgPool3D(pool_size=3, strides=1, padding=1))
    elif use_pool == 'max':
        out.add(nn.MaxPool3D(pool_size=3, strides=1, padding=1))
    setting_names = ['in_channels', 'channels', 'kernel_size', 'strides', 'padding']
    for setting in conv_settings:
        kwargs = {}
        for i, value in enumerate(setting):
            if value is not None:
                if setting_names[i] == 'in_channels':
                    in_channels = value
                elif setting_names[i] == 'channels':
                    channels = value
                else:
                    kwargs[setting_names[i]] = value
        out.add(_make_basic_conv(in_channels, channels, norm_layer, norm_kwargs, **kwargs))
    return out
Ejemplo n.º 12
0
 def __init__(self,
              nclass,
              input_channel=3,
              batch_normal=True,
              dropout_ratio=0.8,
              init_std=0.001,
              **kwargs):
     super(FstCN, self).__init__()
     self.nclass = nclass
     self.new_length = 16 + 1
     #self.feat_dim = 4096
     self.dropout_ratio = dropout_ratio
     self.init_std = init_std
     #        self.config_3d_layer = [2,2,2,2]
     #        self.config_3d_temporal_stride = [1,2,2,2]
     with self.name_scope():
         self.SCL1 = nn.HybridSequential()
         self.SCL1.add(
             nn.Conv3D(in_channels=3,
                       channels=96,
                       kernel_size=(1, 7, 7),
                       strides=(1, 2, 2),
                       padding=(0, 3, 3),
                       weight_initializer=init.Xavier(),
                       bias_initializer='zero'), nn.Activation('relu'),
             nn.BatchNorm(),
             nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2)))
         self.SCL2 = nn.HybridSequential()
         self.SCL2.add(
             nn.Conv3D(in_channels=96,
                       channels=256,
                       kernel_size=(1, 5, 5),
                       strides=(1, 2, 2),
                       padding=(0, 2, 2),
                       weight_initializer=init.Xavier(),
                       bias_initializer='zero'), nn.Activation('relu'),
             nn.BatchNorm(),
             nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2)))
         self.SCL3 = nn.HybridSequential()
         self.SCL3.add(
             nn.Conv3D(in_channels=256,
                       channels=512,
                       kernel_size=(1, 3, 3),
                       strides=(1, 1, 1),
                       padding=(0, 1, 1),
                       weight_initializer=init.Xavier(),
                       bias_initializer='zero'), nn.Activation('relu'),
             nn.BatchNorm())
         self.SCL4 = nn.HybridSequential()
         self.SCL4.add(
             nn.Conv3D(in_channels=512,
                       channels=512,
                       kernel_size=(1, 3, 3),
                       strides=(1, 1, 1),
                       padding=(0, 1, 1),
                       weight_initializer=init.Xavier(),
                       bias_initializer='zero'), nn.Activation('relu'),
             nn.BatchNorm())
         self.Parallel_temporal = nn.HybridSequential()
         self.Parallel_temporal.add(
             nn.Conv3D(in_channels=512,
                       channels=128,
                       kernel_size=(1, 3, 3),
                       strides=(1, 1, 1),
                       padding=(0, 1, 1),
                       weight_initializer=init.Xavier(),
                       bias_initializer='zero'), nn.Activation('relu'),
             nn.BatchNorm(),
             nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 3, 3)), TCL(128))
         self.Parallel_spatial = nn.HybridSequential()
         self.Parallel_spatial.add(
             nn.Conv2D(in_channels=512,
                       channels=128,
                       kernel_size=(3, 3),
                       strides=(1, 1),
                       padding=(1, 1),
                       weight_initializer=init.Xavier(),
                       bias_initializer='zero'), nn.Activation('relu'),
             nn.MaxPool2D(pool_size=(3, 3), strides=(3, 3)))
         self.tem_fc = nn.HybridSequential()
         self.tem_fc.add(
             nn.Dense(in_units=8192,
                      units=4096,
                      weight_initializer=init.Normal(sigma=init_std)),
             nn.Dropout(rate=dropout_ratio),
             nn.Dense(in_units=4096,
                      units=2048,
                      weight_initializer=init.Normal(sigma=init_std)),
         )
         self.spa_fc = nn.HybridSequential()
         self.spa_fc.add(
             nn.Dense(in_units=2048,
                      units=4096,
                      weight_initializer=init.Normal(sigma=init_std)),
             nn.Dropout(rate=dropout_ratio),
             nn.Dense(in_units=4096,
                      units=2048,
                      weight_initializer=init.Normal(sigma=init_std)),
         )
         self.fc = nn.Dense(in_units=4096,
                            units=2048,
                            weight_initializer=init.Normal(sigma=init_std))
         self.out = nn.Dense(in_units=2048,
                             units=nclass,
                             weight_initializer=init.Normal(sigma=init_std))
Ejemplo n.º 13
0
    def __init__(self,
                 nclass,
                 block=Bottleneck,
                 layers=None,
                 pretrained=False,
                 pretrained_base=False,
                 num_segments=1,
                 num_crop=1,
                 bn_eval=True,
                 bn_frozen=False,
                 partial_bn=False,
                 frozen_stages=-1,
                 dropout_ratio=0.5,
                 init_std=0.01,
                 alpha=8,
                 beta_inv=8,
                 fusion_conv_channel_ratio=2,
                 fusion_kernel_size=5,
                 width_per_group=64,
                 num_groups=1,
                 slow_temporal_stride=16,
                 fast_temporal_stride=2,
                 slow_frames=4,
                 fast_frames=32,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 ctx=None,
                 **kwargs):
        super(SlowFast, self).__init__()
        self.num_segments = num_segments
        self.num_crop = num_crop
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.alpha = alpha
        self.beta_inv = beta_inv
        self.fusion_conv_channel_ratio = fusion_conv_channel_ratio
        self.fusion_kernel_size = fusion_kernel_size
        self.width_per_group = width_per_group
        self.num_groups = num_groups
        self.dim_inner = self.num_groups * self.width_per_group
        self.out_dim_ratio = self.beta_inv // self.fusion_conv_channel_ratio
        self.slow_temporal_stride = slow_temporal_stride
        self.fast_temporal_stride = fast_temporal_stride
        self.slow_frames = slow_frames
        self.fast_frames = fast_frames

        with self.name_scope():
            # build fast pathway
            fast = nn.HybridSequential(prefix='fast_')
            with fast.name_scope():
                self.fast_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group // self.beta_inv,
                                            kernel_size=(5, 7, 7), strides=(1, 2, 2), padding=(2, 3, 3), use_bias=False)
                self.fast_bn1 = norm_layer(in_channels=self.width_per_group // self.beta_inv,
                                           **({} if norm_kwargs is None else norm_kwargs))
                self.fast_relu = nn.Activation('relu')
                self.fast_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))
            self.fast_res2 = self._make_layer_fast(inplanes=self.width_per_group // self.beta_inv,
                                                   planes=self.dim_inner // self.beta_inv,
                                                   num_blocks=layers[0],
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='fast_res2_')
            self.fast_res3 = self._make_layer_fast(inplanes=self.width_per_group * 4 // self.beta_inv,
                                                   planes=self.dim_inner * 2 // self.beta_inv,
                                                   num_blocks=layers[1],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='fast_res3_')
            self.fast_res4 = self._make_layer_fast(inplanes=self.width_per_group * 8 // self.beta_inv,
                                                   planes=self.dim_inner * 4 // self.beta_inv,
                                                   num_blocks=layers[2],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='fast_res4_')
            self.fast_res5 = self._make_layer_fast(inplanes=self.width_per_group * 16 // self.beta_inv,
                                                   planes=self.dim_inner * 8 // self.beta_inv,
                                                   num_blocks=layers[3],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='fast_res5_')

            # build lateral connections
            self.lateral_p1 = nn.HybridSequential(prefix='lateral_p1_')
            with self.lateral_p1.name_scope():
                self.lateral_p1.add(nn.Conv3D(in_channels=self.width_per_group // self.beta_inv,
                                              channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio,
                                              kernel_size=(self.fusion_kernel_size, 1, 1),
                                              strides=(self.alpha, 1, 1),
                                              padding=(self.fusion_kernel_size // 2, 0, 0),
                                              use_bias=False))
                self.lateral_p1.add(norm_layer(in_channels=self.width_per_group // self.beta_inv * self.fusion_conv_channel_ratio,
                                               **({} if norm_kwargs is None else norm_kwargs)))
                self.lateral_p1.add(nn.Activation('relu'))

            self.lateral_res2 = nn.HybridSequential(prefix='lateral_res2_')
            with self.lateral_res2.name_scope():
                self.lateral_res2.add(nn.Conv3D(in_channels=self.width_per_group * 4 // self.beta_inv,
                                                channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                kernel_size=(self.fusion_kernel_size, 1, 1),
                                                strides=(self.alpha, 1, 1),
                                                padding=(self.fusion_kernel_size // 2, 0, 0),
                                                use_bias=False))
                self.lateral_res2.add(norm_layer(in_channels=self.width_per_group * 4 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                 **({} if norm_kwargs is None else norm_kwargs)))
                self.lateral_res2.add(nn.Activation('relu'))

            self.lateral_res3 = nn.HybridSequential(prefix='lateral_res3_')
            with self.lateral_res3.name_scope():
                self.lateral_res3.add(nn.Conv3D(in_channels=self.width_per_group * 8 // self.beta_inv,
                                                channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                kernel_size=(self.fusion_kernel_size, 1, 1),
                                                strides=(self.alpha, 1, 1),
                                                padding=(self.fusion_kernel_size // 2, 0, 0),
                                                use_bias=False))
                self.lateral_res3.add(norm_layer(in_channels=self.width_per_group * 8 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                 **({} if norm_kwargs is None else norm_kwargs)))
                self.lateral_res3.add(nn.Activation('relu'))

            self.lateral_res4 = nn.HybridSequential(prefix='lateral_res4_')
            with self.lateral_res4.name_scope():
                self.lateral_res4.add(nn.Conv3D(in_channels=self.width_per_group * 16 // self.beta_inv,
                                                channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                kernel_size=(self.fusion_kernel_size, 1, 1),
                                                strides=(self.alpha, 1, 1),
                                                padding=(self.fusion_kernel_size // 2, 0, 0),
                                                use_bias=False))
                self.lateral_res4.add(norm_layer(in_channels=self.width_per_group * 16 // self.beta_inv * self.fusion_conv_channel_ratio,
                                                 **({} if norm_kwargs is None else norm_kwargs)))
                self.lateral_res4.add(nn.Activation('relu'))

            # build slow pathway
            slow = nn.HybridSequential(prefix='slow_')
            with slow.name_scope():
                self.slow_conv1 = nn.Conv3D(in_channels=3, channels=self.width_per_group,
                                            kernel_size=(1, 7, 7), strides=(1, 2, 2), padding=(0, 3, 3), use_bias=False)
                self.slow_bn1 = norm_layer(in_channels=self.width_per_group,
                                           **({} if norm_kwargs is None else norm_kwargs))
                self.slow_relu = nn.Activation('relu')
                self.slow_maxpool = nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1))
            self.slow_res2 = self._make_layer_slow(inplanes=self.width_per_group + self.width_per_group // self.out_dim_ratio,
                                                   planes=self.dim_inner,
                                                   num_blocks=layers[0],
                                                   head_conv=1,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='slow_res2_')
            self.slow_res3 = self._make_layer_slow(inplanes=self.width_per_group * 4 + self.width_per_group * 4 // self.out_dim_ratio,
                                                   planes=self.dim_inner * 2,
                                                   num_blocks=layers[1],
                                                   strides=2,
                                                   head_conv=1,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='slow_res3_')
            self.slow_res4 = self._make_layer_slow(inplanes=self.width_per_group * 8 + self.width_per_group * 8 // self.out_dim_ratio,
                                                   planes=self.dim_inner * 4,
                                                   num_blocks=layers[2],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='slow_res4_')
            self.slow_res5 = self._make_layer_slow(inplanes=self.width_per_group * 16 + self.width_per_group * 16 // self.out_dim_ratio,
                                                   planes=self.dim_inner * 8,
                                                   num_blocks=layers[3],
                                                   strides=2,
                                                   head_conv=3,
                                                   norm_layer=norm_layer,
                                                   norm_kwargs=norm_kwargs,
                                                   layer_name='slow_res5_')

            # build classifier
            self.avg = nn.GlobalAvgPool3D()
            self.dp = nn.Dropout(rate=self.dropout_ratio)
            self.feat_dim = self.width_per_group * 32 // self.beta_inv + self.width_per_group * 32
            self.fc = nn.Dense(in_units=self.feat_dim, units=nclass, weight_initializer=init.Normal(sigma=self.init_std), use_bias=True)

            self.initialize(init.MSRAPrelu(), ctx=ctx)
Ejemplo n.º 14
0
    def __init__(self,
                 nclass,
                 block,
                 layers,
                 shortcut_type='B',
                 block_design=('A', 'B', 'C'),
                 dropout_ratio=0.5,
                 num_segments=1,
                 num_crop=1,
                 feat_ext=False,
                 init_std=0.001,
                 ctx=None,
                 partial_bn=False,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 **kwargs):
        super(P3D, self).__init__()
        self.shortcut_type = shortcut_type
        self.block_design = block_design
        self.partial_bn = partial_bn
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.num_segments = num_segments
        self.num_crop = num_crop
        self.feat_ext = feat_ext
        self.inplanes = 64
        self.feat_dim = 512 * block.expansion

        with self.name_scope():
            self.conv1 = nn.Conv3D(in_channels=3,
                                   channels=64,
                                   kernel_size=(1, 7, 7),
                                   strides=(1, 2, 2),
                                   padding=(0, 3, 3),
                                   use_bias=False)
            self.bn1 = norm_layer(
                in_channels=64, **({} if norm_kwargs is None else norm_kwargs))
            self.relu = nn.Activation('relu')
            self.pool = nn.MaxPool3D(pool_size=(2, 3, 3),
                                     strides=2,
                                     padding=(0, 1, 1))
            self.pool2 = nn.MaxPool3D(pool_size=(2, 1, 1),
                                      strides=(2, 1, 1),
                                      padding=0)

            if self.partial_bn:
                if norm_kwargs is not None:
                    norm_kwargs['use_global_stats'] = True
                else:
                    norm_kwargs = {}
                    norm_kwargs['use_global_stats'] = True

            # 3D layers are only for (layers1, layers2 and layers3), layers4 is C2D
            self.depth_3d = sum(layers[:3])
            self.layer_cnt = 0

            self.layer1 = self._make_res_layer(block=block,
                                               planes=64,
                                               blocks=layers[0],
                                               layer_name='layer1_')
            self.layer2 = self._make_res_layer(block=block,
                                               planes=128,
                                               blocks=layers[1],
                                               spatial_stride=2,
                                               layer_name='layer2_')
            self.layer3 = self._make_res_layer(block=block,
                                               planes=256,
                                               blocks=layers[2],
                                               spatial_stride=2,
                                               layer_name='layer3_')
            self.layer4 = self._make_res_layer(block=block,
                                               planes=512,
                                               blocks=layers[3],
                                               spatial_stride=2,
                                               layer_name='layer4_')

            self.avgpool = nn.GlobalAvgPool2D()
            self.dropout = nn.Dropout(rate=self.dropout_ratio)
            self.fc = nn.Dense(
                in_units=self.feat_dim,
                units=nclass,
                weight_initializer=init.Normal(sigma=self.init_std))
Ejemplo n.º 15
0
    def __init__(self, nclass=1000, norm_layer=BatchNorm, num_segments=1,
                 norm_kwargs=None, partial_bn=False, pretrained_base=True,
                 dropout_ratio=0.5, init_std=0.01,
                 ctx=None, **kwargs):
        super(I3D_InceptionV1, self).__init__(**kwargs)

        self.num_segments = num_segments
        self.feat_dim = 1024
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std

        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')

            self.features.add(_make_basic_conv(in_channels=3, channels=64, kernel_size=7, strides=2, padding=3, norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)))

            if partial_bn:
                if norm_kwargs is not None:
                    norm_kwargs['use_global_stats'] = True
                else:
                    norm_kwargs = {}
                    norm_kwargs['use_global_stats'] = True

            self.features.add(_make_basic_conv(in_channels=64, channels=64, kernel_size=1, norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(_make_basic_conv(in_channels=64, channels=192, kernel_size=3, padding=(1, 1, 1), norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 1, 1)))

            self.features.add(_make_Mixed_3a(192, 32, 'Mixed_3a_', norm_layer, norm_kwargs))
            self.features.add(_make_Mixed_3b(256, 64, 'Mixed_3b_', norm_layer, norm_kwargs))
            self.features.add(nn.MaxPool3D(pool_size=3, strides=(2, 2, 2), padding=(1, 1, 1)))

            self.features.add(_make_Mixed_4a(480, 64, 'Mixed_4a_', norm_layer, norm_kwargs))
            self.features.add(_make_Mixed_4b(512, 64, 'Mixed_4b_', norm_layer, norm_kwargs))
            self.features.add(_make_Mixed_4c(512, 64, 'Mixed_4c_', norm_layer, norm_kwargs))
            self.features.add(_make_Mixed_4d(512, 64, 'Mixed_4d_', norm_layer, norm_kwargs))
            self.features.add(_make_Mixed_4e(528, 128, 'Mixed_4e_', norm_layer, norm_kwargs))
            self.features.add(nn.MaxPool3D(pool_size=2, strides=(2, 2, 2)))

            self.features.add(_make_Mixed_5a(832, 128, 'Mixed_5a_', norm_layer, norm_kwargs))
            self.features.add(_make_Mixed_5b(832, 128, 'Mixed_5b_', norm_layer, norm_kwargs))
            self.features.add(nn.GlobalAvgPool3D())

            self.head = nn.HybridSequential(prefix='')
            self.head.add(nn.Dropout(rate=self.dropout_ratio))
            self.output = nn.Dense(units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std))
            self.head.add(self.output)

            self.features.initialize(ctx=ctx)
            self.head.initialize(ctx=ctx)

            if pretrained_base:
                inceptionv1_2d = googlenet(pretrained=True)
                weights2d = inceptionv1_2d.collect_params()
                weights3d = self.collect_params()
                assert len(weights2d.keys()) == len(weights3d.keys()), 'Number of parameters should be same.'

                dict2d = {}
                for key_id, key_name in enumerate(weights2d.keys()):
                    dict2d[key_id] = key_name

                dict3d = {}
                for key_id, key_name in enumerate(weights3d.keys()):
                    dict3d[key_id] = key_name

                dict_transform = {}
                for key_id, key_name in dict3d.items():
                    dict_transform[dict2d[key_id]] = key_name

                cnt = 0
                for key2d, key3d in dict_transform.items():
                    if 'conv' in key3d:
                        temporal_dim = weights3d[key3d].shape[2]
                        temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2)
                        inflated_2d = nd.broadcast_to(temporal_2d, shape=[0, 0, temporal_dim, 0, 0]) / temporal_dim
                        assert inflated_2d.shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d)
                        weights3d[key3d].set_data(inflated_2d)
                        cnt += 1
                        print('%s is done with shape: ' % (key3d), weights3d[key3d].shape)
                    if 'batchnorm' in key3d:
                        assert weights2d[key2d].shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d)
                        weights3d[key3d].set_data(weights2d[key2d].data())
                        cnt += 1
                        print('%s is done with shape: ' % (key3d), weights3d[key3d].shape)
                    if 'dense' in key3d:
                        cnt += 1
                        print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape)

                assert cnt == len(weights2d.keys()), 'Not all parameters have been ported, check the initialization.'
Ejemplo n.º 16
0
train_data = readingDataset('train')
valid_data = readingDataset('test')

for a,b in train_data:
    print(a.shape)
    print(b.shape)
    break



net = nn.Sequential()
with net.name_scope():
    net.add(
        nn.Conv3D(channels=32, kernel_size=(5,5,1), activation='relu'),
        nn.MaxPool3D(pool_size=2, strides=(2,2,1)),
        nn.Conv3D(channels=64, kernel_size=(5,5,1), activation='relu'),
        nn.MaxPool3D(pool_size=2, strides=(2,2,1)),
        # nn.Flatten(),
        nn.Dense(120, activation="relu"),
        nn.Dense(84, activation="relu"),
        nn.Dense(4)
    )

net.initialize(init=init.Xavier())
print(net)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})

num_epoch = 10
for epoch in range(num_epoch):
Ejemplo n.º 17
0
    def __init__(self, nclass=1000, pretrained=False, pretrained_base=True,
                 num_segments=1, num_crop=1, feat_ext=False,
                 dropout_ratio=0.5, init_std=0.01, partial_bn=False,
                 ctx=None, norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
        super(I3D_InceptionV3, self).__init__(**kwargs)

        self.num_segments = num_segments
        self.num_crop = num_crop
        self.feat_dim = 2048
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.feat_ext = feat_ext

        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')
            self.features.add(_make_basic_conv(in_channels=3, channels=32, kernel_size=3, strides=2, padding=(1, 0, 0),
                                               norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            if partial_bn:
                if norm_kwargs is not None:
                    norm_kwargs['use_global_stats'] = True
                else:
                    norm_kwargs = {}
                    norm_kwargs['use_global_stats'] = True

            self.features.add(_make_basic_conv(in_channels=32, channels=32, kernel_size=3, padding=(1, 0, 0),
                                               norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(_make_basic_conv(in_channels=32, channels=64, kernel_size=3, padding=1,
                                               norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0)))
            self.features.add(_make_basic_conv(in_channels=64, channels=80, kernel_size=1,
                                               norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(_make_basic_conv(in_channels=80, channels=192, kernel_size=3, padding=(1, 0, 0),
                                               norm_layer=norm_layer, norm_kwargs=norm_kwargs))
            self.features.add(nn.MaxPool3D(pool_size=3, strides=(1, 2, 2), padding=(1, 0, 0)))
            self.features.add(_make_A(192, 32, 'A1_', norm_layer, norm_kwargs))
            self.features.add(_make_A(256, 64, 'A2_', norm_layer, norm_kwargs))
            self.features.add(_make_A(288, 64, 'A3_', norm_layer, norm_kwargs))
            self.features.add(_make_B('B_', norm_layer, norm_kwargs))
            self.features.add(_make_C(768, 128, 'C1_', norm_layer, norm_kwargs))
            self.features.add(_make_C(768, 160, 'C2_', norm_layer, norm_kwargs))
            self.features.add(_make_C(768, 160, 'C3_', norm_layer, norm_kwargs))
            self.features.add(_make_C(768, 192, 'C4_', norm_layer, norm_kwargs))
            self.features.add(_make_D('D_', norm_layer, norm_kwargs))
            self.features.add(_make_E(1280, 'E1_', norm_layer, norm_kwargs))
            self.features.add(_make_E(2048, 'E2_', norm_layer, norm_kwargs))
            self.features.add(nn.GlobalAvgPool3D())

            self.head = nn.HybridSequential(prefix='')
            self.head.add(nn.Dropout(rate=self.dropout_ratio))
            self.output = nn.Dense(units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std))
            self.head.add(self.output)

            self.features.initialize(ctx=ctx)
            self.head.initialize(ctx=ctx)

            if pretrained_base and not pretrained:
                inceptionv3_2d = inception_v3(pretrained=True)
                weights2d = inceptionv3_2d.collect_params()
                weights3d = self.collect_params()
                assert len(weights2d.keys()) == len(weights3d.keys()), 'Number of parameters should be same.'

                dict2d = {}
                for key_id, key_name in enumerate(weights2d.keys()):
                    dict2d[key_id] = key_name

                dict3d = {}
                for key_id, key_name in enumerate(weights3d.keys()):
                    dict3d[key_id] = key_name

                dict_transform = {}
                for key_id, key_name in dict3d.items():
                    dict_transform[dict2d[key_id]] = key_name

                cnt = 0
                for key2d, key3d in dict_transform.items():
                    if 'conv' in key3d:
                        temporal_dim = weights3d[key3d].shape[2]
                        temporal_2d = nd.expand_dims(weights2d[key2d].data(), axis=2)
                        inflated_2d = nd.broadcast_to(temporal_2d, shape=[0, 0, temporal_dim, 0, 0]) / temporal_dim
                        assert inflated_2d.shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d)
                        weights3d[key3d].set_data(inflated_2d)
                        cnt += 1
                        print('%s is done with shape: ' % (key3d), weights3d[key3d].shape)
                    if 'batchnorm' in key3d:
                        assert weights2d[key2d].shape == weights3d[key3d].shape, 'the shape of %s and %s does not match. ' % (key2d, key3d)
                        weights3d[key3d].set_data(weights2d[key2d].data())
                        cnt += 1
                        print('%s is done with shape: ' % (key3d), weights3d[key3d].shape)
                    if 'dense' in key3d:
                        cnt += 1
                        print('%s is skipped with shape: ' % (key3d), weights3d[key3d].shape)

                assert cnt == len(weights2d.keys()), 'Not all parameters have been ported, check the initialization.'
Ejemplo n.º 18
0
    def __init__(self,
                 nclass,
                 depth,
                 num_stages=4,
                 pretrained_base=True,
                 num_segments=1,
                 spatial_strides=(1, 2, 2, 2),
                 temporal_strides=(1, 1, 1, 1),
                 dilations=(1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
                 conv1_kernel_t=5,
                 conv1_stride_t=2,
                 pool1_kernel_t=1,
                 pool1_stride_t=2,
                 inflate_freq=(1, 1, 1, 1),
                 inflate_stride=(1, 1, 1, 1),
                 inflate_style='3x1x1',
                 nonlocal_stages=(-1, ),
                 nonlocal_freq=(0, 1, 1, 0),
                 nonlocal_cfg=None,
                 bn_eval=True,
                 bn_frozen=False,
                 partial_bn=False,
                 frozen_stages=-1,
                 dropout_ratio=0.5,
                 init_std=0.01,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 ctx=None,
                 **kwargs):
        super(I3D_ResNetV1, self).__init__()

        if depth not in self.arch_settings:
            raise KeyError('invalid depth {} for resnet'.format(depth))

        self.nclass = nclass
        self.depth = depth
        self.num_stages = num_stages
        self.pretrained_base = pretrained_base
        self.num_segments = num_segments
        self.spatial_strides = spatial_strides
        self.temporal_strides = temporal_strides
        self.dilations = dilations
        assert len(spatial_strides) == len(temporal_strides) == len(
            dilations) == num_stages
        self.out_indices = out_indices
        assert max(out_indices) < num_stages
        self.inflate_freqs = inflate_freq if not isinstance(
            inflate_freq, int) else (inflate_freq, ) * num_stages
        self.inflate_style = inflate_style
        self.nonlocal_stages = nonlocal_stages
        self.nonlocal_freqs = nonlocal_freq if not isinstance(
            nonlocal_freq, int) else (nonlocal_freq, ) * num_stages
        self.nonlocal_cfg = nonlocal_cfg
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen
        self.partial_bn = partial_bn
        self.frozen_stages = frozen_stages
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std

        self.block, stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        self.inplanes = 64

        self.first_stage = nn.HybridSequential(prefix='')
        self.first_stage.add(
            nn.Conv3D(in_channels=3,
                      channels=64,
                      kernel_size=(conv1_kernel_t, 7, 7),
                      strides=(conv1_stride_t, 2, 2),
                      padding=((conv1_kernel_t - 1) // 2, 3, 3),
                      use_bias=False))
        self.first_stage.add(
            norm_layer(in_channels=64,
                       **({} if norm_kwargs is None else norm_kwargs)))
        self.first_stage.add(nn.Activation('relu'))
        self.first_stage.add(
            nn.MaxPool3D(pool_size=(pool1_kernel_t, 3, 3),
                         strides=(pool1_stride_t, 2, 2),
                         padding=(pool1_kernel_t // 2, 1, 1)))

        self.pool2 = nn.MaxPool3D(pool_size=(2, 1, 1),
                                  strides=(2, 1, 1),
                                  padding=(0, 0, 0))

        self.res_layers = nn.HybridSequential(prefix='')
        for i, num_blocks in enumerate(self.stage_blocks):
            spatial_stride = spatial_strides[i]
            temporal_stride = temporal_strides[i]
            dilation = dilations[i]
            planes = 64 * 2**i
            layer_name = 'layer{}_'.format(i + 1)

            res_layer = make_res_layer(self.block,
                                       self.inplanes,
                                       planes,
                                       num_blocks,
                                       spatial_stride=spatial_stride,
                                       temporal_stride=temporal_stride,
                                       dilation=dilation,
                                       inflate_freq=self.inflate_freqs[i],
                                       inflate_style=self.inflate_style,
                                       nonlocal_freq=self.nonlocal_freqs[i],
                                       nonlocal_cfg=self.nonlocal_cfg
                                       if i in self.nonlocal_stages else None,
                                       norm_layer=norm_layer,
                                       norm_kwargs=norm_kwargs,
                                       layer_name=layer_name)
            self.inplanes = planes * self.block.expansion
            self.res_layers.add(res_layer)

        self.feat_dim = self.block.expansion * 64 * 2**(
            len(self.stage_blocks) - 1)

        # We use ``GlobalAvgPool3D`` here for simplicity. Otherwise the input size must be fixed.
        # You can also use ``AvgPool3D`` and specify the arguments on your own, e.g.
        # self.st_avg = nn.AvgPool3D(pool_size=(4, 7, 7), strides=1, padding=0)
        # ``AvgPool3D`` is 10% faster, but ``GlobalAvgPool3D`` makes the code cleaner.
        self.st_avg = nn.GlobalAvgPool3D()

        self.head = nn.HybridSequential(prefix='')
        self.head.add(nn.Dropout(rate=self.dropout_ratio))
        self.fc = nn.Dense(in_units=self.feat_dim,
                           units=nclass,
                           weight_initializer=init.Normal(sigma=self.init_std))
        self.head.add(self.fc)

        self.init_weights()
Ejemplo n.º 19
0
    def __init__(self, classes=4, dropout_keep_prob=0.5, **kwargs):
        """400 classes in the Kinetics dataset."""
        super(InceptionI3d, self).__init__(**kwargs)
        self._num_classes = classes
        self.dropout_keep_prob = dropout_keep_prob

        # this is the main classifier
        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')

            # the input shape is `batch_size` x `num_frames` x 224 x 224 x `num_channels` in tf code
            # but gluon is NCDHW
            # input shape is 1, 3, 79, 224, 224

            self.features.add(
                _make_unit3d(channels=64,
                             kernel_size=(7, 7, 7),
                             strides=(2, 2, 2)))
            # shape is (1, 64, 37, 109, 109)

            self.features.add(
                nn.MaxPool3D(pool_size=(1, 3, 3),
                             strides=(1, 2, 2),
                             padding=(0, 55, 55))
            )  # here should be 'same' padding; hard code for now.
            # shape is (1, 64, 37, 109, 109)

            self.features.add(_make_unit3d(channels=64, kernel_size=(1, 1, 1)))
            # shape (1, 64, 37, 109, 109)

            self.features.add(_make_unit3d(channels=192,
                                           kernel_size=(3, 3, 3)))
            # shape (1, 192, 35, 107, 107)

            self.features.add(
                nn.MaxPool3D(pool_size=(1, 3, 3),
                             strides=(1, 2, 2),
                             padding=(0, 54, 54)))  # padding same
            # shape (1, 192, 35, 107, 107)

            self.features.add(_make_mixed_3b('mixed_3b'))

            self.features.add(_make_mixed_3c('mixed_3c'))
            #(1, 480, 35, 107, 107)

            self.features.add(
                nn.MaxPool3D(pool_size=(3, 3, 3),
                             strides=(2, 2, 2),
                             padding=(18, 54, 54)))  # padding is same here

            self.features.add(_make_mixed_4b('mixed_4b'))
            #
            self.features.add(_make_mixed_4c('mixed_4c'))

            self.features.add(_make_mixed_4d('mixed_4d'))

            self.features.add(_make_mixed_4e('mixed_4e'))

            self.features.add(_make_mixed_4f('mixed_4f'))
            # (1, 384, 35, 107, 107)

            self.features.add(
                nn.MaxPool3D(pool_size=(2, 2, 2),
                             strides=(2, 2, 2),
                             padding=(18, 54, 54)))

            self.features.add(_make_mixed_5b('mixed_5b'))

            self.features.add(_make_mixed_5c('mixed_5c'))

            self.features.add(nn.AvgPool3D(pool_size=(2, 7, 7)))

            self.features.add(nn.Dropout(self.dropout_keep_prob))

            self.features.add(
                _make_unit3d(channels=self._num_classes,
                             kernel_size=(1, 1, 1)))

            # logits/main classifier outputs endpoint
            self.output = nn.HybridSequential(prefix='')
            self.output.add(nn.Flatten())
            self.output.add(nn.Dense(self._num_classes))
Ejemplo n.º 20
0
    def __init__(self,
                 nclass,
                 dropout_ratio=0.5,
                 num_segments=1,
                 num_crop=1,
                 feat_ext=False,
                 init_std=0.001,
                 ctx=None,
                 **kwargs):
        super(C3D, self).__init__()
        self.num_segments = num_segments
        self.num_crop = num_crop
        self.feat_ext = feat_ext
        self.feat_dim = 8192

        with self.name_scope():
            self.conv1 = nn.Conv3D(in_channels=3,
                                   channels=64,
                                   kernel_size=(3, 3, 3),
                                   padding=(1, 1, 1))
            self.pool1 = nn.MaxPool3D(pool_size=(1, 2, 2), strides=(1, 2, 2))

            self.conv2 = nn.Conv3D(in_channels=64,
                                   channels=128,
                                   kernel_size=(3, 3, 3),
                                   padding=(1, 1, 1))
            self.pool2 = nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2))

            self.conv3a = nn.Conv3D(in_channels=128,
                                    channels=256,
                                    kernel_size=(3, 3, 3),
                                    padding=(1, 1, 1))
            self.conv3b = nn.Conv3D(in_channels=256,
                                    channels=256,
                                    kernel_size=(3, 3, 3),
                                    padding=(1, 1, 1))
            self.pool3 = nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2))

            self.conv4a = nn.Conv3D(in_channels=256,
                                    channels=512,
                                    kernel_size=(3, 3, 3),
                                    padding=(1, 1, 1))
            self.conv4b = nn.Conv3D(in_channels=512,
                                    channels=512,
                                    kernel_size=(3, 3, 3),
                                    padding=(1, 1, 1))
            self.pool4 = nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2))

            self.conv5a = nn.Conv3D(in_channels=512,
                                    channels=512,
                                    kernel_size=(3, 3, 3),
                                    padding=(1, 1, 1))
            self.conv5b = nn.Conv3D(in_channels=512,
                                    channels=512,
                                    kernel_size=(3, 3, 3),
                                    padding=(1, 1, 1))
            self.pool5 = nn.MaxPool3D(pool_size=(2, 2, 2),
                                      strides=(2, 2, 2),
                                      padding=(0, 1, 1))

            self.fc6 = nn.Dense(in_units=8192,
                                units=4096,
                                weight_initializer=init.Normal(sigma=init_std))
            self.fc7 = nn.Dense(in_units=4096,
                                units=4096,
                                weight_initializer=init.Normal(sigma=init_std))
            self.fc8 = nn.Dense(in_units=4096,
                                units=nclass,
                                weight_initializer=init.Normal(sigma=init_std))
            self.dropout = nn.Dropout(rate=dropout_ratio)
            self.relu = nn.Activation('relu')
    def __init__(self,
                 in_channels=1024,
                 nonlocal_type="gaussian",
                 dim=3,
                 embed=True,
                 embed_dim=None,
                 sub_sample=True,
                 use_bn=True,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 ctx=None,
                 **kwargs):
        super(NonLocal, self).__init__()

        assert nonlocal_type in ['gaussian', 'dot', 'concat']
        self.nonlocal_type = nonlocal_type
        self.embed = embed
        self.embed_dim = embed_dim if embed_dim is not None else in_channels // 2
        self.sub_sample = sub_sample
        self.use_bn = use_bn

        with self.name_scope():
            if self.embed:
                if dim == 2:
                    self.theta = nn.Conv2D(in_channels=in_channels,
                                           channels=self.embed_dim,
                                           kernel_size=(1, 1),
                                           strides=(1, 1),
                                           padding=(0, 0),
                                           weight_initializer=init.MSRAPrelu())
                    self.phi = nn.Conv2D(in_channels=in_channels,
                                         channels=self.embed_dim,
                                         kernel_size=(1, 1),
                                         strides=(1, 1),
                                         padding=(0, 0),
                                         weight_initializer=init.MSRAPrelu())
                    self.g = nn.Conv2D(in_channels=in_channels,
                                       channels=self.embed_dim,
                                       kernel_size=(1, 1),
                                       strides=(1, 1),
                                       padding=(0, 0),
                                       weight_initializer=init.MSRAPrelu())
                elif dim == 3:
                    self.theta = nn.Conv3D(in_channels=in_channels,
                                           channels=self.embed_dim,
                                           kernel_size=(1, 1, 1),
                                           strides=(1, 1, 1),
                                           padding=(0, 0, 0),
                                           weight_initializer=init.MSRAPrelu())
                    self.phi = nn.Conv3D(in_channels=in_channels,
                                         channels=self.embed_dim,
                                         kernel_size=(1, 1, 1),
                                         strides=(1, 1, 1),
                                         padding=(0, 0, 0),
                                         weight_initializer=init.MSRAPrelu())
                    self.g = nn.Conv3D(in_channels=in_channels,
                                       channels=self.embed_dim,
                                       kernel_size=(1, 1, 1),
                                       strides=(1, 1, 1),
                                       padding=(0, 0, 0),
                                       weight_initializer=init.MSRAPrelu())

            if self.nonlocal_type == 'concat':
                if dim == 2:
                    self.concat_proj = nn.HybridSequential()
                    self.concat_proj.add(
                        nn.Conv2D(in_channels=self.embed_dim * 2,
                                  channels=1,
                                  kernel_size=(1, 1),
                                  strides=(1, 1),
                                  padding=(0, 0),
                                  weight_initializer=init.MSRAPrelu()))
                    self.concat_proj.add(nn.Activation('relu'))
                elif dim == 3:
                    self.concat_proj = nn.HybridSequential()
                    self.concat_proj.add(
                        nn.Conv3D(in_channels=self.embed_dim * 2,
                                  channels=1,
                                  kernel_size=(1, 1, 1),
                                  strides=(1, 1, 1),
                                  padding=(0, 0, 0),
                                  weight_initializer=init.MSRAPrelu()))
                    self.concat_proj.add(nn.Activation('relu'))

            if sub_sample:
                if dim == 2:
                    self.max_pool = nn.MaxPool2D(pool_size=(2, 2))
                elif dim == 3:
                    self.max_pool = nn.MaxPool3D(pool_size=(1, 2, 2))
                self.sub_phi = nn.HybridSequential()
                self.sub_phi.add(self.phi)
                self.sub_phi.add(self.max_pool)
                self.sub_g = nn.HybridSequential()
                self.sub_g.add(self.g)
                self.sub_g.add(self.max_pool)

            if dim == 2:
                self.W = nn.Conv2D(in_channels=self.embed_dim,
                                   channels=in_channels,
                                   kernel_size=(1, 1),
                                   strides=(1, 1),
                                   padding=(0, 0),
                                   weight_initializer=init.MSRAPrelu())
            elif dim == 3:
                self.W = nn.Conv3D(in_channels=self.embed_dim,
                                   channels=in_channels,
                                   kernel_size=(1, 1, 1),
                                   strides=(1, 1, 1),
                                   padding=(0, 0, 0),
                                   weight_initializer=init.MSRAPrelu())

            if use_bn:
                self.bn = norm_layer(
                    in_channels=in_channels,
                    gamma_initializer='zeros',
                    **({} if norm_kwargs is None else norm_kwargs))
                self.W_bn = nn.HybridSequential()
                self.W_bn.add(self.W)
                self.W_bn.add(self.bn)
	net.add(nn.Flatten())
	net.add(nn.Dense(4096, activation='relu'))
	net.add(nn.Dropout(.5))

	# Stage 5
	net.add(nn.Dense(4096, activation='relu'))
	net.add(nn.Dropout(.5))

	# Stage 6
	net.add(nn.Dense(10))
net_new = gluon.nn.Sequential()
with net_new.name_scope():
	# Stage 1
	net.add(nn.Conv3D(
		channels=96, kernel_size=(1,11,11), strides=(1,4,4), activation='relu'))
	net.add(nn.MaxPool3D(pool_size=(1,3,3),strides=(1,2,2)))
	# Stage 2
	net.add(nn.Conv3D(
		channels=32, kernel_size=(16,5,5), strides=(8,1,1),padding=(0,2,2), activation='relu'))
	net.add(nn.MaxPool3D(pool_size=(1,3,3),strides=(1,2,2)))
	# Stage 3
	ned.add(nn.Conv3D(
		channels=))
def transform(data, label):
	# Resize from 28 x 28 to 224 x 224
	data = image.imresize(data, 224, 224)
	return utils.transform_mnist(data, label)
batch_size = 64
train_data, test_data = utils.load_data_fashion_mnist(batch_size, transform)
ctx = utils.try_gpu()