Beispiel #1
0
    def __init__(self, pretrained_model='auto', n_layers=16):
        super(VGGLayers, self).__init__()
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            init = constant.Zero()
            kwargs = {'initialW': init, 'initial_bias': init}
        else:
            # employ default initializers used in the original paper
            kwargs = {
                'initialW': normal.Normal(0.01),
                'initial_bias': constant.Zero(),
            }

        if n_layers not in [16, 19]:
            raise ValueError(
                'The n_layers argument should be either 16 or 19, '
                'but {} was given.'.format(n_layers)
            )

        with self.init_scope():
            self.conv1_1 = Convolution2D(3, 64, 3, 1, 1, **kwargs)
            self.conv1_2 = Convolution2D(64, 64, 3, 1, 1, **kwargs)
            self.conv2_1 = Convolution2D(64, 128, 3, 1, 1, **kwargs)
            self.conv2_2 = Convolution2D(128, 128, 3, 1, 1, **kwargs)
            self.conv3_1 = Convolution2D(128, 256, 3, 1, 1, **kwargs)
            self.conv3_2 = Convolution2D(256, 256, 3, 1, 1, **kwargs)
            self.conv3_3 = Convolution2D(256, 256, 3, 1, 1, **kwargs)
            self.conv4_1 = Convolution2D(256, 512, 3, 1, 1, **kwargs)
            self.conv4_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv4_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv5_1 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv5_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv5_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.fc6 = Linear(512 * 7 * 7, 4096, **kwargs)
            self.fc7 = Linear(4096, 4096, **kwargs)
            self.fc8 = Linear(4096, 1000, **kwargs)
            if n_layers == 19:
                self.conv3_4 = Convolution2D(256, 256, 3, 1, 1, **kwargs)
                self.conv4_4 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
                self.conv5_4 = Convolution2D(512, 512, 3, 1, 1, **kwargs)

        if pretrained_model == 'auto':
            if n_layers == 16:
                _retrieve(
                    'VGG_ILSVRC_16_layers.npz',
                    'https://www.robots.ox.ac.uk/%7Evgg/software/very_deep/'
                    'caffe/VGG_ILSVRC_16_layers.caffemodel',
                    self)
            else:
                _retrieve(
                    'VGG_ILSVRC_19_layers.npz',
                    'http://www.robots.ox.ac.uk/%7Evgg/software/very_deep/'
                    'caffe/VGG_ILSVRC_19_layers.caffemodel',
                    self)
        elif pretrained_model:
            npz.load_npz(pretrained_model, self)
Beispiel #2
0
    def __init__(self, pretrained_model='auto'):
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            init = constant.Zero()
            conv_kwargs = {'initialW': init, 'initial_bias': init}
            fc_kwargs = conv_kwargs
        else:
            # employ default initializers used in the original paper
            conv_kwargs = {
                'initialW': normal.Normal(0.01),
                'initial_bias': constant.Zero(),
            }
            fc_kwargs = {
                'initialW': normal.Normal(0.005),
                'initial_bias': constant.One(),
            }
        super(C3DVersion1, self).__init__(
            conv1a=ConvolutionND(3, 3, 64, 3, 1, 1, **conv_kwargs),
            conv2a=ConvolutionND(3, 64, 128, 3, 1, 1, **conv_kwargs),
            conv3a=ConvolutionND(3, 128, 256, 3, 1, 1, **conv_kwargs),
            conv3b=ConvolutionND(3, 256, 256, 3, 1, 1, **conv_kwargs),
            conv4a=ConvolutionND(3, 256, 512, 3, 1, 1, **conv_kwargs),
            conv4b=ConvolutionND(3, 512, 512, 3, 1, 1, **conv_kwargs),
            conv5a=ConvolutionND(3, 512, 512, 3, 1, 1, **conv_kwargs),
            conv5b=ConvolutionND(3, 512, 512, 3, 1, 1, **conv_kwargs),
            fc6=Linear(512 * 4 * 4, 4096, **fc_kwargs),
            fc7=Linear(4096, 4096, **fc_kwargs),
            fc8=Linear(4096, 101, **fc_kwargs),
        )
        if pretrained_model == 'auto':
            _retrieve(
                'conv3d_deepnetA_ucf.npz', 'http://vlg.cs.dartmouth.edu/c3d/'
                'c3d_ucf101_finetune_whole_iter_20000', self)
        elif pretrained_model:
            npz.load_npz(pretrained_model, self)

        self.functions = collections.OrderedDict([
            ('conv1a', [self.conv1a, relu]),
            ('pool1', [_max_pooling_2d]),
            ('conv2a', [self.conv2a, relu]),
            ('pool2', [_max_pooling_3d]),
            ('conv3a', [self.conv3a, relu]),
            ('conv3b', [self.conv3b, relu]),
            ('pool3', [_max_pooling_3d]),
            ('conv4a', [self.conv4a, relu]),
            ('conv4b', [self.conv4b, relu]),
            ('pool4', [_max_pooling_3d]),
            ('conv5a', [self.conv5a, relu]),
            ('conv5b', [self.conv5b, relu]),
            ('pool5', [_max_pooling_3d]),
            ('fc6', [self.fc6, relu, dropout]),
            ('fc7', [self.fc7, relu, dropout]),
            ('fc8', [self.fc8]),
            ('prob', [softmax]),
        ])
    def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
                 nobias=False, initialW=None, initial_bias=None):
        super(DeformableConvolution2DSampler, self).__init__()

        self.ksize = ksize
        self.stride = _pair(stride)
        self.pad = _pair(pad)
        self.out_channels = out_channels
        self.initialW = initialW

        if initialW is None:
            initialW = constant.Zero()

        with self.init_scope():
            W_initializer = initializers._get_initializer(initialW)
            self.W = variable.Parameter(W_initializer)

            if nobias:
                self.b = None
            else:
                if initial_bias is None:
                    initial_bias = initializers.Constant(0)
                bias_initializer = initializers._get_initializer(initial_bias)
                self.b = variable.Parameter(bias_initializer)

        if in_channels is not None:
            self._initialize_params(in_channels)
Beispiel #4
0
    def __init__(self, pretrained_model, n_layers):
        super(ResNetLayers, self).__init__()

        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            kwargs = {'initialW': constant.Zero()}
        else:
            # employ default initializers used in the original paper
            kwargs = {'initialW': normal.HeNormal(scale=1.0)}

        if n_layers == 50:
            block = [3, 4, 6, 3]
        elif n_layers == 101:
            block = [3, 4, 23, 3]
        elif n_layers == 152:
            block = [3, 8, 36, 3]
        else:
            raise ValueError('The n_layers argument should be either 50, 101,'
                             ' or 152, but {} was given.'.format(n_layers))

        with self.init_scope():
            self.conv1 = Convolution2D(3, 64, 7, 2, 3, **kwargs)
            self.bn1 = BatchNormalization(64)
            self.res2 = BuildingBlock(block[0], 64, 64, 256, 1, **kwargs)
            self.res3 = BuildingBlock(block[1], 256, 128, 512, 2, **kwargs)
            self.res4 = BuildingBlock(block[2], 512, 256, 1024, 2, **kwargs)
            self.res5 = BuildingBlock(block[3], 1024, 512, 2048, 2, **kwargs)
            self.fc6 = Linear(2048, 1000)

        if pretrained_model and pretrained_model.endswith('.caffemodel'):
            _retrieve(n_layers, 'ResNet-{}-model.npz'.format(n_layers),
                      pretrained_model, self)
        elif pretrained_model:
            npz.load_npz(pretrained_model, self)
 def __init__(self, pretrained_model='auto', n_layers=50):
     super(ExtractorResNet, self).__init__()
     print('Extractor ResNet', n_layers, ' initialization')
     kwargs = {'initialW': constant.Zero()}
     if pretrained_model == 'auto':
         if n_layers == 50:
             pretrained_model = 'ResNet-50-model.caffemodel'
             block = [3, 4, 6, 3]
         elif n_layers == 101:
             pretrained_model = 'ResNet-101-model.caffemodel'
             block = [3, 4, 23, 3]
     with self.init_scope():
         self.conv1 = Convolution2D(3, 64, 7, 2, 3, **kwargs)
         self.bn1 = BatchNormalization(64)
         self.res2 = BuildingBlock(block[0], 64, 64, 256, 1, **kwargs)
         self.res3 = BuildingBlock(block[1], 256, 128, 512, 2, **kwargs)
         self.res4 = BuildingBlock(block[2], 512, 256, 1024, 2, **kwargs)
         self.res5 = BuildingBlock(block[3], 1024, 512, 2048, 1, **kwargs)
         self.fc6 = Linear(2048, 1000)
     if pretrained_model and pretrained_model.endswith('.caffemodel'):
         _retrieve(n_layers, 'ResNet-{}-model.npz'.format(n_layers),
                   pretrained_model, self)
     elif pretrained_model:
         npz.load_npz(pretrained_model, self)
     del self.fc6
Beispiel #6
0
    def __init__(self, num_inter, num_out):
        kwargs = {
            'initialW': normal.Normal(0.01),
            'initial_bias': constant.Zero(),
        }
        self.num_out = num_out
        super(VGG16, self).__init__()

        with self.init_scope():
            self.conv1_1 = Convolution2D(3, 64, 3, 1, 1, **kwargs)
            self.conv1_2 = Convolution2D(64, 64, 3, 1, 1, **kwargs)
            self.conv2_1 = Convolution2D(64, 128, 3, 1, 1, **kwargs)
            self.conv2_2 = Convolution2D(128, 128, 3, 1, 1, **kwargs)
            self.conv3_1 = Convolution2D(128, 256, 3, 1, 1, **kwargs)
            self.conv3_2 = Convolution2D(256, 256, 3, 1, 1, **kwargs)
            self.conv3_3 = Convolution2D(256, 256, 3, 1, 1, **kwargs)
            self.conv4_1 = Convolution2D(256, 512, 3, 1, 1, **kwargs)
            self.conv4_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv4_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv5_1 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv5_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv5_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.fc6 = Linear(512 * 7 * 7, num_inter, **kwargs)
            self.fc7 = Linear(num_inter, num_inter, **kwargs)
            self.fc8 = Linear(num_inter, num_out, **kwargs)
Beispiel #7
0
 def __init__(self, num_inter, num_out):
     kwargs = {
         'initialW': normal.Normal(0.01),
         'initial_bias': constant.Zero(),
     }
     super(VGG16BN, self).__init__()
     with self.init_scope():
         self.block1_1 = Block(64, 3)
         self.block1_2 = Block(64, 3)
         self.block2_1 = Block(128, 3)
         self.block2_2 = Block(128, 3)
         self.block3_1 = Block(256, 3)
         self.block3_2 = Block(256, 3)
         self.block3_3 = Block(256, 3)
         self.block4_1 = Block(512, 3)
         self.block4_2 = Block(512, 3)
         self.block4_3 = Block(512, 3)
         self.block5_1 = Block(512, 3)
         self.block5_2 = Block(512, 3)
         self.block5_3 = Block(512, 3)
         self.fc1 = L.Linear(None, num_inter, **kwargs)
         self.bn_fc1 = L.BatchNormalization(num_inter)
         self.fc2 = L.Linear(None, num_inter, **kwargs)
         self.bn_fc2 = L.BatchNormalization(num_inter)
         self.fc3 = L.Linear(None, num_out, **kwargs)
Beispiel #8
0
    def __init__(self, num_inter, num_out, dropout_ratio=.5):
        kwargs = {
            'initialW': normal.Normal(0.01),
            'initial_bias': constant.Zero(),
        }
        super(VGG16BNFC3, self).__init__()

        with self.init_scope():
            self.conv1_1 = Convolution2D(3, 64, 3, 1, 1, **kwargs)
            self.conv1_2 = Convolution2D(64, 64, 3, 1, 1, **kwargs)
            self.conv2_1 = Convolution2D(64, 128, 3, 1, 1, **kwargs)
            self.conv2_2 = Convolution2D(128, 128, 3, 1, 1, **kwargs)
            self.conv3_1 = Convolution2D(128, 256, 3, 1, 1, **kwargs)
            self.conv3_2 = Convolution2D(256, 256, 3, 1, 1, **kwargs)
            self.conv3_3 = Convolution2D(256, 256, 3, 1, 1, **kwargs)
            self.conv4_1 = Convolution2D(256, 512, 3, 1, 1, **kwargs)
            self.conv4_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv4_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv5_1 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv5_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv5_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.bn1 = L.BatchNormalization(512)
            self.fc6 = Linear(512 * 7 * 7, num_inter, **kwargs)
            self.bn2 = L.BatchNormalization(num_inter)
            self.fc7 = Linear(num_inter, num_inter, **kwargs)
            self.bn3 = L.BatchNormalization(num_inter)
            self.fc8 = Linear(num_inter, num_out, **kwargs)
        self.dropout_ratio = dropout_ratio
Beispiel #9
0
 def __init__(self, pretrained_model='auto'):
     if pretrained_model:
         # As a sampling process is time-consuming,
         # we employ a zero initializer for faster computation.
         kwargs = {'initialW': constant.Zero()}
     else:
         # employ default initializers used in the original paper
         kwargs = {'initialW': normal.HeNormal(scale=1.0)}
     super(ResNet50Layers, self).__init__(
         conv1=Convolution2D(3, 64, 7, 2, 3, **kwargs),
         bn1=BatchNormalization(64),
         res2=BuildingBlock(3, 64, 64, 256, 1, **kwargs),
         res3=BuildingBlock(4, 256, 128, 512, 2, **kwargs),
         res4=BuildingBlock(6, 512, 256, 1024, 2, **kwargs),
         res5=BuildingBlock(3, 1024, 512, 2048, 2, **kwargs),
         fc6=Linear(2048, 1000),
     )
     if pretrained_model == 'auto':
         _retrieve(
             'ResNet-50-model.npz', 'ResNet-50-model.caffemodel', self)
     elif pretrained_model:
         npz.load_npz(pretrained_model, self)
     self.functions = OrderedDict([
         ('conv1', [self.conv1, self.bn1, relu]),
         ('pool1', [lambda x: max_pooling_2d(x, ksize=3, stride=2)]),
         ('res2', [self.res2]),
         ('res3', [self.res3]),
         ('res4', [self.res4]),
         ('res5', [self.res5]),
         ('pool5', [_global_average_pooling_2d]),
         ('fc6', [self.fc6]),
         ('prob', [softmax]),
     ])
    def __init__(self,
                 n_class=None, pretrained_model=None, mean=None,
                 initialW=None, initial_bias=None):
        if n_class is None:
            if pretrained_model in self._models:
                n_class = self._models[pretrained_model]['n_class']
            else:
                n_class = 1000

        if mean is None:
            if pretrained_model in self._models:
                mean = self._models[pretrained_model]['mean']
            else:
                mean = _imagenet_mean
        self.mean = mean

        if initialW is None:
            # Employ default initializers used in the original paper.
            initialW = normal.Normal(0.01)
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            initialW = constant.Zero()
        kwargs = {'initialW': initialW, 'initial_bias': initial_bias}

        super(VGG16, self).__init__()
        with self.init_scope():
            self.conv1_1 = Conv2DActiv(None, 64, 3, 1, 1, **kwargs)
            self.conv1_2 = Conv2DActiv(None, 64, 3, 1, 1, **kwargs)
            self.pool1 = _max_pooling_2d
            self.conv2_1 = Conv2DActiv(None, 128, 3, 1, 1, **kwargs)
            self.conv2_2 = Conv2DActiv(None, 128, 3, 1, 1, **kwargs)
            self.pool2 = _max_pooling_2d
            self.conv3_1 = Conv2DActiv(None, 256, 3, 1, 1, **kwargs)
            self.conv3_2 = Conv2DActiv(None, 256, 3, 1, 1, **kwargs)
            self.conv3_3 = Conv2DActiv(None, 256, 3, 1, 1, **kwargs)
            self.pool3 = _max_pooling_2d
            self.conv4_1 = Conv2DActiv(None, 512, 3, 1, 1, **kwargs)
            self.conv4_2 = Conv2DActiv(None, 512, 3, 1, 1, **kwargs)
            self.conv4_3 = Conv2DActiv(None, 512, 3, 1, 1, **kwargs)
            self.pool4 = _max_pooling_2d
            self.conv5_1 = Conv2DActiv(None, 512, 3, 1, 1, **kwargs)
            self.conv5_2 = Conv2DActiv(None, 512, 3, 1, 1, **kwargs)
            self.conv5_3 = Conv2DActiv(None, 512, 3, 1, 1, **kwargs)
            self.pool5 = _max_pooling_2d
            self.fc6 = Linear(None, 4096, **kwargs)
            self.fc6_relu = relu
            self.fc6_dropout = dropout
            self.fc7 = Linear(None, 4096, **kwargs)
            self.fc7_relu = relu
            self.fc7_dropout = dropout
            self.fc8 = Linear(None, n_class, **kwargs)
            self.prob = softmax

        if pretrained_model in self._models:
            path = download_model(self._models[pretrained_model]['url'])
            chainer.serializers.load_npz(path, self)
        elif pretrained_model:
            chainer.serializers.load_npz(pretrained_model, self)
Beispiel #11
0
    def __init__(self, pretrained_model, n_layers, n_class, class_weight=None):
        super(ResNetLayersFCN32, self).__init__()
        self.n_class = n_class
        if class_weight is not None:
            assert class_weight.shape == (self.n_class,)
            self.class_weight = class_weight
        else:
            self.class_weight = None

        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            kwargs = {'initialW': constant.Zero()}

        else:
            # employ default initializers used in the original paper
            kwargs = {'initialW': normal.HeNormal(scale=1.0)}

        kwargs2 = {
            'initialW': chainer.initializers.Zero(),
            'initial_bias': chainer.initializers.Zero(),
            }

        if n_layers == 50:
            block = [3, 4, 6, 3]
        elif n_layers == 101:
            block = [3, 4, 23, 3]
        elif n_layers == 152:
            block = [3, 8, 36, 3]
        else:
            raise ValueError('The n_layers argument should be either 50, 101,'
                             ' or 152, but {} was given.'.format(n_layers))

        with self.init_scope(): #in the comments are the sizes (of default images of 224x224) AFTER the cooresponding layer
            self.conv1 = Convolution2D(3, 64, 7, 2, 3, **kwargs)                #112x112
            self.bn1 = BatchNormalization(64)
            self.res2 = BuildingBlock(block[0], 64, 64, 256, 1, **kwargs)       #56x56
            self.res3 = BuildingBlock(block[1], 256, 128, 512, 2, **kwargs)     #28x28
            self.res4 = BuildingBlock(block[2], 512, 256, 1024, 2, **kwargs)    #14x14
            self.res5 = BuildingBlock(block[3], 1024, 512, 2048, 2, **kwargs)   #7x7
            #self.fc6 = Linear(2048, 1000)
            self.score_fr = L.Convolution2D(2048, n_class, 1, 1, 0, **kwargs2)
            self.upscore = L.Deconvolution2D(n_class, n_class, 64, 32, 0, nobias=True, initialW=initializers.UpsamplingDeconvWeight()) #224x224

        if pretrained_model and pretrained_model.endswith('.caffemodel'):  #default resnet model
            originalresnet = ResNetLayers(pretrained_model, n_layers)
            if n_layers == 50:
                _transfer_resnet50(originalresnet, self)
            elif n_layers == 101:
                _transfer_resnet101(originalresnet, self)
            elif n_layers == 152:
                _transfer_resnet152(originalresnet, self)
            else:
                raise ValueError('The n_layers argument should be either 50, 101,'
                                 ' or 152, but {} was given.'.format(n_layers))

        elif pretrained_model:
            npz.load_npz(pretrained_model, self)
Beispiel #12
0
    def __init__(self, pretrained_model='auto'):
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            init = constant.Zero()
            kwargs = {'initialW': init, 'initial_bias': init}
        else:
            # employ default initializers used in the original paper
            kwargs = {
                'initialW': normal.Normal(0.01),
                'initial_bias': constant.Zero(),
            }
        super(VGG16Layers, self).__init__()

        with self.init_scope():
            self.conv1_1 = Convolution2D(3, 64, 3, 1, 1, **kwargs)
            self.conv1_2 = Convolution2D(64, 64, 3, 1, 1, **kwargs)
            self.conv2_1 = Convolution2D(64, 128, 3, 1, 1, **kwargs)
            self.conv2_2 = Convolution2D(128, 128, 3, 1, 1, **kwargs)
            self.conv3_1 = Convolution2D(128, 256, 3, 1, 1, **kwargs)
            self.conv3_2 = Convolution2D(256, 256, 3, 1, 1, **kwargs)
            self.conv3_3 = Convolution2D(256, 256, 3, 1, 1, **kwargs)
            self.conv4_1 = Convolution2D(256, 512, 3, 1, 1, **kwargs)
            self.conv4_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv4_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv5_1 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv5_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.conv5_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
            self.fc6 = Linear(512 * 7 * 7, 4096, **kwargs)
            self.fc7 = Linear(4096, 4096, **kwargs)
            self.fc8 = Linear(4096, 1000, **kwargs)

        if pretrained_model == 'auto':
            _retrieve(
                'VGG_ILSVRC_16_layers.npz',
                'http://www.robots.ox.ac.uk/%7Evgg/software/very_deep/'
                'caffe/VGG_ILSVRC_16_layers.caffemodel',
                self)
        elif pretrained_model:
            npz.load_npz(pretrained_model, self)
Beispiel #13
0
    def __init__(self,
                 n_class,
                 pretrained_model=None,
                 mean=None,
                 initialw=None,
                 initialbias=None,
                 googlenetbn_trianedmodel=None):
        self.n_class = n_class
        self.mean = mean
        self.initialbias = initialbias
        self.googlenetbn_trainedmodel = googlenetbn_trianedmodel

        self.insize = 224

        if mean is None:
            # imagenet means
            self.mean = np.array([123.68, 116.779, 103.939],
                                 dtype=np.float32)[:, np.newaxis, np.newaxis]

        if initialw is None:
            # employ default initializers used in BVLC. For more detail, see
            self.initialW = uniform.LeCunUniform(scale=1.0)

        if pretrained_model:
            # As a sampling process is time-consuming
            # we employ a zero initializer for faster computation
            self.initialW = constant.Zero()

        super(MultiscaleNet, self).__init__()
        with self.init_scope():
            # Deep layers: GoogleNet of BatchNormalization version
            self.googlenetbn = GoogleNetBN(n_class=n_class)

            # Shallow layers
            self.conv_s1 = L.Convolution2D(None,
                                           96,
                                           3,
                                           stride=4,
                                           pad=1,
                                           initialW=0.02 * np.sqrt(3 * 3 * 3))
            self.norm_s1 = L.BatchNormalization(96)
            self.conv_s2 = L.Convolution2D(None,
                                           96,
                                           3,
                                           stride=4,
                                           pad=1,
                                           initialW=0.02 * np.sqrt(3 * 3 * 3))
            self.norm_s2 = L.BatchNormalization(96)

            # Final layers
            self.fc4_1 = L.Linear(None, 4096)
            self.fc4_2 = L.Linear(None, self.n_class)
Beispiel #14
0
 def __init__(self, out_channels, ksize, pad=1):
     kwargs = {
         'initialW': normal.Normal(0.01),
         'initial_bias': constant.Zero(),
     }
     super(Block, self).__init__()
     with self.init_scope():
         self.conv = L.Convolution2D(None,
                                     out_channels,
                                     ksize,
                                     pad=pad,
                                     **kwargs)
         self.bn = L.BatchNormalization(out_channels)
Beispiel #15
0
    def __init__(self, n_class=36, pretrained_model=None, output_scale=1.0):
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            kwargs = {'initialW': constant.Zero()}
        else:
            print "train resblock : True"
            # employ default initializers used in the original paper
            kwargs = {'initialW': normal.HeNormal(scale=1.0)}
        self.n_class = n_class
        self.output_scale = output_scale
        super(DualCenterProposalNetworkRes50FCN, self).__init__(
            # resnet50
            conv1=L.Convolution2D(3, 64, 7, 2, 3, **kwargs),
            bn1=L.BatchNormalization(64),
            res2=R.BuildingBlock(3, 64, 64, 256, 1, **kwargs),
            res3=R.BuildingBlock(4, 256, 128, 512, 2, **kwargs),
            res4=R.BuildingBlock(6, 512, 256, 1024, 2, **kwargs),
            res5=R.BuildingBlock(3, 1024, 512, 2048, 2, **kwargs),
            upscore32=L.Deconvolution2D(2048, 512, 8, stride=4, pad=2),
            bn_up32=L.BatchNormalization(512),
            upscore16=L.Deconvolution2D(1024, 512, 4, stride=2, pad=1),
            bn_up16=L.BatchNormalization(512),
            concat_conv=L.Convolution2D(512 * 3, 512 * 3, 3, stride=1, pad=1),
            bn_concat=L.BatchNormalization(512 * 3),
            score_pool=L.Convolution2D(512 * 3, n_class, 1, stride=1, pad=0),
            upscore_final=L.Deconvolution2D(self.n_class,
                                            self.n_class,
                                            16,
                                            stride=8,
                                            pad=4),
            conv_cp1=L.Convolution2D(512 * 3, 1024, 3, stride=1, pad=1),
            bn_cp1=L.BatchNormalization(1024),

            # center pose network
            conv_cp2=L.Convolution2D(1024, 512, 3, stride=1, pad=1),
            bn_cp2=L.BatchNormalization(512),
            upscore_cp1=L.Deconvolution2D(512, 16, 8, stride=4, pad=2),
            bn_cp3=L.BatchNormalization(16),
            upscore_cp2=L.Deconvolution2D(16, 3, 4, stride=2, pad=1),

            # origin center pose network
            conv_ocp2=L.Convolution2D(1024, 512, 3, stride=1, pad=1),
            bn_ocp2=L.BatchNormalization(512),
            upscore_ocp1=L.Deconvolution2D(512, 16, 8, stride=4, pad=2),
            bn_ocp3=L.BatchNormalization(16),
            upscore_ocp2=L.Deconvolution2D(16, 3, 4, stride=2, pad=1),
        )
Beispiel #16
0
    def __init__(self, pretrained_model, n_layers, n_class=3):
        super(ResNetLayersFCN, self).__init__()

        self.n_class = n_class

        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            kwargs = {'initialW': constant.Zero()}
        else:
            # employ default initializers used in the original paper
            kwargs = {'initialW': normal.HeNormal(scale=1.0)}

        if n_layers == 50:
            block = [3, 4, 6, 3]
        elif n_layers == 101:
            block = [3, 4, 23, 3]
        elif n_layers == 152:
            block = [3, 8, 36, 3]
        else:
            raise ValueError('The n_layers argument should be either 50, 101,'
                             ' or 152, but {} was given.'.format(n_layers))

        with self.init_scope(): #in the comments are the sizes (of default images of 224x224) AFTER the cooresponding layer
            self.conv1 = Convolution2D(3, 64, 7, 2, 3, **kwargs)                #112x112
            self.bn1 = BatchNormalization(64)
            self.res2 = BuildingBlock(block[0], 64, 64, 256, 1, **kwargs)       #56x56
            self.res3 = BuildingBlock(block[1], 256, 128, 512, 2, **kwargs)     #28x28
            self.res4 = BuildingBlock(block[2], 512, 256, 1024, 2, **kwargs)    #14x14
            self.res5 = BuildingBlock(block[3], 1024, 512, 2048, 2, **kwargs)   #7x7
            #self.fc6 = Linear(2048, 1000)
            self.score_fr = L.Convolution2D(2048, n_class, 1, 1, 0, **kwargs)
            self.upscore = L.Deconvolution2D(
                n_class, n_class, 64, 32, 0, nobias=True,
                initialW=initializers.UpsamplingDeconvWeight())                 #224x224

        #if pretrained_model and pretrained_model.endswith('.caffemodel'):
        #    _retrieve(n_layers, 'ResNet-{}-model.npz'.format(n_layers),
        #              pretrained_model, self)
        if pretrained_model and pretrained_model is in ['ResNet-101-model.caffemodel']: #later maybe and 50 and 152 here
        #  open default resnet and extract weigths form it
        #              pretrained_model, self)
           resnet101 = ResNetLayers(pretrained_model, 101)
           init_from_resnet101(resnet101)

        elif pretrained_model:
            npz.load_npz(pretrained_model, self)
Beispiel #17
0
    def __init__(self, pretrained_model, n_layers):
        super(ResNet, self).__init__()
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            kwargs = {'initialW': constant.Zero()}
        else:
            # employ default initializers used in the original paper
            kwargs = {'initialW': normal.HeNormal(scale=1.0)}

        if n_layers == 50:
            block = [3, 4, 6, 3]
        elif n_layers == 101:
            block = [3, 4, 23, 3]
        elif n_layers == 152:
            block = [3, 8, 36, 3]
        else:
            raise ValueError('The n_layers argument should be either 50, 101,'
                             ' or 152, but {} was given.'.format(n_layers))

        with self.init_scope():
            self.conv1 = L.Convolution2D(3, 64, 7, 2, 3, **kwargs)
            self.bn1 = L.BatchNormalization(64)
            self.res2 = BuildingBlock(block[0], 64, 64, 256, 1, **kwargs)
            self.res3 = BuildingBlock(block[1], 256, 128, 512, 2, **kwargs)
            self.res4 = BuildingBlock(block[2], 512, 256, 1024, 2, **kwargs)
            self.res5 = BuildingBlock(block[3], 1024, 512, 2048, 2, **kwargs)
            self.fc6 = L.Linear(2048, 1000)

        if pretrained_model and pretrained_model.endswith('.caffemodel'):
            _retrieve(n_layers, 'ResNet-{}-model.npz'.format(n_layers),
                      pretrained_model, self)
        elif pretrained_model:
            npz.load_npz(pretrained_model, self)

        self.functions = collections.OrderedDict([
            ('conv1', [self.conv1, self.bn1, F.relu]),
            ('pool1', [lambda x: F.max_pooling_2d(x, ksize=3, stride=2)]),
            ('res2', [self.res2]),
            ('res3', [self.res3]),
            ('res4', [self.res4]),
            ('res5', [self.res5]),
            ('pool5', [_global_average_pooling_2d]),
            ('fc6', [self.fc6]),
            ('prob', [F.softmax]),
        ])
Beispiel #18
0
 def __init__(self, pretrained_model='auto'):
     if pretrained_model:
         # As a sampling process is time-consuming,
         # we employ a zero initializer for faster computation.
         kwargs = {'initialW': constant.Zero()}
     else:
         # employ default initializers used in BVLC. For more detail, see
         # https://github.com/chainer/chainer/pull/2424#discussion_r109642209
         kwargs = {'initialW': uniform.LeCunUniform(scale=1.0)}
     super(GoogLeNet,
           self).__init__(conv1=Convolution2D(3,
                                              64,
                                              7,
                                              stride=2,
                                              pad=3,
                                              **kwargs),
                          conv2_reduce=Convolution2D(64, 64, 1, **kwargs),
                          conv2=Convolution2D(64,
                                              192,
                                              3,
                                              stride=1,
                                              pad=1,
                                              **kwargs),
                          inc3a=Inception(192, 64, 96, 128, 16, 32, 32),
                          inc3b=Inception(256, 128, 128, 192, 32, 96, 64),
                          inc4a=Inception(480, 192, 96, 208, 16, 48, 64),
                          inc4b=Inception(512, 160, 112, 224, 24, 64, 64),
                          inc4c=Inception(512, 128, 128, 256, 24, 64, 64),
                          inc4d=Inception(512, 112, 144, 288, 32, 64, 64),
                          inc4e=Inception(528, 256, 160, 320, 32, 128, 128),
                          inc5a=Inception(832, 256, 160, 320, 32, 128, 128),
                          inc5b=Inception(832, 384, 192, 384, 48, 128, 128),
                          loss3_fc=Linear(1024, 1000, **kwargs),
                          loss1_conv=Convolution2D(512, 128, 1, **kwargs),
                          loss1_fc1=Linear(2048, 1024, **kwargs),
                          loss1_fc2=Linear(1024, 1000, **kwargs),
                          loss2_conv=Convolution2D(528, 128, 1, **kwargs),
                          loss2_fc1=Linear(2048, 1024, **kwargs),
                          loss2_fc2=Linear(1024, 1000, **kwargs))
     if pretrained_model == 'auto':
         _retrieve(
             'bvlc_googlenet.npz',
             'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel',
             self)
     elif pretrained_model:
         npz.load_npz(pretrained_model, self)
Beispiel #19
0
 def __init__(self, n_class=36, pretrained_model=None):
     if pretrained_model:
         # As a sampling process is time-consuming,
         # we employ a zero initializer for faster computation.
         kwargs = {'initialW': constant.Zero()}
     else:
         # employ default initializers used in the original paper
         kwargs = {'initialW': normal.HeNormal(scale=1.0)}
     self.n_class = n_class
     super(DepthInvariantNetworkRes50FCN, self).__init__(
         # resnet50
         conv1=L.Convolution2D(3, 64, 7, 2, 3, **kwargs),
         bn1=L.BatchNormalization(64),
         res2=R.BuildingBlock(3, 64, 64, 256, 1,
                              **kwargs),  # resblock 1/2 -> 1/4
         res3=R.BuildingBlock(4, 256, 128, 512, 2,
                              **kwargs),  # resblock 1/4 ->1/8
         res4=R.BuildingBlock(6, 512, 256, 1024, 2,
                              **kwargs),  # resblock 1/8 -> 1/16
         res5=R.BuildingBlock(3, 1024, 512, 2048, 2,
                              **kwargs),  # resblock 1/16 -> 1/32
         upscore1=L.Deconvolution2D(2048, 512, 16, stride=8, pad=4),
         upscore2=L.Deconvolution2D(1024, 512, 8, stride=4, pad=2),
         upscore3=L.Deconvolution2D(512, 512, 4, stride=2, pad=1),
         bn_upscore=L.BatchNormalization(512 * 3),
         concat_conv=L.Convolution2D(512 * 3, 1024, 3, stride=1, pad=1),
         pool_roi_conv=L.Convolution2D(1024, 1024, 5, stride=5, pad=0),
         conv_after_croip=L.Convolution2D(1024, 512, 3, stride=1, pad=1),
         bn_croip1=L.BatchNormalization(1024),
         bn_croip2=L.BatchNormalization(512),
         score_pool=L.Convolution2D(512, n_class, 1, stride=1, pad=0),
         upscore_final=L.Deconvolution2D(self.n_class,
                                         self.n_class,
                                         8,
                                         stride=4,
                                         pad=2),
     )
Beispiel #20
0
    def __init__(self, pretrained_model="auto"):
        if pretrained_model:
            kwargs = {'initialW': constant.Zero()}
        else:
            kwargs = {'initialW': normal.HeNormal(scale=1.0)}

        super(ResNet, self).__init__(
            conv1=L.Convolution2D(3, 64, 7, 2, 3, **kwargs),
            bn1=L.BatchNormalization(64),
            res2=Block(3, 64, 64, 256, 1, **kwargs),
            res3=Block(4, 256, 128, 512, 2, **kwargs),
            res4=Block(6, 512, 256, 1024, 2, **kwargs),
            res5=Block(3, 1024, 512, 2048, 2, **kwargs),
            fc6=L.Linear(None, 1000),
        )
        if pretrained_model == 'auto':
            print("[ PREPROCESS ] Use caffe model of ResNet.")
            _retrieve('ResNet-50-model.npz', 'ResNet-50-model.caffemodel',
                      self)
            self.fc6 = L.Linear(None, 25)
        elif pretrained_model:
            npz.load_npz(pretrained_model, self)

        self.train = True
Beispiel #21
0
    def __init__(self, n_class=36, pretrained_model=None):
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            kwargs = {'initialW': constant.Zero()}
        else:
            # employ default initializers used in the original paper
            kwargs = {'initialW': normal.HeNormal(scale=1.0)}
        self.n_class = n_class
        super(CenterProposalNetworkRes50FCN, self).__init__(
            # resnet50
            conv1=L.Convolution2D(3, 64, 7, 2, 3, **kwargs),
            bn1=L.BatchNormalization(64),
            res2=R.BuildingBlock(3, 64, 64, 256, 1, **kwargs),
            res3=R.BuildingBlock(4, 256, 128, 512, 2, **kwargs),
            res4=R.BuildingBlock(6, 512, 256, 1024, 2, **kwargs),
            res5=R.BuildingBlock(3, 1024, 512, 2048, 2, **kwargs),
            upscore32=L.Deconvolution2D(2048,
                                        512,
                                        8,
                                        stride=4,
                                        pad=2,
                                        use_cudnn=False),
            upscore16=L.Deconvolution2D(1024,
                                        512,
                                        4,
                                        stride=2,
                                        pad=1,
                                        use_cudnn=False),
            concat_conv=L.Convolution2D(512 * 3, 512 * 3, 3, stride=1, pad=1),
            score_pool=L.Convolution2D(512 * 3, n_class, 1, stride=1, pad=0),
            cls_pool=L.Convolution2D(512 * 3, 128, 1, stride=1, pad=0),
            upscore_final=L.Deconvolution2D(self.n_class,
                                            self.n_class,
                                            16,
                                            stride=8,
                                            pad=4,
                                            use_cudnn=False),

            # depth network
            conv_d1_1=L.Convolution2D(1, 64, 3, stride=1, pad=1),
            bn_d1_1=L.BatchNormalization(64),
            conv_d1_2=L.Convolution2D(64, 64, 3, stride=1, pad=1),
            bn_d1_2=L.BatchNormalization(64),
            conv_d2=L.Convolution2D(64, 128, 3, stride=1, pad=1),
            bn_d2=L.BatchNormalization(128),
            conv_d3=L.Convolution2D(128, 256, 3, stride=1, pad=1),
            bn_d3=L.BatchNormalization(256),

            # center pose network
            conv_cp_1=L.Convolution2D(256 + 512 + 128,
                                      1024,
                                      3,
                                      stride=1,
                                      pad=1),
            bn_cp_1=L.BatchNormalization(1024),
            conv_cp_2=L.Convolution2D(1024, 1024, 3, stride=1, pad=1),
            bn_cp_2=L.BatchNormalization(1024),
            upscore_cp1=L.Deconvolution2D(1024,
                                          512,
                                          8,
                                          stride=4,
                                          pad=2,
                                          use_cudnn=False),
            bn_cp_3=L.BatchNormalization(512),
            upscore_cp2=L.Deconvolution2D(512,
                                          3,
                                          4,
                                          stride=2,
                                          pad=1,
                                          use_cudnn=False),

            # rotation network
            conv_rot_1=L.Convolution2D(256 + 512 + 128,
                                       1024,
                                       3,
                                       stride=1,
                                       pad=1),
            bn_rot_1=L.BatchNormalization(1024),
            conv_rot_2=L.Convolution2D(1024, 1024, 3, stride=1, pad=1),
            bn_rot_2=L.BatchNormalization(1024),
            upscore_rot1=L.Deconvolution2D(1024,
                                           512,
                                           8,
                                           stride=4,
                                           pad=2,
                                           use_cudnn=False),
            bn_rot_3=L.BatchNormalization(512),
            upscore_rot2=L.Deconvolution2D(512,
                                           5,
                                           4,
                                           stride=2,
                                           pad=1,
                                           use_cudnn=False),
        )
Beispiel #22
0
 def __init__(self, pretrained_model='auto'):
     if pretrained_model:
         # As a sampling process is time-consuming,
         # we employ a zero initializer for faster computation.
         kwargs = {'initialW': constant.Zero()}
     else:
         # employ default initializers used in the original paper
         kwargs = {'initialW': uniform.GlorotUniform(scale=1.0)}
     super(GoogLeNet,
           self).__init__(conv1=Convolution2D(3,
                                              64,
                                              7,
                                              stride=2,
                                              pad=3,
                                              **kwargs),
                          conv2_reduce=Convolution2D(64, 64, 1, **kwargs),
                          conv2=Convolution2D(64,
                                              192,
                                              3,
                                              stride=1,
                                              pad=1,
                                              **kwargs),
                          inc3a=Inception(192, 64, 96, 128, 16, 32, 32),
                          inc3b=Inception(256, 128, 128, 192, 32, 96, 64),
                          inc4a=Inception(480, 192, 96, 208, 16, 48, 64),
                          inc4b=Inception(512, 160, 112, 224, 24, 64, 64),
                          inc4c=Inception(512, 128, 128, 256, 24, 64, 64),
                          inc4d=Inception(512, 112, 144, 288, 32, 64, 64),
                          inc4e=Inception(528, 256, 160, 320, 32, 128, 128),
                          inc5a=Inception(832, 256, 160, 320, 32, 128, 128),
                          inc5b=Inception(832, 384, 192, 384, 48, 128, 128),
                          loss3_fc=Linear(1024, 1000, **kwargs),
                          loss1_conv=Convolution2D(512, 128, 1, **kwargs),
                          loss1_fc1=Linear(2048, 1024, **kwargs),
                          loss1_fc2=Linear(1024, 1000, **kwargs),
                          loss2_conv=Convolution2D(528, 128, 1, **kwargs),
                          loss2_fc1=Linear(2048, 1024, **kwargs),
                          loss2_fc2=Linear(1024, 1000, **kwargs))
     if pretrained_model == 'auto':
         _retrieve(
             'bvlc_googlenet.npz',
             'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel',
             self)
     elif pretrained_model:
         npz.load_npz(pretrained_model, self)
     self.functions = OrderedDict([
         ('conv1', [self.conv1, relu]),
         ('pool1', [_max_pooling_2d, _local_response_normalization]),
         ('conv2_reduce', [self.conv2_reduce, relu]),
         ('conv2', [self.conv2, relu, _local_response_normalization]),
         ('pool2', [_max_pooling_2d]),
         ('inception_3a', [self.inc3a]),
         ('inception_3b', [self.inc3b]),
         ('pool3', [_max_pooling_2d]),
         ('inception_4a', [self.inc4a]),
         ('inception_4b', [self.inc4b]),
         ('inception_4c', [self.inc4c]),
         ('inception_4d', [self.inc4d]),
         ('inception_4e', [self.inc4e]),
         ('pool4', [_max_pooling_2d]),
         ('inception_5a', [self.inc5a]),
         ('inception_5b', [self.inc5b]),
         ('pool5', [_average_pooling_2d_k7]),
         ('loss3_fc', [_dropout, self.loss3_fc]),
         ('prob', [softmax]),
         # Since usually the following outputs are not used, they are put
         # after 'prob' to be skipped for efficiency.
         ('loss1_fc2', [
             _average_pooling_2d_k5, self.loss1_conv, relu, self.loss1_fc1,
             relu, self.loss1_fc2
         ]),
         ('loss2_fc2', [
             _average_pooling_2d_k5, self.loss2_conv, relu, self.loss2_fc1,
             relu, self.loss2_fc2
         ])
     ])
Beispiel #23
0
    def __init__(self, pretrained_model='auto'):
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            init = constant.Zero()
            kwargs = {'initialW': init, 'initial_bias': init}
        else:
            # employ default initializers used in the original paper
            kwargs = {
                'initialW': normal.Normal(0.01),
                'initial_bias': constant.Zero(),
            }
        super(VGG16Layers, self).__init__(
            conv1_1=Convolution2D(3, 64, 3, 1, 1, **kwargs),
            conv1_2=Convolution2D(64, 64, 3, 1, 1, **kwargs),
            conv2_1=Convolution2D(64, 128, 3, 1, 1, **kwargs),
            conv2_2=Convolution2D(128, 128, 3, 1, 1, **kwargs),
            conv3_1=Convolution2D(128, 256, 3, 1, 1, **kwargs),
            conv3_2=Convolution2D(256, 256, 3, 1, 1, **kwargs),
            conv3_3=Convolution2D(256, 256, 3, 1, 1, **kwargs),
            conv4_1=Convolution2D(256, 512, 3, 1, 1, **kwargs),
            conv4_2=Convolution2D(512, 512, 3, 1, 1, **kwargs),
            conv4_3=Convolution2D(512, 512, 3, 1, 1, **kwargs),
            conv5_1=Convolution2D(512, 512, 3, 1, 1, **kwargs),
            conv5_2=Convolution2D(512, 512, 3, 1, 1, **kwargs),
            conv5_3=Convolution2D(512, 512, 3, 1, 1, **kwargs),
            fc6=Linear(512 * 7 * 7, 4096, **kwargs),
            fc7=Linear(4096, 4096, **kwargs),
            fc8=Linear(4096, 1000, **kwargs),
        )
        if pretrained_model == 'auto':
            _retrieve(
                'VGG_ILSVRC_16_layers.npz',
                'http://www.robots.ox.ac.uk/%7Evgg/software/very_deep/'
                'caffe/VGG_ILSVRC_16_layers.caffemodel', self)
        elif pretrained_model:
            npz.load_npz(pretrained_model, self)

        self.functions = collections.OrderedDict([
            ('conv1_1', [self.conv1_1, relu]),
            ('conv1_2', [self.conv1_2, relu]),
            ('pool1', [_max_pooling_2d]),
            ('conv2_1', [self.conv2_1, relu]),
            ('conv2_2', [self.conv2_2, relu]),
            ('pool2', [_max_pooling_2d]),
            ('conv3_1', [self.conv3_1, relu]),
            ('conv3_2', [self.conv3_2, relu]),
            ('conv3_3', [self.conv3_3, relu]),
            ('pool3', [_max_pooling_2d]),
            ('conv4_1', [self.conv4_1, relu]),
            ('conv4_2', [self.conv4_2, relu]),
            ('conv4_3', [self.conv4_3, relu]),
            ('pool4', [_max_pooling_2d]),
            ('conv5_1', [self.conv5_1, relu]),
            ('conv5_2', [self.conv5_2, relu]),
            ('conv5_3', [self.conv5_3, relu]),
            ('pool5', [_max_pooling_2d]),
            ('fc6', [self.fc6, relu, dropout]),
            ('fc7', [self.fc7, relu, dropout]),
            ('fc8', [self.fc8, relu]),
            ('prob', [softmax]),
        ])
Beispiel #24
0
    def __init__(self,
                 pretrained_model='auto',
                 n_channels=3,
                 n_outputs=101,
                 mean_path='datasets/models/mean2.npz'):
        super(C3DVersion1UCF101, self).__init__()
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            init = constant.Zero()
            conv_kwargs = {'initialW': init, 'initial_bias': init}
            fc_kwargs = conv_kwargs
        else:
            # employ default initializers used in the original paper
            conv_kwargs = {
                'initialW': normal.Normal(0.01),
                'initial_bias': constant.Zero(),
            }
            fc_kwargs = {
                'initialW': normal.Normal(0.005),
                'initial_bias': constant.One(),
            }
        with self.init_scope():
            self.conv1a = ConvolutionND(3, n_channels, 64, 3, 1, 1,
                                        **conv_kwargs)
            self.conv2a = ConvolutionND(3, 64, 128, 3, 1, 1, **conv_kwargs)
            self.conv3a = ConvolutionND(3, 128, 256, 3, 1, 1, **conv_kwargs)
            self.conv3b = ConvolutionND(3, 256, 256, 3, 1, 1, **conv_kwargs)
            self.conv4a = ConvolutionND(3, 256, 512, 3, 1, 1, **conv_kwargs)
            self.conv4b = ConvolutionND(3, 512, 512, 3, 1, 1, **conv_kwargs)
            self.conv5a = ConvolutionND(3, 512, 512, 3, 1, 1, **conv_kwargs)
            self.conv5b = ConvolutionND(3, 512, 512, 3, 1, 1, **conv_kwargs)
            self.fc6 = Linear(512 * 4 * 4, 4096, **fc_kwargs)
            self.fc7 = Linear(4096, 4096, **fc_kwargs)
            self.fc8 = Linear(4096, n_outputs, **fc_kwargs)
        if pretrained_model == 'auto':
            _retrieve(
                'conv3d_deepnetA_ucf.npz', 'http://vlg.cs.dartmouth.edu/c3d/'
                'c3d_ucf101_finetune_whole_iter_20000', self)
        elif pretrained_model:
            npz.load_npz(pretrained_model, self)

        self.pre = ConvolutionND(3,
                                 n_channels,
                                 n_channels,
                                 1,
                                 1,
                                 0,
                                 nobias=True,
                                 **conv_kwargs)
        self.pre.W.data[:] = 0
        self.pre.W.data[[0, 1, 2], [2, 1, 0]] = 128
        # self.pre.b.data[:] = 128 - numpy.array([90.25164795, 97.65701294, 101.4083252])
        self.mean = Bias(shape=(3, 16, 112, 112))
        mean = numpy.load(mean_path)['mean']
        self.mean.b.data[:] = 128 - mean[:, :, 8:8 + 112, 8:8 + 112]
        self.functions = collections.OrderedDict([
            ('pre', [self.pre, _resize, self.mean]),
            ('conv1a', [self.conv1a, relu]),
            ('pool1', [_max_pooling_2d]),
            ('conv2a', [self.conv2a, relu]),
            ('pool2', [_max_pooling_3d]),
            ('conv3a', [self.conv3a, relu]),
            ('conv3b', [self.conv3b, relu]),
            ('pool3', [_max_pooling_3d]),
            ('conv4a', [self.conv4a, relu]),
            ('conv4b', [self.conv4b, relu]),
            ('pool4', [_max_pooling_3d]),
            ('conv5a', [self.conv5a, relu]),
            ('conv5b', [self.conv5b, relu]),
            ('pool5', [_max_pooling_3d, dropout]),
            ('fc6', [self.fc6, relu, dropout]),
            ('fc7', [self.fc7, relu, dropout]),
            ('fc8', [self.fc8]),
            ('prob', [softmax]),
        ])
Beispiel #25
0
    def __init__(self,
                 n_class=None,
                 pretrained_model=None,
                 mean=None,
                 initialW=None,
                 initialBias=None):
        self.n_class = n_class
        self.mean = mean
        self.initialbias = initialBias

        self.insize = 224

        if n_class is None:
            self.n_class = 100

        if mean is None:
            # imagenet means
            self.mean = np.array([123.68, 116.779, 103.939],
                                 dtype=np.float32)[:, np.newaxis, np.newaxis]

        if initialW is None:
            # employ default initializers used in BVLC. For more detail, see
            self.initialW = uniform.LeCunUniform(scale=1.0)

        if pretrained_model is None:
            # As a sampling process is time-consuming
            # we employ a zero initializer for faster computation
            self.initialW = constant.Zero()

        super(GoogleNetBN, self).__init__()
        with self.init_scope():
            # Deep layers: GoogleNet of BatchNormalization version
            self.conv1 = L.Convolution2D(None,
                                         64,
                                         7,
                                         stride=2,
                                         pad=3,
                                         nobias=True)
            self.norm1 = L.BatchNormalization(64)
            self.conv2 = L.Convolution2D(None,
                                         192,
                                         3,
                                         stride=1,
                                         pad=1,
                                         nobias=True)
            self.norm2 = L.BatchNormalization(192)

            self.inc3a = L.InceptionBN(None, 64, 64, 64, 64, 96, "avg", 32)
            self.inc3b = L.InceptionBN(None, 64, 64, 96, 64, 96, "avg", 64)
            self.inc3c = L.InceptionBN(None,
                                       0,
                                       128,
                                       160,
                                       64,
                                       96,
                                       "max",
                                       stride=2)

            self.inc4a = L.InceptionBN(None, 224, 64, 96, 96, 128, "avg", 128)
            self.inc4b = L.InceptionBN(None, 192, 96, 128, 96, 128, "avg", 128)
            self.inc4c = L.InceptionBN(None, 128, 128, 160, 128, 160, "avg",
                                       128)
            self.inc4d = L.InceptionBN(None, 64, 128, 192, 160, 192, "avg",
                                       128)
            self.inc4e = L.InceptionBN(None,
                                       0,
                                       128,
                                       192,
                                       192,
                                       256,
                                       "max",
                                       stride=2)

            self.inc5a = L.InceptionBN(None, 352, 192, 320, 160, 224, "avg",
                                       128)
            self.inc5b = L.InceptionBN(None, 352, 192, 320, 192, 224, "max",
                                       128)
            self.loss3_fc = L.Linear(None,
                                     self.n_class,
                                     initialW=self.initialW)

            self.loss1_conv = L.Convolution2D(None,
                                              128,
                                              1,
                                              initialW=self.initialW,
                                              nobias=True)
            self.norma = L.BatchNormalization(128)
            self.loss1_fc1 = L.Linear(None,
                                      1024,
                                      initialW=self.initialW,
                                      nobias=True)
            self.norma2 = L.BatchNormalization(1024)
            self.loss1_fc2 = L.Linear(None,
                                      self.n_class,
                                      initialW=self.initialW)

            self.loss2_conv = L.Convolution2D(None,
                                              128,
                                              1,
                                              initialW=self.initialW,
                                              nobias=True)
            self.normb = L.BatchNormalization(128)
            self.loss2_fc1 = L.Linear(None,
                                      1024,
                                      initialW=self.initialW,
                                      nobias=True)
            self.normb2 = L.BatchNormalization(1024)
            self.loss2_fc2 = L.Linear(None,
                                      self.n_class,
                                      initialW=self.initialW)