def __init__(self, pretrained_model='auto'):
        super(GoogLeNet, self).__init__()

        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            kwargs = {'initialW': constant.Zero()}
        else:
            # employ default initializers used in BVLC. For more detail, see
            # https://github.com/chainer/chainer/pull/2424#discussion_r109642209
            kwargs = {'initialW': uniform.LeCunUniform(scale=1.0)}

        with self.init_scope():
            self.conv1 = Convolution2D(3, 64, 7, stride=2, pad=3, **kwargs)
            self.conv2_reduce = Convolution2D(64, 64, 1, **kwargs)
            self.conv2 = Convolution2D(64, 192, 3, stride=1, pad=1, **kwargs)
            self.inc3a = Inception(192, 64, 96, 128, 16, 32, 32)
            self.inc3b = Inception(256, 128, 128, 192, 32, 96, 64)
            self.inc4a = Inception(480, 192, 96, 208, 16, 48, 64)
            self.inc4b = Inception(512, 160, 112, 224, 24, 64, 64)
            self.inc4c = Inception(512, 128, 128, 256, 24, 64, 64)
            self.inc4d = Inception(512, 112, 144, 288, 32, 64, 64)
            self.inc4e = Inception(528, 256, 160, 320, 32, 128, 128)
            self.inc5a = Inception(832, 256, 160, 320, 32, 128, 128)
            self.inc5b = Inception(832, 384, 192, 384, 48, 128, 128)
            self.loss3_fc = Linear(1024, 1000, **kwargs)

            self.loss1_conv = Convolution2D(512, 128, 1, **kwargs)
            self.loss1_fc1 = Linear(2048, 1024, **kwargs)
            self.loss1_fc2 = Linear(1024, 1000, **kwargs)

            self.loss2_conv = Convolution2D(528, 128, 1, **kwargs)
            self.loss2_fc1 = Linear(2048, 1024, **kwargs)
            self.loss2_fc2 = Linear(1024, 1000, **kwargs)

        if pretrained_model == 'auto':
            _retrieve(
                'bvlc_googlenet.npz',
                'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel',
                self)
        elif pretrained_model:
            npz.load_npz(pretrained_model, self)
Exemple #2
0
 def __init__(self, pretrained_model='auto'):
     if pretrained_model:
         # As a sampling process is time-consuming,
         # we employ a zero initializer for faster computation.
         kwargs = {'initialW': constant.Zero()}
     else:
         # employ default initializers used in the original paper
         kwargs = {'initialW': uniform.GlorotUniform(scale=1.0)}
     super(GoogLeNet,
           self).__init__(conv1=Convolution2D(3,
                                              64,
                                              7,
                                              stride=2,
                                              pad=3,
                                              **kwargs),
                          conv2_reduce=Convolution2D(64, 64, 1, **kwargs),
                          conv2=Convolution2D(64,
                                              192,
                                              3,
                                              stride=1,
                                              pad=1,
                                              **kwargs),
                          inc3a=Inception(192, 64, 96, 128, 16, 32, 32),
                          inc3b=Inception(256, 128, 128, 192, 32, 96, 64),
                          inc4a=Inception(480, 192, 96, 208, 16, 48, 64),
                          inc4b=Inception(512, 160, 112, 224, 24, 64, 64),
                          inc4c=Inception(512, 128, 128, 256, 24, 64, 64),
                          inc4d=Inception(512, 112, 144, 288, 32, 64, 64),
                          inc4e=Inception(528, 256, 160, 320, 32, 128, 128),
                          inc5a=Inception(832, 256, 160, 320, 32, 128, 128),
                          inc5b=Inception(832, 384, 192, 384, 48, 128, 128),
                          loss3_fc=Linear(1024, 1000, **kwargs),
                          loss1_conv=Convolution2D(512, 128, 1, **kwargs),
                          loss1_fc1=Linear(2048, 1024, **kwargs),
                          loss1_fc2=Linear(1024, 1000, **kwargs),
                          loss2_conv=Convolution2D(528, 128, 1, **kwargs),
                          loss2_fc1=Linear(2048, 1024, **kwargs),
                          loss2_fc2=Linear(1024, 1000, **kwargs))
     if pretrained_model == 'auto':
         _retrieve(
             'bvlc_googlenet.npz',
             'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel',
             self)
     elif pretrained_model:
         npz.load_npz(pretrained_model, self)
     self.functions = OrderedDict([
         ('conv1', [self.conv1, relu]),
         ('pool1', [_max_pooling_2d, _local_response_normalization]),
         ('conv2_reduce', [self.conv2_reduce, relu]),
         ('conv2', [self.conv2, relu, _local_response_normalization]),
         ('pool2', [_max_pooling_2d]),
         ('inception_3a', [self.inc3a]),
         ('inception_3b', [self.inc3b]),
         ('pool3', [_max_pooling_2d]),
         ('inception_4a', [self.inc4a]),
         ('inception_4b', [self.inc4b]),
         ('inception_4c', [self.inc4c]),
         ('inception_4d', [self.inc4d]),
         ('inception_4e', [self.inc4e]),
         ('pool4', [_max_pooling_2d]),
         ('inception_5a', [self.inc5a]),
         ('inception_5b', [self.inc5b]),
         ('pool5', [_average_pooling_2d_k7]),
         ('loss3_fc', [_dropout, self.loss3_fc]),
         ('prob', [softmax]),
         # Since usually the following outputs are not used, they are put
         # after 'prob' to be skipped for efficiency.
         ('loss1_fc2', [
             _average_pooling_2d_k5, self.loss1_conv, relu, self.loss1_fc1,
             relu, self.loss1_fc2
         ]),
         ('loss2_fc2', [
             _average_pooling_2d_k5, self.loss2_conv, relu, self.loss2_fc1,
             relu, self.loss2_fc2
         ])
     ])