def __init__(self, n_class, pretrained_model=None, mean=None, initialw=None, initialbias=None, googlenetbn_trianedmodel=None): self.n_class = n_class self.mean = mean self.initialbias = initialbias self.googlenetbn_trainedmodel = googlenetbn_trianedmodel self.insize = 224 if mean is None: # imagenet means self.mean = np.array([123.68, 116.779, 103.939], dtype=np.float32)[:, np.newaxis, np.newaxis] if initialw is None: # employ default initializers used in BVLC. For more detail, see self.initialW = uniform.LeCunUniform(scale=1.0) if pretrained_model: # As a sampling process is time-consuming # we employ a zero initializer for faster computation self.initialW = constant.Zero() super(MultiscaleNet, self).__init__() with self.init_scope(): # Deep layers: GoogleNet of BatchNormalization version self.googlenetbn = GoogleNetBN(n_class=n_class) # Shallow layers self.conv_s1 = L.Convolution2D(None, 96, 3, stride=4, pad=1, initialW=0.02 * np.sqrt(3 * 3 * 3)) self.norm_s1 = L.BatchNormalization(96) self.conv_s2 = L.Convolution2D(None, 96, 3, stride=4, pad=1, initialW=0.02 * np.sqrt(3 * 3 * 3)) self.norm_s2 = L.BatchNormalization(96) # Final layers self.fc4_1 = L.Linear(None, 4096) self.fc4_2 = L.Linear(None, self.n_class)
def __init__(self, pretrained_model='auto'): if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. kwargs = {'initialW': constant.Zero()} else: # employ default initializers used in BVLC. For more detail, see # https://github.com/chainer/chainer/pull/2424#discussion_r109642209 kwargs = {'initialW': uniform.LeCunUniform(scale=1.0)} super(GoogLeNet, self).__init__(conv1=Convolution2D(3, 64, 7, stride=2, pad=3, **kwargs), conv2_reduce=Convolution2D(64, 64, 1, **kwargs), conv2=Convolution2D(64, 192, 3, stride=1, pad=1, **kwargs), inc3a=Inception(192, 64, 96, 128, 16, 32, 32), inc3b=Inception(256, 128, 128, 192, 32, 96, 64), inc4a=Inception(480, 192, 96, 208, 16, 48, 64), inc4b=Inception(512, 160, 112, 224, 24, 64, 64), inc4c=Inception(512, 128, 128, 256, 24, 64, 64), inc4d=Inception(512, 112, 144, 288, 32, 64, 64), inc4e=Inception(528, 256, 160, 320, 32, 128, 128), inc5a=Inception(832, 256, 160, 320, 32, 128, 128), inc5b=Inception(832, 384, 192, 384, 48, 128, 128), loss3_fc=Linear(1024, 1000, **kwargs), loss1_conv=Convolution2D(512, 128, 1, **kwargs), loss1_fc1=Linear(2048, 1024, **kwargs), loss1_fc2=Linear(1024, 1000, **kwargs), loss2_conv=Convolution2D(528, 128, 1, **kwargs), loss2_fc1=Linear(2048, 1024, **kwargs), loss2_fc2=Linear(1024, 1000, **kwargs)) if pretrained_model == 'auto': _retrieve( 'bvlc_googlenet.npz', 'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel', self) elif pretrained_model: npz.load_npz(pretrained_model, self)
def __init__(self, pretrained_model='auto'): if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. kwargs = {'initialW': constant.Zero()} else: # employ default initializers used in BVLC. For more detail, see # https://github.com/pfnet/chainer/pull/2424#discussion_r109642209 kwargs = {'initialW': uniform.LeCunUniform(scale=1.0)} super(GoogLeNet, self).__init__(conv1=Convolution2D(3, 64, 7, stride=2, pad=3, **kwargs), conv2_reduce=Convolution2D(64, 64, 1, **kwargs), conv2=Convolution2D(64, 192, 3, stride=1, pad=1, **kwargs), inc3a=Inception(192, 64, 96, 128, 16, 32, 32), inc3b=Inception(256, 128, 128, 192, 32, 96, 64), inc4a=Inception(480, 192, 96, 208, 16, 48, 64), inc4b=Inception(512, 160, 112, 224, 24, 64, 64), inc4c=Inception(512, 128, 128, 256, 24, 64, 64), inc4d=Inception(512, 112, 144, 288, 32, 64, 64), inc4e=Inception(528, 256, 160, 320, 32, 128, 128), inc5a=Inception(832, 256, 160, 320, 32, 128, 128), inc5b=Inception(832, 384, 192, 384, 48, 128, 128), loss3_fc=Linear(1024, 1000, **kwargs), loss1_conv=Convolution2D(512, 128, 1, **kwargs), loss1_fc1=Linear(2048, 1024, **kwargs), loss1_fc2=Linear(1024, 1000, **kwargs), loss2_conv=Convolution2D(528, 128, 1, **kwargs), loss2_fc1=Linear(2048, 1024, **kwargs), loss2_fc2=Linear(1024, 1000, **kwargs)) if pretrained_model == 'auto': _retrieve( 'bvlc_googlenet.npz', 'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel', self) elif pretrained_model: npz.load_npz(pretrained_model, self) self.functions = OrderedDict([ ('conv1', [self.conv1, relu]), ('pool1', [_max_pooling_2d, _local_response_normalization]), ('conv2_reduce', [self.conv2_reduce, relu]), ('conv2', [self.conv2, relu, _local_response_normalization]), ('pool2', [_max_pooling_2d]), ('inception_3a', [self.inc3a]), ('inception_3b', [self.inc3b]), ('pool3', [_max_pooling_2d]), ('inception_4a', [self.inc4a]), ('inception_4b', [self.inc4b]), ('inception_4c', [self.inc4c]), ('inception_4d', [self.inc4d]), ('inception_4e', [self.inc4e]), ('pool4', [_max_pooling_2d]), ('inception_5a', [self.inc5a]), ('inception_5b', [self.inc5b]), ('pool5', [_average_pooling_2d_k7]), ('loss3_fc', [_dropout, self.loss3_fc]), ('prob', [softmax]), # Since usually the following outputs are not used, they are put # after 'prob' to be skipped for efficiency. ('loss1_fc2', [ _average_pooling_2d_k5, self.loss1_conv, relu, self.loss1_fc1, relu, self.loss1_fc2 ]), ('loss2_fc2', [ _average_pooling_2d_k5, self.loss2_conv, relu, self.loss2_fc1, relu, self.loss2_fc2 ]) ])
def __init__(self, n_class=None, pretrained_model=None, mean=None, initialW=None, initialBias=None): self.n_class = n_class self.mean = mean self.initialbias = initialBias self.insize = 224 if n_class is None: self.n_class = 100 if mean is None: # imagenet means self.mean = np.array([123.68, 116.779, 103.939], dtype=np.float32)[:, np.newaxis, np.newaxis] if initialW is None: # employ default initializers used in BVLC. For more detail, see self.initialW = uniform.LeCunUniform(scale=1.0) if pretrained_model is None: # As a sampling process is time-consuming # we employ a zero initializer for faster computation self.initialW = constant.Zero() super(GoogleNetBN, self).__init__() with self.init_scope(): # Deep layers: GoogleNet of BatchNormalization version self.conv1 = L.Convolution2D(None, 64, 7, stride=2, pad=3, nobias=True) self.norm1 = L.BatchNormalization(64) self.conv2 = L.Convolution2D(None, 192, 3, stride=1, pad=1, nobias=True) self.norm2 = L.BatchNormalization(192) self.inc3a = L.InceptionBN(None, 64, 64, 64, 64, 96, "avg", 32) self.inc3b = L.InceptionBN(None, 64, 64, 96, 64, 96, "avg", 64) self.inc3c = L.InceptionBN(None, 0, 128, 160, 64, 96, "max", stride=2) self.inc4a = L.InceptionBN(None, 224, 64, 96, 96, 128, "avg", 128) self.inc4b = L.InceptionBN(None, 192, 96, 128, 96, 128, "avg", 128) self.inc4c = L.InceptionBN(None, 128, 128, 160, 128, 160, "avg", 128) self.inc4d = L.InceptionBN(None, 64, 128, 192, 160, 192, "avg", 128) self.inc4e = L.InceptionBN(None, 0, 128, 192, 192, 256, "max", stride=2) self.inc5a = L.InceptionBN(None, 352, 192, 320, 160, 224, "avg", 128) self.inc5b = L.InceptionBN(None, 352, 192, 320, 192, 224, "max", 128) self.loss3_fc = L.Linear(None, self.n_class, initialW=self.initialW) self.loss1_conv = L.Convolution2D(None, 128, 1, initialW=self.initialW, nobias=True) self.norma = L.BatchNormalization(128) self.loss1_fc1 = L.Linear(None, 1024, initialW=self.initialW, nobias=True) self.norma2 = L.BatchNormalization(1024) self.loss1_fc2 = L.Linear(None, self.n_class, initialW=self.initialW) self.loss2_conv = L.Convolution2D(None, 128, 1, initialW=self.initialW, nobias=True) self.normb = L.BatchNormalization(128) self.loss2_fc1 = L.Linear(None, 1024, initialW=self.initialW, nobias=True) self.normb2 = L.BatchNormalization(1024) self.loss2_fc2 = L.Linear(None, self.n_class, initialW=self.initialW)