def __init__(self, num_inter, num_out, dropout_ratio=.5): kwargs = { 'initialW': normal.Normal(0.01), 'initial_bias': constant.Zero(), } super(VGG16BNFC3, self).__init__() with self.init_scope(): self.conv1_1 = Convolution2D(3, 64, 3, 1, 1, **kwargs) self.conv1_2 = Convolution2D(64, 64, 3, 1, 1, **kwargs) self.conv2_1 = Convolution2D(64, 128, 3, 1, 1, **kwargs) self.conv2_2 = Convolution2D(128, 128, 3, 1, 1, **kwargs) self.conv3_1 = Convolution2D(128, 256, 3, 1, 1, **kwargs) self.conv3_2 = Convolution2D(256, 256, 3, 1, 1, **kwargs) self.conv3_3 = Convolution2D(256, 256, 3, 1, 1, **kwargs) self.conv4_1 = Convolution2D(256, 512, 3, 1, 1, **kwargs) self.conv4_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv4_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_1 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.bn1 = L.BatchNormalization(512) self.fc6 = Linear(512 * 7 * 7, num_inter, **kwargs) self.bn2 = L.BatchNormalization(num_inter) self.fc7 = Linear(num_inter, num_inter, **kwargs) self.bn3 = L.BatchNormalization(num_inter) self.fc8 = Linear(num_inter, num_out, **kwargs) self.dropout_ratio = dropout_ratio
def __init__(self, pretrained_model='auto', n_layers=16): super(VGGLayers, self).__init__() kwargs = {} if n_layers not in [16, 19]: raise ValueError( 'The n_layers argument should be either 16 or 19,' 'but {} was given.'.format(n_layers) ) with self.init_scope(): self.conv1_1 = Convolution2D(3, 64, 3, 1, 1, **kwargs) self.conv1_2 = Convolution2D(64, 64, 3, 1, 1, **kwargs) self.conv2_1 = Convolution2D(64, 128, 3, 1, 1, **kwargs) self.conv2_2 = Convolution2D(128, 128, 3, 1, 1, **kwargs) self.conv3_1 = Convolution2D(128, 256, 3, 1, 1, **kwargs) self.conv3_2 = Convolution2D(256, 256, 3, 1, 1, **kwargs) self.conv3_3 = Convolution2D(256, 256, 3, 1, 1, **kwargs) self.conv4_1 = Convolution2D(256, 512, 3, 1, 1, **kwargs) self.conv4_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv4_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_1 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.fc6 = Linear(512 * 7 * 7, 4096, **kwargs) self.fc7 = Linear(4096, 4096, **kwargs) self.fc8 = Linear(4096, 1000, **kwargs) if n_layers == 19: self.conv3_4 = Convolution2D(256, 256, 3, 1, 1, **kwargs) self.conv4_4 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_4 = Convolution2D(512, 512, 3, 1, 1, **kwargs)
def __init__(self, num_inter, num_out): kwargs = { 'initialW': normal.Normal(0.01), 'initial_bias': constant.Zero(), } self.num_out = num_out super(VGG16, self).__init__() with self.init_scope(): self.conv1_1 = Convolution2D(3, 64, 3, 1, 1, **kwargs) self.conv1_2 = Convolution2D(64, 64, 3, 1, 1, **kwargs) self.conv2_1 = Convolution2D(64, 128, 3, 1, 1, **kwargs) self.conv2_2 = Convolution2D(128, 128, 3, 1, 1, **kwargs) self.conv3_1 = Convolution2D(128, 256, 3, 1, 1, **kwargs) self.conv3_2 = Convolution2D(256, 256, 3, 1, 1, **kwargs) self.conv3_3 = Convolution2D(256, 256, 3, 1, 1, **kwargs) self.conv4_1 = Convolution2D(256, 512, 3, 1, 1, **kwargs) self.conv4_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv4_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_1 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.fc6 = Linear(512 * 7 * 7, num_inter, **kwargs) self.fc7 = Linear(num_inter, num_inter, **kwargs) self.fc8 = Linear(num_inter, num_out, **kwargs)
def __init__(self, pretrained_model='auto', n_layers=16): super(VGGLayers, self).__init__() if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. init = constant.Zero() kwargs = {'initialW': init, 'initial_bias': init} else: # employ default initializers used in the original paper kwargs = { 'initialW': normal.Normal(0.01), 'initial_bias': constant.Zero(), } if n_layers not in [16, 19]: raise ValueError( 'The n_layers argument should be either 16 or 19, ' 'but {} was given.'.format(n_layers) ) with self.init_scope(): self.conv1_1 = Convolution2D(3, 64, 3, 1, 1, **kwargs) self.conv1_2 = Convolution2D(64, 64, 3, 1, 1, **kwargs) self.conv2_1 = Convolution2D(64, 128, 3, 1, 1, **kwargs) self.conv2_2 = Convolution2D(128, 128, 3, 1, 1, **kwargs) self.conv3_1 = Convolution2D(128, 256, 3, 1, 1, **kwargs) self.conv3_2 = Convolution2D(256, 256, 3, 1, 1, **kwargs) self.conv3_3 = Convolution2D(256, 256, 3, 1, 1, **kwargs) self.conv4_1 = Convolution2D(256, 512, 3, 1, 1, **kwargs) self.conv4_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv4_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_1 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.fc6 = Linear(512 * 7 * 7, 4096, **kwargs) self.fc7 = Linear(4096, 4096, **kwargs) self.fc8 = Linear(4096, 1000, **kwargs) if n_layers == 19: self.conv3_4 = Convolution2D(256, 256, 3, 1, 1, **kwargs) self.conv4_4 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_4 = Convolution2D(512, 512, 3, 1, 1, **kwargs) if pretrained_model == 'auto': if n_layers == 16: _retrieve( 'VGG_ILSVRC_16_layers.npz', 'https://www.robots.ox.ac.uk/%7Evgg/software/very_deep/' 'caffe/VGG_ILSVRC_16_layers.caffemodel', self) else: _retrieve( 'VGG_ILSVRC_19_layers.npz', 'http://www.robots.ox.ac.uk/%7Evgg/software/very_deep/' 'caffe/VGG_ILSVRC_19_layers.caffemodel', self) elif pretrained_model: npz.load_npz(pretrained_model, self)
def __init__(self, pretrained_model='auto'): if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. init = constant.Zero() conv_kwargs = {'initialW': init, 'initial_bias': init} fc_kwargs = conv_kwargs else: # employ default initializers used in the original paper conv_kwargs = { 'initialW': normal.Normal(0.01), 'initial_bias': constant.Zero(), } fc_kwargs = { 'initialW': normal.Normal(0.005), 'initial_bias': constant.One(), } super(C3DVersion1, self).__init__( conv1a=ConvolutionND(3, 3, 64, 3, 1, 1, **conv_kwargs), conv2a=ConvolutionND(3, 64, 128, 3, 1, 1, **conv_kwargs), conv3a=ConvolutionND(3, 128, 256, 3, 1, 1, **conv_kwargs), conv3b=ConvolutionND(3, 256, 256, 3, 1, 1, **conv_kwargs), conv4a=ConvolutionND(3, 256, 512, 3, 1, 1, **conv_kwargs), conv4b=ConvolutionND(3, 512, 512, 3, 1, 1, **conv_kwargs), conv5a=ConvolutionND(3, 512, 512, 3, 1, 1, **conv_kwargs), conv5b=ConvolutionND(3, 512, 512, 3, 1, 1, **conv_kwargs), fc6=Linear(512 * 4 * 4, 4096, **fc_kwargs), fc7=Linear(4096, 4096, **fc_kwargs), fc8=Linear(4096, 101, **fc_kwargs), ) if pretrained_model == 'auto': _retrieve( 'conv3d_deepnetA_ucf.npz', 'http://vlg.cs.dartmouth.edu/c3d/' 'c3d_ucf101_finetune_whole_iter_20000', self) elif pretrained_model: npz.load_npz(pretrained_model, self) self.functions = collections.OrderedDict([ ('conv1a', [self.conv1a, relu]), ('pool1', [_max_pooling_2d]), ('conv2a', [self.conv2a, relu]), ('pool2', [_max_pooling_3d]), ('conv3a', [self.conv3a, relu]), ('conv3b', [self.conv3b, relu]), ('pool3', [_max_pooling_3d]), ('conv4a', [self.conv4a, relu]), ('conv4b', [self.conv4b, relu]), ('pool4', [_max_pooling_3d]), ('conv5a', [self.conv5a, relu]), ('conv5b', [self.conv5b, relu]), ('pool5', [_max_pooling_3d]), ('fc6', [self.fc6, relu, dropout]), ('fc7', [self.fc7, relu, dropout]), ('fc8', [self.fc8]), ('prob', [softmax]), ])
def __init__(self, pretrained_model='auto'): super(GoogLeNet, self).__init__( conv1=Convolution2D(3, 64, 7, stride=2, pad=3), conv2_reduce=Convolution2D(64, 64, 1), conv2=Convolution2D(64, 192, 3, stride=1, pad=1), inc3a=Inception(192, 64, 96, 128, 16, 32, 32), inc3b=Inception(256, 128, 128, 192, 32, 96, 64), inc4a=Inception(480, 192, 96, 208, 16, 48, 64), inc4b=Inception(512, 160, 112, 224, 24, 64, 64), inc4c=Inception(512, 128, 128, 256, 24, 64, 64), inc4d=Inception(512, 112, 144, 288, 32, 64, 64), inc4e=Inception(528, 256, 160, 320, 32, 128, 128), inc5a=Inception(832, 256, 160, 320, 32, 128, 128), inc5b=Inception(832, 384, 192, 384, 48, 128, 128), loss3_fc=Linear(1024, 1000), loss1_conv=Convolution2D(512, 128, 1), loss1_fc1=Linear(4 * 4 * 128, 1024), loss1_fc2=Linear(1024, 1000), loss2_conv=Convolution2D(528, 128, 1), loss2_fc1=Linear(4 * 4 * 128, 1024), loss2_fc2=Linear(1024, 1000), ) if pretrained_model == 'auto': _retrieve( 'bvlc_googlenet.npz', 'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel', self) elif pretrained_model: npz.load_npz(pretrained_model, self) self.functions = OrderedDict([ ('conv1', [self.conv1, relu]), ('pool1', [ lambda x: max_pooling_2d(x, ksize=3, stride=2), lambda x: local_response_normalization(x, n=5) ]), ('conv2_reduce', [self.conv2_reduce, relu]), ('conv2', [self.conv2, relu]), ('pool2', [ lambda x: local_response_normalization(x, n=5), lambda x: max_pooling_2d(x, ksize=3, stride=2) ]), ('inc3a', [self.inc3a]), ('inc3b', [self.inc3b]), ('pool3', [lambda x: max_pooling_2d(x, ksize=3, stride=2)]), ('inc4a', [self.inc4a]), ('inc4b', [self.inc4b]), ('inc4c', [self.inc4c]), ('inc4d', [self.inc4d]), ('inc4e', [self.inc4e]), ('pool4', [lambda x: max_pooling_2d(x, ksize=3, stride=2)]), ('inc5a', [self.inc5a]), ('inc5b', [self.inc5b]), ('pool6', [lambda x: average_pooling_2d(x, ksize=7, stride=1)]), ('prob', [lambda x: dropout(x, ratio=0.4), self.loss3_fc]) ])
def __init__(self, pretrained_model, n_layers): super(ResNetLayers, self).__init__() if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. kwargs = {'initialW': constant.Zero()} else: # employ default initializers used in the original paper kwargs = {'initialW': normal.HeNormal(scale=1.0)} if n_layers == 50: block = [3, 4, 6, 3] elif n_layers == 101: block = [3, 4, 23, 3] elif n_layers == 152: block = [3, 8, 36, 3] else: raise ValueError('The n_layers argument should be either 50, 101,' ' or 152, but {} was given.'.format(n_layers)) with self.init_scope(): self.conv1 = Convolution2D(3, 64, 7, 2, 3, **kwargs) self.bn1 = BatchNormalization(64) self.res2 = BuildingBlock(block[0], 64, 64, 256, 1, **kwargs) self.res3 = BuildingBlock(block[1], 256, 128, 512, 2, **kwargs) self.res4 = BuildingBlock(block[2], 512, 256, 1024, 2, **kwargs) self.res5 = BuildingBlock(block[3], 1024, 512, 2048, 2, **kwargs) self.fc6 = Linear(2048, 1000) if pretrained_model and pretrained_model.endswith('.caffemodel'): _retrieve(n_layers, 'ResNet-{}-model.npz'.format(n_layers), pretrained_model, self) elif pretrained_model: npz.load_npz(pretrained_model, self)
def __init__(self, pretrained_model='auto'): if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. kwargs = {'initialW': constant.Zero()} else: # employ default initializers used in the original paper kwargs = {'initialW': normal.HeNormal(scale=1.0)} super(ResNet50Layers, self).__init__( conv1=Convolution2D(3, 64, 7, 2, 3, **kwargs), bn1=BatchNormalization(64), res2=BuildingBlock(3, 64, 64, 256, 1, **kwargs), res3=BuildingBlock(4, 256, 128, 512, 2, **kwargs), res4=BuildingBlock(6, 512, 256, 1024, 2, **kwargs), res5=BuildingBlock(3, 1024, 512, 2048, 2, **kwargs), fc6=Linear(2048, 1000), ) if pretrained_model == 'auto': _retrieve( 'ResNet-50-model.npz', 'ResNet-50-model.caffemodel', self) elif pretrained_model: npz.load_npz(pretrained_model, self) self.functions = OrderedDict([ ('conv1', [self.conv1, self.bn1, relu]), ('pool1', [lambda x: max_pooling_2d(x, ksize=3, stride=2)]), ('res2', [self.res2]), ('res3', [self.res3]), ('res4', [self.res4]), ('res5', [self.res5]), ('pool5', [_global_average_pooling_2d]), ('fc6', [self.fc6]), ('prob', [softmax]), ])
def __init__(self, pretrained_model='auto', n_layers=50): super(ExtractorResNet, self).__init__() print('Extractor ResNet', n_layers, ' initialization') kwargs = {'initialW': constant.Zero()} if pretrained_model == 'auto': if n_layers == 50: pretrained_model = 'ResNet-50-model.caffemodel' block = [3, 4, 6, 3] elif n_layers == 101: pretrained_model = 'ResNet-101-model.caffemodel' block = [3, 4, 23, 3] with self.init_scope(): self.conv1 = Convolution2D(3, 64, 7, 2, 3, **kwargs) self.bn1 = BatchNormalization(64) self.res2 = BuildingBlock(block[0], 64, 64, 256, 1, **kwargs) self.res3 = BuildingBlock(block[1], 256, 128, 512, 2, **kwargs) self.res4 = BuildingBlock(block[2], 512, 256, 1024, 2, **kwargs) self.res5 = BuildingBlock(block[3], 1024, 512, 2048, 1, **kwargs) self.fc6 = Linear(2048, 1000) if pretrained_model and pretrained_model.endswith('.caffemodel'): _retrieve(n_layers, 'ResNet-{}-model.npz'.format(n_layers), pretrained_model, self) elif pretrained_model: npz.load_npz(pretrained_model, self) del self.fc6
def __init__(self, pretrained_model='auto'): if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. kwargs = {'initialW': constant.Zero()} else: # employ default initializers used in BVLC. For more detail, see # https://github.com/chainer/chainer/pull/2424#discussion_r109642209 kwargs = {'initialW': uniform.LeCunUniform(scale=1.0)} super(GoogLeNet, self).__init__(conv1=Convolution2D(3, 64, 7, stride=2, pad=3, **kwargs), conv2_reduce=Convolution2D(64, 64, 1, **kwargs), conv2=Convolution2D(64, 192, 3, stride=1, pad=1, **kwargs), inc3a=Inception(192, 64, 96, 128, 16, 32, 32), inc3b=Inception(256, 128, 128, 192, 32, 96, 64), inc4a=Inception(480, 192, 96, 208, 16, 48, 64), inc4b=Inception(512, 160, 112, 224, 24, 64, 64), inc4c=Inception(512, 128, 128, 256, 24, 64, 64), inc4d=Inception(512, 112, 144, 288, 32, 64, 64), inc4e=Inception(528, 256, 160, 320, 32, 128, 128), inc5a=Inception(832, 256, 160, 320, 32, 128, 128), inc5b=Inception(832, 384, 192, 384, 48, 128, 128), loss3_fc=Linear(1024, 1000, **kwargs), loss1_conv=Convolution2D(512, 128, 1, **kwargs), loss1_fc1=Linear(2048, 1024, **kwargs), loss1_fc2=Linear(1024, 1000, **kwargs), loss2_conv=Convolution2D(528, 128, 1, **kwargs), loss2_fc1=Linear(2048, 1024, **kwargs), loss2_fc2=Linear(1024, 1000, **kwargs)) if pretrained_model == 'auto': _retrieve( 'bvlc_googlenet.npz', 'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel', self) elif pretrained_model: npz.load_npz(pretrained_model, self)
def __init__(self, n_out, entropy_weight): super(ResNet50Layers_transfer, self).__init__() with self.init_scope(): self.base = ResNet50Layers(pretrained_model='auto') self.fc7 = Linear(None, n_out) #self.base=base self.entropy_weight = entropy_weight self.n_out = n_out
def __init__(self, pretrained_model='auto'): if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. init = constant.Zero() kwargs = {'initialW': init, 'initial_bias': init} else: # employ default initializers used in the original paper kwargs = { 'initialW': normal.Normal(0.01), 'initial_bias': constant.Zero(), } super(VGG16Layers, self).__init__() with self.init_scope(): self.conv1_1 = Convolution2D(3, 64, 3, 1, 1, **kwargs) self.conv1_2 = Convolution2D(64, 64, 3, 1, 1, **kwargs) self.conv2_1 = Convolution2D(64, 128, 3, 1, 1, **kwargs) self.conv2_2 = Convolution2D(128, 128, 3, 1, 1, **kwargs) self.conv3_1 = Convolution2D(128, 256, 3, 1, 1, **kwargs) self.conv3_2 = Convolution2D(256, 256, 3, 1, 1, **kwargs) self.conv3_3 = Convolution2D(256, 256, 3, 1, 1, **kwargs) self.conv4_1 = Convolution2D(256, 512, 3, 1, 1, **kwargs) self.conv4_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv4_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_1 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_2 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.conv5_3 = Convolution2D(512, 512, 3, 1, 1, **kwargs) self.fc6 = Linear(512 * 7 * 7, 4096, **kwargs) self.fc7 = Linear(4096, 4096, **kwargs) self.fc8 = Linear(4096, 1000, **kwargs) if pretrained_model == 'auto': _retrieve( 'VGG_ILSVRC_16_layers.npz', 'http://www.robots.ox.ac.uk/%7Evgg/software/very_deep/' 'caffe/VGG_ILSVRC_16_layers.caffemodel', self) elif pretrained_model: npz.load_npz(pretrained_model, self)
def __init__(self, n_joints): super(VGG16_conv3_3, self).__init__() with self.init_scope(): self.conv1_1 = Convolution2D(3, 64, 3, 1, 1) self.conv1_2 = Convolution2D(64, 64, 3, 1, 1) self.conv2_1 = Convolution2D(64, 128, 3, 1, 1) self.conv2_2 = Convolution2D(128, 128, 3, 1, 1) self.conv3_1 = Convolution2D(128, 256, 3, 1, 1) self.conv3_2 = Convolution2D(256, 256, 3, 1, 1) self.conv3_3 = Convolution2D(256, 256, 3, 1, 1) self.conv4_1 = Convolution2D(256, 512, 3, 1, 1) self.conv4_2 = Convolution2D(512, 512, 3, 1, 1) self.conv4_3 = Convolution2D(512, 512, 3, 1, 1) self.conv5_1 = Convolution2D(512, 512, 3, 1, 1) self.conv5_2 = Convolution2D(512, 512, 3, 1, 1) self.conv5_3 = Convolution2D(512, 512, 3, 1, 1) self.bn1 = BatchNormalization(512) self.fc6 = Linear(None, 4096) self.bn2 = BatchNormalization(4096) self.fc7 = Linear(4096, 4096) self.bn3 = BatchNormalization(4096) self.fc8 = Linear(4096, n_joints * 2)
def __init__(self, pretrained_model, n_layers): super(ResNetLayers, self).__init__() if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. kwargs = {'initialW': constant.Zero()} else: # employ default initializers used in the original paper kwargs = {'initialW': normal.HeNormal(scale=1.0)} if n_layers == 50: block = [3, 4, 6, 3] elif n_layers == 101: block = [3, 4, 23, 3] elif n_layers == 152: block = [3, 8, 36, 3] else: raise ValueError('The n_layers argument should be either 50, 101,' ' or 152, but {} was given.'.format(n_layers)) with self.init_scope(): self.conv1 = Convolution2D(3, 64, 7, 2, 3, **kwargs) self.bn1 = BatchNormalization(64) self.res2 = BuildingBlock(block[0], 64, 64, 256, 1, **kwargs) self.res3 = BuildingBlock(block[1], 256, 128, 512, 2, **kwargs) self.res4 = BuildingBlock(block[2], 512, 256, 1024, 2, **kwargs) self.res5 = BuildingBlock(block[3], 1024, 512, 2048, 2, **kwargs) self.fc6 = Linear(2048, 1000) if pretrained_model and pretrained_model.endswith('.caffemodel'): _retrieve(n_layers, 'ResNet-{}-model.npz'.format(n_layers), pretrained_model, self) elif pretrained_model: npz.load_npz(pretrained_model, self) self.functions = collections.OrderedDict([ ('conv1', [self.conv1, self.bn1, relu]), ('pool1', [lambda x: max_pooling_2d(x, ksize=3, stride=2)]), ('res2', [self.res2]), ('res3', [self.res3]), ('res4', [self.res4]), ('res5', [self.res5]), ('pool5', [_global_average_pooling_2d]), ('fc6', [self.fc6]), ('prob', [softmax]), ])
def __init__(self, pretrained_model='auto'): if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. kwargs = {'initialW': constant.Zero()} else: # employ default initializers used in the original paper kwargs = {'initialW': uniform.GlorotUniform(scale=1.0)} super(GoogLeNet, self).__init__(conv1=Convolution2D(3, 64, 7, stride=2, pad=3, **kwargs), conv2_reduce=Convolution2D(64, 64, 1, **kwargs), conv2=Convolution2D(64, 192, 3, stride=1, pad=1, **kwargs), inc3a=Inception(192, 64, 96, 128, 16, 32, 32), inc3b=Inception(256, 128, 128, 192, 32, 96, 64), inc4a=Inception(480, 192, 96, 208, 16, 48, 64), inc4b=Inception(512, 160, 112, 224, 24, 64, 64), inc4c=Inception(512, 128, 128, 256, 24, 64, 64), inc4d=Inception(512, 112, 144, 288, 32, 64, 64), inc4e=Inception(528, 256, 160, 320, 32, 128, 128), inc5a=Inception(832, 256, 160, 320, 32, 128, 128), inc5b=Inception(832, 384, 192, 384, 48, 128, 128), loss3_fc=Linear(1024, 1000, **kwargs), loss1_conv=Convolution2D(512, 128, 1, **kwargs), loss1_fc1=Linear(2048, 1024, **kwargs), loss1_fc2=Linear(1024, 1000, **kwargs), loss2_conv=Convolution2D(528, 128, 1, **kwargs), loss2_fc1=Linear(2048, 1024, **kwargs), loss2_fc2=Linear(1024, 1000, **kwargs)) if pretrained_model == 'auto': _retrieve( 'bvlc_googlenet.npz', 'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel', self) elif pretrained_model: npz.load_npz(pretrained_model, self) self.functions = OrderedDict([ ('conv1', [self.conv1, relu]), ('pool1', [_max_pooling_2d, _local_response_normalization]), ('conv2_reduce', [self.conv2_reduce, relu]), ('conv2', [self.conv2, relu, _local_response_normalization]), ('pool2', [_max_pooling_2d]), ('inception_3a', [self.inc3a]), ('inception_3b', [self.inc3b]), ('pool3', [_max_pooling_2d]), ('inception_4a', [self.inc4a]), ('inception_4b', [self.inc4b]), ('inception_4c', [self.inc4c]), ('inception_4d', [self.inc4d]), ('inception_4e', [self.inc4e]), ('pool4', [_max_pooling_2d]), ('inception_5a', [self.inc5a]), ('inception_5b', [self.inc5b]), ('pool5', [_average_pooling_2d_k7]), ('loss3_fc', [_dropout, self.loss3_fc]), ('prob', [softmax]), # Since usually the following outputs are not used, they are put # after 'prob' to be skipped for efficiency. ('loss1_fc2', [ _average_pooling_2d_k5, self.loss1_conv, relu, self.loss1_fc1, relu, self.loss1_fc2 ]), ('loss2_fc2', [ _average_pooling_2d_k5, self.loss2_conv, relu, self.loss2_fc1, relu, self.loss2_fc2 ]) ])
def __init__(self): super().__init__() with self.init_scope(): self.conv_input = Convolution2D(3, 64, ksize=3, stride=1, pad=0, initialW=Normal(0.02)) self.c1 = Convolution2D(64, 64, ksize=3, stride=2, pad=0, initialW=Normal(0.02)) self.bn1 = BatchNormalization(64) self.c2 = Convolution2D(64, 128, ksize=3, stride=1, pad=0, initialW=Normal(0.02)) self.bn2 = BatchNormalization(128) self.c3 = Convolution2D(128, 128, ksize=3, stride=2, pad=0, initialW=Normal(0.02)) self.bn3 = BatchNormalization(128) self.c4 = Convolution2D(128, 256, ksize=3, stride=1, pad=0, initialW=Normal(0.02)) self.bn4 = BatchNormalization(256) self.c5 = Convolution2D(256, 256, ksize=3, stride=2, pad=0, initialW=Normal(0.02)) self.bn5 = BatchNormalization(256) self.c6 = Convolution2D(256, 512, ksize=3, stride=1, pad=0, initialW=Normal(0.02)) self.bn6 = BatchNormalization(512) self.c7 = Convolution2D(512, 512, ksize=3, stride=2, pad=0, initialW=Normal(0.02)) self.bn7 = BatchNormalization(512) self.linear1 = Linear(in_size=4608, out_size=1024) self.linear2 = Linear(in_size=None, out_size=2)
def __init__(self, pretrained_model='auto', n_channels=3, n_outputs=101, mean_path='datasets/models/mean2.npz'): super(C3DVersion1UCF101, self).__init__() if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. init = constant.Zero() conv_kwargs = {'initialW': init, 'initial_bias': init} fc_kwargs = conv_kwargs else: # employ default initializers used in the original paper conv_kwargs = { 'initialW': normal.Normal(0.01), 'initial_bias': constant.Zero(), } fc_kwargs = { 'initialW': normal.Normal(0.005), 'initial_bias': constant.One(), } with self.init_scope(): self.conv1a = ConvolutionND(3, n_channels, 64, 3, 1, 1, **conv_kwargs) self.conv2a = ConvolutionND(3, 64, 128, 3, 1, 1, **conv_kwargs) self.conv3a = ConvolutionND(3, 128, 256, 3, 1, 1, **conv_kwargs) self.conv3b = ConvolutionND(3, 256, 256, 3, 1, 1, **conv_kwargs) self.conv4a = ConvolutionND(3, 256, 512, 3, 1, 1, **conv_kwargs) self.conv4b = ConvolutionND(3, 512, 512, 3, 1, 1, **conv_kwargs) self.conv5a = ConvolutionND(3, 512, 512, 3, 1, 1, **conv_kwargs) self.conv5b = ConvolutionND(3, 512, 512, 3, 1, 1, **conv_kwargs) self.fc6 = Linear(512 * 4 * 4, 4096, **fc_kwargs) self.fc7 = Linear(4096, 4096, **fc_kwargs) self.fc8 = Linear(4096, n_outputs, **fc_kwargs) if pretrained_model == 'auto': _retrieve( 'conv3d_deepnetA_ucf.npz', 'http://vlg.cs.dartmouth.edu/c3d/' 'c3d_ucf101_finetune_whole_iter_20000', self) elif pretrained_model: npz.load_npz(pretrained_model, self) self.pre = ConvolutionND(3, n_channels, n_channels, 1, 1, 0, nobias=True, **conv_kwargs) self.pre.W.data[:] = 0 self.pre.W.data[[0, 1, 2], [2, 1, 0]] = 128 # self.pre.b.data[:] = 128 - numpy.array([90.25164795, 97.65701294, 101.4083252]) self.mean = Bias(shape=(3, 16, 112, 112)) mean = numpy.load(mean_path)['mean'] self.mean.b.data[:] = 128 - mean[:, :, 8:8 + 112, 8:8 + 112] self.functions = collections.OrderedDict([ ('pre', [self.pre, _resize, self.mean]), ('conv1a', [self.conv1a, relu]), ('pool1', [_max_pooling_2d]), ('conv2a', [self.conv2a, relu]), ('pool2', [_max_pooling_3d]), ('conv3a', [self.conv3a, relu]), ('conv3b', [self.conv3b, relu]), ('pool3', [_max_pooling_3d]), ('conv4a', [self.conv4a, relu]), ('conv4b', [self.conv4b, relu]), ('pool4', [_max_pooling_3d]), ('conv5a', [self.conv5a, relu]), ('conv5b', [self.conv5b, relu]), ('pool5', [_max_pooling_3d, dropout]), ('fc6', [self.fc6, relu, dropout]), ('fc7', [self.fc7, relu, dropout]), ('fc8', [self.fc8]), ('prob', [softmax]), ])
def __init__(self, pretrained_model='auto'): if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. init = constant.Zero() kwargs = {'initialW': init, 'initial_bias': init} else: # employ default initializers used in the original paper kwargs = { 'initialW': normal.Normal(0.01), 'initial_bias': constant.Zero(), } super(VGG16Layers, self).__init__( conv1_1=Convolution2D(3, 64, 3, 1, 1, **kwargs), conv1_2=Convolution2D(64, 64, 3, 1, 1, **kwargs), conv2_1=Convolution2D(64, 128, 3, 1, 1, **kwargs), conv2_2=Convolution2D(128, 128, 3, 1, 1, **kwargs), conv3_1=Convolution2D(128, 256, 3, 1, 1, **kwargs), conv3_2=Convolution2D(256, 256, 3, 1, 1, **kwargs), conv3_3=Convolution2D(256, 256, 3, 1, 1, **kwargs), conv4_1=Convolution2D(256, 512, 3, 1, 1, **kwargs), conv4_2=Convolution2D(512, 512, 3, 1, 1, **kwargs), conv4_3=Convolution2D(512, 512, 3, 1, 1, **kwargs), conv5_1=Convolution2D(512, 512, 3, 1, 1, **kwargs), conv5_2=Convolution2D(512, 512, 3, 1, 1, **kwargs), conv5_3=Convolution2D(512, 512, 3, 1, 1, **kwargs), fc6=Linear(512 * 7 * 7, 4096, **kwargs), fc7=Linear(4096, 4096, **kwargs), fc8=Linear(4096, 1000, **kwargs), ) if pretrained_model == 'auto': _retrieve( 'VGG_ILSVRC_16_layers.npz', 'http://www.robots.ox.ac.uk/%7Evgg/software/very_deep/' 'caffe/VGG_ILSVRC_16_layers.caffemodel', self) elif pretrained_model: npz.load_npz(pretrained_model, self) self.functions = collections.OrderedDict([ ('conv1_1', [self.conv1_1, relu]), ('conv1_2', [self.conv1_2, relu]), ('pool1', [_max_pooling_2d]), ('conv2_1', [self.conv2_1, relu]), ('conv2_2', [self.conv2_2, relu]), ('pool2', [_max_pooling_2d]), ('conv3_1', [self.conv3_1, relu]), ('conv3_2', [self.conv3_2, relu]), ('conv3_3', [self.conv3_3, relu]), ('pool3', [_max_pooling_2d]), ('conv4_1', [self.conv4_1, relu]), ('conv4_2', [self.conv4_2, relu]), ('conv4_3', [self.conv4_3, relu]), ('pool4', [_max_pooling_2d]), ('conv5_1', [self.conv5_1, relu]), ('conv5_2', [self.conv5_2, relu]), ('conv5_3', [self.conv5_3, relu]), ('pool5', [_max_pooling_2d]), ('fc6', [self.fc6, relu, dropout]), ('fc7', [self.fc7, relu, dropout]), ('fc8', [self.fc8, relu]), ('prob', [softmax]), ])