def test_global_pooling_repr(self): layer = layers.GlobalPooling('max') self.assertEqual("GlobalPooling('max', name='global-pooling-1')", str(layer)) layer = layers.GlobalPooling(lambda x: x) self.assertRegexpMatches( str(layer), r"GlobalPooling\(<function .+>, name='global-pooling-2'\)", )
def test_global_pooling_output_shape(self): input_layer = layers.Input((8, 8, 3)) global_pooling_layer = layers.GlobalPooling('avg') self.assertEqual(global_pooling_layer.output_shape, None) layers.join(input_layer, global_pooling_layer) self.assertEqual(global_pooling_layer.output_shape, (3, ))
def squeezenet(): """ SqueezeNet network architecture with random parameters. Parameters can be loaded using ``neupy.storage`` module. SqueezeNet has roughly 1.2 million parameters. It is almost 50 times less than in AlexNet. Parameters can be stored as 5Mb file. Examples -------- >>> from neupy import architectures >>> squeezenet = architectures.squeezenet() >>> squeezenet (?, 227, 227, 3) -> [... 67 layers ...] -> (?, 1000) >>> >>> from neupy import algorithms >>> optimizer = algorithms.Momentum(squeezenet) See Also -------- :architecture:`vgg16` : VGG16 network :architecture:`vgg19` : VGG19 network :architecture:`resnet50` : ResNet50 network References ---------- SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size https://arxiv.org/abs/1602.07360 """ return layers.join( layers.Input((227, 227, 3)), layers.Convolution((7, 7, 96), stride=(2, 2), padding='VALID', name='conv1'), layers.Relu(), layers.MaxPooling((3, 3), stride=(2, 2)), Fire(16, 64, 64, name='fire2'), Fire(16, 64, 64, name='fire3'), Fire(32, 128, 128, name='fire4'), layers.MaxPooling((2, 2)), Fire(32, 128, 128, name='fire5'), Fire(48, 192, 192, name='fire6'), Fire(48, 192, 192, name='fire7'), Fire(64, 256, 256, name='fire8'), layers.MaxPooling((2, 2)), Fire(64, 256, 256, name='fire9'), layers.Dropout(0.5), layers.Convolution((1, 1, 1000), name='conv10'), layers.GlobalPooling('avg'), layers.Reshape(), layers.Softmax(), )
def test_global_pooling_other_function(self): x = asfloat(np.ones((2, 4, 5, 3))) expected_outputs = 20 * np.ones((2, 3)) global_sum_pooling_layer = layers.GlobalPooling(function=tf.reduce_sum) actual_output = self.eval(global_sum_pooling_layer.output(x)) self.assertEqual(actual_output.shape, (2, 3)) np.testing.assert_array_equal(expected_outputs, actual_output)
def test_global_pooling(self): x = asfloat(np.ones((2, 4, 5, 3))) expected_outputs = np.ones((2, 3)) global_mena_pooling_layer = layers.GlobalPooling('avg') actual_output = self.eval(global_mena_pooling_layer.output(x)) self.assertEqual(actual_output.shape, (2, 3)) np.testing.assert_array_equal(expected_outputs, actual_output)
def test_global_pooling_late_shape_init(self): network = layers.join( layers.Convolution((3, 3, 12)), layers.GlobalPooling('max'), ) self.assertShapesEqual(network.output_shape, (None, None)) network = layers.join(layers.Input((10, 10, 1)), network) self.assertShapesEqual(network.output_shape, (None, 12))
def test_global_pooling_output_shape(self): input_layer = layers.Input((8, 8, 3)) global_pooling_layer = layers.GlobalPooling('avg') network = layers.join( input_layer, global_pooling_layer ) self.assertShapesEqual(network.input_shape, (None, 8, 8, 3)) self.assertShapesEqual(network.output_shape, (None, 3))
def test_global_pooling_other_function(self): x = asfloat(np.ones((2, 3, 4, 5))) expected_outputs = 20 * np.ones((2, 3)) global_sum_pooling_layer = layers.GlobalPooling(function=T.sum) a = T.tensor4() actual_output = global_sum_pooling_layer.output(a).eval({a: x}) self.assertEqual(actual_output.shape, (2, 3)) np.testing.assert_array_equal(expected_outputs, actual_output)
]], layers.Concatenate(), ) googlenet = layers.join( layers.Input((3, None, None)), layers.Convolution((64, 7, 7), padding='half', stride=2), layers.Relu(), layers.MaxPooling((3, 3), stride=2), layers.LocalResponseNorm(alpha=0.00002, k=1), layers.Convolution((64, 1, 1)) > layers.Relu(), layers.Convolution((192, 3, 3), padding='half') > layers.Relu(), layers.LocalResponseNorm(alpha=0.00002, k=1), layers.MaxPooling((3, 3), stride=2), Inception((32, 64, 96, 128, 16, 32)), Inception((64, 128, 128, 192, 32, 96)), layers.MaxPooling((3, 3), stride=2), Inception((64, 192, 96, 208, 16, 48)), Inception((64, 160, 112, 224, 24, 64)), Inception((64, 128, 128, 256, 24, 64)), Inception((64, 112, 144, 288, 32, 64)), Inception((128, 256, 160, 320, 32, 128)), layers.MaxPooling((3, 3), stride=2), Inception((128, 256, 160, 320, 32, 128)), Inception((128, 384, 192, 384, 48, 128)), layers.GlobalPooling(function=T.mean), layers.Softmax(1000), ) plots.layer_structure(googlenet)
) resnet50 = layers.join( layers.Input((3, 224, 224)), layers.Convolution((64, 7, 7), stride=2, padding=3), layers.BatchNorm(), layers.Relu(), layers.MaxPooling((3, 3), stride=(2, 2), ignore_border=False), ResidualUnit(64, 256, stride=1, has_branch=True), ResidualUnit(64, 256, stride=1), ResidualUnit(64, 256, stride=1), ResidualUnit(128, 512, stride=2, has_branch=True), ResidualUnit(128, 512, stride=1), ResidualUnit(128, 512, stride=1), ResidualUnit(128, 512, stride=1), ResidualUnit(256, 1024, stride=2, has_branch=True), ResidualUnit(256, 1024, stride=1), ResidualUnit(256, 1024, stride=1), ResidualUnit(256, 1024, stride=1), ResidualUnit(256, 1024, stride=1), ResidualUnit(256, 1024, stride=1), ResidualUnit(512, 2048, stride=2, has_branch=True), ResidualUnit(512, 2048, stride=1), ResidualUnit(512, 2048, stride=1), layers.GlobalPooling(), layers.Reshape(), layers.Softmax(1000), ) plots.layer_structure(resnet50)
def resnet50(input_shape=(224, 224, 3), include_global_pool=True, in_out_ratio=32): """ ResNet50 network architecture with random parameters. Parameters can be loaded using ``neupy.storage`` module. ResNet50 has roughly 25.5 million parameters. Parameters ---------- input_shape : tuple Network's input shape. Defaults to ``(224, 224, 3)``. include_global_pool : bool Specifies if returned output should include global pooling layer. Defaults to ``True``. in_out_ratio : {4, 8, 16, 32} Every layer that applies strides reduces height and width per every image. There are 5 of these layers in Resnet and at the end each dimensions gets reduced by ``32``. For example, 224x224 image will be reduced to 7x7 image patches. This parameter specifies what level of reduction we want to obtain after we've propagated network through all the convolution layers. Notes ----- Because of the global pooling layer, ResNet50 can be applied to the images with variable sizes. The only limitation is that image size should be bigger than 32x32, otherwise network won't be able to apply all transformations to the image. Examples -------- ResNet-50 for ImageNet classification >>> from neupy import architectures, algorithms >>> >>> resnet = architectures.resnet50() >>> resnet (?, 224, 224, 3) -> [... 187 layers ...] -> (?, 1000) >>> >>> optimizer = algorithms.Momentum(resnet50) ResNet-50 for custom classification task >>> from neupy import architectures >>> resnet = architectures.resnet50(include_global_pool=False) >>> resnet (?, 224, 224, 3) -> [... 185 layers ...] -> (?, 7, 7, 2048) >>> >>> from neupy.layers import * >>> resnet = resnet >> GlobalPooling('avg') >> Softmax(21) (?, 224, 224, 3) -> [... 187 layers ...] -> (?, 21) ResNet-50 for image segmentation >>> from neupy import architectures >>> resnet = architectures.resnet50( ... include_global_pool=False, ... in_out_ratio=8, ... ) >>> resnet (?, 224, 224, 3) -> [... 185 layers ...] -> (?, 28, 28, 2048) See Also -------- :architecture:`vgg16` : VGG16 network :architecture:`squeezenet` : SqueezeNet network :architecture:`resnet50` : ResNet-50 network References ---------- Deep Residual Learning for Image Recognition. https://arxiv.org/abs/1512.03385 """ in_out_configs = { 4: {'strides': [1, 1, 1], 'rates': [2, 4, 8]}, 8: {'strides': [2, 1, 1], 'rates': [1, 2, 4]}, 16: {'strides': [2, 2, 1], 'rates': [1, 1, 2]}, 32: {'strides': [2, 2, 2], 'rates': [1, 1, 1]}, } if in_out_ratio not in in_out_configs: raise ValueError( "Expected one of the folowing in_out_ratio values: {}, got " "{} instead.".format(in_out_configs.keys(), in_out_ratio)) strides = in_out_configs[in_out_ratio]['strides'] rates = in_out_configs[in_out_ratio]['rates'] resnet = layers.join( layers.Input(input_shape), # Convolutional layer reduces image's height and width by a factor # of 2 (because of the stride) # from (3, 224, 224) to (64, 112, 112) layers.Convolution( (7, 7, 64), stride=2, bias=None, padding='same', name='conv1' ), layers.BatchNorm(name='bn_conv1'), layers.Relu(), # Stride equal two 2 reduces image size by a factor of two # from (64, 112, 112) to (64, 56, 56) layers.MaxPooling((3, 3), stride=2, padding="same"), # The branch option applies extra convolution x+ batch # normalization transformations to the residual ResidualUnit(64, name='2a', has_branch=True), ResidualUnit(64, name='2b'), ResidualUnit(64, name='2c'), # When stride=2 reduces width and hight by factor of 2 ResidualUnit(128, stride=strides[0], name='3a', has_branch=True), ResidualUnit(128, rate=rates[0], name='3b'), ResidualUnit(128, rate=rates[0], name='3c'), ResidualUnit(128, rate=rates[0], name='3d'), # When stride=2 reduces width and hight by factor of 2 ResidualUnit(256, rate=rates[0], name='4a', stride=strides[1], has_branch=True), ResidualUnit(256, rate=rates[1], name='4b'), ResidualUnit(256, rate=rates[1], name='4c'), ResidualUnit(256, rate=rates[1], name='4d'), ResidualUnit(256, rate=rates[1], name='4e'), ResidualUnit(256, rate=rates[1], name='4f'), # When stride=2 reduces width and hight by factor of 2 ResidualUnit(512, rate=rates[1], name='5a', stride=strides[2], has_branch=True), ResidualUnit(512, rate=rates[2], name='5b'), ResidualUnit(512, rate=rates[2], name='5c'), ) if include_global_pool: resnet = layers.join( resnet, # Since the final residual unit has 2048 output filters, global # pooling will replace every output image with single average # value. Despite input image size, output from this layer always # will be a vector with 2048 values. layers.GlobalPooling('avg'), layers.Softmax(1000, name='fc1000'), ) return resnet
def test_global_pooling_for_lower_dimensions(self): layer = layers.GlobalPooling('max') x = np.ones((1, 5)) np.testing.assert_array_equal(x, layer.output(x))
def test_global_pooling_unknown_option(self): with self.assertRaises(ValueError): layers.GlobalPooling('unknown')
def test_global_pooling_for_lower_dimensions(self): layer = layers.GlobalPooling('max') x = np.random.random((4, 5)) y = layer.output(x) self.assertShapesEqual(y.shape, (4, 5))
def resnet50(): """ ResNet50 network architecture with random parameters. Parameters can be loaded using ``neupy.storage`` module. ResNet50 has roughly 25.5 million parameters. Notes ----- Because of the global pooling layer, ResNet50 can be applied to the images with variable sizes. The only limitation is that image size should be bigger than 32x32, otherwise network wont be able to apply all transformations to the image. Examples -------- >>> from neupy import architectures >>> resnet50 = architectures.resnet50() >>> resnet50 (3, 224, 224) -> [... 187 layers ...] -> 1000 >>> >>> from neupy import algorithms >>> network = algorithms.Momentum(resnet50) See Also -------- :architecture:`vgg16` : VGG16 network :architecture:`squeezenet` : SqueezeNet network :architecture:`alexnet` : AlexNet network :architecture:`squeezenet` : SqueezeNet network References ---------- Deep Residual Learning for Image Recognition. https://arxiv.org/abs/1512.03385 """ return layers.join( layers.Input((3, 224, 224)), # Convolutional layer reduces image's height and width by a factor # of 2 (because of the stride) # from (3, 224, 224) to (64, 112, 112) layers.Convolution((64, 7, 7), stride=2, padding=3, name='conv1'), layers.BatchNorm(name='bn_conv1'), layers.Relu(), # Stride equal two 2 reduces image size by a factor of two # from (64, 112, 112) to (64, 56, 56) layers.MaxPooling((3, 3), stride=2, ignore_border=False), # The branch option applies extra convolution + batch # normalization transforamtions to the residual ResidualUnit(64, 256, stride=1, name='2a', has_branch=True), ResidualUnit(64, 256, stride=1, name='2b'), ResidualUnit(64, 256, stride=1, name='2c'), # Another stride=2 reduces width and hight by factor of 2 ResidualUnit(128, 512, stride=2, name='3a', has_branch=True), ResidualUnit(128, 512, stride=1, name='3b'), ResidualUnit(128, 512, stride=1, name='3c'), ResidualUnit(128, 512, stride=1, name='3d'), # Another stride=2 reduces width and hight by factor of 2 ResidualUnit(256, 1024, stride=2, name='4a', has_branch=True), ResidualUnit(256, 1024, stride=1, name='4b'), ResidualUnit(256, 1024, stride=1, name='4c'), ResidualUnit(256, 1024, stride=1, name='4d'), ResidualUnit(256, 1024, stride=1, name='4e'), ResidualUnit(256, 1024, stride=1, name='4f'), # Another stride=2 reduces width and hight by factor of 2 ResidualUnit(512, 2048, stride=2, name='5a', has_branch=True), ResidualUnit(512, 2048, stride=1, name='5b'), ResidualUnit(512, 2048, stride=1, name='5c'), # Since the final residual unit has 2048 output filters, global # pooling will replace every output image with single average value. # Despite input iamge size output from this layer always will be # vector with 2048 values layers.GlobalPooling(), layers.Softmax(1000, name='fc1000'), )
layers.Input((UNKNOWN, UNKNOWN, 3)), layers.Convolution((7, 7, 64), padding='SAME', stride=2), layers.Relu(), layers.MaxPooling((3, 3), stride=2), layers.LocalResponseNorm(alpha=0.00002, k=1), layers.Convolution((1, 1, 64)) > layers.Relu(), layers.Convolution((3, 3, 192), padding='SAME') > layers.Relu(), layers.LocalResponseNorm(alpha=0.00002, k=1), layers.MaxPooling((3, 3), stride=2), Inception((32, 64, 96, 128, 16, 32)), Inception((64, 128, 128, 192, 32, 96)), layers.MaxPooling((3, 3), stride=2), Inception((64, 192, 96, 208, 16, 48)), Inception((64, 160, 112, 224, 24, 64)), Inception((64, 128, 128, 256, 24, 64)), Inception((64, 112, 144, 288, 32, 64)), Inception((128, 256, 160, 320, 32, 128)), layers.MaxPooling((3, 3), stride=2), Inception((128, 256, 160, 320, 32, 128)), Inception((128, 384, 192, 384, 48, 128)), layers.GlobalPooling('avg'), layers.Softmax(1000), ) plots.network_structure(googlenet)