def resnet_v1(inputs, training=False): """Deep Residual Networks family of models https://arxiv.org/abs/1512.03385 """ builder = nvutils.LayerBuilder(tf.nn.relu, args['image_format'], training, use_batch_norm=True) if flags.layers == 18: return inference_resnet_v1_impl(builder, inputs, [2, 2, 2, 2], basic=True) elif flags.layers == 34: return inference_resnet_v1_impl(builder, inputs, [3, 4, 6, 3], basic=True) elif flags.layers == 50: return inference_resnet_v1_impl(builder, inputs, [3, 4, 6, 3]) elif flags.layers == 101: return inference_resnet_v1_impl(builder, inputs, [3, 4, 23, 3]) elif flags.layers == 152: return inference_resnet_v1_impl(builder, inputs, [3, 8, 36, 3]) else: raise ValueError( "Invalid layer count (%i); must be one of: 18,34,50,101,152" % flags.layers)
def googlenet(inputs, training=False): """GoogLeNet model https://arxiv.org/abs/1409.4842 """ def inception_v1(builder, x, k, l, m, n, p, q): cols = [[('conv2d', k, 1, 1, 'SAME')], [('conv2d', l, 1, 1, 'SAME'), ('conv2d', m, 3, 1, 'SAME')], [('conv2d', n, 1, 1, 'SAME'), ('conv2d', p, 5, 1, 'SAME')], [('mpool2d', 3, 1, 'SAME'), ('conv2d', q, 1, 1, 'SAME')]] return builder.inception_module(x, 'incept_v1', cols) builder = nvutils.LayerBuilder(tf.nn.relu, args['image_format'], training) x = inputs x = builder.conv2d(x, 64, 7, 2, 'SAME') x = builder.max_pooling2d(x, 3, 2, 'SAME') x = builder.conv2d(x, 64, 1, 1, 'SAME') x = builder.conv2d(x, 192, 3, 1, 'SAME') x = builder.max_pooling2d(x, 3, 2, 'SAME') x = inception_v1(builder, x, 64, 96, 128, 16, 32, 32) x = inception_v1(builder, x, 128, 128, 192, 32, 96, 64) x = builder.max_pooling2d(x, 3, 2, 'SAME') x = inception_v1(builder, x, 192, 96, 208, 16, 48, 64) x = inception_v1(builder, x, 160, 112, 224, 24, 64, 64) x = inception_v1(builder, x, 128, 128, 256, 24, 64, 64) x = inception_v1(builder, x, 112, 144, 288, 32, 64, 64) x = inception_v1(builder, x, 256, 160, 320, 32, 128, 128) x = builder.max_pooling2d(x, 3, 2, 'SAME') x = inception_v1(builder, x, 256, 160, 320, 32, 128, 128) x = inception_v1(builder, x, 384, 192, 384, 48, 128, 128) x = builder.spatial_average2d(x) return x
def trivial(inputs, training=False): """A trivial model for benchmarking input pipeline performance""" builder = nvutils.LayerBuilder(tf.nn.relu, args['image_format'], training) x = inputs x = builder.flatten2d(x) x = builder.dense_linear(x, 1) return x
def vgg(inputs, training=False): """Visual Geometry Group's family of models https://arxiv.org/abs/1409.1556 """ builder = nvutils.LayerBuilder(tf.nn.relu, args['image_format'], training) if flags.layers == 11: return inference_vgg_impl(builder, inputs, [1,1,2,2,2]) # A elif flags.layers == 13: return inference_vgg_impl(builder, inputs, [2,2,2,2,2]) # B elif flags.layers == 16: return inference_vgg_impl(builder, inputs, [2,2,3,3,3]) # D elif flags.layers == 19: return inference_vgg_impl(builder, inputs, [2,2,4,4,4]) # E else: raise ValueError("Invalid nlayer (%i); must be one of: 11,13,16,19" % flags.layers)
def xception(inputs, training=False): builder = nvutils.LayerBuilder(tf.nn.relu, args['image_format'], training, use_batch_norm=True) builder.batch_norm_config = {'decay': 0.99, 'epsilon': 1e-5, 'scale': True} def make_xception_entry(nout, activate_first=True): def xception_entry(inputs): x = inputs if activate_first: x = builder.activate(x) x = builder.separable_conv2d(x, nout, 3, 1, 'SAME') x = builder.separable_conv2d_linear(x, nout, 3, 1, 'SAME') x = builder.max_pooling2d(x, 3, 2, 'SAME') return x return xception_entry def xception_middle(inputs): x = inputs x = builder.activate(x) x = builder.separable_conv2d(x, 728, 3, 1, 'SAME') x = builder.separable_conv2d(x, 728, 3, 1, 'SAME') x = builder.separable_conv2d_linear(x, 728, 3, 1, 'SAME') return x def xception_exit(inputs): x = inputs x = builder.activate(x) x = builder.separable_conv2d(x, 728, 3, 1, 'SAME') x = builder.separable_conv2d_linear(x, 1024, 3, 1, 'SAME') x = builder.max_pooling2d(x, 3, 2, 'SAME') return x x = inputs x = builder.conv2d(x, 32, 3, 2, 'VALID') x = builder.conv2d(x, 64, 3, 1, 'VALID') x = builder.residual2d(x, make_xception_entry(128, False), 128) x = builder.residual2d(x, make_xception_entry(256), 256) x = builder.residual2d(x, make_xception_entry(728), 728) for _ in range(8): x = builder.residual2d(x, xception_middle) x = builder.residual2d(x, xception_exit, 1024) x = builder.separable_conv2d(x, 1536, 3, 1, 'SAME') x = builder.separable_conv2d(x, 2048, 3, 1, 'SAME') x = builder.spatial_average2d(x) # Note: Optional FC layer not included x = builder.dropout(x, 0.5) return x
def overfeat(inputs, training=False): builder = nvutils.LayerBuilder(tf.nn.relu, args['image_format'], training) x = inputs x = builder.conv2d(x, 96, 11, 4, 'VALID') x = builder.max_pooling2d(x, 2, 2, 'VALID') x = builder.conv2d(x, 256, 5, 1, 'VALID') x = builder.max_pooling2d(x, 2, 2, 'VALID') x = builder.conv2d(x, 512, 3, 1, 'SAME') x = builder.conv2d(x, 1024, 3, 1, 'SAME') x = builder.conv2d(x, 1024, 3, 1, 'SAME') x = builder.max_pooling2d(x, 2, 2, 'VALID') x = builder.flatten2d(x) x = builder.dense(x, 3072) x = builder.dense(x, 4096) return x
def alexnet_owt(inputs, training): """Alexnet One Weird Trick model https://arxiv.org/abs/1404.5997 """ builder = nvutils.LayerBuilder(tf.nn.relu, args['image_format'], training) # Note: VALID requires padding the images by 3 in width and height x = inputs x = builder.conv2d(x, 64, 11, 4, 'VALID') x = builder.max_pooling2d(x, 3, 2) x = builder.conv2d(x, 192, 5, 1, 'SAME') x = builder.max_pooling2d(x, 3, 2) x = builder.conv2d(x, 384, 3, 1, 'SAME') x = builder.conv2d(x, 256, 3, 1, 'SAME') x = builder.conv2d(x, 256, 3, 1, 'SAME') x = builder.max_pooling2d(x, 3, 2) x = builder.flatten2d(x) x = builder.dense(x, 4096) x = builder.dropout(x) x = builder.dense(x, 4096) x = builder.dropout(x) return x
def inception_v3(inputs, training=False): """Google's Inception v3 model https://arxiv.org/abs/1512.00567 """ def inception_v3_a(builder, x, n): cols = [[('conv2d', 64, 1, 1, 'SAME')], [('conv2d', 48, 1, 1, 'SAME'), ('conv2d', 64, 5, 1, 'SAME')], [('conv2d', 64, 1, 1, 'SAME'), ('conv2d', 96, 3, 1, 'SAME'), ('conv2d', 96, 3, 1, 'SAME')], [('apool2d', 3, 1, 'SAME'), ('conv2d', n, 1, 1, 'SAME')]] return builder.inception_module(x, 'incept_v3_a', cols) def inception_v3_b(builder, x): cols = [[('conv2d', 64, 1, 1, 'SAME'), ('conv2d', 96, 3, 1, 'SAME'), ('conv2d', 96, 3, 2, 'VALID')], [('conv2d', 384, 3, 2, 'VALID')], [('mpool2d', 3, 2, 'VALID')]] return builder.inception_module(x, 'incept_v3_b', cols) def inception_v3_c(builder, x, n): cols = [[('conv2d', 192, 1, 1, 'SAME')], [('conv2d', n, 1, 1, 'SAME'), ('conv2d', n, (1, 7), 1, 'SAME'), ('conv2d', 192, (7, 1), 1, 'SAME')], [('conv2d', n, 1, 1, 'SAME'), ('conv2d', n, (7, 1), 1, 'SAME'), ('conv2d', n, (1, 7), 1, 'SAME'), ('conv2d', n, (7, 1), 1, 'SAME'), ('conv2d', 192, (1, 7), 1, 'SAME')], [('apool2d', 3, 1, 'SAME'), ('conv2d', 192, 1, 1, 'SAME')]] return builder.inception_module(x, 'incept_v3_c', cols) def inception_v3_d(builder, x): cols = [[('conv2d', 192, 1, 1, 'SAME'), ('conv2d', 320, 3, 2, 'VALID')], [('conv2d', 192, 1, 1, 'SAME'), ('conv2d', 192, (1, 7), 1, 'SAME'), ('conv2d', 192, (7, 1), 1, 'SAME'), ('conv2d', 192, 3, 2, 'VALID')], [('mpool2d', 3, 2, 'VALID')]] return builder.inception_module(x, 'incept_v3_d', cols) def inception_v3_e(builder, x, pooltype): poolfunc = {'AVG': 'apool2d', 'MAX': 'mpool2d'}[pooltype] cols = [[('conv2d', 320, 1, 1, 'SAME')], [('conv2d', 384, 1, 1, 'SAME'), ('conv2d', 384, (1, 3), 1, 'SAME')], [('share', ), ('conv2d', 384, (3, 1), 1, 'SAME')], [('conv2d', 448, 1, 1, 'SAME'), ('conv2d', 384, 3, 1, 'SAME'), ('conv2d', 384, (1, 3), 1, 'SAME')], [('share', ), ('share', ), ('conv2d', 384, (3, 1), 1, 'SAME')], [(poolfunc, 3, 1, 'SAME'), ('conv2d', 192, 1, 1, 'SAME')]] return builder.inception_module(x, 'incept_v3_e', cols) # TODO: This does not include the extra 'arm' that forks off # from before the 3rd-last module (the arm is designed # to speed up training in the early stages). builder = nvutils.LayerBuilder(tf.nn.relu, args['image_format'], training, use_batch_norm=True) x = inputs x = builder.conv2d(x, 32, 3, 2, 'VALID') x = builder.conv2d(x, 32, 3, 1, 'VALID') x = builder.conv2d(x, 64, 3, 1, 'SAME') x = builder.max_pooling2d(x, 3, 2, 'VALID') x = builder.conv2d(x, 80, 1, 1, 'VALID') x = builder.conv2d(x, 192, 3, 1, 'VALID') x = builder.max_pooling2d(x, 3, 2, 'VALID') x = inception_v3_a(builder, x, 32) x = inception_v3_a(builder, x, 64) x = inception_v3_a(builder, x, 64) x = inception_v3_b(builder, x) x = inception_v3_c(builder, x, 128) x = inception_v3_c(builder, x, 160) x = inception_v3_c(builder, x, 160) x = inception_v3_c(builder, x, 192) x = inception_v3_d(builder, x) x = inception_v3_e(builder, x, 'AVG') x = inception_v3_e(builder, x, 'MAX') return builder.spatial_average2d(x)
def inception_v4(inputs, training): """Google's Inception v4 model https://arxiv.org/abs/1602.07261 """ # Stem functions def inception_v4_sa(builder, x): cols = [[('mpool2d', 3, 2, 'VALID')], [('conv2d', 96, 3, 2, 'VALID')]] return builder.inception_module(x, 'incept_v4_sa', cols) def inception_v4_sb(builder, x): cols = [[('conv2d', 64, 1, 1, 'SAME'), ('conv2d', 96, 3, 1, 'VALID')], [('conv2d', 64, 1, 1, 'SAME'), ('conv2d', 64, (7, 1), 1, 'SAME'), ('conv2d', 64, (1, 7), 1, 'SAME'), ('conv2d', 96, 3, 1, 'VALID')]] return builder.inception_module(x, 'incept_v4_sb', cols) def inception_v4_sc(builder, x): cols = [[('conv2d', 192, 3, 2, 'VALID')], [('mpool2d', 3, 2, 'VALID')]] return builder.inception_module(x, 'incept_v4_sc', cols) # Reduction functions def inception_v4_ra(builder, x, k, l, m, n): cols = [[('mpool2d', 3, 2, 'VALID')], [('conv2d', n, 3, 2, 'VALID')], [('conv2d', k, 1, 1, 'SAME'), ('conv2d', l, 3, 1, 'SAME'), ('conv2d', m, 3, 2, 'VALID')]] return builder.inception_module(x, 'incept_v4_ra', cols) def inception_v4_rb(builder, x): cols = [[('mpool2d', 3, 2, 'VALID')], [('conv2d', 192, 1, 1, 'SAME'), ('conv2d', 192, 3, 2, 'VALID')], [('conv2d', 256, 1, 1, 'SAME'), ('conv2d', 256, (1, 7), 1, 'SAME'), ('conv2d', 320, (7, 1), 1, 'SAME'), ('conv2d', 320, 3, 2, 'VALID')]] return builder.inception_module(x, 'incept_v4_rb', cols) def inception_resnet_v2_rb(builder, x): cols = [ [('mpool2d', 3, 2, 'VALID')], # Note: These match Facebook's Torch implem [('conv2d', 256, 1, 1, 'SAME'), ('conv2d', 384, 3, 2, 'VALID')], [('conv2d', 256, 1, 1, 'SAME'), ('conv2d', 256, 3, 2, 'VALID')], [('conv2d', 256, 1, 1, 'SAME'), ('conv2d', 256, 3, 1, 'SAME'), ('conv2d', 256, 3, 2, 'VALID')] ] return builder.inception_module(x, 'incept_resnet_v2_rb', cols) def inception_v4_a(builder, x): cols = [[('apool2d', 3, 1, 'SAME'), ('conv2d', 96, 1, 1, 'SAME')], [('conv2d', 96, 1, 1, 'SAME')], [('conv2d', 64, 1, 1, 'SAME'), ('conv2d', 96, 3, 1, 'SAME')], [('conv2d', 64, 1, 1, 'SAME'), ('conv2d', 96, 3, 1, 'SAME'), ('conv2d', 96, 3, 1, 'SAME')]] return builder.inception_module(x, 'incept_v4_a', cols) def inception_v4_b(builder, x): cols = [[('apool2d', 3, 1, 'SAME'), ('conv2d', 128, 1, 1, 'SAME')], [('conv2d', 384, 1, 1, 'SAME')], [('conv2d', 192, 1, 1, 'SAME'), ('conv2d', 224, (1, 7), 1, 'SAME'), ('conv2d', 256, (7, 1), 1, 'SAME')], [('conv2d', 192, 1, 1, 'SAME'), ('conv2d', 192, (1, 7), 1, 'SAME'), ('conv2d', 224, (7, 1), 1, 'SAME'), ('conv2d', 224, (1, 7), 1, 'SAME'), ('conv2d', 256, (7, 1), 1, 'SAME')]] return builder.inception_module(x, 'incept_v4_b', cols) def inception_v4_c(builder, x): cols = [[('apool2d', 3, 1, 'SAME'), ('conv2d', 256, 1, 1, 'SAME')], [('conv2d', 256, 1, 1, 'SAME')], [('conv2d', 384, 1, 1, 'SAME'), ('conv2d', 256, (1, 3), 1, 'SAME')], [('share', ), ('conv2d', 256, (3, 1), 1, 'SAME')], [('conv2d', 384, 1, 1, 'SAME'), ('conv2d', 448, (1, 3), 1, 'SAME'), ('conv2d', 512, (3, 1), 1, 'SAME'), ('conv2d', 256, (3, 1), 1, 'SAME')], [('share', ), ('share', ), ('share', ), ('conv2d', 256, (1, 3), 1, 'SAME')]] return builder.inception_module(x, 'incept_v4_c', cols) builder = nvutils.LayerBuilder(tf.nn.relu, args['image_format'], training, use_batch_norm=True) x = inputs x = builder.conv2d(x, 32, 3, 2, 'VALID') x = builder.conv2d(x, 32, 3, 1, 'VALID') x = builder.conv2d(x, 64, 3, 1, 'SAME') x = inception_v4_sa(builder, x) x = inception_v4_sb(builder, x) x = inception_v4_sc(builder, x) for _ in range(4): x = inception_v4_a(builder, x) x = inception_v4_ra(builder, x, 192, 224, 256, 384) for _ in range(7): x = inception_v4_b(builder, x) x = inception_v4_rb(builder, x) for _ in range(3): x = inception_v4_c(builder, x) x = builder.spatial_average2d(x) x = builder.dropout(x, 0.8) return x
def inception_resnet_v2(inputs, training): """Google's Inception-Resnet v2 model https://arxiv.org/abs/1602.07261 """ builder = nvutils.LayerBuilder(tf.nn.relu, args['image_format'], training, use_batch_norm=True) # Stem functions def inception_v4_sa(x): cols = [[('mpool2d', 3, 2, 'VALID')], [('conv2d', 96, 3, 2, 'VALID')]] return builder.inception_module(x, 'incept_v4_sa', cols) def inception_v4_sb(x): cols = [[('conv2d', 64, 1, 1, 'SAME'), ('conv2d', 96, 3, 1, 'VALID')], [('conv2d', 64, 1, 1, 'SAME'), ('conv2d', 64, (7, 1), 1, 'SAME'), ('conv2d', 64, (1, 7), 1, 'SAME'), ('conv2d', 96, 3, 1, 'VALID')]] return builder.inception_module(x, 'incept_v4_sb', cols) def inception_v4_sc(x): cols = [[('conv2d', 192, 3, 2, 'VALID')], [('mpool2d', 3, 2, 'VALID')]] return builder.inception_module(x, 'incept_v4_sc', cols) # Reduction functions def inception_v4_ra(x, k, l, m, n): cols = [[('mpool2d', 3, 2, 'VALID')], [('conv2d', n, 3, 2, 'VALID')], [('conv2d', k, 1, 1, 'SAME'), ('conv2d', l, 3, 1, 'SAME'), ('conv2d', m, 3, 2, 'VALID')]] return builder.inception_module(x, 'incept_v4_ra', cols) def inception_v4_rb(x): cols = [[('mpool2d', 3, 2, 'VALID')], [('conv2d', 192, 1, 1, 'SAME'), ('conv2d', 192, 3, 2, 'VALID')], [('conv2d', 256, 1, 1, 'SAME'), ('conv2d', 256, (1, 7), 1, 'SAME'), ('conv2d', 320, (7, 1), 1, 'SAME'), ('conv2d', 320, 3, 2, 'VALID')]] return builder.inception_module(x, 'incept_v4_rb', cols) def inception_resnet_v2_rb(x): cols = [ [('mpool2d', 3, 2, 'VALID')], # Note: These match Facebook's Torch implem [('conv2d', 256, 1, 1, 'SAME'), ('conv2d', 384, 3, 2, 'VALID')], [('conv2d', 256, 1, 1, 'SAME'), ('conv2d', 256, 3, 2, 'VALID')], [('conv2d', 256, 1, 1, 'SAME'), ('conv2d', 256, 3, 1, 'SAME'), ('conv2d', 256, 3, 2, 'VALID')] ] return builder.inception_module(x, 'incept_resnet_v2_rb', cols) def inception_resnet_v2_a(x): cols = [[('conv2d', 32, 1, 1, 'SAME')], [('conv2d', 32, 1, 1, 'SAME'), ('conv2d', 32, 3, 1, 'SAME')], [('conv2d', 32, 1, 1, 'SAME'), ('conv2d', 48, 3, 1, 'SAME'), ('conv2d', 64, 3, 1, 'SAME')]] x = builder.inception_module(x, 'incept_resnet_v2_a', cols) x = builder.conv2d_linear(x, 384, 1, 1, 'SAME') return x def inception_resnet_v2_b(x): cols = [[('conv2d', 192, 1, 1, 'SAME')], [('conv2d', 128, 1, 1, 'SAME'), ('conv2d', 160, (1, 7), 1, 'SAME'), ('conv2d', 192, (7, 1), 1, 'SAME')]] x = builder.inception_module(x, 'incept_resnet_v2_b', cols) x = builder.conv2d_linear(x, 1152, 1, 1, 'SAME') return x def inception_resnet_v2_c(x): cols = [[('conv2d', 192, 1, 1, 'SAME')], [('conv2d', 192, 1, 1, 'SAME'), ('conv2d', 224, (1, 3), 1, 'SAME'), ('conv2d', 256, (3, 1), 1, 'SAME')]] x = builder.inception_module(x, 'incept_resnet_v2_c', cols) x = builder.conv2d_linear(x, 2048, 1, 1, 'SAME') return x residual_scale = 0.2 x = inputs x = builder.conv2d(x, 32, 3, 2, 'VALID') x = builder.conv2d(x, 32, 3, 1, 'VALID') x = builder.conv2d(x, 64, 3, 1, 'SAME') x = inception_v4_sa(x) x = inception_v4_sb(x) x = inception_v4_sc(x) for _ in range(5): x = builder.residual2d(x, inception_resnet_v2_a, scale=residual_scale) x = inception_v4_ra(x, 256, 256, 384, 384) for _ in range(10): x = builder.residual2d(x, inception_resnet_v2_b, scale=residual_scale) x = inception_resnet_v2_rb(x) for _ in range(5): x = builder.residual2d(x, inception_resnet_v2_c, scale=residual_scale) x = builder.spatial_average2d(x) x = builder.dropout(x, 0.8) return x