Ejemplo n.º 1
0
def _traced_module(network, n_filters, n_layers):
  group = []

  for index in range(n_layers):
    identity = network

    residual = _normalized_convolution(network, n_filters=n_filters)
    residual = _normalized_convolution(residual, n_filters=n_filters)

    trace = layers.terminate_gradient(residual)
    trace = layers.ReLU(trace)
    trace = layers.flatten(trace)
    group.append(trace)

    network = identity + residual

  network = layers.batch_normalization(network, fix_gamma=False)
  network = layers.ReLU(network)

  network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0))
  network = layers.flatten(network)
  network = layers.batch_normalization(network, fix_gamma=False)
  network = layers.fully_connected(X=network, n_hidden_units=10)
  network = layers.terminate_gradient(network)
  group.append(network)

  return layers.group(group)
Ejemplo n.º 2
0
def build_network(n_layers):
    network = layers.variable('data')
    network = _convolution(X=network, n_filters=16)

    for n_filters in (16, 32):
        network = _module(network, n_filters, n_layers)
        network = _transit(network, n_filters * 2)

    network = _module(network, 64, n_layers)
    network = layers.batch_normalization(network, fix_gamma=False)
    network = layers.ReLU(network)

    network = layers.pooling(X=network,
                             mode='average',
                             kernel_shape=(8, 8),
                             stride=(1, 1),
                             pad=(0, 0))
    network = layers.flatten(network)
    network = layers.batch_normalization(network, fix_gamma=False)
    network = layers.fully_connected(X=network, n_hidden_units=10)
    network = layers.softmax_loss(prediction=network,
                                  normalization='batch',
                                  id='softmax')

    return network
Ejemplo n.º 3
0
def _lstm_attention_module(network, settings):
  global _n_lstm_attention_module
  prefix = 'lstm_attention_module%d' % _n_lstm_attention_module

  n_filters = settings['convolution_settings']['n_filters']
  memory_settings = {'n_filters' : n_filters}
  X_weight = layers.variable('%s_X_weight' % prefix, shape=(4 * n_filters, n_filters, 3, 3))
  h_weight = layers.variable('%s_h_weight' % prefix, shape=(4 * n_filters, n_filters, 3, 3))
  lstm_bias = layers.variable('%s_lstm_bias' % prefix, shape=(1, 4 * n_filters, 1, 1))
  lstm_parameters = (X_weight, h_weight, lstm_bias)
  memory = (0, 0)

  kwargs = {key : value for key, value in settings['convolution_settings'].items()}
  if settings['weight_sharing']:
    kwargs['weight'] = layers.variable('%s_weight' % prefix)
    kwargs['bias'] = layers.variable('%s_bias' % prefix)
  network = layers.batch_normalization(network)
  for index in range(settings['n_layers']):
    memory = _write(network, memory_settings, lstm_parameters, memory)
    network = _read(memory_settings, memory)
    network = _normalized_convolution(X=network, **kwargs)
    memory = _write(network, memory_settings, lstm_parameters, memory)
    network = _read(memory_settings, memory)
    network = _normalized_convolution(X=network, **kwargs)

  _n_lstm_attention_module += 1
  return network
Ejemplo n.º 4
0
def build_network(n_layers):
    network = layers.variable('data')
    network = _convolution(X=network, n_filters=16)

    convolution_settings = {'n_filters': None}
    settings = {
        'convolution_settings': convolution_settings,
        'n_layers': args.n_layers,
        'weight_sharing': False
    }

    for n_filters in (16, 32):
        convolution_settings['n_filters'] = n_filters
        network = _rnn_attention_module(network, settings)
        network = _transit(network, n_filters * 2)

    convolution_settings['n_filters'] = 64
    network = _rnn_attention_module(network, settings)

    network = layers.pooling(X=network,
                             mode='average',
                             kernel_shape=(8, 8),
                             stride=(1, 1),
                             pad=(0, 0))
    network = layers.flatten(network)
    network = layers.batch_normalization(network, fix_gamma=False)
    network = layers.fully_connected(X=network, n_hidden_units=10)
    network = layers.softmax_loss(prediction=network,
                                  normalization='batch',
                                  id='softmax')

    return network
Ejemplo n.º 5
0
def rnn_attention_network(settings):
    network = layers.variable('data')
    network = layers.batch_normalization(network)
    for module_settings in settings:
        if module_settings['operator'] is 'rnn_attention_module':
            network = _rnn_attention_module(network,
                                            module_settings['settings'])
        elif module_settings['operator'] is 'transit':
            network = _transit(network, module_settings['n_filters'])
        else:
            args = module_settings.get('args', tuple())
            kwargs = {
                key: value
                for key, value in module_settings.get('kwargs', {}).items()
            }
            if args: args = (network, ) + args
            else: kwargs['X'] = network
            network = getattr(layers, module_settings['operator'])(*args,
                                                                   **kwargs)
    network = layers.pooling(X=network,
                             mode='average',
                             global_pool=True,
                             kernel_shape=(1, 1),
                             stride=(1, 1),
                             pad=(1, 1))
    network = layers.flatten(network)
    network = layers.fully_connected(X=network, n_hidden_units=10)
    network = layers.softmax_loss(prediction=network,
                                  normalization='batch',
                                  id='softmax')
    return network
Ejemplo n.º 6
0
def _normalized_pooling(network, kernel_shape, stride, pad):
    network = layers.pooling(X=network,
                             mode='maximum',
                             kernel_shape=kernel_shape,
                             stride=stride,
                             pad=pad)
    network = layers.batch_normalization(network)
    return network
Ejemplo n.º 7
0
def _normalized_convolution(network, kernel_shape, n_filters, stride, pad, weight=None, bias=None):
  args = {'X' : network, 'kernel_shape' : kernel_shape, 'n_filters' : n_filters, 'stride' : stride, 'pad' : pad}
  if weight is not None: args['weight'] = weight
  if bias is not None: args['bias'] = bias
  network = layers.convolution(**args)
  network = layers.batch_normalization(network)
  network = layers.ReLU(network)
  return network
Ejemplo n.º 8
0
def _normalized_convolution(network, kernel_shape, n_filters, stride, pad):
    network = layers.convolution(X=network,
                                 kernel_shape=kernel_shape,
                                 n_filters=n_filters,
                                 stride=stride,
                                 pad=pad)
    network = layers.batch_normalization(network)
    network = layers.ReLU(network)
    return network
Ejemplo n.º 9
0
def _normalized_convolution(network, kernel_shape, n_filters, stride, pad):
    args = {
        'X': network,
        'kernel_shape': kernel_shape,
        'n_filters': n_filters,
        'stride': stride,
        'pad': pad
    }
    network = layers.convolution(**args)
    network = layers.batch_normalization(network)
    network = layers.ReLU(network)
    return network
Ejemplo n.º 10
0
def _normalized_unweighted_convolution(network, kernel_shape, n_filters, stride, pad):
  network = unweighted_convolution(
    X            = network,
    kernel_shape = kernel_shape,
    n_filters    = n_filters,
    stride       = stride,
    pad          = pad,
    data_shape   = (1, 3, 32, 32),
  )
  network = layers.batch_normalization(network)
  network = layers.ReLU(network)
  return network
Ejemplo n.º 11
0
def unattentioned_network(times, function=average, n_classes=10):
  # TODO simplify network structure
  network = layers.variable('data')
  cache = []
  for time in range(times):
    network = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1))
    cache.append(network)
  network = layers.batch_normalization(function(cache))
  network = _normalized_convolution(network, (3, 3), 16, (2, 2), (1, 1))
  network = _normalized_convolution(network, (3, 3), 16, (2, 2), (1, 1))
  network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0))
  network = layers.fully_connected(X=network, n_hidden_units=n_classes)
  network = layers.softmax_loss(network, normalization='batch')
  return network
Ejemplo n.º 12
0
shared_gamma = layers.variable('shared_gamma')
shared_beta = layers.variable('shared_beta')

_convolution = lambda X: layers.convolution(
    X=X,
    n_filters=16,
    kernel_shape=(3, 3),
    stride=(1, 1),
    pad=(1, 1),
    weight=shared_weight,
    no_bias=True
    # X=X, n_filters=16, kernel_shape=(5, 5), stride=(1, 1), pad=(2, 2), weight=shared_weight, no_bias=True
)
for index in range(args.n_residual_layers):
    network = layers.batch_normalization(network,
                                         beta=shared_beta,
                                         gamma=shared_gamma,
                                         fix_gamma=False)
    network = layers.ReLU(network)
    network += _convolution(network)

network = layers.pooling(X=network,
                         mode='average',
                         kernel_shape=(7, 7),
                         stride=(1, 1),
                         pad=(0, 0))
# network = layers.pooling(X=network, mode='average', kernel_shape=(14, 14), stride=(1, 1), pad=(0, 0))
network = layers.flatten(network)
network = layers.fully_connected(X=network, n_hidden_units=10)
network = layers.softmax_loss(prediction=network,
                              normalization='batch',
                              id='softmax')
Ejemplo n.º 13
0
def _normalized_convolution(**args):
  network = layers.convolution(**args)
  network = layers.batch_normalization(network)
  network = layers.ReLU(network)
  return network
Ejemplo n.º 14
0
def _write(X, memory):
  memory = layers.batch_normalization(memory + X)
  return memory
def _normalized_convolution(**kwargs):
    network = layers.convolution(no_bias=True, **kwargs)
    network = layers.batch_normalization(network, fix_gamma=False)
    network = layers.ReLU(network)
    return network
Ejemplo n.º 16
0
def _normalized_convolution(network, **kwargs):
    network = layers.batch_normalization(network, fix_gamma=False)
    network = layers.ReLU(network)
    network = _convolution(X=network, **kwargs)
    return network
Ejemplo n.º 17
0
def _transit(network, n_filters, mode):
  left = _normalized_convolution(X=network, n_filters=n_filters / 2, kernel_shape=(3, 3), stride=(2, 2), pad=(1, 1))
  left = _normalized_convolution(X=left, n_filters=n_filters, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1))
  right = layers.pooling(X=network, mode='average', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0))
  pad_width = (0, 0, 0, n_filters / 2, 0, 0, 0, 0)
  right = layers.pad(right, pad_width, 'constant')
  if mode == 'left': return left
  elif mode == 'right': return right
  elif mode == 'left-right': return left + right

N = int(sys.argv[1])
mode = sys.argv[2]

network = layers.variable('data')
network = layers.batch_normalization(network)

network = _normalized_convolution(X=network, n_filters=16, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1))
for index in range(N):
  identity = network
  residual = _normalized_convolution(X=network, n_filters=16, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1))
  residual = _normalized_convolution(X=residual, n_filters=16, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1))
  network = identity + residual

network = _transit(network, 32, mode)
for index in range(N):
  identity = network
  residual = _normalized_convolution(X=network, n_filters=32, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1))
  residual = _normalized_convolution(X=residual, n_filters=32, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1))
  network = identity + residual
Ejemplo n.º 18
0
def _transit(network, n_filters):
  network = layers.pooling(X=network, mode='average', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0))
  network = layers.batch_normalization(network)
  pad_width = (0, 0, 0, n_filters / 2, 0, 0, 0, 0)
  network = layers.pad(network, pad_width, 'constant')
  return network