def _traced_module(network, n_filters, n_layers): group = [] for index in range(n_layers): identity = network residual = _normalized_convolution(network, n_filters=n_filters) residual = _normalized_convolution(residual, n_filters=n_filters) trace = layers.terminate_gradient(residual) trace = layers.ReLU(trace) trace = layers.flatten(trace) group.append(trace) network = identity + residual network = layers.batch_normalization(network, fix_gamma=False) network = layers.ReLU(network) network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.batch_normalization(network, fix_gamma=False) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.terminate_gradient(network) group.append(network) return layers.group(group)
def build_network(n_layers): network = layers.variable('data') network = _convolution(X=network, n_filters=16) for n_filters in (16, 32): network = _module(network, n_filters, n_layers) network = _transit(network, n_filters * 2) network = _module(network, 64, n_layers) network = layers.batch_normalization(network, fix_gamma=False) network = layers.ReLU(network) network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.batch_normalization(network, fix_gamma=False) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def _lstm_attention_module(network, settings): global _n_lstm_attention_module prefix = 'lstm_attention_module%d' % _n_lstm_attention_module n_filters = settings['convolution_settings']['n_filters'] memory_settings = {'n_filters' : n_filters} X_weight = layers.variable('%s_X_weight' % prefix, shape=(4 * n_filters, n_filters, 3, 3)) h_weight = layers.variable('%s_h_weight' % prefix, shape=(4 * n_filters, n_filters, 3, 3)) lstm_bias = layers.variable('%s_lstm_bias' % prefix, shape=(1, 4 * n_filters, 1, 1)) lstm_parameters = (X_weight, h_weight, lstm_bias) memory = (0, 0) kwargs = {key : value for key, value in settings['convolution_settings'].items()} if settings['weight_sharing']: kwargs['weight'] = layers.variable('%s_weight' % prefix) kwargs['bias'] = layers.variable('%s_bias' % prefix) network = layers.batch_normalization(network) for index in range(settings['n_layers']): memory = _write(network, memory_settings, lstm_parameters, memory) network = _read(memory_settings, memory) network = _normalized_convolution(X=network, **kwargs) memory = _write(network, memory_settings, lstm_parameters, memory) network = _read(memory_settings, memory) network = _normalized_convolution(X=network, **kwargs) _n_lstm_attention_module += 1 return network
def build_network(n_layers): network = layers.variable('data') network = _convolution(X=network, n_filters=16) convolution_settings = {'n_filters': None} settings = { 'convolution_settings': convolution_settings, 'n_layers': args.n_layers, 'weight_sharing': False } for n_filters in (16, 32): convolution_settings['n_filters'] = n_filters network = _rnn_attention_module(network, settings) network = _transit(network, n_filters * 2) convolution_settings['n_filters'] = 64 network = _rnn_attention_module(network, settings) network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.batch_normalization(network, fix_gamma=False) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def rnn_attention_network(settings): network = layers.variable('data') network = layers.batch_normalization(network) for module_settings in settings: if module_settings['operator'] is 'rnn_attention_module': network = _rnn_attention_module(network, module_settings['settings']) elif module_settings['operator'] is 'transit': network = _transit(network, module_settings['n_filters']) else: args = module_settings.get('args', tuple()) kwargs = { key: value for key, value in module_settings.get('kwargs', {}).items() } if args: args = (network, ) + args else: kwargs['X'] = network network = getattr(layers, module_settings['operator'])(*args, **kwargs) network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def _normalized_pooling(network, kernel_shape, stride, pad): network = layers.pooling(X=network, mode='maximum', kernel_shape=kernel_shape, stride=stride, pad=pad) network = layers.batch_normalization(network) return network
def _normalized_convolution(network, kernel_shape, n_filters, stride, pad, weight=None, bias=None): args = {'X' : network, 'kernel_shape' : kernel_shape, 'n_filters' : n_filters, 'stride' : stride, 'pad' : pad} if weight is not None: args['weight'] = weight if bias is not None: args['bias'] = bias network = layers.convolution(**args) network = layers.batch_normalization(network) network = layers.ReLU(network) return network
def _normalized_convolution(network, kernel_shape, n_filters, stride, pad): network = layers.convolution(X=network, kernel_shape=kernel_shape, n_filters=n_filters, stride=stride, pad=pad) network = layers.batch_normalization(network) network = layers.ReLU(network) return network
def _normalized_convolution(network, kernel_shape, n_filters, stride, pad): args = { 'X': network, 'kernel_shape': kernel_shape, 'n_filters': n_filters, 'stride': stride, 'pad': pad } network = layers.convolution(**args) network = layers.batch_normalization(network) network = layers.ReLU(network) return network
def _normalized_unweighted_convolution(network, kernel_shape, n_filters, stride, pad): network = unweighted_convolution( X = network, kernel_shape = kernel_shape, n_filters = n_filters, stride = stride, pad = pad, data_shape = (1, 3, 32, 32), ) network = layers.batch_normalization(network) network = layers.ReLU(network) return network
def unattentioned_network(times, function=average, n_classes=10): # TODO simplify network structure network = layers.variable('data') cache = [] for time in range(times): network = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) cache.append(network) network = layers.batch_normalization(function(cache)) network = _normalized_convolution(network, (3, 3), 16, (2, 2), (1, 1)) network = _normalized_convolution(network, (3, 3), 16, (2, 2), (1, 1)) network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.fully_connected(X=network, n_hidden_units=n_classes) network = layers.softmax_loss(network, normalization='batch') return network
shared_gamma = layers.variable('shared_gamma') shared_beta = layers.variable('shared_beta') _convolution = lambda X: layers.convolution( X=X, n_filters=16, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1), weight=shared_weight, no_bias=True # X=X, n_filters=16, kernel_shape=(5, 5), stride=(1, 1), pad=(2, 2), weight=shared_weight, no_bias=True ) for index in range(args.n_residual_layers): network = layers.batch_normalization(network, beta=shared_beta, gamma=shared_gamma, fix_gamma=False) network = layers.ReLU(network) network += _convolution(network) network = layers.pooling(X=network, mode='average', kernel_shape=(7, 7), stride=(1, 1), pad=(0, 0)) # network = layers.pooling(X=network, mode='average', kernel_shape=(14, 14), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax')
def _normalized_convolution(**args): network = layers.convolution(**args) network = layers.batch_normalization(network) network = layers.ReLU(network) return network
def _write(X, memory): memory = layers.batch_normalization(memory + X) return memory
def _normalized_convolution(**kwargs): network = layers.convolution(no_bias=True, **kwargs) network = layers.batch_normalization(network, fix_gamma=False) network = layers.ReLU(network) return network
def _normalized_convolution(network, **kwargs): network = layers.batch_normalization(network, fix_gamma=False) network = layers.ReLU(network) network = _convolution(X=network, **kwargs) return network
def _transit(network, n_filters, mode): left = _normalized_convolution(X=network, n_filters=n_filters / 2, kernel_shape=(3, 3), stride=(2, 2), pad=(1, 1)) left = _normalized_convolution(X=left, n_filters=n_filters, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1)) right = layers.pooling(X=network, mode='average', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) pad_width = (0, 0, 0, n_filters / 2, 0, 0, 0, 0) right = layers.pad(right, pad_width, 'constant') if mode == 'left': return left elif mode == 'right': return right elif mode == 'left-right': return left + right N = int(sys.argv[1]) mode = sys.argv[2] network = layers.variable('data') network = layers.batch_normalization(network) network = _normalized_convolution(X=network, n_filters=16, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1)) for index in range(N): identity = network residual = _normalized_convolution(X=network, n_filters=16, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1)) residual = _normalized_convolution(X=residual, n_filters=16, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1)) network = identity + residual network = _transit(network, 32, mode) for index in range(N): identity = network residual = _normalized_convolution(X=network, n_filters=32, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1)) residual = _normalized_convolution(X=residual, n_filters=32, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1)) network = identity + residual
def _transit(network, n_filters): network = layers.pooling(X=network, mode='average', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) network = layers.batch_normalization(network) pad_width = (0, 0, 0, n_filters / 2, 0, 0, 0, 0) network = layers.pad(network, pad_width, 'constant') return network