def _transit(network, mode): if mode is 'convolution + dropout': network = _activated_convolution(X=network, kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) network = layers.dropout(network, 0.5) elif mode is 'pooling + dropout': network = layers.pooling(X=network, mode='average', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) network = layers.dropout(network, 0.5) elif mode is 'stochastic_pooling': network = _activated_convolution(X=network, kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) network = layers.pooling(X=network, mode='average', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) return network
def _extract_representations(network, parameters, batch_size): variables = {'layer_index': 0} def convolve(*args): layer_index = variables['layer_index'] weight = parameters[layer_index]['weight'] if weight is None: network = _normalized_convolution(*args) else: network = _normalized_batch_convolution( *(args + ((batch_size, 3, 32, 32), weight))) variables['layer_index'] += 1 return network network = convolve(network, (3, 3), 16, (1, 1), (1, 1)) network = layers.pooling(X=network, mode='maximum', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) network = convolve(network, (3, 3), 16, (1, 1), (1, 1)) network = layers.pooling(X=network, mode='maximum', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) network = convolve(network, (3, 3), 16, (1, 1), (1, 1)) network = layers.pooling(X=network, mode='maximum', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) return network
def build_network(n_layers): network = layers.variable('data') network = _convolution(X=network, n_filters=16) convolution_settings = {'n_filters': None} settings = { 'convolution_settings': convolution_settings, 'n_layers': args.n_layers, 'weight_sharing': False } for n_filters in (16, 32): convolution_settings['n_filters'] = n_filters network = _rnn_attention_module(network, settings) network = _transit(network, n_filters * 2) convolution_settings['n_filters'] = 64 network = _rnn_attention_module(network, settings) network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.batch_normalization(network, fix_gamma=False) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def _transit(network, n_filters): left = _normalized_convolution(X=network, n_filters=n_filters, kernel_shape=(3, 3), stride=(2, 2), pad=(1, 1)) left = _normalized_convolution(X=left, n_filters=n_filters, kernel_shape=(3, 3), stride=(2, 2), pad=(1, 1)) right = layers.pooling(X=network, mode='average', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) pad_width = (0, 0, 0, n_filters / 2, 0, 0, 0, 0) right = layers.pad(right, pad_width, 'constant') return right + right
def attended_memory_network(settings): network = layers.variable('data') for module_settings in settings: if module_settings['operator'] is 'attended_memory_module': network = _attended_memory_module(network, module_settings['settings']) else: args = module_settings.get('args', tuple()) kwargs = { key: value for key, value in module_settings.get('kwargs', {}).items() } if args: args = (network, ) + args else: kwargs['X'] = network network = getattr(layers, module_settings['operator'])(*args, **kwargs) network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def recurrent_hypernetwork(T, batch_size): X = layers.variable('data') label = layers.variable('softmax_label') loss = 0 parameters = ({ 'weight': None, 'bias': None }, { 'weight': None, 'bias': None }, { 'weight': None, 'bias': None }) KERNEL_SHAPES = ((3, 3, 3 * 16), ) + ((3, 3, 16 * 16), ) * 2 for time in range(T): network = _extract_representations(X, parameters, batch_size) prediction = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(0, 0)) prediction = layers.flatten(prediction) prediction = layers.fully_connected(X=prediction, n_hidden_units=10) loss += layers.softmax_loss(prediction=prediction, label=label) for index, weight in enumerate( _generate_parameters(network, KERNEL_SHAPES)): parameters[index]['weight'] = weight return loss
def _traced_module(network, n_filters, n_layers): group = [] for index in range(n_layers): identity = network residual = _normalized_convolution(network, n_filters=n_filters) residual = _normalized_convolution(residual, n_filters=n_filters) trace = layers.terminate_gradient(residual) trace = layers.ReLU(trace) trace = layers.flatten(trace) group.append(trace) network = identity + residual network = layers.batch_normalization(network, fix_gamma=False) network = layers.ReLU(network) network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.batch_normalization(network, fix_gamma=False) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.terminate_gradient(network) group.append(network) return layers.group(group)
def nin(settings): network = layers.variable('data') network = _activated_convolution(X=network, kernel_shape=(3, 3), n_filters=192, stride=(1, 1), pad=(1, 1)) network = _activated_convolution(X=network, kernel_shape=(1, 1), n_filters=160, stride=(1, 1), pad=(0, 0)) network = _activated_convolution(X=network, kernel_shape=(1, 1), n_filters=96, stride=(1, 1), pad=(0, 0)) network = _transit(network, settings['transition_mode']) network = _activated_convolution(X=network, kernel_shape=(3, 3), n_filters=192, stride=(1, 1), pad=(1, 1)) network = _activated_convolution(X=network, kernel_shape=(1, 1), n_filters=192, stride=(1, 1), pad=(0, 0)) network = _activated_convolution(X=network, kernel_shape=(1, 1), n_filters=192, stride=(1, 1), pad=(0, 0)) network = _transit(network, settings['transition_mode']) network = _activated_convolution(X=network, kernel_shape=(3, 3), n_filters=192, stride=(1, 1), pad=(1, 1)) network = _activated_convolution(X=network, kernel_shape=(1, 1), n_filters=192, stride=(1, 1), pad=(0, 0)) network = _activated_convolution(X=network, kernel_shape=(1, 1), n_filters=10, stride=(1, 1), pad=(0, 0)) network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def build_network(args): network = layers.variable('data') network = _convolution(X=network, n_filters=16) for n_filters in (16, 32): network = _module(network, n_filters, args.n_layers) network = _transit(network, n_filters * 2) # network = _module(network, 64, args.n_layers) _, rnn_cache = _traced_module(network, args.rnn, 64, args.n_layers) # network = layers.batch_normalization(network, fix_gamma=False) network = layers.batch_normalization(rnn_cache['h'], fix_gamma=False, id='BN') network = layers.ReLU(network) network = layers.pooling(X=rnn_cache['h'], mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10, id='linear') network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def _normalized_pooling(network, kernel_shape, stride, pad): network = layers.pooling(X=network, mode='maximum', kernel_shape=kernel_shape, stride=stride, pad=pad) network = layers.batch_normalization(network) return network
def naive_network(n_layers, weight_sharing): network = layers.variable('data') network = _normalized_convolution(X=network, n_filters=8, kernel_shape=(5, 5), stride=(1, 1), pad=(2, 2)) network = layers.pooling(X=network, mode='maximum', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) if weight_sharing: shared_weight = layers.variable('shared_weight') shared_bias = layers.variable('shared_bias') for index in range(n_layers): if weight_sharing: network = _normalized_convolution(X=network, n_filters=8, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1), weight=shared_weight, bias=shared_bias) else: network = _normalized_convolution(X=network, n_filters=16, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1)) network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def _transit(network, module_index): n_filters = {0 : 16, 1 : 32, 2 : 64}[module_index] if module_index == 0: network = _normalized_weighted_convolution(network, (3, 3), n_filters, (2, 2), (1, 1)) return network else: P = _normalized_weighted_convolution(network, (3, 3), n_filters, (2, 2), (1, 1)) P = _normalized_weighted_convolution(P, (3, 3), n_filters, (1, 1), (1, 1)) Q = layers.pooling(X=network, mode='average', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) Q = layers.pad(Q, (0, 0) + (0, n_filters / 2) + (0, 0) * 2, 'constant') return P + Q
def _transit(network, n_filters): ''' identity = \ _convolution(X=network, n_filters=n_filters, kernel_shape=(1, 1), stride=(2, 2), pad=(0, 0)) ''' identity = layers.pooling(X=network, mode='maximum', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) identity = _convolution(X=identity, n_filters=n_filters, kernel_shape=(1, 1), pad=(0, 0)) network = _normalized_convolution(network, n_filters=n_filters, stride=(2, 2)) network = _normalized_convolution(network, n_filters=n_filters) return identity + network
def unattentioned_network(times, function=average, n_classes=10): # TODO simplify network structure network = layers.variable('data') cache = [] for time in range(times): network = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) cache.append(network) network = layers.batch_normalization(function(cache)) network = _normalized_convolution(network, (3, 3), 16, (2, 2), (1, 1)) network = _normalized_convolution(network, (3, 3), 16, (2, 2), (1, 1)) network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.fully_connected(X=network, n_hidden_units=n_classes) network = layers.softmax_loss(network, normalization='batch') return network
def _generate_parameters(network, kernel_shapes): weights = [] for shape in kernel_shapes: width, height, depth = shape weight = layers.pooling(X=network, mode='average', kernel_shape=(2, 2), stride=(2, 2), pad=(1, 1)) weight = _normalized_convolution(weight, (1, 1), depth, (1, 1), (0, 0)) # weight = _normalized_convolution(network, (3, 3), depth, (1, 1), (1, 1)) # weight = layers.pooling(X=weight, mode='average', kernel_shape=(2, 2), stride=(2, 2), pad=(1, 1)) weights.append(weight) return weights
def dual_activation_network(n_layers): shared_weight = layers.variable('shared_weight') shared_bias = layers.variable('shared_bias') network = layers.variable('data') network = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) for i in range(n_layers): private = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) shared = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1), weight=shared_weight, bias=shared_bias) network = private + shared network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10, name='linear_transition') network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def _attention_network(network): network = _normalized_convolution(X=network, n_filters=8, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1)) network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=1) return network
def dense_network(settings, n_classes=10): network = layers.variable('data') network = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) for module_settings in settings: network = _dense_module(network, module_settings) network = layers.pooling(X=network, mode='average', kernel_shape=(1, 1), stride=(1, 1), pad=(0, 0), global_pool=True) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=n_classes) network = layers.softmax_loss(network, normalization='batch') return network
def residual_network(procedures): network = layers.variable('data') for index, procedure in enumerate(procedures): transit, recur = procedure network = transit(network, index) network = recur(network, index) network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10, name='linear_transition') network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def simplifed_identity_network(N): network = variable('data') for index in range(N): residual = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) residual = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) identity = network network = identity + residual network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10, name='linear_transition') network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
parser.add_argument('--n_residual_layers', type=int, required=True) parser.add_argument('--postfix', type=str, default='') args = parser.parse_args() # TODO calculate receptive field _convolution = lambda X: layers.convolution( X=X, n_filters=16, kernel_shape=(5, 5), stride=(1, 1), pad=(2, 2)) network = layers.variable('data') for index in range(3): network = _convolution(network) network = layers.ReLU(network) network = layers.pooling(X=network, mode='maximum', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) shared_weight = layers.variable('shared_weight') shared_gamma = layers.variable('shared_gamma') shared_beta = layers.variable('shared_beta') _convolution = lambda X: layers.convolution( X=X, n_filters=16, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1), weight=shared_weight, no_bias=True
parser.add_argument('--postfix', type=str, required=True) args = parser.parse_args() _convolution = lambda network : \ layers.convolution(X=network, n_filters=16, kernel_shape=(5, 5), stride=(1, 1), pad=(2, 2), no_bias=True) network = layers.variable('data') for index in range(args.n_residual_layers): network = layers.batch_normalization(network) network = layers.ReLU(network) network += _convolution(network) network = layers.pooling(X=network, mode='average', kernel_shape=(56, 56), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') optimizer_settings = { 'args': { 'momentum': 0.9 }, 'initial_lr': 0.1, 'optimizer': 'SGD' }
def _normalized_convolution(**args): network = layers.convolution(**args) network = layers.batch_normalization(network) network = layers.ReLU(network) return network network = layers.variable('data') network = _normalized_convolution(X=network, n_filters=16, kernel_shape=(5, 5), stride=(1, 1), pad=(2, 2)) network = layers.pooling(X=network, mode='maximum', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) # reduction of generalization performance ''' network = _normalized_convolution(X=network, n_filters=16, kernel_shape=(5, 5), stride=(1, 1), pad=(2, 2)) network = layers.pooling(X=network, mode='maximum', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) ''' history = [] for index in range(5): network = _normalized_convolution(X=network, n_filters=16, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1))
network = layers.variable('data') network = _normalized_convolution(X=network, n_filters=16, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1)) network = _previous_n_moment_module(network, settings) for n_filters in (32, 64): settings['convolution_args']['n_filters'] = n_filters network = _transit(network, n_filters) network = _previous_n_moment_module(network, settings) network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(8, 8), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') BATCH_SIZE = 128 lr = 0.1 lr_table = {32000: lr * 0.1, 48000: lr * 0.01} lr_scheduler = AtIterationScheduler(lr, lr_table) optimizer_settings = { 'args': {
def _transit(network, n_filters): network = layers.pooling(X=network, mode='average', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) network = layers.batch_normalization(network) pad_width = (0, 0, 0, n_filters / 2, 0, 0, 0, 0) network = layers.pad(network, pad_width, 'constant') return network