def _traced_module(network, n_filters, n_layers): group = [] for index in range(n_layers): identity = network residual = _normalized_convolution(network, n_filters=n_filters) residual = _normalized_convolution(residual, n_filters=n_filters) trace = layers.terminate_gradient(residual) trace = layers.ReLU(trace) trace = layers.flatten(trace) group.append(trace) network = identity + residual network = layers.batch_normalization(network, fix_gamma=False) network = layers.ReLU(network) network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.batch_normalization(network, fix_gamma=False) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.terminate_gradient(network) group.append(network) return layers.group(group)
def _fully_connected(network, batch_size, n_hidden_units, mode, p): long_path = layers.fully_connected(X=network, n_hidden_units=n_hidden_units) long_path = layers.ReLU(long_path) short_path = layers.mean(network, axis=1) short_path = layers.reshape(short_path, (0, 1)) short_path = layers.broadcast(short_path, (0, n_hidden_units)) short_path = layers.ReLU(short_path) gate = _random_gate(p, (batch_size, n_hidden_units)) network = gate * long_path + (1 - gate) * short_path return network
def build_network(args): network = layers.variable('data') network = _convolution(X=network, n_filters=16) for n_filters in (16, 32): network = _module(network, n_filters, args.n_layers) network = _transit(network, n_filters * 2) # network = _module(network, 64, args.n_layers) _, rnn_cache = _traced_module(network, args.rnn, 64, args.n_layers) # network = layers.batch_normalization(network, fix_gamma=False) network = layers.batch_normalization(rnn_cache['h'], fix_gamma=False, id='BN') network = layers.ReLU(network) network = layers.pooling(X=rnn_cache['h'], mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10, id='linear') network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def _normalized_convolution(network, kernel_shape, n_filters, stride, pad, weight=None, bias=None): args = {'X' : network, 'kernel_shape' : kernel_shape, 'n_filters' : n_filters, 'stride' : stride, 'pad' : pad} if weight is not None: args['weight'] = weight if bias is not None: args['bias'] = bias network = layers.convolution(**args) network = layers.batch_normalization(network) network = layers.ReLU(network) return network
def _normalized_convolution(network, kernel_shape, n_filters, stride, pad): network = layers.convolution(X=network, kernel_shape=kernel_shape, n_filters=n_filters, stride=stride, pad=pad) network = layers.batch_normalization(network) network = layers.ReLU(network) return network
def _normalized_convolution(network, kernel_shape, n_filters, stride, pad): args = { 'X': network, 'kernel_shape': kernel_shape, 'n_filters': n_filters, 'stride': stride, 'pad': pad } network = layers.convolution(**args) network = layers.batch_normalization(network) network = layers.ReLU(network) return network
def _normalized_unweighted_convolution(network, kernel_shape, n_filters, stride, pad): network = unweighted_convolution( X = network, kernel_shape = kernel_shape, n_filters = n_filters, stride = stride, pad = pad, data_shape = (1, 3, 32, 32), ) network = layers.batch_normalization(network) network = layers.ReLU(network) return network
from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('--gpu_index', type=int, default=0) parser.add_argument('--n_residual_layers', type=int, required=True) parser.add_argument('--postfix', type=str, default='') args = parser.parse_args() # TODO calculate receptive field _convolution = lambda X: layers.convolution( X=X, n_filters=16, kernel_shape=(5, 5), stride=(1, 1), pad=(2, 2)) network = layers.variable('data') for index in range(3): network = _convolution(network) network = layers.ReLU(network) network = layers.pooling(X=network, mode='maximum', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) shared_weight = layers.variable('shared_weight') shared_gamma = layers.variable('shared_gamma') shared_beta = layers.variable('shared_beta') _convolution = lambda X: layers.convolution( X=X, n_filters=16, kernel_shape=(3, 3), stride=(1, 1),
def _normalized_convolution(**args): network = layers.convolution(**args) network = layers.batch_normalization(network) network = layers.ReLU(network) return network
def _normalized_convolution(**kwargs): network = layers.convolution(no_bias=True, **kwargs) network = layers.batch_normalization(network, fix_gamma=False) network = layers.ReLU(network) return network
def _normalized_convolution(network, **kwargs): network = layers.batch_normalization(network, fix_gamma=False) network = layers.ReLU(network) network = _convolution(X=network, **kwargs) return network
def _activated_convolution(**kwargs): network = layers.convolution(**kwargs) return layers.ReLU(X=network)
def _fully_connected(network, n_hidden_units, p): network = layers.fully_connected(X=network, n_hidden_units=n_hidden_units) network = layers.ReLU(network) network = layers.dropout(network, p) return network