def MultiLayerPerceptron(*args, **kwargs):
    '''
    positional arguments:
      the number of hidden units of each layer
    keyword arguments(optional):
      activation:         ReLU by default
      affine_monitor:     bool
      activation_monitor: bool
      storage:            dictionary
  '''
    assert all(isinstance(arg, int) for arg in args)
    try:
        activation = kwargs.pop('activation', 'ReLU')
        activation = getattr(builder, activation)()
    except:
        raise Exception('unsupported activation function')
    affine_monitor = kwargs.pop('affine_monitor', False)
    activation_monitor = kwargs.pop('activation_monitor', False)
    if affine_monitor or activation_monitor:
        try:
            storage = kwargs['storage']
        except:
            raise Exception('storage required to monitor intermediate result')

    network = builder.Sequential()
    for i, arg in enumerate(args[:-1]):
        network.append(builder.Affine(arg))
        if affine_monitor:
            network.append(builder.Export('affine%d' % i, storage))
        network.append(activation)
        if activation_monitor:
            network.append(builder.Export('activation%d' % i, storage))
    network.append(builder.Affine(args[-1]))

    return network
Example #2
0
def main(args):
    # Define a convolutional neural network the same as above
    net = builder.Sequential(
        builder.Convolution((7, 7), 32),
        builder.ReLU(),
        builder.Pooling('max', (2, 2), (2, 2)),
        builder.Reshape((flattened_input_size,))
        builder.Affine(hidden_size),
        builder.Affine(num_classes),
    )

    # Cast the definition to a model compatible with minpy solver
    model = builder.Model(net, 'softmax', (3 * 32 * 32,))

    data = get_CIFAR10_data(args.data_dir)

    train_dataiter = NDArrayIter(data['X_train'],
                         data['y_train'],
                         batch_size=batch_size,
                         shuffle=True)

    test_dataiter = NDArrayIter(data['X_test'],
                         data['y_test'],
                         batch_size=batch_size,
                         shuffle=False)

    solver = Solver(model,
                    train_dataiter,
                    test_dataiter,
                    num_epochs=10,
                    init_rule='gaussian',
                    init_config={
                        'stdvar': 0.001
                    },
                    update_rule='sgd_momentum',
                    optim_config={
                        'learning_rate': 1e-3,
                        'momentum': 0.9
                    },
                    verbose=True,
                    print_every=20)
    solver.init()
    solver.train()
Example #3
0
def main(args):
    # Define a 2-layer perceptron
    MLP = builder.Sequential(
        builder.Affine(512),
        builder.ReLU(),
        builder.Affine(10)
    )

    # Cast the definition to a model compatible with minpy solver
    model = builder.Model(MLP, 'softmax', (3 * 32 * 32,))

    data = get_CIFAR10_data(args.data_dir)
    data['X_train'] = data['X_train'].reshape([data['X_train'].shape[0], 3 * 32 * 32])
    data['X_val'] = data['X_val'].reshape([data['X_val'].shape[0], 3 * 32 * 32])
    data['X_test'] = data['X_test'].reshape([data['X_test'].shape[0], 3 * 32 * 32])

    train_dataiter = NDArrayIter(data['X_train'],
                         data['y_train'],
                         batch_size=100,
                         shuffle=True)

    test_dataiter = NDArrayIter(data['X_test'],
                         data['y_test'],
                         batch_size=100,
                         shuffle=False)

    solver = Solver(model,
                    train_dataiter,
                    test_dataiter,
                    num_epochs=10,
                    init_rule='gaussian',
                    init_config={
                        'stdvar': 0.001
                    },
                    update_rule='sgd_momentum',
                    optim_config={
                        'learning_rate': 1e-5,
                        'momentum': 0.9
                    },
                    verbose=True,
                    print_every=20)
    solver.init()
    solver.train()
Example #4
0
def residual_network(n):
  '''
    n: the network contains 6 * n + 2 layers, including 6 * n + 1 convolution layers and 1 affine layer
       please refer to the paper for details
  '''
  def normalized_convolution(kernel_shape, kernel_number, stride, pad, activate=None):
    module = builder.Sequential(
      builder.Convolution(kernel_shape, kernel_number, stride, pad),
      builder.SpatialBatchNormalization()
    )
    if activate:
      module.append(getattr(builder, activate)())
    return module

  def residual(kernel_number, project=False):
    if project:
      module = builder.Add(
        builder.Sequential(
          normalized_convolution((3, 3), kernel_number, (2, 2), (1, 1), 'ReLU'),
          normalized_convolution((3, 3), kernel_number, (1, 1), (1, 1))
        ),
        builder.Sequential(
          builder.Pooling('avg', (2, 2), (2, 2)),
          builder.Convolution((1, 1), kernel_number)
        )
      )
    else:
      module = builder.Add(
        builder.Sequential(
          normalized_convolution((3, 3), kernel_number, (1, 1), (1, 1), 'ReLU'),
          normalized_convolution((3, 3), kernel_number, (1, 1), (1, 1))
        ),
        builder.Identity()
      )
    return module

  network = builder.Sequential(
    builder.Reshape((3, 32, 32)),
    normalized_convolution((3, 3), 16, (1, 1), (1, 1), 'ReLU')
  )
  for i in range(n):
    network.append(residual(16))
  network.append(residual(32, project=True))
  for i in range(n-1):
    network.append(residual(32))
  network.append(residual(64, project=True))
  for i in range(n-1):
    network.append(residual(64))
  network.append(builder.Pooling('avg', (8, 8)))
  network.append(builder.Reshape((64,)))
  network.append(builder.Affine(10))

  return network
Example #5
0
from facility import *
from solver_primitives import *

sys.path.append('../')
from utilities.data_utility import load_cifar10
data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True)
X = data[0][:16]

hidden_layers = 4
shapes = (1024, ) * hidden_layers + (10, )
activation = builder.ReLU
storage = {}
mlp = builder.Sequential()

for i, shape in enumerate(shapes[:-1]):
    mlp.append(builder.Affine(shape))
    mlp.append(builder.Export('affine%d' % i, storage))
    mlp.append(activation())
mlp.append(builder.Affine(shapes[-1]))

model = builder.Model(mlp, 'softmax', (3072, ))
'''
for key, value in model.param_configs.items():
  if 'weight' in key:
    value['init_rule'] = 'gaussian'
    value['init_config'] = {'stdvar' : 1}
'''

initialize(model)
for key, value in model.params.items():
    if 'weight' in key:
Example #6
0
from solver_primitives import *
from utilities.data_utility import load_cifar10

from GPU_utility import GPU_availability
from minpy.context import set_context, gpu
set_context(gpu(GPU_availability()[0]))

ACTIVATION = 'ReLU'
SHAPE = (1024, ) * 3 + (10, )
BATCH_SIZE = 64

X_SHAPE = (3072, )
activation = getattr(builder, ACTIVATION)
mlp = builder.Sequential()
for shape in SHAPE[:-1]:
    mlp.append(builder.Affine(shape))
    mlp.append(activation())
mlp.append(builder.Affine(SHAPE[-1]))
model = builder.Model(mlp, 'softmax', X_SHAPE)
initialize(model)
updater = Updater(model, 'sgd', {'learning_rate': 0.01})

training_X, training_Y, validation_X, validation_Y, test_X, test_Y, = \
  load_cifar10(path='../../cifar10/utilities/cifar/', center=True, rescale=True)
X_batches = Batches(training_X, BATCH_SIZE)
Y_batches = Batches(training_Y, BATCH_SIZE)

ITERATIONS = 20000
LOGGING_INTERVAL = 10
VALIDATION_INTERVAL = 50
loss_table = []
Example #7
0
  parameters = list(dmodel.params.values()) + list(gmodel.params.values())
  return gl(*parameters)
  
N, D = 50000, 16
data, p = generate_data(N, D)
BATCH_SIZE = 100
X_batches = Batches(data, BATCH_SIZE)
p_batches = Batches(p.reshape((N, 1)), BATCH_SIZE)

ACTIVATION = 'ReLU'
activation = getattr(builder, ACTIVATION)

DSHAPE = (16,) * 4 + (1,)
dmlp = builder.Sequential()
for shape in DSHAPE[:-1]:
  dmlp.append(builder.Affine(shape))
  dmlp.append(activation())
dmlp.append(builder.Affine(DSHAPE[-1]))
dmodel = builder.Model(dmlp, 'l2', (D,))
initialize(dmodel)
dupdater = Updater(dmodel, 'sgd', {'learning_rate' : -0.01})

GSHAPE = (16,) * 4 + (D,)
gmlp = builder.Sequential()
for shape in GSHAPE[:-1]:
  gmlp.append(builder.Affine(shape))
  gmlp.append(activation())
gmlp.append(builder.Affine(GSHAPE[-1]))
gmodel = builder.Model(gmlp, 'l2', (D,))
initialize(gmodel)
gupdater = Updater(gmodel, 'sgd', {'learning_rate' : 0.01})