def MultiLayerPerceptron(*args, **kwargs): ''' positional arguments: the number of hidden units of each layer keyword arguments(optional): activation: ReLU by default affine_monitor: bool activation_monitor: bool storage: dictionary ''' assert all(isinstance(arg, int) for arg in args) try: activation = kwargs.pop('activation', 'ReLU') activation = getattr(builder, activation)() except: raise Exception('unsupported activation function') affine_monitor = kwargs.pop('affine_monitor', False) activation_monitor = kwargs.pop('activation_monitor', False) if affine_monitor or activation_monitor: try: storage = kwargs['storage'] except: raise Exception('storage required to monitor intermediate result') network = builder.Sequential() for i, arg in enumerate(args[:-1]): network.append(builder.Affine(arg)) if affine_monitor: network.append(builder.Export('affine%d' % i, storage)) network.append(activation) if activation_monitor: network.append(builder.Export('activation%d' % i, storage)) network.append(builder.Affine(args[-1])) return network
def main(args): # Define a convolutional neural network the same as above net = builder.Sequential( builder.Convolution((7, 7), 32), builder.ReLU(), builder.Pooling('max', (2, 2), (2, 2)), builder.Reshape((flattened_input_size,)) builder.Affine(hidden_size), builder.Affine(num_classes), ) # Cast the definition to a model compatible with minpy solver model = builder.Model(net, 'softmax', (3 * 32 * 32,)) data = get_CIFAR10_data(args.data_dir) train_dataiter = NDArrayIter(data['X_train'], data['y_train'], batch_size=batch_size, shuffle=True) test_dataiter = NDArrayIter(data['X_test'], data['y_test'], batch_size=batch_size, shuffle=False) solver = Solver(model, train_dataiter, test_dataiter, num_epochs=10, init_rule='gaussian', init_config={ 'stdvar': 0.001 }, update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-3, 'momentum': 0.9 }, verbose=True, print_every=20) solver.init() solver.train()
def main(args): # Define a 2-layer perceptron MLP = builder.Sequential( builder.Affine(512), builder.ReLU(), builder.Affine(10) ) # Cast the definition to a model compatible with minpy solver model = builder.Model(MLP, 'softmax', (3 * 32 * 32,)) data = get_CIFAR10_data(args.data_dir) data['X_train'] = data['X_train'].reshape([data['X_train'].shape[0], 3 * 32 * 32]) data['X_val'] = data['X_val'].reshape([data['X_val'].shape[0], 3 * 32 * 32]) data['X_test'] = data['X_test'].reshape([data['X_test'].shape[0], 3 * 32 * 32]) train_dataiter = NDArrayIter(data['X_train'], data['y_train'], batch_size=100, shuffle=True) test_dataiter = NDArrayIter(data['X_test'], data['y_test'], batch_size=100, shuffle=False) solver = Solver(model, train_dataiter, test_dataiter, num_epochs=10, init_rule='gaussian', init_config={ 'stdvar': 0.001 }, update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-5, 'momentum': 0.9 }, verbose=True, print_every=20) solver.init() solver.train()
def residual_network(n): ''' n: the network contains 6 * n + 2 layers, including 6 * n + 1 convolution layers and 1 affine layer please refer to the paper for details ''' def normalized_convolution(kernel_shape, kernel_number, stride, pad, activate=None): module = builder.Sequential( builder.Convolution(kernel_shape, kernel_number, stride, pad), builder.SpatialBatchNormalization() ) if activate: module.append(getattr(builder, activate)()) return module def residual(kernel_number, project=False): if project: module = builder.Add( builder.Sequential( normalized_convolution((3, 3), kernel_number, (2, 2), (1, 1), 'ReLU'), normalized_convolution((3, 3), kernel_number, (1, 1), (1, 1)) ), builder.Sequential( builder.Pooling('avg', (2, 2), (2, 2)), builder.Convolution((1, 1), kernel_number) ) ) else: module = builder.Add( builder.Sequential( normalized_convolution((3, 3), kernel_number, (1, 1), (1, 1), 'ReLU'), normalized_convolution((3, 3), kernel_number, (1, 1), (1, 1)) ), builder.Identity() ) return module network = builder.Sequential( builder.Reshape((3, 32, 32)), normalized_convolution((3, 3), 16, (1, 1), (1, 1), 'ReLU') ) for i in range(n): network.append(residual(16)) network.append(residual(32, project=True)) for i in range(n-1): network.append(residual(32)) network.append(residual(64, project=True)) for i in range(n-1): network.append(residual(64)) network.append(builder.Pooling('avg', (8, 8))) network.append(builder.Reshape((64,))) network.append(builder.Affine(10)) return network
from facility import * from solver_primitives import * sys.path.append('../') from utilities.data_utility import load_cifar10 data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True) X = data[0][:16] hidden_layers = 4 shapes = (1024, ) * hidden_layers + (10, ) activation = builder.ReLU storage = {} mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(builder.Export('affine%d' % i, storage)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, )) ''' for key, value in model.param_configs.items(): if 'weight' in key: value['init_rule'] = 'gaussian' value['init_config'] = {'stdvar' : 1} ''' initialize(model) for key, value in model.params.items(): if 'weight' in key:
from solver_primitives import * from utilities.data_utility import load_cifar10 from GPU_utility import GPU_availability from minpy.context import set_context, gpu set_context(gpu(GPU_availability()[0])) ACTIVATION = 'ReLU' SHAPE = (1024, ) * 3 + (10, ) BATCH_SIZE = 64 X_SHAPE = (3072, ) activation = getattr(builder, ACTIVATION) mlp = builder.Sequential() for shape in SHAPE[:-1]: mlp.append(builder.Affine(shape)) mlp.append(activation()) mlp.append(builder.Affine(SHAPE[-1])) model = builder.Model(mlp, 'softmax', X_SHAPE) initialize(model) updater = Updater(model, 'sgd', {'learning_rate': 0.01}) training_X, training_Y, validation_X, validation_Y, test_X, test_Y, = \ load_cifar10(path='../../cifar10/utilities/cifar/', center=True, rescale=True) X_batches = Batches(training_X, BATCH_SIZE) Y_batches = Batches(training_Y, BATCH_SIZE) ITERATIONS = 20000 LOGGING_INTERVAL = 10 VALIDATION_INTERVAL = 50 loss_table = []
parameters = list(dmodel.params.values()) + list(gmodel.params.values()) return gl(*parameters) N, D = 50000, 16 data, p = generate_data(N, D) BATCH_SIZE = 100 X_batches = Batches(data, BATCH_SIZE) p_batches = Batches(p.reshape((N, 1)), BATCH_SIZE) ACTIVATION = 'ReLU' activation = getattr(builder, ACTIVATION) DSHAPE = (16,) * 4 + (1,) dmlp = builder.Sequential() for shape in DSHAPE[:-1]: dmlp.append(builder.Affine(shape)) dmlp.append(activation()) dmlp.append(builder.Affine(DSHAPE[-1])) dmodel = builder.Model(dmlp, 'l2', (D,)) initialize(dmodel) dupdater = Updater(dmodel, 'sgd', {'learning_rate' : -0.01}) GSHAPE = (16,) * 4 + (D,) gmlp = builder.Sequential() for shape in GSHAPE[:-1]: gmlp.append(builder.Affine(shape)) gmlp.append(activation()) gmlp.append(builder.Affine(GSHAPE[-1])) gmodel = builder.Model(gmlp, 'l2', (D,)) initialize(gmodel) gupdater = Updater(gmodel, 'sgd', {'learning_rate' : 0.01})