def MultiLayerPerceptron(*args, **kwargs): ''' positional arguments: the number of hidden units of each layer keyword arguments(optional): activation: ReLU by default affine_monitor: bool activation_monitor: bool storage: dictionary ''' assert all(isinstance(arg, int) for arg in args) try: activation = kwargs.pop('activation', 'ReLU') activation = getattr(builder, activation)() except: raise Exception('unsupported activation function') affine_monitor = kwargs.pop('affine_monitor', False) activation_monitor = kwargs.pop('activation_monitor', False) if affine_monitor or activation_monitor: try: storage = kwargs['storage'] except: raise Exception('storage required to monitor intermediate result') network = builder.Sequential() for i, arg in enumerate(args[:-1]): network.append(builder.Affine(arg)) if affine_monitor: network.append(builder.Export('affine%d' % i, storage)) network.append(activation) if activation_monitor: network.append(builder.Export('activation%d' % i, storage)) network.append(builder.Affine(args[-1])) return network
def normalized_convolution(kernel_shape, kernel_number, stride, pad, activate=None): module = builder.Sequential( builder.Convolution(kernel_shape, kernel_number, stride, pad), builder.SpatialBatchNormalization() ) if activate: module.append(getattr(builder, activate)()) return module
def residual(kernel_number, project=False): if project: module = builder.Add( builder.Sequential( normalized_convolution((3, 3), kernel_number, (2, 2), (1, 1), 'ReLU'), normalized_convolution((3, 3), kernel_number, (1, 1), (1, 1))), builder.Sequential(builder.Pooling('avg', (2, 2), (2, 2)), builder.Convolution((1, 1), kernel_number))) else: module = builder.Add( builder.Sequential( normalized_convolution((3, 3), kernel_number, (1, 1), (1, 1), 'ReLU'), normalized_convolution((3, 3), kernel_number, (1, 1), (1, 1))), builder.Identity()) return module
def residual_network(n): ''' n: the network contains 6 * n + 2 layers, including 6 * n + 1 convolution layers and 1 affine layer please refer to the paper for details ''' def normalized_convolution(kernel_shape, kernel_number, stride, pad, activate=None): module = builder.Sequential( builder.Convolution(kernel_shape, kernel_number, stride, pad), builder.SpatialBatchNormalization() ) if activate: module.append(getattr(builder, activate)()) return module def residual(kernel_number, project=False): if project: module = builder.Add( builder.Sequential( normalized_convolution((3, 3), kernel_number, (2, 2), (1, 1), 'ReLU'), normalized_convolution((3, 3), kernel_number, (1, 1), (1, 1)) ), builder.Sequential( builder.Pooling('avg', (2, 2), (2, 2)), builder.Convolution((1, 1), kernel_number) ) ) else: module = builder.Add( builder.Sequential( normalized_convolution((3, 3), kernel_number, (1, 1), (1, 1), 'ReLU'), normalized_convolution((3, 3), kernel_number, (1, 1), (1, 1)) ), builder.Identity() ) return module network = builder.Sequential( builder.Reshape((3, 32, 32)), normalized_convolution((3, 3), 16, (1, 1), (1, 1), 'ReLU') ) for i in range(n): network.append(residual(16)) network.append(residual(32, project=True)) for i in range(n-1): network.append(residual(32)) network.append(residual(64, project=True)) for i in range(n-1): network.append(residual(64)) network.append(builder.Pooling('avg', (8, 8))) network.append(builder.Reshape((64,))) network.append(builder.Affine(10)) return network
def main(args): # Define a 2-layer perceptron MLP = builder.Sequential( builder.Affine(512), builder.ReLU(), builder.Affine(10) ) # Cast the definition to a model compatible with minpy solver model = builder.Model(MLP, 'softmax', (3 * 32 * 32,)) data = get_CIFAR10_data(args.data_dir) data['X_train'] = data['X_train'].reshape([data['X_train'].shape[0], 3 * 32 * 32]) data['X_val'] = data['X_val'].reshape([data['X_val'].shape[0], 3 * 32 * 32]) data['X_test'] = data['X_test'].reshape([data['X_test'].shape[0], 3 * 32 * 32]) train_dataiter = NDArrayIter(data['X_train'], data['y_train'], batch_size=100, shuffle=True) test_dataiter = NDArrayIter(data['X_test'], data['y_test'], batch_size=100, shuffle=False) solver = Solver(model, train_dataiter, test_dataiter, num_epochs=10, init_rule='gaussian', init_config={ 'stdvar': 0.001 }, update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-5, 'momentum': 0.9 }, verbose=True, print_every=20) solver.init() solver.train()
def main(args): # Define a convolutional neural network the same as above net = builder.Sequential( builder.Convolution((7, 7), 32), builder.ReLU(), builder.Pooling('max', (2, 2), (2, 2)), builder.Flatten(), builder.Affine(hidden_size), builder.Affine(num_classes), ) # Cast the definition to a model compatible with minpy solver model = builder.Model(net, 'softmax', (3 * 32 * 32, )) data = get_CIFAR10_data(args.data_dir) train_dataiter = NDArrayIter(data['X_train'], data['y_train'], batch_size=batch_size, shuffle=True) test_dataiter = NDArrayIter(data['X_test'], data['y_test'], batch_size=batch_size, shuffle=False) solver = Solver(model, train_dataiter, test_dataiter, num_epochs=10, init_rule='gaussian', init_config={'stdvar': 0.001}, update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-3, 'momentum': 0.9 }, verbose=True, print_every=20) solver.init() solver.train()
import sys sys.path.append('../../nn') from custom_layers import * from facility import * from solver_primitives import * sys.path.append('../') from utilities.data_utility import load_cifar10 data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True) X = data[0][:16] hidden_layers = 4 shapes = (1024, ) * hidden_layers + (10, ) activation = builder.ReLU storage = {} mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(builder.Export('affine%d' % i, storage)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, )) ''' for key, value in model.param_configs.items(): if 'weight' in key: value['init_rule'] = 'gaussian' value['init_config'] = {'stdvar' : 1} '''
Min Lin, Qiang Chen, Shuicheng Yan, Network In Network ''' network_in_network = builder.Sequential( builder.Reshape((3, 32, 32)), builder.Convolution((5, 5), 192, pad=(2, 2)), builder.ReLU(), builder.Convolution((1, 1), 160), builder.ReLU(), builder.Convolution((1, 1), 96), builder.ReLU(), builder.Pooling('max', (3, 3), (2, 2), (1, 1)), builder.Dropout(0.5), builder.Convolution((5, 5), 192, pad=(2, 2)), builder.ReLU(), builder.Convolution((1, 1), 192), builder.ReLU(), builder.Convolution((1, 1), 192), builder.ReLU(), builder.Pooling('avg', (3, 3), (2, 2), (1, 1)), builder.Dropout(0.5), builder.Convolution((3, 3), 192, pad=(1, 1)), builder.ReLU(), builder.Convolution((1, 1), 192), builder.ReLU(), builder.Convolution((1, 1), 10), builder.ReLU(), builder.Pooling('avg', (8, 8)), builder.Reshape((10,)) ) '''