# weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # setup model layers relu = Rectlin() conv_params = {'strides': 1, 'padding': 1, 'init': Xavier(local=True), 'bias': Constant(0), 'activation': relu} vgg_layers = [] # set up 3x3 conv stacks with different number of filters vgg_layers.append(Conv((7, 7, 32), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 64), **conv_params)) vgg_layers.append(Conv((3, 3, 128), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 256), **conv_params)) vgg_layers.append(Conv((3, 3, 256), **conv_params)) vgg_layers.append(Conv((3, 3, 256), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Dropout(keep=0.5)) vgg_layers.append(Linear(nout=4, init=Gaussian(loc=0.0, scale=0.01))) model = Model(layers=vgg_layers) # cost = GeneralizedCost(costfunc=CrossEntropyBinary()) cost = GeneralizedCost(costfunc=SumSquared())
Conv((3, 3, 256), init=init_uni, activation=relu, strides=2, padding=1)) # 23->12 layers.append( Conv((3, 3, 384), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 384), init=init_uni, activation=relu, strides=1)) layers.append( Conv((3, 3, 384), init=init_uni, activation=relu, strides=2, padding=1)) # 12->6 layers.append(Dropout(keep=0.5)) layers.append( Conv((3, 3, 1024), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 1024), init=init_uni, activation=relu, strides=1)) layers.append(Conv((1, 1, 1000), init=init_uni, activation=relu, strides=1)) layers.append(Pooling(6, op='avg')) layers.append(Activation(Softmax())) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_params(args.model_file) # configure callbacks callbacks = Callbacks(mlp, train, eval_set=test, **args.callback_args) if args.deconv:
lshape=(3, 32, 32), name='test') init_uni = Uniform(low=-0.1, high=0.1) if args.datatype in [np.float32, np.float64]: opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=args.rounding) elif args.datatype in [np.float16]: opt_gdm = GradientDescentMomentum(learning_rate=0.01 / cost_scale, momentum_coef=0.9, stochastic_round=args.rounding) layers = [ Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True), Affine(nout=10, init=init_uni, activation=Softmax()) ] if args.datatype in [np.float32, np.float64]: cost = GeneralizedCost(costfunc=CrossEntropyMulti()) elif args.datatype in [np.float16]: cost = GeneralizedCost(costfunc=CrossEntropyMulti(scale=cost_scale)) model = Model(layers=layers) # configure callbacks callbacks = Callbacks(model, eval_set=test, **args.callback_args)
train_set_x, train_set_y, valid_set_x, valid_set_y = load_data() train_set = ArrayIterator(train_set_x, train_set_y, nclass=10, lshape=(1, 28, 28)) valid_set = ArrayIterator(valid_set_x, valid_set_y, nclass=10, lshape=(1, 28, 28)) init_uni = Gaussian(loc=0.0, scale=0.01) # setup model layers layers = [ Conv(fshape=(5, 5, 32), init=init_uni, activation=Rectlin()), Pooling(fshape=2, strides=2), Conv(fshape=(5, 5, 32), init=init_uni, activation=Rectlin()), Pooling(fshape=2, strides=2), Dropout(), Affine(nout=500, init=init_uni, activation=Rectlin()), Dropout(), Affine(nout=10, init=init_uni, activation=Softmax()) ] # setup cost function as CrossEntropy cost = GeneralizedCost(costfunc=CrossEntropyBinary()) # setup optimizer optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding)
strides=2) layers = [ Dropout(keep=.8), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_weights(args.model_file) # configure callbacks callbacks = Callbacks(mlp, train_set, eval_set=valid_set, **args.callback_args)
module = [MergeSum([mainpath, sidepath]), Activation(Rectlin())] return module # Structure of the deep residual part of the network: # args.depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64 nfms = [2**(stage + 4) for stage in sorted(range(3) * args.depth)] strides = [1] + [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] # Now construct the network layers = [Conv(**conv_params(3, 16))] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, stride)) layers.append(Pooling('all', op='avg')) layers.append( Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) model = Model(layers=layers) opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001, schedule=Schedule([90, 135], 0.1)) # configure callbacks valmetric = Misclassification() callbacks = Callbacks(model,
repo_dir=data_dir, **common) test = DataLoader(set_name=test_set, media_params=test_params, index_file=test_idx, repo_dir=test_dir, **common) gauss = Gaussian(scale=0.01) glorot = GlorotUniform() tiny = dict(str_h=1, str_w=1) small = dict(str_h=1, str_w=2) big = dict(str_h=1, str_w=4) common = dict(batch_norm=True, activation=Rectlin()) layers = [ Conv((3, 5, 64), init=gauss, activation=Rectlin(), strides=big), Pooling(2, strides=2), Conv((3, 3, 128), init=gauss, strides=small, **common), Pooling(2, strides=2), Conv((3, 3, 256), init=gauss, strides=small, **common), Conv((2, 2, 512), init=gauss, strides=tiny, **common), DeepBiRNN(128, init=glorot, reset_cells=True, depth=3, **common), RecurrentMean(), Affine(nout=2, init=gauss, activation=Softmax()) ] model = Model(layers=layers) opt = Adagrad(learning_rate=0.001) callbacks = Callbacks(model, eval_set=test, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(tain,