def run(be, fake_dilation, fsz, stride, pad, dilation): K = 8 strides = stride padding = pad be.rng = be.gen_rng(be.rng_seed) in_shape = 16 while out_shape(in_shape, fsz, stride, dilation, pad) < 3: in_shape *= 2 train_shape = (1, in_shape, in_shape) inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz)) init = Gaussian() layers = [ Conv((5, 5, K), init=init), Conv((fsz, fsz, K), strides=strides, padding=padding, init=init, dilation=dict(dil_d=1, dil_h=dilation, dil_w=dilation)), Conv((3, 3, K), init=init), Affine(nout=1, init=init) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(train_shape, cost) if fake_dilation: # Perform regular convolution with an expanded filter. weights = save(model) new_layers = layers # Replace the middle layers. new_fsz = dilated_fsz(fsz, dilation) new_layers[1] = Conv((new_fsz, new_fsz, K), strides=strides, padding=padding, init=init) model = Model(layers=new_layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(train_shape, cost) load(weights, model, K, fsz, dilation) print(model) model.optimizer = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9) outputs = fprop(model, inp) weights = bprop(model, outputs) model.optimizer.optimize(model.layers_to_optimize, epoch=0) return outputs.get(), weights.get()
def test_concat_sequence_l1_l1(backend_default, allrand_args, deltas_buffer): # test two linear layers that are merged with concat dtypeu = np.float32 w_rng, rngmax = allrand_args # Diff size input steps nin = 128 steps = [32, 64] nout = 256 batch_size = 16 NervanaObject.be.bsz = batch_size be = NervanaObject.be init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layers = [Sequential(Affine(nout=nout, init=init_unif)) for _ in (0, 1)] inputs = [ be.array(dtypeu(np.random.random((nin, batch_size * step)))) for step in steps ] merge = MergeMultistream(layers, merge="recurrent") assert (len(inputs) == len(layers)) merge.configure(inputs) merge.allocate() merge.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() merge.set_deltas(deltas_buffer) out = merge.fprop(inputs).get() sublayers = [s.layers[0] for s in layers] weights = [layer.W.get() for layer in sublayers] out_exp = np.concatenate( [np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)], axis=1) assert allclose_with_out(out, out_exp, atol=1e-3) err_lst = [ dtypeu(np.random.random((nout, batch_size * step))) for step in steps ] err_concat = be.array(np.concatenate(err_lst, axis=1)) merge.bprop(err_concat) dW_exp_lst = [ np.dot(err, inp.get().T) for (err, inp) in zip(err_lst, inputs) ] for layer, dW_exp in zip(sublayers, dW_exp_lst): assert allclose_with_out(layer.dW.get(), dW_exp) return
def layers(self): init_uni = Uniform(low=-0.1, high=0.1) bn = False return [ Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=bn), Affine(nout=self.noutputs, init=init_uni, bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
def test_model_get_outputs(backend_default, data): (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data) train_set = ArrayIterator(X_train[:backend_default.bsz * 3]) init_norm = Gaussian(loc=0.0, scale=0.1) layers = [ Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] mlp = Model(layers=layers) out_list = [] mlp.initialize(train_set) for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output) # test model benchmark inference mlp.benchmark(train_set, inference=True, niterations=5)
def run(args, train, test): init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=args.rounding) layers = [ Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True), Affine(nout=10, init=init_uni, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) callbacks = Callbacks(mlp, eval_set=test, **args.callback_args) mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) err = mlp.eval(test, metric=Misclassification()) * 100 print('Misclassification error = %.2f%%' % err) return err
def test_model_get_outputs(backend_default, data): dataset = MNIST(path=data) train_set = dataset.train_iter init_norm = Gaussian(loc=0.0, scale=0.1) layers = [ Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] mlp = Model(layers=layers) out_list = [] mlp.initialize(train_set) for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output[:output.shape[0], :]) # test model benchmark inference mlp.benchmark(train_set, inference=True, niterations=5)
def prepare_model(ninputs=9600, nclass=5): """ Set up and compile the model architecture (Logistic regression) """ layers = [ Affine(nout=nclass, init=Gaussian(loc=0.0, scale=0.01), activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) opt = Adam() lrmodel = Model(layers=layers) return lrmodel, opt, cost
def gen_model(backend_type): # setup backend gen_backend(backend=backend_type, batch_size=batch_size, rng_seed=2, device_id=args.device_id, default_dtype=args.datatype) init_uni = Uniform(low=-0.1, high=0.1) # Set up the model layers layers = [] layers.append( Conv((5, 5, 16), init=init_uni, bias=Constant(0), activation=Rectlin())) layers.append(Pooling(2)) layers.append(Conv((5, 5, 32), init=init_uni, activation=Rectlin())) layers.append(Pooling(2)) layers.append(Affine(nout=500, init=init_uni, activation=Rectlin())) layers.append( Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True))) mlp = Model(layers=layers) return mlp
def test_multi_optimizer(backend_default_mkl): """ A test for MultiOptimizer. """ opt_gdm = GradientDescentMomentum( learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) for l in layer_list: l.configure(in_obj=(16, 28, 28)) l.allocate() # separate layer_list into two, the last two recurrent layers and the rest layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:] opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Convolution_bias': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1}) layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)] layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)] opt.optimize(layers_to_optimize1, 0) assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias' assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear' opt.optimize(layers_to_optimize2, 0) assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
def resnet(depth, num_classes, s): # Structure of the deep residual part of the network: # args.depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64 nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * depth)] strides = [1] + [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] # Now construct the network layers = [Conv(**conv_params(3, 16))] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, stride)) layers.append(Pooling(s, op='avg')) layers.append( Affine(nout=num_classes, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) return layers
def test_model_get_outputs_rnn(backend_default, data): data_path = load_text('ptb-valid', path=data) data_set = Text(time_steps=50, path=data_path) # weight initialization init = Constant(0.08) # model initialization layers = [ Recurrent(150, init, Logistic()), Affine(len(data_set.vocab), init, bias=init, activation=Rectlin()) ] model = Model(layers=layers) output = model.get_outputs(data_set) assert output.shape == ( data_set.ndata, data_set.seq_length, data_set.nclass)
def test_concat_l1_l1(backend_default, allrand_args): # test two linear layers that are merged with concat dtypeu = np.float32 w_rng, rngmax = allrand_args # Diff size inputs and outputs nins = [128, 1024] nouts = [64, 2048] batch_size = 16 NervanaObject.be.bsz = NervanaObject.be.bs = batch_size be = NervanaObject.be init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layers = [Sequential(Affine(nout=nout, init=init_unif)) for nout in nouts] inputs = [ be.array(dtypeu(np.random.random((nin, batch_size)))) for nin in nins ] merge = MergeMultistream(layers, merge="stack") assert (len(inputs) == len(layers)) merge.configure(inputs) merge.allocate() merge.set_deltas(None) out = merge.fprop(inputs).asnumpyarray() sublayers = [s.layers[0] for s in layers] weights = [layer.W.asnumpyarray() for layer in sublayers] out_exp = np.concatenate( [np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)]) assert np.allclose(out, out_exp, atol=1e-3) err_lst = [dtypeu(np.random.random((nout, batch_size))) for nout in nouts] err_concat = np.concatenate(err_lst) merge.bprop(be.array(err_concat)) dW_exp_lst = [ np.dot(err, inp.asnumpyarray().T) for (err, inp) in zip(err_lst, inputs) ] for layer, dW_exp in zip(sublayers, dW_exp_lst): assert np.allclose(layer.dW.asnumpyarray(), dW_exp) return
def main_branch(branch_nodes): return [ Conv((7, 7, 64), padding=3, strides=2, **common), Pooling(**pool3s2p1), Conv((1, 1, 64), **common), Conv((3, 3, 192), **commonp1), Pooling(**pool3s2p1), inception([(64, ), (96, 128), (16, 32), (32, )]), inception([(128, ), (128, 192), (32, 96), (64, )]), Pooling(**pool3s2p1), inception([(192, ), (96, 208), (16, 48), (64, )]), branch_nodes[0], inception([(160, ), (112, 224), (24, 64), (64, )]), inception([(128, ), (128, 256), (24, 64), (64, )]), inception([(112, ), (144, 288), (32, 64), (64, )]), branch_nodes[1], inception([(256, ), (160, 320), (32, 128), (128, )]), Pooling(**pool3s2p1), inception([(256, ), (160, 320), (32, 128), (128, )]), inception([(384, ), (192, 384), (48, 128), (128, )]), Pooling(fshape=7, strides=1, op="avg"), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0)) ]
def create_model(vocab_size, rlayer_type): """ Create LSTM/GRU model for bAbI dataset. Args: vocab_size (int) : String of bAbI data. rlayer_type (string) : Type of recurrent layer to use (gru or lstm). Returns: Model : Model of the created network """ # recurrent layer parameters (default gru) rlayer_obj = GRU if rlayer_type == 'gru' else LSTM rlayer_params = dict(output_size=100, reset_cells=True, init=GlorotUniform(), init_inner=Orthonormal(0.5), activation=Tanh(), gate_activation=Logistic()) # if using lstm, swap the activation functions if rlayer_type == 'lstm': rlayer_params.update( dict(activation=Logistic(), gate_activation=Tanh())) # lookup layer parameters lookup_params = dict(vocab_size=vocab_size, embedding_dim=50, init=Uniform(-0.05, 0.05)) # Model construction story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] layers = [ MergeMultistream(layers=[story_path, query_path], merge="stack"), Affine(vocab_size, init=GlorotUniform(), activation=Softmax()) ] return Model(layers=layers)
def main_branch(branch_nodes): return [Conv((7, 7, 64), padding=3, strides=2, name='conv1/7x7_s2', **common), Pooling(name="pool1/3x3_s2", **pool3s2p1), Conv((1, 1, 64), name='conv2/3x3_reduce', **common), Conv((3, 3, 192), name="conv2/3x3", **commonp1), Pooling(name="pool2/3x3_s2", **pool3s2p1), inception([(64, ), (96, 128), (16, 32), (32, )], name='inception_3a/'), inception([(128,), (128, 192), (32, 96), (64, )], name='inception_3b/'), Pooling(name='pool3/3x3_s2', **pool3s2p1), inception([(192,), (96, 208), (16, 48), (64, )], name='inception_4a/'), branch_nodes[0], inception([(160,), (112, 224), (24, 64), (64, )], name='inception_4b/'), inception([(128,), (128, 256), (24, 64), (64, )], name='inception_4c/'), inception([(112,), (144, 288), (32, 64), (64, )], name='inception_4d/'), branch_nodes[1], inception([(256,), (160, 320), (32, 128), (128,)], name='inception_4e/'), Pooling(name='pool4/3x3_s2', **pool3s2p1), inception([(256,), (160, 320), (32, 128), (128,)], name='inception_5a/'), inception([(384,), (192, 384), (48, 128), (128,)], name="inception_5b/"), Pooling(fshape=7, strides=1, op="avg", name='pool5/7x7_s1'), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0), name='loss3/classifier')]
def test_multi_optimizer(backend_default): opt_gdm = GradientDescentMomentum( learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1}) map_list = opt.map_optimizers(layer_list) assert map_list[opt_adam][0].__class__.__name__ == 'Convolution' assert map_list[opt_ada][0].__class__.__name__ == 'Bias' assert map_list[opt_rms][0].__class__.__name__ == 'Linear' assert map_list[opt_gdm][0].__class__.__name__ == 'Activation' assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
from neon.layers import GeneralizedCost from neon.transforms import CrossEntropyMulti from neon.optimizers import GradientDescentMomentum, RMSProp from neon.callbacks.callbacks import Callbacks from neon.data import ArrayIterator from Readfile import DataSet, readfile be = gen_backend(backend='cpu', batch_size=30) init_uni = Uniform(low=-0.1, high=0.1) layers = [ Conv(fshape=(4, 4, 16), init=init_uni, activation=Rectlin()), Pooling(fshape=2, strides=2), Conv(fshape=(4, 4, 32), init=init_uni, activation=Rectlin()), Pooling(fshape=2, strides=2), Conv(fshape=(4, 4, 32), init=init_uni, activation=Rectlin()), Pooling(fshape=2, strides=2), Affine(nout=500, init=init_uni, activation=Rectlin()), Affine(nout=11, init=init_uni, activation=Softmax()) ] model = Model(layers) model.load_params('model.pkl') data = readfile('PreImage', 'label.csv') X_test = data.test_data test_set = ArrayIterator(X_test, None, nclass=11, lshape=(1, 200, 200)) true = data.test_label out = model.get_outputs(test_set) row = len(X_test) pred = np.zeros((row, 1)) i = 0 while i < row:
# download shakespeare text data_path = load_shakespeare(path=args.data_dir) train_path, valid_path = Text.create_valid_file(data_path) # load data and parse on character-level train_set = Text(time_steps, train_path) valid_set = Text(time_steps, valid_path, vocab=train_set.vocab) # weight initialization init = Uniform(low=-0.08, high=0.08) # model initialization layers = [ LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh()), Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = RMSProp(gradient_clip_value=gradient_clip_value, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # fit and validate model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs,
tiny = dict(str_h=1, str_w=1) small = dict(str_h=1, str_w=2) big = dict(str_h=1, str_w=4) common = dict(batch_norm=True, activation=Rectlin()) layers = { 1: [ Conv((3, 5, 64), init=gauss, strides=big, **common), Pooling(2, strides=2), Conv((3, 3, 128), init=gauss, strides=small, **common), Pooling(2, strides=2), Conv((3, 3, 256), init=gauss, strides=small, **common), Conv((2, 2, 512), init=gauss, strides=tiny, **common), Conv((2, 2, 128), init=gauss, strides=tiny, **common), DeepBiRNN(64, init=glorot, reset_cells=True, depth=3, **common), RecurrentMean(), Affine(nout=2, init=gauss, activation=Softmax()) ], 2: [ Conv((3, 5, 64), init=gauss, strides=big, **common), Pooling(2, strides=2), Dropout(0.8), Conv((3, 3, 128), init=gauss, strides=small, **common), Pooling(2, strides=2), Dropout(0.4), Conv((3, 3, 256), init=gauss, strides=small, **common), Dropout(0.2), Conv((2, 2, 512), init=gauss, strides=tiny, **common), Conv((2, 2, 128), init=gauss, strides=tiny, **common), DeepBiRNN(64, init=glorot, reset_cells=True, depth=5, **common), RecurrentMean(), Affine(nout=2, init=gauss, activation=Softmax())
from neon.util.argparser import NeonArgparser # parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() dataset = MNIST(path=args.data_dir) train_set = dataset.train_iter valid_set = dataset.valid_iter # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model layers = [] layers.append(Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin())) layers.append(Affine(nout=10, init=init_norm, bias=Constant(0), activation=Logistic(shortcut=True), name='special_linear')) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp = Model(layers=layers) # fit and validate optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) optimizer_two = RMSProp() # all bias layers and the last linear layer will use # optimizer_two. all other layers will use optimizer_one. opt = MultiOptimizer({'default': optimizer_one, 'Bias': optimizer_two,
valid_set = DataIteratorSequence(time_series.test, seq_len, return_sequences=return_sequences) # define weights initialization init = GlorotUniform() # Uniform(low=-0.08, high=0.08) # define model: model is different for the 2 strategies (sequence target or not) if return_sequences is True: layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=False), Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ] else: layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), RecurrentLast(), Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ] model = Model(layers=layers) cost = GeneralizedCost(MeanSquared()) optimizer = RMSProp(stochastic_round=args.rounding)
batch_norm=False, bi_sum=False) elif args.rlayer_type == 'bibnrnn': rlayer = DeepBiRNN(hidden_size, g_uni, activation=Tanh(), depth=1, reset_cells=True, batch_norm=True) layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=uni), rlayer, RecurrentSum(), Dropout(keep=0.5), Affine(2, g_uni, bias=g_uni, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = Adagrad(learning_rate=0.01, gradient_clip_value=gradient_clip_value) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs,
# setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # download dataset data_path = load_flickr8k(path=args.data_dir) # Other setnames are flickr30k and coco # load data train_set = ImageCaption(path=data_path, max_images=-1) # weight initialization init = Uniform(low=-0.08, high=0.08) init2 = Constant(val=train_set.be.array(train_set.bias_init)) # model initialization image_path = Sequential([Affine(hidden_size, init, bias=Constant(val=0.0))]) sent_path = Sequential([Affine(hidden_size, init, linear_name='sent')]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) # configure callbacks checkpoint_model_path = "~/image_caption2.pickle" if args.callback_args['save_path'] is None: args.callback_args['save_path'] = checkpoint_model_path
def test_model_serialize(backend): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = [ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] path2 = [ Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] layers = [ MergeConcat([path1, path2]), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), BatchNorm(), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model save_obj(mlp.serialize(keep_states=True), tmp_save) # Load model mlp = Model(layers=layers) mlp.load_weights(tmp_save) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) else: assert np.allclose(p, p_e) os.remove(tmp_save)
(X_train, y_train), (X_test, y_test), nclass = mnist.load_data() train_set = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28)) valid_set = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28)) # define model nfilters = [20, 50, 500] # nfilters = [24, 56, 500] init_w = Gaussian(scale=0.01) relu = Rectlin() common_params = dict(init=init_w, activation=relu) layers = [ Conv((5, 5, nfilters[0]), bias=Constant(0.1), padding=0, **common_params), Pooling(2, strides=2, padding=0), Conv((5, 5, nfilters[1]), bias=Constant(0.1), padding=0, **common_params), Pooling(2, strides=2, padding=0), Affine(nout=nfilters[2], bias=Constant(0.1), **common_params), Affine(nout=10, bias=Constant(0.1), activation=Softmax(), init=Gaussian(scale=0.01)) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(train_set, cost) model.load_params('models/mnist/mnist_cnn.pkl', load_states=False) # define optimizer opt_w = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, wdecay=0.0005)
init_emb = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim) nclass = 2 layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=init_emb, pad_idx=0, update=True), LSTM(hidden_size, init_glorot, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Dropout(keep=0.5), Affine(nclass, init_glorot, bias=init_glorot, activation=Softmax()) ] # load the weights print("Initialized the models - ") model_new = Model(layers=layers) print("Loading the weights from {0}".format(args.model_weights)) model_new.load_params(args.model_weights) model_new.initialize(dataset=(sentence_length, batch_size)) # setup buffers before accepting reviews xdev = be.zeros((sentence_length, 1), dtype=np.int32) # bsz is 1, feature size xbuf = np.zeros((1, sentence_length), dtype=np.int32) oov = 2 start = 1
# parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() # hyperparameters num_epochs = args.epochs (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28)) # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) layers = [MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) # fit and validate optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # configure callbacks
# parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() (X_train, y_train), (X_test, y_test), nclass = load_cifar10(path=args.data_dir) train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(3, 32, 32)) test = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(3, 32, 32)) init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9) # set up the model layers layers = [ Affine(nout=200, init=init_uni, activation=Rectlin()), Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True)) ] cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp = Model(layers=layers) # configure callbacks callbacks = Callbacks(mlp, eval_set=test, **args.callback_args) mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
# setup a validation data set iterator valid_set = DataIterator(X_test, y_test, nclass=nclass) # setup weight initialization function init_norm = Gaussian(loc=0.0, scale=0.01) normrelu = dict(init=init_norm, activation=Rectlin()) normsigm = dict(init=init_norm, activation=Logistic(shortcut=True)) normsoft = dict(init=init_norm, activation=Softmax()) # setup model layers b1 = BranchNode(name="b1") b2 = BranchNode(name="b2") p1 = [Affine(nout=100, linear_name="m_l1", **normrelu), b1, Affine(nout=32, linear_name="m_l2", **normrelu), Affine(nout=16, linear_name="m_l3", **normrelu), b2, Affine(nout=10, linear_name="m_l4", **normsoft)] p2 = [b1, Affine(nout=16, linear_name="b1_l1", **normrelu), Affine(nout=10, linear_name="b1_l2", **normsigm)] p3 = [b2, Affine(nout=16, linear_name="b2_l1", **normrelu), Affine(nout=10, linear_name="b2_l2", **normsigm)]
parser = NeonArgparser(__doc__) args = parser.parse_args() NervanaObject.be.enable_winograd = 4 # setup data provider X_train = np.random.uniform(-1, 1, (128, 3*224*224)) y_train = np.random.uniform(-1, 1, (128, 1000)) train = ArrayIterator(X_train, y_train, nclass=1000, lshape=(3, 224, 224)) layers = [Conv((11, 11, 64), init=Gaussian(scale=0.01), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()), Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()), Affine(nout=1000, init=Gaussian(scale=0.01), activation=Softmax())] model = Model(layers=layers) weight_sched = Schedule([22, 44, 65], (1/250.)**(1/3.)) opt_gdm = GradientDescentMomentum(0.01, 0.0, wdecay=0.0005, schedule=weight_sched) opt = MultiOptimizer({'default': opt_gdm}) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.benchmark(train, cost=cost, optimizer=opt, niterations=10, nskip=5)