(X_train, y_train), (X_test, y_test), nclass = load_cifar10(path=args.data_dir) train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(3, 32, 32)) test = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(3, 32, 32)) init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9) # set up the model layers layers = [ Affine(nout=200, init=init_uni, activation=Rectlin()), Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True)) ] cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp = Model(layers=layers) # configure callbacks callbacks = Callbacks(mlp, eval_set=test, **args.callback_args) mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (mlp.eval(test, metric=Misclassification()) * 100))
def test_reshape_layer_model(backend_default, fargs): """ test cases: - conv before RNNs - conv after RNNs - conv after LUT """ np.random.seed(seed=0) nin, nout, bsz = fargs be = backend_default be.bsz = bsz input_size = (nin, be.bsz) init = Uniform(-0.1, 0.1) g_uni = GlorotUniform() inp_np = np.random.rand(nin, be.bsz) delta_np = np.random.rand(nout, be.bsz) inp = be.array(inp_np) delta = be.array(delta_np) conv_lut_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Reshape(reshape=(4, 100, -1)), Conv((3, 3, 16), init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Affine(nout, init, bias=init, activation=Softmax()) ] conv_lut_2 = [ LookupTable(vocab_size=1000, embedding_dim=400, init=init), Reshape(reshape=(4, 50, -1)), Conv((3, 3, 16), init=init), Pooling(2, strides=2), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] conv_rnn_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), Reshape(reshape=(4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] conv_rnn_2 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Recurrent(64, g_uni, activation=Tanh(), reset_cells=True), Reshape(reshape=(4, -1, 32)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] lut_sum_1 = [ LookupTable(vocab_size=1000, embedding_dim=128, init=init), RecurrentSum(), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] lut_birnn_1 = [ LookupTable(vocab_size=1000, embedding_dim=200, init=init), DeepBiRNN(32, init=GlorotUniform(), batch_norm=True, activation=Tanh(), reset_cells=True, depth=1), Reshape((4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout=nout, init=init, bias=init, activation=Softmax()) ] layers_test = [ conv_lut_1, conv_lut_2, conv_rnn_1, conv_rnn_2, lut_sum_1, lut_birnn_1 ] for lg in layers_test: model = Model(layers=lg) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(input_size, cost) model.fprop(inp) model.bprop(delta)
def test_model_serialize(backend): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = [ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] path2 = [ Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] layers = [ MergeConcat([path1, path2]), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), BatchNorm(), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model save_obj(mlp.serialize(keep_states=True), tmp_save) # Load model mlp = Model(layers=layers) mlp.load_weights(tmp_save) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) else: assert np.allclose(p, p_e) os.remove(tmp_save)
def main(): # setup the model and run for num_epochs saving the last state only # this is at the top so that the be is generated mlp = gen_model(args.backend) # setup data iterators (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) if args.backend == 'nervanacpu' or args.backend == 'cpu': # limit data since cpu backend runs slower train = DataIterator(X_train[:1000], y_train[:1000], nclass=nclass, lshape=(1, 28, 28)) valid = DataIterator(X_test[:1000], y_test[:1000], nclass=nclass, lshape=(1, 28, 28)) else: train = DataIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28)) valid = DataIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28)) # serialization related cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) checkpoint_model_path = os.path.join('./', 'test_oneshot.pkl') checkpoint_schedule = 1 # save at every step callbacks = Callbacks(mlp, train) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path, history=2) # run the fit all the way through saving a checkpoint e mlp.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # setup model with same random seed run epoch by epoch # serializing and deserializing at each step mlp = gen_model(args.backend) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # reset data iterators train.reset() valid.reset() checkpoint_model_path = os.path.join('./', 'test_manyshot.pkl') checkpoint_schedule = 1 # save at evey step callbacks = Callbacks(mlp, train) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path, history=num_epochs) for epoch in range(num_epochs): # _0 points to state at end of epoch 0 mlp.fit(train, optimizer=opt_gdm, num_epochs=epoch + 1, cost=cost, callbacks=callbacks) # load saved file prts = os.path.splitext(checkpoint_model_path) fn = prts[0] + '_%d' % epoch + prts[1] mlp.load_weights(fn) # load the saved weights # compare test_oneshot_<num_epochs>.pkl to test_manyshot_<num_epochs>.pkl try: compare_model_pickles('test_oneshot_%d.pkl' % (num_epochs - 1), 'test_manyshot_%d.pkl' % (num_epochs - 1)) except: print 'test failed....' sys.exit(1)
def test_conv_rnn(backend_default): train_shape = (1, 17, 142) be = backend_default inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz)) delta = be.array(be.rng.randn(10, be.bsz)) init_norm = Gaussian(loc=0.0, scale=0.01) bilstm = DeepBiLSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), depth=1, reset_cells=True) birnn_1 = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=False) birnn_2 = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=2, reset_cells=True, batch_norm=False) bibnrnn = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=True) birnnsum = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=False, bi_sum=True) rnn = Recurrent(128, init=init_norm, activation=Rectlin(), reset_cells=True) lstm = LSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True) gru = GRU(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True) rlayers = [bilstm, birnn_1, birnn_2, bibnrnn, birnnsum, rnn, lstm, gru] for rl in rlayers: layers = [ Conv((2, 2, 4), init=init_norm, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((3, 3, 4), init=init_norm, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), rl, RecurrentMean(), Affine(nout=10, init=init_norm, activation=Rectlin()), ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(train_shape, cost) model.fprop(inp) model.bprop(delta)
def test_model_serialize(backend_default, data): dataset = MNIST(path=data) (X_train, y_train), (X_test, y_test), nclass = dataset.load_data() train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential([ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) path2 = Sequential([ Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) layers = [ MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp.initialize(train_set, cost=mlp.cost) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model mlp.save_params(tmp_save, keep_states=True) # Load model mlp = Model(tmp_save) mlp.initialize(train_set) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert allclose_with_out(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert allclose_with_out(_s, _s_e) else: assert allclose_with_out(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): assert type(p) == type(p_e) if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert allclose_with_out(_p, _p_e) elif isinstance(p, np.ndarray): assert allclose_with_out(p, p_e) else: assert p == p_e os.remove(tmp_save)
l_in = deepstacks.neon.InputLayer((None, ) + train_set.shape, 'image') l_y = deepstacks.neon.InputLayer((None, ), 'y') network, stacks, paramlayers, errors, watchpoints = deepstacks.neon.build_network( l_in, ((0, 100, 0, 0, 'm_l1', 0, {'dense'}), (0, 32, 0, 0, 'm_l2', 0, {'dense'}), (0, 100, 0, 0, 'm_l3', 0, {'dense'}), (0, 10, 0, 0, 'm_l4', 0, { 'dense': True, 'nonlinearity': Softmax() }), ('m_l1', 0, 0, 0, 0, 0, {}), (share, 'shared', ( (0, 16, 0, 0, 'b1_l1', 0, {'dense'}), (0, 10, 0, 0, 'b1_l2', 0, { 'dense': True, 'nonlinearity': Logistic(shortcut=True), 'equal': ['target', 'b1', CrossEntropyBinary()] }), )), ('m_l3', 0, 0, 0, 0, 0, {}), (share, 'shared', ( (0, 16, 0, 0, 'b2_l1', 0, {'dense'}), (0, 10, 0, 0, 'b2_l2', 0, { 'dense': True, 'nonlinearity': Logistic(shortcut=True), 'equal': ['target', 'b2', CrossEntropyBinary()] }), )), ('m_l4', )), {'target': l_y}) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) cost, layers, tagslice = deepstacks.neon.get_loss(errors, watchpoints, cost) print layers
return Sequential(layers=(network,b)) # NOTE: neon's orig layers dose not add bias, so it's much faster than us l_in = deepstacks.neon.InputLayer((None,)+train_set.shape,'image') l_y = deepstacks.neon.InputLayer((None,),'y') network,stacks,paramlayers,errors,watchpoints=deepstacks.neon.build_network(l_in,( (0,100,0,0,'m_l1',0,{'dense'}), #(0,0,0,0,0,0,{'layer':(insert_branch_layer,curr_layer,b1)}), (0,32,0,0,'m_l2',0,{'dense'}), (0,16,0,0,'m_l3',0,{'dense'}), #(0,0,0,0,0,0,{'layer':(insert_branch_layer,curr_layer,b2)}), (0,10,0,0,'m_l4',0,{'dense':True,'nonlinearity':Softmax()}), ('m_l1',0,0,0,0,0,{}), #(0,0,0,0,0,0,{'layer':b1}), (0,16,0,0,'b1_l1',0,{'dense'}), (0,10,0,0,'b1_l2',0,{'dense':True,'nonlinearity':Logistic(shortcut=True),'equal':['target','b1',CrossEntropyBinary()]}), ('m_l3',0,0,0,0,0,{}), #(0,0,0,0,0,0,{'layer':b2}), (0,16,0,0,'b2_l1',0,{'dense'}), (0,10,0,0,'b2_l2',0,{'dense':True,'nonlinearity':Logistic(shortcut=True),'equal':['target','b2',CrossEntropyBinary()]}), ('m_l4',) ),{ 'target':l_y }) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) cost,layers,tagslice = deepstacks.neon.get_loss(errors,watchpoints,cost) print 'network:',network print 'extra layers:',layers