def main(): # setup the model and run for num_epochs saving the last state only # this is at the top so that the be is generated mlp = gen_model(args.backend) # setup data iterators (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) if args.backend == 'nervanacpu' or args.backend == 'cpu': # limit data since cpu backend runs slower train = DataIterator(X_train[:1000], y_train[:1000], nclass=nclass, lshape=(1, 28, 28)) valid = DataIterator(X_test[:1000], y_test[:1000], nclass=nclass, lshape=(1, 28, 28)) else: train = DataIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28)) valid = DataIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28)) # serialization related cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) checkpoint_model_path = os.path.join('./', 'test_oneshot.pkl') checkpoint_schedule = 1 # save at every step callbacks = Callbacks(mlp, train) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path, history=2) # run the fit all the way through saving a checkpoint e mlp.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # setup model with same random seed run epoch by epoch # serializing and deserializing at each step mlp = gen_model(args.backend) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # reset data iterators train.reset() valid.reset() checkpoint_model_path = os.path.join('./', 'test_manyshot.pkl') checkpoint_schedule = 1 # save at evey step callbacks = Callbacks(mlp, train) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path, history=num_epochs) for epoch in range(num_epochs): # _0 points to state at end of epoch 0 mlp.fit(train, optimizer=opt_gdm, num_epochs=epoch+1, cost=cost, callbacks=callbacks) # load saved file prts = os.path.splitext(checkpoint_model_path) fn = prts[0] + '_%d' % epoch + prts[1] mlp.load_weights(fn) # load the saved weights # compare test_oneshot_<num_epochs>.pkl to test_manyshot_<num_epochs>.pkl try: compare_model_pickles('test_oneshot_%d.pkl' % (num_epochs-1), 'test_manyshot_%d.pkl' % (num_epochs-1)) except: print 'test failed....' sys.exit(1)
def test_model_get_outputs(backend): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator(X_train[:backend.bsz * 3]) init_norm = Gaussian(loc=0.0, scale=0.1) layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] mlp = Model(layers=layers) out_list = [] for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output)
def test_model_get_outputs(backend_default): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator(X_train[:backend_default.bsz * 3]) init_norm = Gaussian(loc=0.0, scale=0.1) layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] mlp = Model(layers=layers) out_list = [] mlp.initialize(train_set) for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output)