def main(): # setup the model and run for num_epochs saving the last state only # this is at the top so that the be is generated mlp = gen_model(args.backend) # setup data iterators (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) if args.backend == 'nervanacpu' or args.backend == 'cpu': # limit data since cpu backend runs slower train = DataIterator(X_train[:1000], y_train[:1000], nclass=nclass, lshape=(1, 28, 28)) valid = DataIterator(X_test[:1000], y_test[:1000], nclass=nclass, lshape=(1, 28, 28)) else: train = DataIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28)) valid = DataIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28)) # serialization related cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) checkpoint_model_path = os.path.join('./', 'test_oneshot.pkl') checkpoint_schedule = 1 # save at every step callbacks = Callbacks(mlp, train) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path, history=2) # run the fit all the way through saving a checkpoint e mlp.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # setup model with same random seed run epoch by epoch # serializing and deserializing at each step mlp = gen_model(args.backend) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # reset data iterators train.reset() valid.reset() checkpoint_model_path = os.path.join('./', 'test_manyshot.pkl') checkpoint_schedule = 1 # save at evey step callbacks = Callbacks(mlp, train) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path, history=num_epochs) for epoch in range(num_epochs): # _0 points to state at end of epoch 0 mlp.fit(train, optimizer=opt_gdm, num_epochs=epoch+1, cost=cost, callbacks=callbacks) # load saved file prts = os.path.splitext(checkpoint_model_path) fn = prts[0] + '_%d' % epoch + prts[1] mlp.load_weights(fn) # load the saved weights # compare test_oneshot_<num_epochs>.pkl to test_manyshot_<num_epochs>.pkl try: compare_model_pickles('test_oneshot_%d.pkl' % (num_epochs-1), 'test_manyshot_%d.pkl' % (num_epochs-1)) except: print 'test failed....' sys.exit(1)
def train(self, dataset, model=None): """Trains the passed model on the given dataset. If no model is passed, `generate_default_model` is used.""" print "Starting training..." start = time.time() # The training will be run on the CPU. If a GPU is available it should be used instead. backend = gen_backend(backend='cpu', batch_size=self.batch_size, rng_seed=self.random_seed, stochastic_round=False) cost = GeneralizedCost( name='cost', costfunc=CrossEntropyMulti()) optimizer = GradientDescentMomentum( learning_rate=self.lrate, momentum_coef=0.9) # set up the model and experiment if not model: model = self.generate_default_model(dataset.num_labels) callbacks = Callbacks(model, dataset.train(), output_file=os.path.join(self.root_path, self.Callback_Store_Filename), progress_bar=True, valid_set=dataset.test(), valid_freq=1) # add a callback that saves the best model state callbacks.add_save_best_state_callback(self.model_path) callbacks.add_serialize_callback( serialize_schedule=1, save_path=os.path.join(self.root_path, self.Intermediate_Model_Filename), history=100) # Uncomment line below to run on GPU using cudanet backend # backend = gen_backend(rng_seed=0, gpu='cudanet') model.fit( dataset.train(), optimizer=optimizer, num_epochs=self.max_epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (model.eval(dataset.test(), metric=Misclassification()) * 100)) print "Finished training!" end = time.time() print "Duration", end - start, "seconds" return model
gate_activation=Tanh(), reset_cells=True), Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) checkpoint_model_path = "~/image_caption2.pickle" checkpoint_schedule = range(num_epochs) model = Model(layers=layers) callbacks = Callbacks(model, train_set, output_file=args.output_file, progress_bar=args.progress_bar) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path) opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, clip_gradients=True, gradient_limit=1.0) # train model model.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # load model (if exited) and evaluate bleu score on test set
if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_weights(args.model_file) # configure callbacks callbacks = Callbacks(mlp, train, output_file=args.output_file) if args.validation_freq: class TopKMetrics(Callback): def __init__(self, valid_set, epoch_freq=args.validation_freq): super(TopKMetrics, self).__init__(epoch_freq=epoch_freq) self.valid_set = valid_set def on_epoch_end(self, epoch): self.valid_set.reset() allmetrics = TopKMisclassification(k=5) stats = mlp.eval(self.valid_set, metric=allmetrics) print ", ".join(allmetrics.metric_names) + ": " + ", ".join(map(str, stats.flatten())) callbacks.add_callback(TopKMetrics(test)) if args.save_path: checkpoint_schedule = range(1, args.epochs) callbacks.add_serialize_callback(checkpoint_schedule, args.save_path, history=2) mlp.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) test.exit_batch_provider() train.exit_batch_provider()
if args.validation_freq: class TopKMetrics(Callback): def __init__(self, valid_set, epoch_freq=args.validation_freq): super(TopKMetrics, self).__init__(epoch_freq=epoch_freq) self.valid_set = valid_set def on_epoch_end(self, epoch): self.valid_set.reset() allmetrics = TopKMisclassification(k=5) stats = mlp.eval(self.valid_set, metric=allmetrics) print ", ".join(allmetrics.metric_names) + ": " + ", ".join( map(str, stats.flatten())) callbacks.add_callback(TopKMetrics(test)) if args.save_path: checkpoint_schedule = range(args.epochs) callbacks.add_serialize_callback(checkpoint_schedule, args.save_path, history=2) mlp.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) test.exit_batch_provider() train.exit_batch_provider()
'step_config': 1, 'learning_rate': 0.1, 'weight_decay': 0} # initialize model object rbm = RBM(layers=layers) if args.model_file: assert os.path.exists(args.model_file), '%s not found' % args.model_file logger.info('loading initial model state from %s' % args.model_file) rbm.load_weights(args.model_file) # setup standard fit callbacks callbacks = Callbacks(rbm, train_set, output_file=args.output_file, progress_bar=args.progress_bar) # add a callback ot calculate if args.serialize > 0: # add callback for saving checkpoint file # every args.serialize epchs checkpoint_schedule = args.serialize checkpoint_model_path = args.save_path callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path) rbm.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks) for mb_idx, (x_val, y_val) in enumerate(valid_set): hidden = rbm.fprop(x_val) break
layers = [ LSTM(hidden_size, init, Logistic(), Tanh()), Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = RMSProp(clip_gradients=clip_gradients, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(model, train_set, output_file=args.output_file, progress_bar=args.progress_bar, valid_set=valid_set, valid_freq=1, ) callbacks.add_serialize_callback(1, args.save_path) # fit and validate model.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) def sample(prob): """ Sample index from probability distribution """ prob = prob / (prob.sum() + 1e-6) return np.argmax(np.random.multinomial(1, prob, 1)) # Set batch size and time_steps to 1 for generation and reset buffers be.bsz = 1 time_steps = 1
] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = RMSProp(clip_gradients=clip_gradients, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(model, train_set, output_file=args.output_file, valid_set=valid_set, valid_freq=1, progress_bar=args.progress_bar) callbacks.add_serialize_callback(1, args.save_path) # fit and validate model.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) def sample(prob): """ Sample index from probability distribution """ prob = prob / (prob.sum() + 1e-6) return np.argmax(np.random.multinomial(1, prob, 1))