def test_persistent_tensor(): input_axes = ng.make_axes([ ng.make_axis(10), ng.make_axis(3) ]) bgr = ng.persistent_tensor( axes=input_axes, initial_value=np.array([113.9, 123.0, 125.3])) bgr_comp = ng.computation(bgr, "all") results = dict() weight_saver = Saver() with closing(ngt.make_transformer()) as transformer: bgr_func = transformer.add_computation(bgr_comp) weight_saver.setup_save(transformer=transformer, computation=bgr_comp) results['saved'] = bgr_func().copy() weight_saver.save(filename="test_persistent_tensor") with closing(ngt.make_transformer()) as restore_transformer: bgr_refunc = restore_transformer.add_computation(bgr_comp) weight_saver.setup_restore(transformer=restore_transformer, computation=bgr_comp, filename="test_persistent_tensor") weight_saver.restore() results['restored'] = bgr_refunc().copy() os.remove("test_persistent_tensor.npz") assert np.allclose(results['saved'], results['restored'], atol=0)
def test_variable(): input_axes = ng.make_axes([ ng.make_axis(10), ng.make_axis(3) ]) var = ng.variable(axes=input_axes) assign_val = np.random.rand(10, 3) var_assign = ng.AssignOp(tensor=var, val=assign_val) var_seq = ng.sequential([var_assign, var]) var_comp = ng.computation(var_seq, "all") results = dict() weight_saver = Saver() with closing(ngt.make_transformer()) as transformer: var_func = transformer.add_computation(var_comp) weight_saver.setup_save(transformer=transformer, computation=var_comp) results['saved'] = var_func().copy() weight_saver.save(filename="test_variable") reassign_val = np.random.rand(10, 3) var_reassign = ng.AssignOp(tensor=var, val=reassign_val) var_recomp = ng.computation(var_reassign, "all") var_read = ng.computation(var, "all") with closing(ngt.make_transformer()) as restore_transformer: var_recompfunc = restore_transformer.add_computation(var_recomp) weight_saver.setup_restore(transformer=restore_transformer, computation=var_recomp, filename="test_variable") var_readfunc = restore_transformer.add_computation(var_read) var_recompfunc() results['reassigned'] = var_readfunc().copy() weight_saver.restore() results['restored'] = var_readfunc().copy() os.remove("test_variable.npz") assert np.allclose(results['saved'], assign_val, atol=0) assert np.allclose(results['reassigned'], reassign_val, atol=0) assert np.allclose(results['saved'], results['restored'], atol=0)
def train(self, train_iterator, val_iterator, n_epochs=100, log_interval=100, save_plots=True, results_dir="./"): train_iterator.reset() val_iterator.reset() batch_size = train_iterator.batch_size num_iterations = np.floor( (train_iterator.ndata * n_epochs * 1.) / batch_size).astype('int') n_train = train_iterator.ndata assert val_iterator.batch_size == batch_size # save model weight_saver = Saver() # train model self.train_function = self.transformer.add_computation( self.train_computation) self.eval_function = self.transformer.add_computation( self.eval_computation) self.pred_function = self.transformer.add_computation( self.pred_computation) # set up weight saver weight_saver.setup_save(transformer=self.transformer, computation=self.train_computation) # Progress bar tpbar = tqdm(unit="batches", ncols=100, total=num_iterations) tpbar_string = "Train Epoch: {epoch} [ {num_examples_seen}/{n_train} ({percent_complete}%)] Train Loss {cost}" train_losses = [] eval_losses = [] # Iterating over the training set num_examples_seen = 0 n_epoch = 1 for step in range(num_iterations): data = next(train_iterator) feed_dict = { self.input_placeholders["X"]: data["X"], self.input_placeholders["y"]: data["y"] } # Mean batch cost output = self.train_function(feed_dict=feed_dict) train_loss = output[()].item() train_losses.append(train_loss) if self.tb is not None: self.tb.add_scalar("train_loss", train_loss, step=step) # Update progress bar tpbar.update(1) tpbar.set_description("Training {}".format(str(output[()]))) num_examples_seen += batch_size # Every epoch print test set metrics if (step + 1) % log_interval == 0 and step > 0: # calculate metrics over test set avg_eval_loss = 0.0 val_iterator.reset() for e, data_test in enumerate(val_iterator): feed_dict_test = { self.input_placeholders["X"]: data_test["X"], self.input_placeholders["y"]: data_test["y"] } eval_loss = self.eval_function(feed_dict=feed_dict_test)[0] avg_eval_loss += eval_loss avg_eval_loss /= (e + 1) # save loss eval_losses.append(avg_eval_loss.item()) if self.tb is not None: self.tb.add_scalar("eval_loss", avg_eval_loss, step=step) # write to progress bar avg_train_cost = train_losses[-1 * log_interval:] avg_train_cost = np.mean(avg_train_cost) tqdm.write( tpbar_string.format(epoch=n_epoch, num_examples_seen=num_examples_seen, n_train=n_train, percent_complete=100.0 * num_examples_seen / n_train, cost=avg_train_cost)) weight_saver.save(filename=results_dir + "/" + "model") # Writing to CSV logfile = os.path.join(results_dir, "logs") with open(logfile, 'w') as fp: json.dump( { 'train_loss': train_losses, 'eval_loss': eval_losses }, fp) if save_plots: # plot all entries in logfile self.plot_scalars(logfile, results_dir) if num_examples_seen > n_train: num_examples_seen = num_examples_seen - n_train n_epoch += 1 print("Test set: Average loss: {}".format(avg_eval_loss)) print("\nTraining Completed")
validate_parent_exists(args.data_dir) data_dir = args.data_dir assert weights_save_path.endswith('.npz') assert log_file.endswith('.txt') gradient_clip_norm = args.grad_clip_norm babi = BABI_Dialog( path=data_dir, task=args.task, oov=args.use_oov, use_match_type=args.use_match_type, cache_match_type=args.cache_match_type, cache_vectorized=args.cache_vectorized) weight_saver = Saver() # Set num iterations to 1 epoch since we loop over epochs & shuffle ndata = babi.data_dict['train']['memory']['data'].shape[0] num_iterations = ndata // args.batch_size train_set = ArrayIterator(babi.data_dict['train'], batch_size=args.batch_size, total_iterations=num_iterations) dev_set = ArrayIterator(babi.data_dict['dev'], batch_size=args.batch_size) test_set = ArrayIterator(babi.data_dict['test'], batch_size=args.batch_size) inputs = train_set.make_placeholders() memn2n = MemN2N_Dialog( babi.cands, babi.num_cands, babi.max_cand_len,
with Layer.inference_mode_on(): a_pred_inference, _ = memn2n(inputs) eval_loss = ng.cross_entropy_multi(a_pred_inference, ng.one_hot(inputs['answer'], axis=vocab_axis), usebits=True) eval_outputs = dict(test_cross_ent_loss=eval_loss, test_preds=a_pred_inference) if args.interactive: interactive_outputs = dict(test_preds=a_pred_inference) if model_file is not None: # Instantiate the Saver object to save weights weight_saver = Saver() if args.inference is False: # Train Loop with closing(ngt.make_transformer()) as transformer: # bind the computations train_computation = make_bound_computation(transformer, train_outputs, inputs) eval_computation = make_bound_computation(transformer, eval_outputs, inputs) if (model_file is not None and args.restore): weight_saver.setup_restore(transformer=transformer, computation=train_outputs, filename=model_file) # Restore weight
wdecay=wdecay, nesterov=False, iteration=input_ops_train['iteration']) # Make a prediction prediction = resnet(input_ops_train['image']) # Calculate loss train_loss = ng.cross_entropy_multi( prediction, ng.one_hot(input_ops_train['label'], axis=ax.Y)) # Average loss over the batch batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") # Instantiate the Saver object to save weights weight_saver = Saver() with ng.metadata(device=device_hetr, device_id=device_id, parallel=ax.N): # Inference with Layer.inference_mode_on(): # Doing inference inference_prob = resnet(input_ops_valid['image']) eval_loss = ng.cross_entropy_multi( inference_prob, ng.one_hot(input_ops_valid['label'], axis=ax.Y)) # Computation for inference eval_computation = ng.computation( [inference_prob, eval_loss, input_ops_valid['label']], "all") if args.benchmark: inputs = input_ops_train n_skip = 1 # don't count first iteration in timing
# Sanitize inputs validate_existing_filepath(args.model_file) model_file = args.model_file assert model_file.endswith('.npz') validate_parent_exists(args.data_dir) data_dir = args.data_dir babi = BABI_Dialog( path=data_dir, task=args.task, oov=args.use_oov, use_match_type=args.use_match_type, cache_match_type=args.cache_match_type, cache_vectorized=args.cache_vectorized) weight_saver = Saver() # Set num iterations to 1 epoch since we loop over epochs & shuffle ndata = babi.data_dict['train']['memory']['data'].shape[0] num_iterations = ndata // args.batch_size train_set = ArrayIterator(babi.data_dict['train'], batch_size=args.batch_size, total_iterations=num_iterations) inputs = train_set.make_placeholders() memn2n = MemN2N_Dialog( babi.cands, babi.num_cands, babi.max_cand_len, babi.memory_size, babi.max_utt_len,