def probe_vocab_from_model(model): vocab = utils.get_vocab_path(model) with open(vocab) as jin: vocab = json.load(jin) embs = transformers.DistilBertModel.from_pretrained( 'distilbert-base-uncased').to('cuda').get_input_embeddings() ipt = list(vocab.values()) embs = embs(torch.tensor( ipt, device=torch.device('cuda'))).cpu().detach().numpy() visualize_embs(embs, vocab)
json.dump(status, dst) utils.save_model(acmodel, args.model) # Testing the model before saving agent = ModelAgent(args.model, obss_preprocessor, argmax=True) agent.model = acmodel agent.model.eval() logs = batch_evaluate(agent, test_env_name, args.val_seed, args.val_episodes) agent.model.train() mean_return = np.mean(logs["return_per_episode"]) success_rate = np.mean( [1 if r > 0 else 0 for r in logs['return_per_episode']]) save_model = False if success_rate > best_success_rate: best_success_rate = success_rate save_model = True elif (success_rate == best_success_rate) and (mean_return > best_mean_return): best_mean_return = mean_return save_model = True if save_model: utils.save_model(acmodel, args.model + '_best') obss_preprocessor.vocab.save( utils.get_vocab_path(args.model + '_best')) logger.info( "Return {: .2f}; best model is saved".format(mean_return)) else: logger.info("Return {: .2f}; not the best model; not saved".format( mean_return))
if args.tb: assert len(header) == len(data) for key, value in zip(header, data): writer.add_scalar(key, float(value), status['num_frames']) csv_writer.writerow(data) # Save obss preprocessor vocabulary and model if args.save_interval > 0 and status['i'] % args.save_interval == 0: obss_preprocessor.vocab.save() with open(status_path, 'w') as dst: json.dump(status, dst) utils.save_model(acmodel, args.model) # Testing the model before saving agent = ModelAgent(args.model, obss_preprocessor, argmax=True) agent.model = acmodel agent.model.eval() logs = batch_evaluate(agent, test_env_name, args.val_seed, args.val_episodes) agent.model.train() mean_return = np.mean(logs["return_per_episode"]) success_rate = np.mean([1 if r > 0 else 0 for r in logs['return_per_episode']]) if success_rate > best_success_rate: best_success_rate = success_rate utils.save_model(acmodel, args.model + '_best') obss_preprocessor.vocab.save(utils.get_vocab_path(args.model + '_best')) logger.info("Return {: .2f}; best model is saved".format(mean_return)) else: logger.info("Return {: .2f}; not the best model; not saved".format(mean_return))
def train(self, train_demos, writer, csv_writer, status_path, header, reset_status=False): # Load the status def initial_status(): return {'i': 0, 'num_frames': 0, 'patience': 0} status = initial_status() if os.path.exists(status_path) and not reset_status: with open(status_path, 'r') as src: status = json.load(src) elif not os.path.exists(os.path.dirname(status_path)): # Ensure that the status directory exists os.makedirs(os.path.dirname(status_path)) # If the batch size is larger than the number of demos, we need to lower the batch size if self.args.batch_size > len(train_demos): self.args.batch_size = len(train_demos) logger.info( "Batch size too high. Setting it to the number of train demos ({})" .format(len(train_demos))) # Model saved initially to avoid "Model not found Exception" during first validation step utils.save_model(self.acmodel, self.args.model) # best mean return to keep track of performance on validation set best_success_rate, patience, i = 0, 0, 0 total_start_time = time.time() while status['i'] < getattr(self.args, 'epochs', int(1e9)): if 'patience' not in status: # if for some reason you're finetuining with IL an RL pretrained agent status['patience'] = 0 # Do not learn if using a pre-trained model that already lost patience if status['patience'] > self.args.patience: break if status['num_frames'] > self.args.frames: break status['i'] += 1 i = status['i'] update_start_time = time.time() # Learning rate scheduler self.scheduler.step() log = self.run_epoch_recurrence(train_demos, is_training=True) total_len = sum([len(item[3]) for item in train_demos]) status['num_frames'] += total_len update_end_time = time.time() # Print logs if status['i'] % self.args.log_interval == 0: total_ellapsed_time = int(time.time() - total_start_time) fps = total_len / (update_end_time - update_start_time) duration = datetime.timedelta(seconds=total_ellapsed_time) for key in log: log[key] = np.mean(log[key]) train_data = [ status['i'], status['num_frames'], fps, total_ellapsed_time, log["entropy"], log["policy_loss"], log["accuracy"] ] logger.info( "U {} | F {:06} | FPS {:04.0f} | D {} | H {:.3f} | pL {: .3f} | A {: .3f}" .format(*train_data)) # Log the gathered data only when we don't evaluate the validation metrics. It will be logged anyways # afterwards when status['i'] % self.args.val_interval == 0 if status['i'] % self.args.val_interval != 0: # instantiate a validation_log with empty strings when no validation is done validation_data = [''] * len( [key for key in header if 'valid' in key]) assert len(header) == len(train_data + validation_data) if self.args.tb: for key, value in zip(header, train_data): writer.add_scalar(key, float(value), status['num_frames']) csv_writer.writerow(train_data + validation_data) if status['i'] % self.args.val_interval == 0: valid_log = self.validate(self.args.val_episodes) mean_return = [ np.mean(log['return_per_episode']) for log in valid_log ] success_rate = [ np.mean( [1 if r > 0 else 0 for r in log['return_per_episode']]) for log in valid_log ] val_log = self.run_epoch_recurrence(self.val_demos) validation_accuracy = np.mean(val_log["accuracy"]) if status['i'] % self.args.log_interval == 0: validation_data = [validation_accuracy ] + mean_return + success_rate logger.info(("Validation: A {: .3f} " + ("| R {: .3f} " * len(mean_return) + "| S {: .3f} " * len(success_rate))).format( *validation_data)) assert len(header) == len(train_data + validation_data) if self.args.tb: for key, value in zip(header, train_data + validation_data): writer.add_scalar(key, float(value), status['num_frames']) csv_writer.writerow(train_data + validation_data) # In case of a multi-env, the update condition would be "better mean success rate" ! if np.mean(success_rate) > best_success_rate: best_success_rate = np.mean(success_rate) status['patience'] = 0 with open(status_path, 'w') as dst: json.dump(status, dst) # Saving the model logger.info("Saving best model") if torch.cuda.is_available(): self.acmodel.cpu() utils.save_model(self.acmodel, self.args.model + "_best") self.obss_preprocessor.vocab.save( utils.get_vocab_path(self.args.model + "_best")) if torch.cuda.is_available(): self.acmodel.cuda() else: status['patience'] += 1 logger.info( "Losing patience, new value={}, limit={}".format( status['patience'], self.args.patience)) if torch.cuda.is_available(): self.acmodel.cpu() utils.save_model(self.acmodel, self.args.model) if torch.cuda.is_available(): self.acmodel.cuda() with open(status_path, 'w') as dst: json.dump(status, dst)
def load_model(model): acmodel = utils.load_model(model) vocab = utils.get_vocab_path(model) with open(vocab) as jin: vocab = json.load(jin) return vocab, acmodel