assert len(header) == len(data) for key, value in zip(header, data): writer.add_scalar(key, float(value), status['num_frames']) csv_writer.writerow(data) # Save obss preprocessor vocabulary and model if args.save_interval > 0 and status['i'] % args.save_interval == 0: obss_preprocessor.vocab.save() with open(status_path, 'w') as dst: json.dump(status, dst) utils.save_model(acmodel, args.model) # Testing the model before saving agent = ModelAgent(args.model, obss_preprocessor, argmax=True) agent.model = acmodel agent.model.eval() logs = batch_evaluate(agent, test_env_name, args.val_seed, args.val_episodes) agent.model.train() mean_return = np.mean(logs["return_per_episode"]) success_rate = np.mean( [1 if r > 0 else 0 for r in logs['return_per_episode']]) save_model = False if success_rate > best_success_rate: best_success_rate = success_rate save_model = True elif (success_rate == best_success_rate) and (mean_return > best_mean_return): best_mean_return = mean_return
assert len(header) == len(data) for key, value in zip(header, data): writer.add_scalar(key, float(value), status['num_frames']) csv_writer.writerow(data) # Save obss preprocessor vocabulary and model if args.save_interval > 0 and status['i'] % args.save_interval == 0: obss_preprocessor.vocab.save() with open(status_path, 'w') as dst: json.dump(status, dst) utils.save_model(acmodel, args.model) # Testing the model before saving agent = ModelAgent(args.model, obss_preprocessor, argmax=True, timepoint_bounds=(5, 15)) agent.model = acmodel agent.model.eval() logs = batch_evaluate(agent, test_env_name, args.val_seed, args.val_episodes, pixel=use_pixel, return_obss_actions=True) order = sorted(range(len(logs['return_per_episode'])), key=lambda i: logs["observations_per_episode"][i][0]["mission"]) for i in order: obs = logs["observations_per_episode"][i] acts = logs["manager_actions_per_episode"][i] print(obs[0]["mission"], acts) agent.model.train() mean_return = np.mean(logs["return_per_episode"]) success_rate = np.mean([1 if r > 0 else 0 for r in logs['return_per_episode']]) save_model = False logger.info("Success rate: {}, best".format(success_rate, best_success_rate)) if success_rate > best_success_rate:
def main(): # Generate environments envs = [] for i in range(args.procs): env = gym.make(args.env) env.seed(100 * args.seed + i) envs.append(env) # Define model name suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S") instr = args.instr_arch if args.instr_arch else "noinstr" mem = "mem" if not args.no_mem else "nomem" model_name_parts = { 'env': args.env, 'algo': args.algo, 'arch': args.arch, 'instr': instr, 'mem': mem, 'seed': args.seed, 'info': '', 'coef': '', 'suffix': suffix} default_model_name = "{env}_{algo}_{arch}_{instr}_{mem}_seed{seed}{info}{coef}_{suffix}".format(**model_name_parts) if args.pretrained_model: default_model_name = args.pretrained_model + '_pretrained_' + default_model_name args.model = args.model.format(**model_name_parts) if args.model else default_model_name utils.configure_logging(args.model) logger = logging.getLogger(__name__) # Define obss preprocessor if 'emb' in args.arch: obss_preprocessor = utils.IntObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) else: """ obss_preprocessor = utils.ObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) """ obss_preprocessor = utils.ImgInstrObssPreprocessor(args.model, envs[0].observation_space) # Define actor-critic model acmodel = utils.load_model(args.model, raise_not_found=False) if acmodel is None: if args.pretrained_model: acmodel = utils.load_model(args.pretrained_model, raise_not_found=True) else: """ acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space, args.image_dim, args.memory_dim, args.instr_dim, not args.no_instr, args.instr_arch, not args.no_mem, args.arch) """ acmodel = ACModelImgInstr(obss_preprocessor.obs_space, envs[0].action_space, args.image_dim, args.memory_dim, args.instr_dim, not args.no_instr, not args.no_mem, args.arch) """ obss_preprocessor.vocab.save() """ utils.save_model(acmodel, args.model) if torch.cuda.is_available(): acmodel.cuda() # Define actor-critic algo reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward if args.algo == "ppo": algo = babyai.rl.PPOAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.beta1, args.beta2, args.gae_lambda, args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence, args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size, obss_preprocessor, reshape_reward) else: raise ValueError("Incorrect algorithm name: {}".format(args.algo)) # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that. # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here. utils.seed(args.seed) # Restore training status status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') if os.path.exists(status_path): with open(status_path, 'r') as src: status = json.load(src) else: status = {'i': 0, 'num_episodes': 0, 'num_frames': 0} # Define logger and Tensorboard writer and CSV writer header = (["update", "episodes", "frames", "FPS", "duration"] + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']] + ["success_rate"] + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']] + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"]) if args.tb: from tensorboardX import SummaryWriter writer = SummaryWriter(utils.get_log_dir(args.model)) csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') first_created = not os.path.exists(csv_path) # we don't buffer data going in the csv log, cause we assume # that one update will take much longer that one write to the log csv_writer = csv.writer(open(csv_path, 'a', 1)) if first_created: csv_writer.writerow(header) # Log code state, command, availability of CUDA and model babyai_code = list(babyai.__path__)[0] try: last_commit = subprocess.check_output( 'cd {}; git log -n1'.format(babyai_code), shell=True).decode('utf-8') logger.info('LAST COMMIT INFO:') logger.info(last_commit) except subprocess.CalledProcessError: logger.info('Could not figure out the last commit') try: diff = subprocess.check_output( 'cd {}; git diff'.format(babyai_code), shell=True).decode('utf-8') if diff: logger.info('GIT DIFF:') logger.info(diff) except subprocess.CalledProcessError: logger.info('Could not figure out the last commit') logger.info('COMMAND LINE ARGS:') logger.info(args) logger.info("CUDA available: {}".format(torch.cuda.is_available())) logger.info(acmodel) # Train model total_start_time = time.time() best_success_rate = 0 best_mean_return = 0 test_env_name = args.env while status['num_frames'] < args.frames: # Update parameters update_start_time = time.time() logs = algo.update_parameters() update_end_time = time.time() status['num_frames'] += logs["num_frames"] status['num_episodes'] += logs['episodes_done'] status['i'] += 1 # Print logs if status['i'] % args.log_interval == 0: total_ellapsed_time = int(time.time() - total_start_time) fps = logs["num_frames"] / (update_end_time - update_start_time) duration = datetime.timedelta(seconds=total_ellapsed_time) return_per_episode = utils.synthesize(logs["return_per_episode"]) success_per_episode = utils.synthesize( [1 if r > 0 else 0 for r in logs["return_per_episode"]]) num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"]) data = [status['i'], status['num_episodes'], status['num_frames'], fps, total_ellapsed_time, *return_per_episode.values(), success_per_episode['mean'], *num_frames_per_episode.values(), logs["entropy"], logs["value"], logs["policy_loss"], logs["value_loss"], logs["loss"], logs["grad_norm"]] format_str = ("U {} | E {} | F {:06} | FPS {:04.0f} | D {} | R:xsmM {: .2f} {: .2f} {: .2f} {: .2f} | " "S {:.2f} | F:xsmM {:.1f} {:.1f} {} {} | H {:.3f} | V {:.3f} | " "pL {: .3f} | vL {:.3f} | L {:.3f} | gN {:.3f} | ") logger.info(format_str.format(*data)) if args.tb: assert len(header) == len(data) for key, value in zip(header, data): writer.add_scalar(key, float(value), status['num_frames']) csv_writer.writerow(data) # Save obss preprocessor vocabulary and model if args.save_interval > 0 and status['i'] % args.save_interval == 0: """ obss_preprocessor.vocab.save() """ with open(status_path, 'w') as dst: json.dump(status, dst) utils.save_model(acmodel, args.model) # Testing the model before saving agent = ModelAgent(args.model, obss_preprocessor, argmax=True) agent.model = acmodel agent.model.eval() logs = batch_evaluate(agent, test_env_name, args.val_seed, args.val_episodes) agent.model.train() mean_return = np.mean(logs["return_per_episode"]) success_rate = np.mean([1 if r > 0 else 0 for r in logs['return_per_episode']]) save_model = False if success_rate > best_success_rate: best_success_rate = success_rate save_model = True elif (success_rate == best_success_rate) and (mean_return > best_mean_return): best_mean_return = mean_return save_model = True if save_model: utils.save_model(acmodel, args.model + '_best') """ obss_preprocessor.vocab.save(utils.get_vocab_path(args.model + '_best')) """ logger.info("Return {: .2f}; best model is saved".format(mean_return)) else: logger.info("Return {: .2f}; not the best model; not saved".format(mean_return))
help= 'Probability of performing the non-optimal action when the random/model agent is performing' ) parser.add_option("--seed", type="int", default=1) parser.add_option("--num_runs", type="int", default=500) parser.add_option("--verbose", action='store_true') (options, args) = parser.parse_args() if options.level: level_list = [options.level] bad_agent = None if options.advise_mode: if options.model: bad_agent = ModelAgent(options.model, obss_preprocessor=None, argmax=True) else: bad_agent = RandomAgent(seed=options.random_agent_seed) start_time = time.time() all_good = True for level_name in level_list: print("Starting level ", level_name) num_success = 0 total_reward = 0 total_steps = [] total_bfs = 0