def validate(self, episodes, verbose=True): if verbose: logger.info("Validating the model") if getattr(self.args, 'multi_env', None): agent = utils.load_agent(self.env[0], model_name=self.args.model, argmax=True) else: agent = utils.load_agent(self.env, model_name=self.args.model, argmax=True) # Setting the agent model to the current model agent.model = self.acmodel agent.model.eval() logs = [] for env_name in ([self.args.env] if not getattr(self.args, 'multi_env', None) else self.args.multi_env): logs += [ batch_evaluate(agent, env_name, self.val_seed, episodes, pixel=self.use_pixel) ] self.val_seed += episodes agent.model.train() return logs
def validate(self, episodes, verbose=True): # Seed needs to be reset for each validation, to ensure consistency utils.seed(self.args.val_seed) if verbose: logger.info("Validating the model") if getattr(self.args, 'multi_env', None): agent = utils.load_agent(self.env[0], model_name=self.args.model, argmax=True) else: agent = utils.load_agent(self.env, model_name=self.args.model, argmax=True) # Setting the agent model to the current model agent.model = self.acmodel agent.model.eval() logs = [] for env_name in ([self.args.env] if not getattr(self.args, 'multi_env', None) else self.args.multi_env): logs += [ batch_evaluate(agent, env_name, self.args.val_seed, episodes) ] agent.model.train() return logs
def main(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Define agent env = gym.make(args.env) env.seed(seed) agent = utils.load_agent(env, args.model, None, None, args.argmax, args.env) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) # Evaluate if isinstance(agent, utils.DemoAgent): logs = evaluate_demo_agent(agent, episodes) elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes: logs = evaluate(agent, env, episodes, False) else: logs = batch_evaluate(agent, args.env, seed, episodes, return_obss_actions=True) return logs
def main_test(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Define agent # do test environment env_name = args.env + "_Test-v0" env = gym.make(env_name) env.seed(seed) agent = utils.load_agent(env, args.model, argmax=args.argmax, env_name=env_name) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) # Evaluate if isinstance(agent, utils.DemoAgent): logs = evaluate_demo_agent(agent, episodes) elif isinstance(agent, utils.BotAgent): logs = evaluate(agent, env, episodes, False) else: logs = batch_evaluate(agent, env_name, seed, episodes) return logs
def evaluate_agent(il_learn, eval_seed, num_eval_demos): """ Evaluate the agent on some number of episodes and return the seeds for the episodes the agent performed the worst on. """ logger.info("Evaluating agent on {}".format(il_learn.args.env)) agent = utils.load_agent(il_learn.env, il_learn.args.model) agent.model.eval() logs = batch_evaluate(agent, il_learn.args.env, episodes=num_eval_demos, seed=eval_seed, seed_shift=0) agent.model.train() success_rate = np.mean( [1 if r > 0 else 0 for r in logs['return_per_episode']]) logger.info("success rate: {:.2f}".format(success_rate)) # Find the seeds for all the failing demos fail_seeds = [] for idx, ret in enumerate(logs["return_per_episode"]): if ret <= 0: fail_seeds.append(logs["seed_per_episode"][idx]) return success_rate, fail_seeds
def evaluate_agent(il_learn, eval_seed, num_eval_demos, return_obss_actions=False): """ Evaluate the agent on some number of episodes and return the seeds for the episodes the agent performed the worst on. """ logger.info("Evaluating agent on {} using {} demos".format( il_learn.args.env, num_eval_demos)) agent = utils.load_agent(il_learn.env, il_learn.args.model) agent.model.eval() logs = batch_evaluate(agent, il_learn.args.env, episodes=num_eval_demos, seed=eval_seed, seed_shift=0, return_obss_actions=return_obss_actions) agent.model.train() success_rate = np.mean( [1 if r > 0 else 0 for r in logs['return_per_episode']]) logger.info("success rate: {:.2f}".format(success_rate)) # Find the seeds for all the failing demos fail_seeds = [] fail_obss = [] fail_actions = [] for idx, ret in enumerate(logs["return_per_episode"]): if ret <= 0: fail_seeds.append(logs["seed_per_episode"][idx]) if return_obss_actions: fail_obss.append(logs["observations_per_episode"][idx]) fail_actions.append(logs["actions_per_episode"][idx]) logger.info("{} fails".format(len(fail_seeds))) if not return_obss_actions: return success_rate, fail_seeds else: return success_rate, fail_seeds, fail_obss, fail_actions
def main(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Define agent env = gym.make(args.env) env.seed(seed) agent = utils.load_agent(env, args.model, args.demos, args.demos_origin, args.argmax, args.env) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) if args.proj is not None: assert args.proj_file is not None if args.proj_file is not None: with open(args.proj_file, newline="") as reader: proj_sentences = reader.readlines() else: proj_sentences = None seeds = [] orig_missions = [] missions = [] with open(args.turk_file, newline="") as reader: csv_reader = csv.reader(reader) header = next(csv_reader) i_seed = header.index("Input.seed") i_orig_dir = header.index("Input.cmd") i_mission = header.index("Answer.command") for row in csv_reader: seeds.append(int(row[i_seed])) orig_missions.append(row[i_orig_dir]) missions.append(row[i_mission]) if not args.human: logs = evaluate_fixed_seeds(agent, env, episodes, seeds, orig_missions) else: logs = evaluate_fixed_seeds(agent, env, episodes, seeds, orig_missions, missions, args.proj, proj_sentences) return logs
def main(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Define agent env = gym.make(args.env) env.seed(seed) agent = utils.load_agent(env, args.model, args.demos, args.demos_origin, args.argmax, args.env) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) # Evaluate if isinstance(agent, utils.ModelAgent) and not args.contiguous_episodes: logs = batch_evaluate(agent, args.env, seed, episodes) else: logs = evaluate(agent, env, episodes, False) return logs
# Set seed for all randomness sources utils.seed(args.seed) # Generate environment env = gym.make(args.env) env.seed(args.seed) global obs obs = env.reset() print("Mission: {}".format(obs["mission"])) # Define agent agent = utils.load_agent(env, args.model, args.demos, args.demos_origin, args.argmax, args.env) # Run the agent done = True action = None def keyDownCb(keyName): global obs # Avoiding processing of observation by agent for wrong key clicks if keyName not in action_map and keyName != "RETURN": return agent_action = agent.act(obs)['action']
def generate_demos(n_episodes, valid, seed, shift=0): utils.seed(seed) # Generate environment env = gym.make(args.env) env.seed(seed) for i in range(shift): env.reset() agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax, args.env) demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid) demos = [] checkpoint_time = time.time() while True: # Run the expert for one episode done = False obs = env.reset() agent.on_reset() actions = [] mission = obs["mission"] images = [] directions = [] try: while not done: action = agent.act(obs)['action'] if isinstance(action, torch.Tensor): action = action.item() new_obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done) actions.append(action) images.append(obs['image']) directions.append(obs['direction']) obs = new_obs if reward > 0 and (args.filter_steps == 0 or len(images) <= args.filter_steps): demos.append((mission, blosc.pack_array(np.array(images)), directions, actions)) if len(demos) >= n_episodes: break if reward == 0: if args.on_exception == 'crash': raise Exception("mission failed") logger.info("mission failed") except Exception: if args.on_exception == 'crash': raise logger.exception("error while generating demo #{}".format( len(demos))) continue if len(demos) and len(demos) % args.log_interval == 0: now = time.time() demos_per_second = args.log_interval / (now - checkpoint_time) to_go = (n_episodes - len(demos)) / demos_per_second logger.info( "demo #{}, {:.3f} demos per second, {:.3f} seconds to go". format(len(demos), demos_per_second, to_go)) checkpoint_time = now # Save demonstrations if args.save_interval > 0 and len( demos) < n_episodes and len(demos) % args.save_interval == 0: logger.info("Saving demos...") utils.save_demos(demos, demos_path) logger.info("Demos saved") # print statistics for the last 100 demonstrations print_demo_lengths(demos[-100:]) # Save demonstrations logger.info("Saving demos...") utils.save_demos(demos, demos_path) logger.info("Demos saved") print_demo_lengths(demos[-100:])
if "_n" in args.env: env = gym.make(args.env, pairs_dict=pairs_dict, test_instr_mode=test_mode, num_dists=args.num_dists) else: env = gym.make(args.env) demo_path = os.path.join(model_path, test_mode) env = Monitor(env, demo_path, _check_log_this, force=True) env.seed(args.seed) # Define agent agent = utils.load_agent(env=env, model_name=args.model, argmax=args.argmax, env_name=args.env, instr_arch=args.instr_arch) utils.seed(args.seed) print('\n') print(f'=== EVALUATING MODE: {test_mode} ===') # Run the agent done = False action = None obs = env.reset() step = 0 episode_num = 0
def main(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Keep track of results per task. results = {} for env_name in args.env: start_time = time.time() env = gym.make(env_name) env.seed(seed) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) # Define agent agent = utils.load_agent(env, args.model, args.demos, args.demos_origin, args.argmax, env_name, model_path=args.model_path) # Evaluate if isinstance(agent, utils.DemoAgent): logs = evaluate_demo_agent(agent, episodes) elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes: logs = evaluate(agent, env, episodes, False) else: logs = batch_evaluate(agent, env_name, seed, episodes) end_time = time.time() # Print logs num_frames = sum(logs["num_frames_per_episode"]) fps = num_frames / (end_time - start_time) ellapsed_time = int(end_time - start_time) duration = datetime.timedelta(seconds=ellapsed_time) if args.model is not None: return_per_episode = utils.synthesize(logs["return_per_episode"]) success_per_episode = utils.synthesize( [1 if r > 0 else 0 for r in logs["return_per_episode"]]) num_frames_per_episode = utils.synthesize( logs["num_frames_per_episode"]) if args.model is not None: print( "F {} | FPS {:.0f} | D {} | R:xsmM {:.3f} {:.3f} {:.3f} {:.3f} | S {:.3f} | F:xsmM {:.1f} {:.1f} {} {}" .format(num_frames, fps, duration, *return_per_episode.values(), success_per_episode['mean'], *num_frames_per_episode.values())) else: print( "F {} | FPS {:.0f} | D {} | F:xsmM {:.1f} {:.1f} {} {}".format( num_frames, fps, duration, *num_frames_per_episode.values())) indexes = sorted(range(len(logs["num_frames_per_episode"])), key=lambda k: -logs["num_frames_per_episode"][k]) n = args.worst_episodes_to_show if n > 0: print("{} worst episodes:".format(n)) for i in indexes[:n]: if 'seed_per_episode' in logs: print(logs['seed_per_episode'][i]) if args.model is not None: print("- episode {}: R={}, F={}".format( i, logs["return_per_episode"][i], logs["num_frames_per_episode"][i])) else: print("- episode {}: F={}".format( i, logs["num_frames_per_episode"][i])) # Store results for this env. logs['return_per_episode'] = return_per_episode logs['success_per_episode'] = success_per_episode logs['num_frames_per_episode'] = num_frames_per_episode results[env_name] = logs return results
args.seed = 0 if args.model is not None else 1 # Set seed for all randomness sources utils.seed(args.seed) # Generate environment env = gym.make(args.env) env.seed(args.seed) for _ in range(args.shift): env.reset() # Define agent agent = utils.load_agent(args, env) # Run the agent done = True import cv2 import numpy as np episode = 0 step = 0 while True: time.sleep(args.pause) image = env.render("rgb_array") image = cv2.resize(image, dsize=(512, 512), interpolation=cv2.INTER_CUBIC) #image = np.transpose(image, (2, 0, 1)) file_name = 'rendered_image/episodes_' + str(episode) + '_step_' + str( step) + '.png'
def generate_demos(n_episodes, valid, seed, shift=0): utils.seed(seed) # Generate environment env = gym.make(args.env) use_pixels = args.pixels if use_pixels: env = RGBImgPartialObsWrapper(env) agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax, args.env) demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid) demos = [] checkpoint_time = time.time() just_crashed = False while True: if len(demos) == n_episodes: break done = False if just_crashed: logger.info( "reset the environment to find a mission that the bot can solve" ) env.reset() else: env.seed(seed + len(demos)) obs = env.reset() agent.on_reset() actions = [] mission = obs["mission"] images = [] directions = [] try: while not done: action = agent.act(obs)['action'] if isinstance(action, torch.Tensor): action = action.item() new_obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done) actions.append(action) images.append(obs['image']) if use_pixels: directions.append(None) else: directions.append(obs['direction']) obs = new_obs if reward > 0 and (args.filter_steps == 0 or len(images) <= args.filter_steps): demos.append((mission, blosc.pack_array(np.array(images)), directions, actions)) just_crashed = False if reward == 0: if args.on_exception == 'crash': raise Exception( "mission failed, the seed is {}".format(seed + len(demos))) just_crashed = True logger.info("mission failed") except (Exception, AssertionError): if args.on_exception == 'crash': raise just_crashed = True logger.exception("error while generating demo #{}".format( len(demos))) continue if len(demos) and len(demos) % args.log_interval == 0: now = time.time() demos_per_second = args.log_interval / (now - checkpoint_time) to_go = (n_episodes - len(demos)) / demos_per_second logger.info( "demo #{}, {:.3f} demos per second, {:.3f} seconds to go". format(len(demos) - 1, demos_per_second, to_go)) checkpoint_time = now # Save demonstrations if args.save_interval > 0 and len( demos) < n_episodes and len(demos) % args.save_interval == 0: logger.info("Saving demos...") utils.save_demos(demos, demos_path) logger.info("{} demos saved".format(len(demos))) # print statistics for the last 100 demonstrations print_demo_lengths(demos[-100:]) # Save demonstrations logger.info("Saving demos...") utils.save_demos(demos, demos_path) logger.info("{} demos saved".format(len(demos))) print_demo_lengths(demos[-100:])