def main(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Define agent env = gym.make(args.env) env.seed(seed) agent = utils.load_agent(env, args.model, None, None, args.argmax, args.env) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) # Evaluate if isinstance(agent, utils.DemoAgent): logs = evaluate_demo_agent(agent, episodes) elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes: logs = evaluate(agent, env, episodes, False) else: logs = batch_evaluate(agent, args.env, seed, episodes, return_obss_actions=True) return logs
def main_test(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Define agent # do test environment env_name = args.env + "_Test-v0" env = gym.make(env_name) env.seed(seed) agent = utils.load_agent(env, args.model, argmax=args.argmax, env_name=env_name) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) # Evaluate if isinstance(agent, utils.DemoAgent): logs = evaluate_demo_agent(agent, episodes) elif isinstance(agent, utils.BotAgent): logs = evaluate(agent, env, episodes, False) else: logs = batch_evaluate(agent, env_name, seed, episodes) return logs
def main(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Keep track of results per task. results = {} for env_name in args.env: start_time = time.time() env = gym.make(env_name) env.seed(seed) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) # Define agent agent = utils.load_agent(env, args.model, args.demos, args.demos_origin, args.argmax, env_name, model_path=args.model_path) # Evaluate if isinstance(agent, utils.DemoAgent): logs = evaluate_demo_agent(agent, episodes) elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes: logs = evaluate(agent, env, episodes, False) else: logs = batch_evaluate(agent, env_name, seed, episodes) end_time = time.time() # Print logs num_frames = sum(logs["num_frames_per_episode"]) fps = num_frames / (end_time - start_time) ellapsed_time = int(end_time - start_time) duration = datetime.timedelta(seconds=ellapsed_time) if args.model is not None: return_per_episode = utils.synthesize(logs["return_per_episode"]) success_per_episode = utils.synthesize( [1 if r > 0 else 0 for r in logs["return_per_episode"]]) num_frames_per_episode = utils.synthesize( logs["num_frames_per_episode"]) if args.model is not None: print( "F {} | FPS {:.0f} | D {} | R:xsmM {:.3f} {:.3f} {:.3f} {:.3f} | S {:.3f} | F:xsmM {:.1f} {:.1f} {} {}" .format(num_frames, fps, duration, *return_per_episode.values(), success_per_episode['mean'], *num_frames_per_episode.values())) else: print( "F {} | FPS {:.0f} | D {} | F:xsmM {:.1f} {:.1f} {} {}".format( num_frames, fps, duration, *num_frames_per_episode.values())) indexes = sorted(range(len(logs["num_frames_per_episode"])), key=lambda k: -logs["num_frames_per_episode"][k]) n = args.worst_episodes_to_show if n > 0: print("{} worst episodes:".format(n)) for i in indexes[:n]: if 'seed_per_episode' in logs: print(logs['seed_per_episode'][i]) if args.model is not None: print("- episode {}: R={}, F={}".format( i, logs["return_per_episode"][i], logs["num_frames_per_episode"][i])) else: print("- episode {}: F={}".format( i, logs["num_frames_per_episode"][i])) # Store results for this env. logs['return_per_episode'] = return_per_episode logs['success_per_episode'] = success_per_episode logs['num_frames_per_episode'] = num_frames_per_episode results[env_name] = logs return results