def make_envs(procs, env_name, seed, extrap_min, extrap_max): envs = [] for i in range(procs): env = utils.make_env(env_name, seed + 100000 * i, {"extrapolate_min": extrap_min, "extrapolate_max": extrap_max}) envs.append(env) env = ParallelEnv(envs) print("Environments loaded\n") return env
def main(): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument("--env", required=True, help="name of the environment (REQUIRED)") parser.add_argument("--model", required=True, help="name of the trained model (REQUIRED)") parser.add_argument("--episodes", type=int, default=100, help="number of episodes of evaluation (default: 100)") parser.add_argument("--seed", type=int, default=0, help="random seed (default: 0)") parser.add_argument("--procs", type=int, default=1, help="number of processes (default: 16)") parser.add_argument("--argmax", action="store_true", default=False, help="action with highest probability is selected") parser.add_argument("--worst-episodes-to-show", type=int, default=10, help="how many worst episodes to show") parser.add_argument("--memory", action="store_true", default=False, help="add a LSTM to the model") parser.add_argument("--text", action="store_true", default=False, help="add a GRU to the model") parser.add_argument("--visualize", default=False, help="print stuff") parser.add_argument("--save_path", default="test_image", help="save path for agent visualizations") args = parser.parse_args() # Set seed for all randomness sources utils.seed(args.seed) # Set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Device: {device}\n") # Load environments envs = [] for i in range(args.procs): env = utils.make_env(args.env, args.seed + 10000 * i) envs.append(env) env = ParallelEnv(envs) print("Environments loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, device=device, argmax=args.argmax, num_envs=args.procs, use_memory=args.memory, use_text=args.text) print("Agent loaded\n") # Initialize logs logs = {"num_frames_per_episode": [], "return_per_episode": []} # Run agent start_time = time.time() obss = env.reset() log_done_counter = 0 log_episode_return = torch.zeros(args.procs, device=device) log_episode_num_frames = torch.zeros(args.procs, device=device) img_sum = [] obss_sum = None encoding_sum = None img_count = 0 while log_done_counter < args.episodes: actions = agent.get_actions(obss) obss, rewards, dones, _ = env.step(actions) agent.analyze_feedbacks(rewards, dones) log_episode_return += torch.tensor(rewards, device=device, dtype=torch.float) log_episode_num_frames += torch.ones(args.procs, device=device) state = env.get_environment_state() img = state.grid.render(32, state.agent_pos, state.agent_dir, highlight_mask=None) encoding = state.grid.encode() # img_count += 1 # if img_count == 1: # img_sum = img ## obss_sum = obss[0]['image'] ## encoding_sum = encoding # else: # img_sum += img ## obss_sum += obss[0]['image'] ## encoding_sum += encoding for i, done in enumerate(dones): if done: log_done_counter += 1 logs["return_per_episode"].append(log_episode_return[i].item()) logs["num_frames_per_episode"].append( log_episode_num_frames[i].item()) if args.visualize: if len(img_sum) > 0: img_sum = img_sum / img_count # img_sum = img_sum.astype(numpy.uint8) filepath = args.save_path + '_image_' + str( log_done_counter - 1) + '.jpg' imsave(filepath, img_sum) img_sum = [] img_count = 0 else: img_count += 1 if img_count == 1: img_sum = img #.astype(float) else: img_sum += img mask = 1 - torch.tensor(dones, device=device, dtype=torch.float) log_episode_return *= mask log_episode_num_frames *= mask end_time = time.time() # Print logs num_frames = sum(logs["num_frames_per_episode"]) fps = num_frames / (end_time - start_time) duration = int(end_time - start_time) return_per_episode = utils.synthesize(logs["return_per_episode"]) num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"]) print( "F {} | FPS {:.0f} | D {} | R:μσmM {:.2f} {:.2f} {:.2f} {:.2f} | F:μσmM {:.1f} {:.1f} {} {}" .format(num_frames, fps, duration, *return_per_episode.values(), *num_frames_per_episode.values())) # Print worst episodes n = args.worst_episodes_to_show if n > 0: print("\n{} worst episodes:".format(n)) indexes = sorted(range(len(logs["return_per_episode"])), key=lambda k: logs["return_per_episode"][k]) for i in indexes[:n]: print("- episode {}: R={}, F={}".format( i, logs["return_per_episode"][i], logs["num_frames_per_episode"][i]))
# Set seed for all randomness sources utils.seed(args.seed) # Set device print(f"Device: {device}\n") # Load environments envs = [] for i in range(args.procs): env = utils.make_env(args.env, args.seed + 10000 * i) envs.append(env) env = ParallelEnv(envs) print("Environments loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, argmax=args.argmax, num_envs=args.procs, use_memory=args.memory, use_text=args.text) print("Agent loaded\n") # Initialize logs
utils.seed(args.seed) # Set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Device: {device}\n") # Load environments envs = [] for i in range(args.procs): env = FoodEnv() env.seed(args.seed + 10000 * i) envs.append(env) env = ParallelEnv(envs) print("Environments loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, device, args.argmax, args.procs) print("Agent loaded\n") # Initialize logs logs = {"num_frames_per_episode": [], "return_per_episode": []} # Run agent
utils.seed(args.seed) # Set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Device: {device}\n") # Load environments envs = [] for i in range(args.procs): env = utils.make_env(args.env, args.seed + 10000 * i) envs.append(env) env = ParallelEnv( envs ) # TODO: Is this just to run our model on a lot of environments, to decrease random results? print("Environments loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, device=device, argmax=args.argmax, num_envs=args.procs, use_memory=args.memory, use_text=args.text) print("Agent loaded\n")