def enjoy(environment, model, seed=0, argmax=False, pause=0.1): utils.seed(seed) # Generate environment environment.seed(seed) # Define agent model_dir = utils.get_model_dir(model) agent = utils.Agent(model_dir, environment.observation_space, argmax) # Run the agent done = True while True: if done: obs = environment.reset() print("Instr:", obs["mission"]) time.sleep(pause) renderer = environment.render("human") renderer.window.update_imagination_display([[1, 2, 3], 2, 3], None, None) action = agent.get_action(obs) obs, reward, done, _ = environment.step(action) agent.analyze_feedback(reward, done) if renderer.window is None: break
default=False, help="action with highest probability is selected") args = parser.parse_args() # Set seed for all randomness sources utils.seed(args.seed) # Generate environment env = gym.make(args.env) env.seed(args.seed) # Define agent agent = utils.Agent(args.model, env.observation_space, args.deterministic) # Initialize logs logs = {"num_frames_per_episode": [], "return_per_episode": []} # Run the agent start_time = time.time() for _ in range(args.episodes): obs = env.reset() done = False num_frames = 0 returnn = 0
import nengo_ssp as ssp X, Y, _ = ssp.HexagonalBasis(10, 10) d = len(X.v) env = SSPWrapper(env, d, X, Y) for _ in range(args.shift): env.reset() print("Environment loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, model_name=args.algo, device=device, argmax=args.argmax, use_memory=args.memory, use_text=args.text, input_type=args.input, feature_learn=args.feature_learn) print("Agent loaded\n") # Run the agent if args.gif: from array2gif import write_gif frames = [] plt.imshow(env.render('human')) for episode in range(args.episodes):
utils.seed(args.seed) # Generate environment envs = [] for i in range(args.procs): env = gym.make(args.env) env.seed(args.seed + 10000 * i) envs.append(env) env = ParallelEnv(envs) # Define agent save_dir = utils.get_save_dir(args.model) agent = utils.Agent(save_dir, env.observation_space, args.argmax, args.procs) print("CUDA available: {}\n".format(torch.cuda.is_available())) # Initialize logs logs = {"num_frames_per_episode": [], "return_per_episode": []} # Run the agent start_time = time.time() obss = env.reset() log_done_counter = 0 log_episode_return = torch.zeros(args.procs, device=agent.device) log_episode_num_frames = torch.zeros(args.procs, device=agent.device)
for model_idx, model_name in enumerate(model_names): for env_idx, env_name in enumerate(envs): results = np.zeros((nr_levels, )) env = gym.make(env_name) # Make the episodes comparable between all agents. env.seed(0) if fully_observable_environment: env = gym_minigrid.wrappers.FullyObsWrapper(env) # Define agent model_dir = utils.get_model_dir(model_name) agent = utils.Agent(env_name, env.observation_space, model_dir, argmax=False) lvl_cnt = 0 obs = env.reset() while True: action = agent.get_action(obs) obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done) if done: results[lvl_cnt] = reward > 0 lvl_cnt += 1 pbar.update()
# Generate environment env = gym.make(args.env) env.seed(args.seed) if args.fullObs: env = gym_minigrid.wrappers.FullyObsWrapper(env) elif args.POfullObs: env = gym_minigrid.wrappers.PartialObsFullGridWrapper(env) for _ in range(args.shift): env.reset() # Define agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(args.env, env.observation_space, model_dir, args.argmax) # Run the agent done = True while True: if done: obs = env.reset() time.sleep(args.pause) renderer = env.render() action = agent.get_action(obs) obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done)
# Load environment env = utils.make_env(args.env, args.seed) for _ in range(args.shift): env.reset() print("Environment loaded\n") # Load agent model_dir = utils.get_model_dir(args.env, args.n_columns, args.transfer) agent = utils.Agent(env.observation_space, env.action_space, model_dir, args.seed, args.n_columns, device=device, argmax=args.argmax, use_memory=args.memory, use_text=args.text) print("Agent loaded\n") # Run the agent if args.gif: from array2gif import write_gif frames = [] # Create a window to view the environment env.render('human')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Device: {device}\n") # Load environments envs = [] for i in range(args.procs): env = utils.make_env(args.env, args.seed + 10000 * i) envs.append(env) env = ParallelEnv(envs) print("Environments loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, device, args.argmax, args.procs) print("Agent loaded\n") # Initialize logs logs = {"num_frames_per_episode": [], "return_per_episode": []} # Run agent start_time = time.time() obss = env.reset() log_done_counter = 0 log_episode_return = torch.zeros(args.procs, device=device) log_episode_num_frames = torch.zeros(args.procs, device=device)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Device: {device}\n") # Load environment env = utils.make_env(args.env, args.seed) for _ in range(args.shift): env.reset() print("Environment loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, device=device, argmax=args.argmax, use_text=args.text) print("Agent loaded\n") # Run the agent if args.gif: from array2gif import write_gif frames = [] # Create a window to view the environment env.render('human') for episode in range(args.episodes): obs = env.reset()
def visualiseAndSave(envStr, model_name, seed, numEpisodes, txt_logger, gifName="test", save=False, dir=None, agentType=ppo, CNNCLASS=None): if agentType != ppo and agentType != dqn: raise Exception utils.seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") env = utils.make_env(envStr, seed) model_dir = utils.get_model_dir(model_name, dir) if agentType == ppo: agent = utils.Agent(env.observation_space, env.action_space, model_dir, device=device, argmax=True, use_memory=False, use_text=False) else: if hasattr(env, 'my_shape'): model = CNNCLASS(env.my_shape, env.action_space.n) else: model = CNNCLASS(env.observation_space['image'].shape, env.action_space.n) loaded_dict = torch.load(model_dir + "/status.pt") model.load_state_dict(loaded_dict["model_state"]) print("For Test load state frames:", loaded_dict['num_frames'], "updates:", loaded_dict['update']) model.to(device) model.eval() if USE_CUDA: print("USE CUDA") model = model.cuda() if save: from array2gif import write_gif frames = [] mycumulativereward = 0 mycumulativeperf = 0 mycumulativeperffull = 0 mycumulativeButtons = 0 mycumulativePhones = 0 mycumulativeDirts = 0 mycumulativeMesses = 0 runsNum = 0 for episode in range(numEpisodes): obs = env.reset() myreward = 0 myperf = 0 myperffull = 0 myButtons = 0 myPhones = 0 myDirts = 0 myMesses = 0 while True: if save: frames.append(numpy.moveaxis(env.render("rgb_array"), 2, 0)) if agentType == ppo: action = agent.get_action(obs) else: action = model.act(obs['image'], 0, True) # epsilon == 0 so no exploration obs, reward, done, info = env.step(action) myreward += reward myperf += info['performance'] myperffull += info['performance_full'] myButtons += info['button_presses'] myPhones += info['phones_cleaned'] myDirts += info['dirt_cleaned'] myMesses += info['messes_cleaned'] if agentType == ppo: agent.analyze_feedback(reward, done) if done: runsNum += 1 mycumulativereward += myreward mycumulativeperf += myperf mycumulativeperffull += myperffull mycumulativeButtons += myButtons mycumulativePhones += myPhones mycumulativeDirts += myDirts mycumulativeMesses += myMesses averageReward = mycumulativereward / runsNum averagePerformance = mycumulativeperf / runsNum averagePerformanceFull = mycumulativeperffull / runsNum averageButtons = mycumulativeButtons / runsNum averageDirts = mycumulativeDirts / runsNum averagePhones = mycumulativePhones / runsNum averageMesses = mycumulativeMesses / runsNum break if save: saveMeAs = model_dir + "/" + model_name + gifName + ".gif" txt_logger.info(("Saving gif to ", saveMeAs, "... ")) write_gif(numpy.array(frames), saveMeAs, fps=1 / 0.3) txt_logger.info("Done.") return averageReward, averagePerformance, averagePerformanceFull, averageButtons, averageDirts, averagePhones, averageMesses
help="pause duration between two consequent actions of the agent") args = parser.parse_args() # Set seed for all randomness sources utils.seed(args.seed) # Generate environment env = gym.make(args.env) env.seed(args.seed) # Define agent run_dir = utils.get_run_dir(args.model) agent = utils.Agent(run_dir, env.observation_space, args.deterministic) # Run the agent done = True while True: if done: obs = env.reset() print("Instr:", obs["mission"]) time.sleep(args.pause) renderer = env.render("human") action = agent.get_action(obs) obs, reward, done, _ = env.step(action)
# Load environment env = utils.make_env(args.env, args.seed) for _ in range(args.shift): env.reset() print("Environment loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) autoencoder = torch.load(args.autoencoder_path) agent = utils.Agent( env.observation_space, env.action_space, model_dir, autoencoder=autoencoder, device=device, argmax=args.argmax, use_memory=args.memory, use_text=args.text, ) print("Agent loaded\n") # Run the agent if args.gif: from array2gif import write_gif frames = [] obs_array = [] predicted_obs = [] predicted_uncertainty = []
def start(model, seed, episodes, size): env_name = "MiniGrid-DoorKey-" + str(size) + "x" + str(size) + "-v0" utils.seed(seed) procs = 10 argmax = False all_data = np.zeros(shape=(size, 8)) print("Evaluating storage/" + model) for _wall in range(2, size - 2): # Generate environment envs = [] for i in range(procs): env = gym.make(env_name) env.setWallID(_wall) envs.append(env) env = ParallelEnv(envs) # Define agent save_dir = utils.get_save_dir(model) agent = utils.Agent(save_dir, env.observation_space, argmax, procs) # print("CUDA available: {}\n".format(torch.cuda.is_available())) # Initialize logs logs = {"num_frames_per_episode": [], "return_per_episode": []} # Run the agent start_time = time.time() obss = env.reset() log_done_counter = 0 log_episode_return = torch.zeros(procs, device=agent.device) log_episode_num_frames = torch.zeros(procs, device=agent.device) while log_done_counter < episodes: actions = agent.get_actions(obss) obss, rewards, dones, _ = env.step(actions) agent.analyze_feedbacks(rewards, dones) log_episode_return += torch.tensor(rewards, device=agent.device, dtype=torch.float) log_episode_num_frames += torch.ones(procs, device=agent.device) for i, done in enumerate(dones): if done: log_done_counter += 1 logs["return_per_episode"].append( log_episode_return[i].item()) logs["num_frames_per_episode"].append( log_episode_num_frames[i].item()) mask = 1 - torch.tensor( dones, device=agent.device, dtype=torch.float) log_episode_return *= mask log_episode_num_frames *= mask end_time = time.time() # Print logs num_frames = sum(logs["num_frames_per_episode"]) fps = num_frames / (end_time - start_time) duration = int(end_time - start_time) return_per_episode = utils.synthesize(logs["return_per_episode"]) num_frames_per_episode = utils.synthesize( logs["num_frames_per_episode"]) print( "Wall {:3d} | F {:6.0f} | FPS {:4.0f} | D {:3d} | R:x̄σmM {:.2f} {:.2f} {:.2f} {:.2f} | F:x̄σmM {:6.1f} {:6.1f} {:6.1f} {:6.1f}" .format(_wall, num_frames, fps, duration, *return_per_episode.values(), *num_frames_per_episode.values())) all_data[_wall, 0] = return_per_episode["mean"] all_data[_wall, 1] = return_per_episode["std"] all_data[_wall, 2] = return_per_episode["min"] all_data[_wall, 3] = return_per_episode["max"] all_data[_wall, 4] = num_frames_per_episode["mean"] all_data[_wall, 5] = num_frames_per_episode["std"] all_data[_wall, 6] = num_frames_per_episode["min"] all_data[_wall, 7] = num_frames_per_episode["max"] return all_data
return env env = make_envs(args.procs, args.env, args.seed, args.extrap_min, args.extrap_min+1) # Load agent model_dirs = utils.get_models_for_exp(args.exp_id) agents = defaultdict(list) for model_dir in model_dirs: root = utils.get_model_dir(model_dir, args.exp_id) use_nac, use_text, use_memory = utils.get_args_for_model(model_dir) for idx, seed in enumerate(os.listdir(root)): exp_path = os.path.join(root, seed) if args.eval_one_model_per_seed and idx != 0: continue agents[exp_path].append(utils.Agent(env.observation_space, env.action_space, exp_path, device=device, argmax=args.argmax, num_envs=args.procs, use_memory=use_memory, use_text=use_text, use_nac=use_nac)) obs_space, preprocess_obss = utils.get_obss_preprocessor(env.envs[0].observation_space) print("Agents loaded\n") all_logs = defaultdict(list) start_time = time.time() for exp_path, agent_list in agents.items(): for agent in agent_list: for offset in range(args.extrap_min, args.extrap_max): logs = {"offset": offset, "num_frames_per_episode": [], "return_per_episode": []} env = make_envs(args.procs, args.env, args.seed, offset, offset+1) obss = env.reset() log_done_counter = 0
env = ltl_wrappers.LTLEnv(env, ltl_sampler="Default") agent = RandomAgent(env.action_space) elif (args["command"] == "viz"): # If the config is available (from trainig) then just load it here instead of asking the user of this script to provide all training time configs config = vars(utils.load_config(args["model_path"])) args.update(config) env = gym.make(args["env_id"]) env = safety_wrappers.Play(env) env = ltl_wrappers.LTLEnv(env, ltl_sampler=args["ltl_sampler"], progression_mode=args["progression_mode"]) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") agent = utils.Agent(env, env.observation_space, env.action_space, args["model_path"], args["ignoreLTL"], args["progression_mode"], args["gnn"], device=device, dumb_ac=args["dumb_ac"]) else: print("Incorrect command: ", args["command"]) exit(1) run_policy(agent, env, max_ep_len=30000, num_episodes=1000)
# Set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Device: {device}\n") # Load environment env = utils.make_env(args.env, args.seed) for _ in range(args.shift): env.reset() print("Environment loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, device, args.argmax, use_rim = args.use_rim) print("Agent loaded\n") # Run the agent if args.gif: from array2gif import write_gif frames = [] # Create a window to view the environment env.render('human') for episode in range(args.episodes): obs = env.reset() done2 = False while True:
def main(): # Parse arguments parser = argparse.ArgumentParser() args = parser.parse_args() parser.add_argument("--env", required=True, help="name of the environment to be run (REQUIRED)") parser.add_argument("--model", required=True, help="name of the trained model (REQUIRED)") parser.add_argument("--seed", type=int, default=0, help="random seed (default: 0)") parser.add_argument( "--shift", type=int, default=0, help= "number of times the environment is reset at the beginning (default: 0)" ) parser.add_argument( "--argmax", action="store_true", default=False, help="select the action with highest probability (default: False)") parser.add_argument( "--pause", type=float, default=0.1, help= "pause duration between two consequent actions of the agent (default: 0.1)" ) parser.add_argument("--gif", type=str, default=None, help="store output as gif with the given filename") parser.add_argument("--episodes", type=int, default=1000000, help="number of episodes to visualize") parser.add_argument("--memory", action="store_true", default=False, help="add a LSTM to the model") parser.add_argument("--text", action="store_true", default=False, help="add a GRU to the model") if len(sys.argv) > 1: args = parser.parse_args() else: args.env = 'MiniGrid-DoorKey-5x5-v0' args.env = 'MiniGrid-KeyCorridorGBLA-v0' args.model = 'KeyCorridor1' args.episodes = 10 args.seed = 0 args.shift = 0 args.argmax = False args.memory = False args.text = False args.gif = 'storage/' + args.model + '/' + args.model args.pause = 0.1 if args.env == 'MiniGrid-KeyCorridorGBLA-v0': env_descriptor = [[0, 0, 0], [0, 13, 0], [0, 0, 0]] task_descriptor = TaskDescriptor(envD=env_descriptor, rmDesc=None, rmOrder=None, rmSize=4, observ=True, seed=None, time_steps=None) env = gym.make('MiniGrid-KeyCorridorGBLA-v0', taskD=task_descriptor) goal = GetGoalDescriptor(env) goal = goal.refinement[0].refinement[0].refinement[0] env = gym_minigrid.wrappers.FullyObsWrapper(env) env = gym_minigrid.wrappers.ImgObsWrapper(env) env = GoalRL.GoalEnvWrapper(env, goal=goal, verbose=0) args.env = env else: pass # Set seed for all randomness sources utils.seed(args.seed) # Set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Device: {device}\n") # Load environment if type(args.env) == str: env = utils.make_env(args.env, args.seed) else: env = args.env for _ in range(args.shift): env.reset() print("Environment loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, device=device, argmax=args.argmax, use_memory=args.memory, use_text=args.text) print("Agent loaded\n") # Run the agent if args.gif: from array2gif import write_gif frames = [] # Create a window to view the environment env.render('human') for episode in range(args.episodes): obs = env.reset() while True: env.render('human') if args.gif: frames.append(numpy.moveaxis(env.render("rgb_array"), 2, 0)) action = agent.get_action(obs) obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done) if done or env.window.closed: break if env.window.closed: break if args.gif: print("Saving gif... ", end="") write_gif(numpy.array(frames), args.gif + ".gif", fps=1 / args.pause) print("Done.")
def main(): # Parse arguments parser = argparse.ArgumentParser() args = parser.parse_args() ## General parameters parser.add_argument("--algo", required=True, help="algorithm to use: a2c | ppo (REQUIRED)") parser.add_argument("--env", required=True, help="name of the environment to train on (REQUIRED)") parser.add_argument("--model", default=None, help="name of the model (default: {ENV}_{ALGO}_{TIME})") parser.add_argument("--seed", type=int, default=1, help="random seed (default: 1)") parser.add_argument("--log-interval", type=int, default=1, help="number of updates between two logs (default: 1)") parser.add_argument("--save-interval", type=int, default=10, help="number of updates between two saves (default: 10, 0 means no saving)") parser.add_argument("--procs", type=int, default=16, help="number of processes (default: 16)") parser.add_argument("--frames", type=int, default=10**7, help="number of frames of training (default: 1e7)") ## Parameters for main algorithm parser.add_argument("--epochs", type=int, default=4, help="number of epochs for PPO (default: 4)") parser.add_argument("--batch-size", type=int, default=256, help="batch size for PPO (default: 256)") parser.add_argument("--frames-per-proc", type=int, default=None, help="number of frames per process before update (default: 5 for A2C and 128 for PPO)") parser.add_argument("--discount", type=float, default=0.99, help="discount factor (default: 0.99)") parser.add_argument("--lr", type=float, default=0.001, help="learning rate (default: 0.001)") parser.add_argument("--gae-lambda", type=float, default=0.95, help="lambda coefficient in GAE formula (default: 0.95, 1 means no gae)") parser.add_argument("--entropy-coef", type=float, default=0.01, help="entropy term coefficient (default: 0.01)") parser.add_argument("--value-loss-coef", type=float, default=0.5, help="value loss term coefficient (default: 0.5)") parser.add_argument("--max-grad-norm", type=float, default=0.5, help="maximum norm of gradient (default: 0.5)") parser.add_argument("--optim-eps", type=float, default=1e-8, help="Adam and RMSprop optimizer epsilon (default: 1e-8)") parser.add_argument("--optim-alpha", type=float, default=0.99, help="RMSprop optimizer alpha (default: 0.99)") parser.add_argument("--clip-eps", type=float, default=0.2, help="clipping epsilon for PPO (default: 0.2)") parser.add_argument("--recurrence", type=int, default=1, help="number of time-steps gradient is backpropagated (default: 1). If > 1, a LSTM is added to the model to have memory.") parser.add_argument("--text", action="store_true", default=False, help="add a GRU to the model to handle text input") parser.add_argument("--argmax", action="store_true", default=False, help="select the action with highest probability (default: False)") if len(sys.argv) > 1: args = parser.parse_args() else: args.env = 'MiniGrid-DoorKey-5x5-v0' args.env = 'MiniGrid-KeyCorridorGBLA-v0' args.algo = 'ppo' args.seed = 1234 args.model = 'KeyCorridor2' args.frames = 2e5 args.procs = 16 args.text = False args.frames_per_proc = None args.discount = 0.99 args.lr = 0.001 args.gae_lambda = 0.95 args.entropy_coef = 0.01 args.value_loss_coef = 0.5 args.max_grad_norm = 0.5 args.recurrence = 1 args.optim_eps = 1e-8 args.optim_alpha = 0.99 args.clip_eps = 0.2 args.epochs = 4 args.batch_size = 256 args.log_interval = 1 args.save_interval = 10 args.argmax = False if args.env == 'MiniGrid-KeyCorridorGBLA-v0': env_descriptor = [[0,0,0],[0,13,0],[0,0,0]] task_descriptor = TaskDescriptor(envD=env_descriptor, rmDesc=None, rmOrder=None, rmSize=4, observ=True, seed=None, time_steps=None) env = gym.make('MiniGrid-KeyCorridorGBLA-v0', taskD=task_descriptor) goal = GetGoalDescriptor(env) goal = goal.refinement[0].refinement[0].refinement[0] env = gym_minigrid.wrappers.FullyObsWrapper(env) env = gym_minigrid.wrappers.ImgObsWrapper(env) env = GoalRL.GoalEnvWrapper(env,goal=goal, verbose=0) # env = Monitor(env, 'storage/{}/{}.monitor.csv'.format(rank, goal.goalId)) # wrap the environment in the monitor object args.env = env else: pass args.mem = args.recurrence > 1 # Set run dir date = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S") default_model_name = f"{args.env}_{args.algo}_seed{args.seed}_{date}" model_name = args.model or default_model_name model_dir = utils.get_model_dir(model_name) # Load loggers and Tensorboard writer txt_logger = utils.get_txt_logger(model_dir) # Log command and all script arguments txt_logger.info("{}\n".format(" ".join(sys.argv))) txt_logger.info("{}\n".format(args)) # Set seed for all randomness sources utils.seed(args.seed) # Set device # Load environments envs = [] for i in range(args.procs): if type(args.env) == str: envs.append(utils.make_env(args.env, args.seed + 10000 * i)) else: envs.append(deepcopy(args.env)) txt_logger.info("Environments loaded\n") # Load training status # Load observations preprocessor #obs_space, preprocess_obss = utils.get_obss_preprocessor(envs[0].observation_space) # Load model agent = utils.Agent(env, model_dir, logger=txt_logger, argmax=args.argmax, use_memory=args.mem, use_text=args.text) # Load algo if args.algo == 'a2c': agent.init_training_algo(algo_type=args.algo, num_cpu=args.procs, frames_per_proc=args.frames_per_proc, discount=args.discount, lr=args.lr, gae_lambda=args.gae_lambda, entropy_coef=args.entropy_coef, value_loss_coef=args.value_loss_coef, max_grad_norm=args.max_grad_norm, recurrence=args.recurrence, optim_eps=args.optim_eps, optim_alpha=args.optim_alpha) # args for A2C elif args.algo == 'ppo': agent.init_training_algo(algo_type=args.algo, num_cpu=args.procs, frames_per_proc=args.frames_per_proc, discount=args.discount, lr=args.lr, gae_lambda=args.gae_lambda, entropy_coef=args.entropy_coef, value_loss_coef=args.value_loss_coef, max_grad_norm=args.max_grad_norm, recurrence=args.recurrence, optim_eps=args.optim_eps, clip_eps=args.clip_eps, # args for PPO2 epochs=args.epochs, batch_size=args.batch_size) else: raise ValueError("Incorrect algorithm name: {}".format(args.algo)) agent.learn(total_timesteps=args.frames, log_interval=args.log_interval, save_interval=args.save_interval) print('training completed!')
def run_eval(): envs = [] for i in range(1): env = utils.make_env(args.env, args.seed + 10000 * i) env.is_teaching = False env.end_pos = args.eval_goal envs.append(env) env = ParallelEnv(envs) # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, device, args.argmax, args.procs) # Initialize logs logs = {"num_frames_per_episode": [], "return_per_episode": []} # Run agent start_time = time.time() obss = env.reset() log_done_counter = 0 log_episode_return = torch.zeros(args.procs, device=device) log_episode_num_frames = torch.zeros(args.procs, device=device) positions = [] while log_done_counter < args.episodes: actions = agent.get_actions(obss) obss, rewards, dones, infos = env.step(actions) positions.extend([info["agent_pos"] for info in infos]) agent.analyze_feedbacks(rewards, dones) log_episode_return += torch.tensor(rewards, device=device, dtype=torch.float) log_episode_num_frames += torch.ones(args.procs, device=device) for i, done in enumerate(dones): if done: log_done_counter += 1 logs["return_per_episode"].append(log_episode_return[i].item()) logs["num_frames_per_episode"].append(log_episode_num_frames[i].item()) mask = 1 - torch.tensor(dones, device=device, dtype=torch.float) log_episode_return *= mask log_episode_num_frames *= mask end_time = time.time() # Print logs num_frames = sum(logs["num_frames_per_episode"]) fps = num_frames/(end_time - start_time) duration = int(end_time - start_time) return_per_episode = utils.synthesize(logs["return_per_episode"]) num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"]) print("Eval: F {} | FPS {:.0f} | D {} | R:μσmM {:.2f} {:.2f} {:.2f} {:.2f} | F:μσmM {:.1f} {:.1f} {} {}" .format(num_frames, fps, duration, *return_per_episode.values(), *num_frames_per_episode.values())) return return_per_episode
# Load environments envs = [] for i in range(args.procs): env = utils.make_env(args.env, args.seed + 10000 * i) envs.append(env) env = ParallelEnv(envs) print("Environments loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, argmax=args.argmax, num_envs=args.procs, use_memory=args.memory, use_text=args.text) print("Agent loaded\n") # Initialize logs logs = {"num_frames_per_episode": [], "return_per_episode": []} # Run agent start_time = time.time() obss = env.reset()
# Set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Device: {device}\n") # Load environment env = utils.make_env(args.env, args.seed) for _ in range(args.shift): env.reset() print("Environment loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, args.ipo_model, device, args.argmax) print("Agent loaded\n") # Run the agent if args.gif: from array2gif import write_gif frames = [] # Create a window to view the environment env.render('human') for episode in range(args.episodes): obs = env.reset()
def main(): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument("--env", required=True, help="name of the environment (REQUIRED)") parser.add_argument("--model", required=True, help="name of the trained model (REQUIRED)") parser.add_argument("--episodes", type=int, default=100, help="number of episodes of evaluation (default: 100)") parser.add_argument("--seed", type=int, default=0, help="random seed (default: 0)") parser.add_argument("--procs", type=int, default=1, help="number of processes (default: 16)") parser.add_argument("--argmax", action="store_true", default=False, help="action with highest probability is selected") parser.add_argument("--worst-episodes-to-show", type=int, default=10, help="how many worst episodes to show") parser.add_argument("--memory", action="store_true", default=False, help="add a LSTM to the model") parser.add_argument("--text", action="store_true", default=False, help="add a GRU to the model") parser.add_argument("--visualize", default=False, help="print stuff") parser.add_argument("--save_path", default="test_image", help="save path for agent visualizations") args = parser.parse_args() # Set seed for all randomness sources utils.seed(args.seed) # Set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Device: {device}\n") # Load environments envs = [] for i in range(args.procs): env = utils.make_env(args.env, args.seed + 10000 * i) envs.append(env) env = ParallelEnv(envs) print("Environments loaded\n") # Load agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, device=device, argmax=args.argmax, num_envs=args.procs, use_memory=args.memory, use_text=args.text) print("Agent loaded\n") # Initialize logs logs = {"num_frames_per_episode": [], "return_per_episode": []} # Run agent start_time = time.time() obss = env.reset() log_done_counter = 0 log_episode_return = torch.zeros(args.procs, device=device) log_episode_num_frames = torch.zeros(args.procs, device=device) img_sum = [] obss_sum = None encoding_sum = None img_count = 0 while log_done_counter < args.episodes: actions = agent.get_actions(obss) obss, rewards, dones, _ = env.step(actions) agent.analyze_feedbacks(rewards, dones) log_episode_return += torch.tensor(rewards, device=device, dtype=torch.float) log_episode_num_frames += torch.ones(args.procs, device=device) state = env.get_environment_state() img = state.grid.render(32, state.agent_pos, state.agent_dir, highlight_mask=None) encoding = state.grid.encode() # img_count += 1 # if img_count == 1: # img_sum = img ## obss_sum = obss[0]['image'] ## encoding_sum = encoding # else: # img_sum += img ## obss_sum += obss[0]['image'] ## encoding_sum += encoding for i, done in enumerate(dones): if done: log_done_counter += 1 logs["return_per_episode"].append(log_episode_return[i].item()) logs["num_frames_per_episode"].append( log_episode_num_frames[i].item()) if args.visualize: if len(img_sum) > 0: img_sum = img_sum / img_count # img_sum = img_sum.astype(numpy.uint8) filepath = args.save_path + '_image_' + str( log_done_counter - 1) + '.jpg' imsave(filepath, img_sum) img_sum = [] img_count = 0 else: img_count += 1 if img_count == 1: img_sum = img #.astype(float) else: img_sum += img mask = 1 - torch.tensor(dones, device=device, dtype=torch.float) log_episode_return *= mask log_episode_num_frames *= mask end_time = time.time() # Print logs num_frames = sum(logs["num_frames_per_episode"]) fps = num_frames / (end_time - start_time) duration = int(end_time - start_time) return_per_episode = utils.synthesize(logs["return_per_episode"]) num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"]) print( "F {} | FPS {:.0f} | D {} | R:μσmM {:.2f} {:.2f} {:.2f} {:.2f} | F:μσmM {:.1f} {:.1f} {} {}" .format(num_frames, fps, duration, *return_per_episode.values(), *num_frames_per_episode.values())) # Print worst episodes n = args.worst_episodes_to_show if n > 0: print("\n{} worst episodes:".format(n)) indexes = sorted(range(len(logs["return_per_episode"])), key=lambda k: logs["return_per_episode"][k]) for i in indexes[:n]: print("- episode {}: R={}, F={}".format( i, logs["return_per_episode"][i], logs["num_frames_per_episode"][i]))
def get_agent_any_type(type_opps, name, policy_type, env): if type_opps == "zoo": return load_agent(name, policy_type, "zoo_ant_policy_2", env, 1) elif type_opps == "const": trained_agent = constant_agent_sampler() trained_agent.load(name) return trained_agent elif type_opps == "lstm": policy = LSTMPolicy(scope="agent_new", reuse=False, ob_space=env.observation_space.spaces[0], ac_space=env.action_space.spaces[0], hiddens=[128, 128], normalize=True) def get_action(observation): return policy.act(stochastic=True, observation=observation)[0] trained_agent = Agent(get_action, policy.reset) with open(name, "rb") as file: values_from_save = pickle.load(file) for key, value in values_from_save.items(): var = tf.get_default_graph().get_tensor_by_name(key) sess.run(tf.assign(var, value)) return trained_agent elif type_opps == "our_mlp": #TODO DO ANYTHING BUT THIS. THIS IS VERY DIRTY AND SAD :( def make_env(id): # TODO: seed (not currently supported) # TODO: VecNormalize? (typically good for MuJoCo) # TODO: baselines logger? # TODO: we're loading identical policy weights into different # variables, this is to work-around design choice of Agent's # having state stored inside of them. sess = utils.make_session() with sess.as_default(): multi_env = env attacked_agent = constant_agent_sampler(act_dim=8, magnitude=100) single_env = Gymify( MultiToSingle(CurryEnv(multi_env, attacked_agent))) single_env.spec = gym.envs.registration.EnvSpec('Dummy-v0') # TODO: upgrade Gym so don't have to do thi0s single_env.observation_space.dtype = np.dtype(np.float32) return single_env # TODO: close session? #TODO DO NOT EVEN READ THE ABOVE CODE :'( denv = SubprocVecEnv([functools.partial(make_env, 0)]) model = ppo2.learn(network="mlp", env=denv, total_timesteps=1, seed=0, nminibatches=4, log_interval=1, save_interval=1, load_path=name) stateful_model = StatefulModel(denv, model) trained_agent = utils.Agent(action_selector=stateful_model.get_action, reseter=stateful_model.reset) return trained_agent raise (Exception('Agent type unrecognized'))