def render_rollout(self, horizon=50, path=None, render_type="pretty", fps=8): """ Render a rollout into a video. Args: horizon: The number of timesteps to roll out. path: Directory where the video will be saved. render_type: Can be 'pretty' or 'fast'. Impliciations obvious. fps: Integer frames per second. """ if path is None: path = os.path.abspath(os.path.dirname(__file__)) + "/videos" print(path) if not os.path.exists(path): os.makedirs(path) video_name = self.env_name + "_trajectory" if render_type == "pretty": image_path = os.path.join(path, "frames/") if not os.path.exists(image_path): os.makedirs(image_path) rewards, observations, full_obs = self.rollout(horizon=horizon, save_path=image_path) utility_funcs.make_video_from_image_dir(path, image_path, fps=fps, video_name=video_name) # Clean up images shutil.rmtree(image_path) else: rewards, observations, full_obs = self.rollout(horizon=horizon) utility_funcs.make_video_from_rgb_imgs(full_obs, path, fps=fps, video_name=video_name)
def render_rollout(self, horizon=500, path=None, render_type='pretty', fps=8): """ Render a rollout into a video. Args: horizon: The number of timesteps to roll out. path: Directory where the video will be saved. render_type: Can be 'pretty' or 'fast'. Impliciations obvious. fps: Integer frames per second. """ if path is None: path = os.path.abspath(os.path.dirname(__file__)) + '/videos' print(path) if not os.path.exists(path): os.makedirs(path) video_name = self.env_name + '_trajectory' if render_type == 'pretty': image_path = os.path.join(path, 'frames/') if not os.path.exists(image_path): os.makedirs(image_path) if self.env_name=='explore': self.explore(horizon=horizon, save_path=image_path) utility_funcs.make_video_from_image_dir(path, image_path, fps=fps, video_name=video_name) else: rewards, observations, full_obs, final_result = \ self.rollout(horizon=horizon, save_path=image_path) utility_funcs.make_video_from_image_dir(path, image_path, fps=fps, video_name=video_name) with open('4-agents-50-hor-imps-nonuniform-rew-prior-ex1.csv', 'w') as writeFile: writer = csv.writer(writeFile) writer.writerows(final_result) # Clean up images shutil.rmtree(image_path) else: if self.env_name=='explore': self.explore(horizon=horizon) utility_funcs.make_vidoe_from_rgb_imgs(path, image_path, fps=fps, video_name=video_name) else: rewards, observations, full_obs, final_result = self.rollout(horizon=horizon) utility_funcs.make_video_from_rgb_imgs(full_obs, path, fps=fps, video_name=video_name)
def render_rollout(self, horizon=50, path=None, fps=8): """ Render a rollout into a video. Args: horizon: The number of timesteps to roll out. path: Directory where the video will be saved. render_type: Can be 'pretty' or 'fast'. Impliciations obvious. fps: Integer frames per second. """ if path is None: path = os.path.abspath(os.path.dirname(__file__)) + '/videos' print(path) if not os.path.exists(path): os.makedirs(path) video_name = self.env_name + '_trajectory' # if render_type == 'pretty': # image_path = os.path.join(path, 'frames/') # if not os.path.exists(image_path): # os.makedirs(image_path) # # rewards, observations, full_obs = self.rollout( # horizon=horizon, save_path=image_path, train_agents=False) # utility_funcs.make_video_from_image_dir(path, image_path, fps=fps, # video_name=video_name) # # # Clean up images # shutil.rmtree(image_path) # else: rewards, observations, full_obs = self.rollout(horizon=horizon, train_agents=False, print_act=False) utility_funcs.make_video_from_rgb_imgs(full_obs, path, fps=fps, video_name=video_name) return rewards
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) pkl = get_rllib_pkl(result_dir) result = pkl # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policies', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Create and register a gym+rllib env env_creator = pkl['env_config']['func_create'] env_name = config['env_config']['env_name'] register_env(env_name, env_creator) ModelCatalog.register_custom_model("conv_to_fc_net", ObedienceLSTM) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if (args.run and config_run): if (args.run != config_run): print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if (args.run): agent_cls = get_agent_class(args.run) elif (config_run): agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) # Run on only one cpu for rendering purposes if possible; A3C requires two if config_run == 'A3C': config['num_workers'] = 1 config["sample_async"] = False else: config['num_workers'] = 0 # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=result) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num print('Loading checkpoint', checkpoint) agent.restore(checkpoint) if hasattr(agent, "local_evaluator"): env = agent.local_evaluator.env if args.save_video: shape = env.base_map.shape full_obs = [np.zeros((shape[0], shape[1], 3), dtype=np.uint8) for i in range(config["horizon"])] if hasattr(agent, "local_evaluator"): multiagent = agent.local_evaluator.multiagent if multiagent: policy_agent_mapping = agent.config["multiagent"][ "policy_mapping_fn"] mapping_cache = {} policy_map = agent.local_evaluator.policy_map state_init = {p: m.get_initial_state() for p, m in policy_map.items()} use_lstm = {p: len(s) > 0 for p, s in state_init.items()} else: multiagent = False use_lstm = {DEFAULT_POLICY_ID: False} steps = 0 while steps < (config['horizon'] or steps + 1): state = env.reset() done = False reward_total = 0.0 while not done and steps < (config['horizon'] or steps + 1): if multiagent: action_dict = {} for agent_id in state.keys(): a_state = state[agent_id] if a_state is not None: policy_id = mapping_cache.setdefault( agent_id, policy_agent_mapping(agent_id)) p_use_lstm = use_lstm[policy_id] if p_use_lstm: a_action, p_state_init, _ = agent.compute_action( a_state, state=state_init[policy_id], policy_id=policy_id) state_init[policy_id] = p_state_init else: a_action = agent.compute_action( a_state, policy_id=policy_id) action_dict[agent_id] = a_action action = action_dict else: if use_lstm[DEFAULT_POLICY_ID]: action, state_init, _ = agent.compute_action( state, state=state_init) else: action = agent.compute_action(state) if agent.config["clip_actions"]: # clipped_action = clip_action(action, env.action_space) next_state, reward, done, _ = env.step(action) else: next_state, reward, done, _ = env.step(action) if multiagent: done = done["__all__"] reward_total += sum(reward.values()) else: reward_total += reward if args.save_video: rgb_arr = env.map_to_colors() full_obs[steps] = rgb_arr.astype(np.uint8) steps += 1 state = next_state print("Episode reward", reward_total) if args.save_video: path = os.path.abspath(os.path.dirname(__file__)) + '/videos' if not os.path.exists(path): os.makedirs(path) images_path = path + '/images/' if not os.path.exists(images_path): os.makedirs(images_path) utility_funcs.make_video_from_rgb_imgs(full_obs, path) # Clean up images shutil.rmtree(images_path)
def rollout(args, agent, config, num_episodes, considered_player=None, coalition=None): if hasattr(agent, "workers"): env = agent.workers.local_worker().env if args.save_video: shape = env.base_map.shape full_obs = [ np.zeros((shape[0], shape[1], 3), dtype=np.uint8) for i in range(config["horizon"]) ] multiagent = isinstance(env, MultiAgentEnv) policy_agent_mapping = agent.config["multiagent"][ "policy_mapping_fn"] if agent.workers.local_worker( ).multiagent else None mapping_cache = {} policy_map = agent.workers.local_worker().policy_map state_init = {p: m.get_initial_state() for p, m in policy_map.items()} use_lstm = {p: len(s) > 0 for p, s in state_init.items()} else: multiagent = False use_lstm = {DEFAULT_POLICY_ID: False} if config["agents_fov"] is not None: env.set_agents_fov(config["agents_fov"]) agents_active = [f"agent-{i}" for i in range(args.agents_active)] # Rollout episode = 0 rewards_list = [] while episode < num_episodes: steps = 0 state = env.reset() done = False reward_total = 0.0 while not done and steps < (config['horizon'] or steps + 1): if args.render: print("render") env.render() if multiagent: if args.shapley_M is not None: action = take_actions_for_coalition( env, agent, considered_player, state, mapping_cache, use_lstm, policy_agent_mapping, state_init, coalition, args.missing_agents_behaviour, agents_active, args.run) else: action = take_action(env, agent, state, mapping_cache, use_lstm, policy_agent_mapping, state_init, agents_active, args.run) else: if use_lstm[DEFAULT_POLICY_ID]: action, state_init, _ = agent.compute_action( state, state=state_init) else: action = agent.compute_action(state) if agent.config["clip_actions"]: # action = clip_action(action, env.action_space) next_state, reward, done, _ = env.step(action) else: next_state, reward, done, _ = env.step(action) if multiagent: done = done["__all__"] reward_total += sum(reward.values()) else: reward_total += reward if args.social_metrics: with open(f'{args.save_dir}/{args.exp_name}.csv', 'a', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',') row = [episode] + [steps] + list(reward.values()) writer.writerow(row) if args.save_video: rgb_arr = env.map_to_colors() full_obs[steps] = rgb_arr.astype(np.uint8) steps += 1 state = next_state print("Episode reward", reward_total) episode += 1 rewards_list.append(reward_total) if args.save_video: path = os.path.abspath(os.path.dirname(__file__)) + '/videos' if not os.path.exists(path): os.makedirs(path) images_path = path + '/images/' if not os.path.exists(images_path): os.makedirs(images_path) utility_funcs.make_video_from_rgb_imgs(full_obs, path) # Clean up images shutil.rmtree(images_path) return rewards_list
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) pkl = get_rllib_pkl(result_dir) # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policy_graphs', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Create and register a gym+rllib env env_creator = pkl['env_config']['func_create'] env_name = config['env_config']['env_name'] register_env(env_name, env_creator.func) ModelCatalog.register_custom_model("conv_to_fc_net", ConvToFCNet) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if (args.run and config_run): if (args.run != config_run): print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if (args.run): agent_cls = get_agent_class(args.run) elif (config_run): agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) # Run on only one cpu for rendering purposes if possible; A3C requires two if config_run == 'A3C': config['num_workers'] = 1 else: config['num_workers'] = 0 # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator"): env = agent.local_evaluator.env if args.save_video: shape = env.map.shape full_obs = [ np.zeros((shape[0], shape[1], 3), dtype=np.uint8) for i in range(config["horizon"]) ] rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policy_graphs'].keys(): rets[key] = [] if config['model']['use_lstm']: use_lstm = True state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False for i in range(args.num_rollouts): state = env.reset() done = False if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for j in range(config["horizon"]): action = {} for agent_id in state.keys(): if use_lstm: action[ agent_id], state_init, logits = agent.compute_action( state[agent_id], state=state_init, policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) observations, reward, done, _ = env.step(action) if args.render: env.render_map() if args.save_video: rgb_arr = env.map_to_colors() full_obs[j] = rgb_arr.astype(np.uint8) for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew if multiagent and done['__all__']: break if not multiagent and done: break for key in rets.keys(): rets[key].append(ret[key]) for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format(i, ret, agent_id)) for agent_id, rew in rets.items(): print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) if args.save_video: path = os.path.abspath(os.path.dirname(__file__)) + '/videos' if not os.path.exists(path): os.makedirs(path) images_path = path + '/images/' if not os.path.exists(images_path): os.makedirs(images_path) utility_funcs.make_video_from_rgb_imgs(full_obs, path) # Clean up images shutil.rmtree(images_path)
def rollout(agent, env_name, num_steps, num_episodes=0, saver=None, no_render=True, monitor=False): policy_agent_mapping = default_policy_agent_mapping if saver is None: saver = RolloutSaver() if hasattr(agent, "workers"): env = agent.workers.local_worker().env multiagent = isinstance(env, MultiAgentEnv) if agent.workers.local_worker().multiagent: policy_agent_mapping = agent.config["multiagent"][ "policy_mapping_fn"] policy_map = agent.workers.local_worker().policy_map state_init = {p: m.get_initial_state() for p, m in policy_map.items()} use_lstm = {p: len(s) > 0 for p, s in state_init.items()} action_init = { p: _flatten_action(m.action_space.sample()) for p, m in policy_map.items() } else: env = gym.make(env_name) multiagent = False use_lstm = {DEFAULT_POLICY_ID: False} if monitor and not no_render and saver and saver.outfile is not None: # If monitoring has been requested, # manually wrap our environment with a gym monitor # which is set to record every episode. env = gym.wrappers.Monitor( env, os.path.join(os.path.dirname(saver.outfile), "monitor"), lambda x: True) steps = 0 episodes = 0 if not no_render: shape = env.base_map.shape full_obs = [ np.zeros((shape[0], shape[1], 3), dtype=np.uint8) for i in range(num_steps * num_episodes) ] while episodes < num_episodes: mapping_cache = {} # in case policy_agent_mapping is stochastic saver.begin_rollout() obs = env.reset() agent_states = DefaultMapping( lambda agent_id: state_init[mapping_cache[agent_id]]) prev_actions = DefaultMapping( lambda agent_id: action_init[mapping_cache[agent_id]]) prev_rewards = collections.defaultdict(lambda: 0.) done = False reward_total = 0.0 intrinsic_total = 0.0 env_total = 0.0 out_file = open('videos/communication_log.txt', 'w') out_file.write(f'\n\n episode-{episodes} \n\n') while not done and steps < num_steps: multi_obs = obs if multiagent else {_DUMMY_AGENT_ID: obs} action_dict = {} for agent_id, a_obs in multi_obs.items(): if a_obs is not None: policy_id = mapping_cache.setdefault( agent_id, policy_agent_mapping(agent_id)) p_use_lstm = use_lstm[policy_id] if p_use_lstm: a_action, p_state, _ = agent.compute_action( a_obs, state=agent_states[agent_id], prev_action=prev_actions[agent_id], prev_reward=prev_rewards[agent_id], policy_id=policy_id) agent_states[agent_id] = p_state else: a_action = agent.compute_action( a_obs, prev_action=prev_actions[agent_id], prev_reward=prev_rewards[agent_id], policy_id=policy_id) a_action = _flatten_action(a_action) # tuple actions action_dict[agent_id] = a_action prev_actions[agent_id] = a_action action = action_dict action = action if multiagent else action[_DUMMY_AGENT_ID] next_obs, reward, done, info = env.step(action) if multiagent: for agent_id, r in reward.items(): prev_rewards[agent_id] = r else: prev_rewards[_DUMMY_AGENT_ID] = reward if multiagent: done = done["__all__"] reward_total += sum(reward.values()) intrinsic_total += sum([f['intrinsic'] for f in info.values()]) env_total += sum([f['environmental'] for f in info.values()]) else: reward_total += reward if not no_render: # env.render() rgb_arr = env.map_to_colors() full_obs[steps + (num_steps * episodes)] = rgb_arr.astype( np.uint8) out_file.write(f'step-{steps}: {action}\n') saver.append_step(obs, action, next_obs, reward, done, info) steps += 1 obs = next_obs out_file.close() saver.end_rollout() print("Episode #{}: reward: {}, intrinsic: {}, env: {}".format( episodes, reward_total, intrinsic_total, env_total)) episodes += 1 steps = 0 if not no_render: path = os.path.abspath(os.path.dirname(__file__)) + '/videos' print('saving video to ', path) if not os.path.exists(path): os.makedirs(path) images_path = path + '/images/' if not os.path.exists(images_path): os.makedirs(images_path) utility_funcs.make_video_from_rgb_imgs(full_obs, path) # Clean up images shutil.rmtree(images_path)
def rollout( agent, env_name, num_steps, num_episodes=0, saver=None, no_render=True, video_dir=None, video_name=None, ): policy_agent_mapping = default_policy_agent_mapping if saver is None: saver = RolloutSaver() if hasattr(agent, "workers") and isinstance(agent.workers, WorkerSet): env = agent.workers.local_worker().env multiagent = isinstance(env, MultiAgentEnv) if agent.workers.local_worker().multiagent: policy_agent_mapping = agent.config["multiagent"][ "policy_mapping_fn"] policy_map = agent.workers.local_worker().policy_map state_init = {p: m.get_initial_state() for p, m in policy_map.items()} use_lstm = {p: len(s) > 0 for p, s in state_init.items()} else: env = gym.make(env_name) multiagent = False try: policy_map = {DEFAULT_POLICY_ID: agent.policy} except AttributeError: raise AttributeError( "Agent ({}) does not have a `policy` property! This is needed " "for performing (trained) agent rollouts.".format(agent)) use_lstm = {DEFAULT_POLICY_ID: False} action_init = { p: flatten_to_single_ndarray(m.action_space.sample()) for p, m in policy_map.items() } # If rendering, create an array to store observations if video_dir: shape = env.base_map.shape total_num_steps = max(num_steps, num_episodes * agent.config["horizon"]) all_obs = [ np.zeros((shape[0], shape[1], 3), dtype=np.uint8) for _ in range(total_num_steps) ] steps = 0 episodes = 0 while keep_going(steps, num_steps, episodes, num_episodes): mapping_cache = {} # in case policy_agent_mapping is stochastic saver.begin_rollout() obs = env.reset() agent_states = DefaultMapping( lambda agent_id: state_init[mapping_cache[agent_id]]) prev_actions = DefaultMapping( lambda agent_id: action_init[mapping_cache[agent_id]]) prev_rewards = collections.defaultdict(lambda: 0.0) done = False reward_total = 0.0 while not done and keep_going(steps, num_steps, episodes, num_episodes): multi_obs = obs if multiagent else {_DUMMY_AGENT_ID: obs} action_dict = {} for agent_id, a_obs in multi_obs.items(): if a_obs is not None: policy_id = mapping_cache.setdefault( agent_id, policy_agent_mapping(agent_id)) p_use_lstm = use_lstm[policy_id] if p_use_lstm: a_action, p_state, _ = agent.compute_action( a_obs, state=agent_states[agent_id], prev_action=prev_actions[agent_id], prev_reward=prev_rewards[agent_id], policy_id=policy_id, ) agent_states[agent_id] = p_state else: a_action = agent.compute_action( a_obs, prev_action=prev_actions[agent_id], prev_reward=prev_rewards[agent_id], policy_id=policy_id, ) a_action = flatten_to_single_ndarray(a_action) action_dict[agent_id] = a_action prev_actions[agent_id] = a_action action = action_dict action = action if multiagent else action[_DUMMY_AGENT_ID] next_obs, reward, done, info = env.step(action) if multiagent: for agent_id, r in reward.items(): prev_rewards[agent_id] = r else: prev_rewards[_DUMMY_AGENT_ID] = reward if multiagent: done = done["__all__"] reward_total += sum(reward.values()) else: reward_total += reward if not no_render: rgb_arr = env.full_map_to_colors() all_obs[steps] = rgb_arr.astype(np.uint8) saver.append_step(obs, action, next_obs, reward, done, info) steps += 1 obs = next_obs saver.end_rollout() print("Episode #{}: reward: {}".format(episodes, reward_total)) if done: episodes += 1 # Render video from observations if video_dir: if not os.path.exists(video_dir): os.makedirs(video_dir) images_path = video_dir + "/images/" if not os.path.exists(images_path): os.makedirs(images_path) height, width, _ = all_obs[0].shape # Upscale to be more legible width *= 20 height *= 20 utility_funcs.make_video_from_rgb_imgs(all_obs, video_dir, video_name=video_name, resize=(width, height)) # Clean up images shutil.rmtree(images_path)