def run(self, agent, num_rollouts, env_wrapper, env_name, num_steps=None, require_frame=False, require_trajectory=False, require_extra_info=False, require_full_frame=False, require_env_state=False, render_mode="rgb_array"): assert isinstance(agent, SymbolicAgentBase) if self.existing_agent is not None: real_agent = agent.get(self.existing_agent)['agent'] else: real_agent = agent.get()['agent'] self.existing_agent = real_agent logger.debug("SymbolicAgent <{}> is restored.".format(agent.name)) env = get_env_maker(env_name)(seed=0) if env_wrapper is not None: env = env_wrapper(env) ret_list = [] for i in range(num_rollouts): ret = rollout(real_agent, env, env_name, num_steps, require_frame, require_trajectory, require_extra_info, require_full_frame, require_env_state, render_mode) ret_list.append(ret) agent.clear() return copy.deepcopy(ret_list), copy.deepcopy(agent)
def collect_frames(self, num_steps, run_name, env_name, config, ckpt, require_full_frame=False, render_mode="rgb_array", ideal_steps=None, random_seed=False): agent = restore_agent(run_name, ckpt, env_name, config) # if ideal_steps is not None: tmp_frames = [] tmp_extra_info = [] # We allow 10 attemps. for i in range(10): if random_seed: seed = np.random.randint(0, 10000) else: seed = i env_maker = get_env_maker(env_name, require_render=True) env = env_maker(seed=seed) result = rollout(agent, env, env_name, num_steps, require_frame=True, require_full_frame=require_full_frame, render_mode=render_mode) frames, extra_info = result['frames'], result['frame_extra_info'] if len(frames) > len(tmp_frames): tmp_frames = copy.deepcopy(frames) tmp_extra_info = copy.deepcopy(extra_info) if (ideal_steps is None) or (len(frames) > ideal_steps): frames = tmp_frames extra_info = tmp_extra_info break else: print("In collect_frames, current frame length is {} and " "we expect length {}. So we rerun the rollout " "with different seed {}." " Current length of potential 'frames' is {}".format( len(frames), ideal_steps, i + 1, len(tmp_frames))) env.close() # agent.stop() return frames, extra_info
def restore_policy_with_mask(run_name, ckpt, env_name, extra_config=None): tf = try_import_tf() Graph = tf.Graph assert run_name == "PPO" register_fc_with_mask() env = get_env_maker(env_name)() with Graph().as_default(): # This is a workaround to avoid variable multiple init. p = PPOTFPolicyWithMask(env.observation_space, env.action_space, ppo_agent_default_config_with_mask) if ckpt is not None: path = os.path.abspath(os.path.expanduser(ckpt)) wkload = pickle.load(open(path, 'rb'))['worker'] state = pickle.loads(wkload)['state']['default_policy'] p.set_state(state) return path
def generate_frames_from_agent(self, agent, agent_name, num_steps=None, seed=0, render_mode="rgb_array", require_trajectory=False, ideal_steps=None): config = agent.config env_name = config["env"] env = get_env_maker(env_name, require_render=True)() if seed is not None: assert isinstance(seed, int) env.seed(seed) for iteration in range(10): result = copy.deepcopy( rollout(agent, env, env_name, num_steps, require_frame=True, require_trajectory=require_trajectory, require_full_frame=self.require_full_frame, render_mode=render_mode)) frames, extra_info = result['frames'], result['frame_extra_info'] if require_trajectory: extra_info['trajectory'] = result['trajectory'] if ideal_steps is None: break elif len(frames) > ideal_steps: break env.close() # agent.stop() period_info = extra_info['period_info'] if period_info: period_source = np.stack(period_info) period = get_period(period_source, self.fps) print("period for agent <{}> is {}, its len is {}".format( agent_name, period, len(frames))) else: period = 100 frames_info = { "frames": frames, "column": None, "row": None, "loc": None, "period": period } return_dict = {agent_name: frames_info} extra_info_dict = PRESET_INFORMATION_DICT.copy() for key, val in extra_info.items(): if key in extra_info_dict: extra_info_dict[key][agent_name] = val elif key == "vf_preds": extra_info_dict["value_function"][agent_name] = val elif key == "trajectory" and require_trajectory: if "trajectory" in extra_info_dict: extra_info_dict["trajectory"][agent_name] = val else: extra_info_dict["trajectory"] = {agent_name: val} extra_info_dict['title'][agent_name] = agent_name new_extra_info_dict = PRESET_INFORMATION_DICT.copy() for key in PRESET_INFORMATION_DICT.keys(): new_extra_info_dict[key].update(extra_info_dict[key]) if require_trajectory: new_extra_info_dict["trajectory"] = extra_info_dict["trajectory"] new_extra_info_dict['frame_info'] = { "width": frames[0].shape[1], "height": frames[0].shape[0] } return return_dict, new_extra_info_dict
def get_fft_representation( name_ckpt_mapping, num_seeds, num_rollouts, padding="fix", padding_length=500, padding_value=0, stack=False, normalize="range", num_workers=10 ): initialize_ray() data_frame_dict = {} representation_dict = {} num_agents = len(name_ckpt_mapping) num_iteration = int(ceil(num_agents / num_workers)) agent_ckpt_dict_range = list(name_ckpt_mapping.items()) agent_count = 1 agent_count_get = 1 workers = [FFTWorker.remote() for _ in range(num_workers)] now_t_get = now_t = start_t = time.time() for iteration in range(num_iteration): start = iteration * num_workers end = min((iteration + 1) * num_workers, num_agents) df_obj_ids = [] for i, (name, ckpt_dict) in enumerate(agent_ckpt_dict_range[start:end] ): ckpt = ckpt_dict["path"] env_name = ckpt_dict["env_name"] run_name = ckpt_dict["run_name"] env_maker = get_env_maker(env_name) workers[i].reset.remote( run_name=run_name, ckpt=ckpt, num_rollouts=num_rollouts, env_name=env_name, env_maker=env_maker, agent_name=name, padding=padding, padding_length=padding_length, padding_value=padding_value, worker_name="Worker{}".format(i) ) df_obj_id = workers[i].fft.remote( normalize=normalize, _extra_name="[{}/{}] ".format(agent_count, num_agents) ) print( "[{}/{}] (+{:.1f}s/{:.1f}s) Start collecting data from agent " "<{}>".format( agent_count, num_agents, time.time() - now_t, time.time() - start_t, name ) ) agent_count += 1 now_t = time.time() df_obj_ids.append(df_obj_id) for df_obj_id, (name, _) in zip(df_obj_ids, agent_ckpt_dict_range[start:end]): df, rep = copy.deepcopy(ray.get(df_obj_id)) data_frame_dict[name] = df representation_dict[name] = rep print( "[{}/{}] (+{:.1f}s/{:.1f}s) Got data from agent <{}>".format( agent_count_get, num_agents, time.time() - now_t_get, time.time() - start_t, name ) ) agent_count_get += 1 now_t_get = time.time() return data_frame_dict, representation_dict
def several_agent_rollout(yaml_path, num_rollouts, seed=0, num_workers=10, force_rewrite=False, return_data=False, require_activation=True, _num_agents=None): name_ckpt_mapping = read_yaml(yaml_path, number=_num_agents) now_t_get = now_t = start_t = time.time() num_agents = len(name_ckpt_mapping) num_iteration = int(ceil(num_agents / num_workers)) agent_ckpt_dict_range = list(name_ckpt_mapping.items()) agent_count = 1 agent_count_get = 1 have_gpu = has_gpu() workers = [ RolloutWorkerWrapper.as_remote( num_gpus=0.2 if have_gpu else 0).remote(force_rewrite) for _ in range(num_workers) ] return_dict = {} for iteration in range(num_iteration): start = iteration * num_workers end = min((iteration + 1) * num_workers, num_agents) # obj_ids = [] # workers = [] obj_ids_dict = {} for i, (name, ckpt_dict) in \ enumerate(agent_ckpt_dict_range[start:end]): ckpt = ckpt_dict["path"] env_name = ckpt_dict["env_name"] env_maker = get_env_maker(env_name) run_name = ckpt_dict["run_name"] assert run_name == "PPO" # TODO Only support PPO now. workers[i].reset.remote(ckpt=ckpt, num_rollouts=num_rollouts, seed=seed, env_creater=env_maker, run_name=run_name, env_name=env_name, require_activation=require_activation) obj_id = workers[i].wrap_sample.remote() obj_ids_dict[name] = obj_id print("[{}/{}] (+{:.1f}s/{:.1f}s) Start collect {} rollouts from " "agent" " <{}>".format(agent_count, num_agents, time.time() - now_t, time.time() - start_t, num_rollouts, name)) agent_count += 1 now_t = time.time() for (name, obj_id), worker in zip(obj_ids_dict.items(), workers): trajectory_list = copy.deepcopy(ray.get(obj_id)) # for obj_id in obj_ids: # trajectory_list.append(ray.get(obj_id)) return_dict[name] = trajectory_list # worker.close.remote() print("[{}/{}] (+{:.1f}s/{:.1f}s) Collected {} rollouts from agent" " <{}>".format(agent_count_get, num_agents, time.time() - now_t_get, time.time() - start_t, num_rollouts, name)) agent_count_get += 1 now_t_get = time.time() return return_dict if return_data else None