def make_po_fourrooms(fomdp=False, max_episode_steps=2000): """ max_episode_steps is a possible kwargs""" TL = gym.wrappers.TimeLimit e = StochasticFourRooms(possible_starts=set(all_squares), possible_goals=set(oc_initial_goal)) if not fomdp: e = FourRoomsFourWallsWrapper(e) e = TL(e, max_episode_steps) return GymEnvWrapper(e)
def simulate_policy(): task = generate_task(task_generator_id='picking') env = CausalWorld(task=task, enable_visualization=True, skip_frame=3, seed=0, max_episode_length=600) env = GymEnvWrapper(env) file = './itr_1097499.pkl' data = torch.load(file) agent_state_dict = data['agent_state_dict'] agent = SacAgent(initial_model_state_dict=agent_state_dict) agent.initialize(env_spaces=env.spaces) agent.eval_mode(itr=data['itr']) def policy_func(obs): # new_obs = np.hstack((obs['observation'], obs['desired_goal'])) agent_info = agent.step(torchify_buffer(obs), prev_action=None, prev_reward=None) return agent_info.action.numpy() # env = HERGoalEnvWrapper(env) for _ in range(100): total_reward = 0 o = env.reset() for _ in range(600): o, reward, done, info = env.step(policy_func(o)) total_reward += reward print("total reward is :", total_reward) env.close()
def make(*args, frame_stack=3, from_pixels=True, height=84, width=84, frame_skip=4, **kwargs): env = dmc2gym.make(*args, frame_skip=frame_skip, visualize_reward=False, from_pixels=from_pixels, height=height, width=width, **kwargs) if isinstance(env, TimeLimit): # Strip the gym TimeLimit wrapper and replace with one which # outputs TimeLimit.truncated=True at max_episode_steps - 1, # because that's when the dmc2gym env seems to end the episode. print("WARNING: replacing Gym TimeLimit wrapper by TimeLimitMinusOne") env = TimeLimitMinusOne(env.env) if from_pixels: env = FrameStack(env, k=frame_stack) elif frame_stack != 1: print("WARNING: dmcontrol.make() requested with frame_stack>1, but not" " doing it on state.") env = GymEnvWrapper(env) env._frame_skip = frame_skip return env
def safety_gym_make(*args, sometimes_info_kwargs=None, obs_prev_cost=True, obs_version="default", **kwargs): assert obs_version in [ "default", "vision", "vision_only", "no_lidar", "no_constraints", ] if obs_version != "default": eid = kwargs["id"] # Must provide as kwarg, not arg. names = dict( # Map to my modification in safety-gym suite. vision="Vision", vision_only="Visonly", no_lidar="NoLidar", no_constraints="NoConstr", ) name = names[obs_version] # e.g. Safexp-PointGoal1-v0 --> Safexp-PointGoal1Vision-v0 kwargs["id"] = eid[:-3] + name + eid[-3:] return GymEnvWrapper( SafetyGymEnvWrapper( gym.make(*args, **kwargs), sometimes_info_kwargs=sometimes_info_kwargs or dict(), obs_prev_cost=obs_prev_cost, ), )
def make_qec_env(error_model, error_rate, volume_depth=5): env = OptimizedSurfaceCodeEnvironment(error_model=error_model, volume_depth=volume_depth, p_meas=error_rate, p_phys=error_rate) # env = GeneralSurfaceCodeEnv(error_model=error_model, p_meas=error_rate, p_phys=error_rate, use_Y=False) # env = gym.make('CartPole-v0') return GymEnvWrapper(env)
def make_qec_env(error_model, error_rate, volume_depth=5, **kwargs): if 'fixed_episode_length' in kwargs.keys(): fixed_episode_length = kwargs['fixed_episode_length'] kwargs.pop('fixed_episode_length') else: fixed_episode_length = None env = Surface_Code_Environment_Multi_Decoding_Cycles(error_model='DP', volume_depth=5, p_meas=error_rate, p_phys=error_rate, use_Y=False) env = FixedLengthEnvWrapper(env, fixed_episode_length=fixed_episode_length) return GymEnvWrapper(env)
def _make_env(rank): task = generate_task(task_generator_id='reaching') env = CausalWorld(task=task, skip_frame=10, enable_visualization=False, seed=0 + rank, max_episode_length=600) env = GymEnvWrapper(env) return env
def make_env(): # Load the 2-vs-2 soccer environment with episodes of 10 seconds: dm_env = maw.load(team_size=args.team_size, time_limit=args.time_limit, terrain=not args.no_hfield, agent_type=args.agent_type, deterministic_spawn=not args.random_spawn, raise_exception_on_physics_error=False, task_id=args.task_id) #dm_env = dm_soccer.load(team_size=2, time_limit=10.) env = GymEnvWrapper(dmc2gym.DmControlWrapper('', '', env=dm_env)) return env
def get_env(*args, **kwargs): return GymEnvWrapper( TransformObservation(env=FrameStack( num_stack=4, env=(gym_wrapper.GymFromDMEnv( bsuite.load_and_record_to_csv( bsuite_id=bsuite_id, results_dir=results_dir, overwrite=True, )) if not gym_id else gym.make(gym_id))), f=lambda lazy_frames: np.reshape( np.stack(lazy_frames._frames), -1)))
def make_env(**kwargs): info_example = {'timeout': 0} # env = gym.make('CartPole-v0') env = Surface_Code_Environment_Multi_Decoding_Cycles(error_model='DP', volume_depth=5, p_meas=0.011, p_phys=0.011, use_Y=False) # env = OptimizedSurfaceCodeEnvironment(error_model='X', volume_depth=5, p_meas=0.011, p_phys=0.011) # env = GeneralSurfaceCodeEnv(error_model='DP', p_meas=0.011, p_phys=0.011, use_Y=False) env = GymEnvWrapper(EnvInfoWrapper(env, info_example)) return env
def _make_env(rank): task = generate_task(task_generator_id='picking', dense_reward_weights=np.array( [250, 0, 125, 0, 750, 0, 0, 0.005]), fractional_reward_weight=1, goal_height=0.15, tool_block_mass=0.02) env = CausalWorld(task=task, skip_frame=3, enable_visualization=False, seed=0, max_episode_length=600) env = GymEnvWrapper(env) return env
def _make_env(rank): task = generate_task('pushing', dense_reward_weights=np.array([2500, 2500, 0]), variables_space='space_a', fractional_reward_weight=100) env = CausalWorld(task=task, skip_frame=3, enable_visualization=False, seed=0 + rank) env = CurriculumWrapper( env, intervention_actors=[GoalInterventionActorPolicy()], actives=(0, 1e9, 2, 0)) env = GymEnvWrapper(env) return env
def build_and_test(model_path, config_key): import dmc_wrapper from dm_control import viewer from rlpyt.utils.buffer import buffer_from_example, torchify_buffer, numpify_buffer import torch config = configs[config_key] reloaded = torch.load(model_path) if len(model_path) > 0 else None # import pdb; pdb.set_trace() agent = MultiFfAgent(model_kwargs=config["model"], initial_model_state_dict=reloaded['agent_state_dict'], **config["agent"]) dm_env = maw.load(team_size=args.team_size, time_limit=args.time_limit, terrain=not args.no_hfield, agent_type=args.agent_type, deterministic_spawn=not args.random_spawn, raise_exception_on_physics_error=False, task_id=args.task_id) env = GymEnvWrapper(dmc2gym.DmControlWrapper('', '', env=dm_env)) agent.initialize(env.spaces) agent.reset() # agent.eval_mode(0) prev_action = env.action_space.null_value() def get_prev_action(): return prev_action def policy(time_step): obs = dmc_wrapper.convertObservation(time_step.observation) reward = time_step.reward reward = np.asarray(reward) if reward is not None else reward obs_pyt, act_pyt, rew_pyt = torchify_buffer( (obs, get_prev_action(), reward)) # obs_pyt, rew_pyt = torchify_buffer((obs, reward)) act_pyt, agent_info = agent.step(obs_pyt.float(), act_pyt, rew_pyt) # prev_action = act_pyt return act_pyt viewer.launch(dm_env, policy=policy)
def make_gym_env(**kwargs): info_example = {'timeout': 0} # print('making env: ' + str(kwargs)) static_decoder_path = '/home/alex/DeepQ-Decoding/example_notebooks/referee_decoders/nn_d5_X_p5' # from keras.models import load_model # static_decoder = load_model(static_decoder_path) if 'fixed_episode_length' in kwargs.keys(): fixed_episode_length = kwargs['fixed_episode_length'] kwargs.pop('fixed_episode_length') else: fixed_episode_length = None env = Surface_Code_Environment_Multi_Decoding_Cycles(error_model='DP', volume_depth=5, p_meas=0.001, p_phys=0.001, use_Y=False) # env = gym.make(**kwargs) env = FixedLengthEnvWrapper(env, fixed_episode_length=fixed_episode_length) # return GymEnvWrapper(EnvInfoWrapper(env, info_example)) return GymEnvWrapper(env)
def make(self): return GymEnvWrapper(self.env)
def env_f(**kwargs): return GymEnvWrapper(DelayedActionEnv(**kwargs))
if __name__ == "__main__": parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--path', help='path to params.pkl', default='/home/alex/parkour-learning/data/params.pkl') parser.add_argument( '--env', default='HumanoidPrimitivePretraining-v0', choices=['HumanoidPrimitivePretraining-v0', 'TrackEnv-v0']) parser.add_argument('--algo', default='ppo', choices=['sac', 'ppo']) args = parser.parse_args() snapshot = torch.load(args.path, map_location=torch.device('cpu')) agent_state_dict = snapshot['agent_state_dict'] env = GymEnvWrapper(gym.make(args.env, render=True)) if args.algo == 'ppo': if args.env == 'TrackEnv-v0': agent = MujocoFfAgent(ModelCls=PpoMcpVisionModel) else: agent = MujocoFfAgent(ModelCls=PPOMcpModel) else: if args.env == 'TrackEnv-v0': agent = SacAgent(ModelCls=PiVisionModel, QModelCls=QofMuVisionModel) else: agent = SacAgent(ModelCls=PiMCPModel, QModelCls=QofMCPModel) agent.initialize(env_spaces=env.spaces) agent.load_state_dict(agent_state_dict) agent.eval_mode(0)
def make_env(): # Load the 2-vs-2 soccer environment with episodes of 10 seconds: dm_env = dm_soccer.load(team_size=2, time_limit=10.) env = GymEnvWrapper(dmc2gym.DmControlWrapper('', '', env=dm_env)) return env
def make_po_taxi(**kwargs): """ max_episode_steps is a possible kwargs""" e = TaxiPartialObservabilityWrapper(gym.make('Taxi-v3', **kwargs)) return GymEnvWrapper(e)
def football_self_play_env(rank=0, **kwargs): return GymEnvWrapper(FootballSelfPlayEnv(rank, **kwargs), act_null_value=0)
def football_env(rank=0, **kwargs): return GymEnvWrapper(FootballEnv(rank, **kwargs), act_null_value=0)
def make(*args, **kwargs): # Make the RLPYT Environment after wrapping the gym environment env = gym.make(*args, **kwargs) env = PytImgWrapper(env) env = GymEnvWrapper(env) return env
def make(*args, info_example=None, **kwargs): env = GridEnv((7, 7)) return GymEnvWrapper(env)
def make_env(): # Load the 2-vs-2 soccer environment with episodes of 10 seconds: # dm_env = dm_soccer.load(team_size=2, time_limit=10.) dm_env = suite.load(domain_name="quadruped", task_name="escape") env = GymEnvWrapper(dmc2gym.DmControlWrapper('', '', env=dm_env)) return env
def make(*args, info_example=None, **kwargs): import pybulletgym import parkour_learning info_example = {'timeout': 0} return GymEnvWrapper( EnvInfoWrapper(gym.make(*args, **kwargs), info_example))
def cartpole_env(env_id=1, **kwargs): return GymEnvWrapper(CartPoleEnv(**kwargs), act_null_value=0)