Ejemplo n.º 1
0
def make_po_fourrooms(fomdp=False, max_episode_steps=2000):
    """ max_episode_steps is a possible kwargs"""
    TL = gym.wrappers.TimeLimit
    e = StochasticFourRooms(possible_starts=set(all_squares), possible_goals=set(oc_initial_goal))
    if not fomdp: e = FourRoomsFourWallsWrapper(e)
    e = TL(e, max_episode_steps)
    return GymEnvWrapper(e)
Ejemplo n.º 2
0
def simulate_policy():
    task = generate_task(task_generator_id='picking')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      skip_frame=3,
                      seed=0,
                      max_episode_length=600)
    env = GymEnvWrapper(env)
    file = './itr_1097499.pkl'
    data = torch.load(file)
    agent_state_dict = data['agent_state_dict']
    agent = SacAgent(initial_model_state_dict=agent_state_dict)
    agent.initialize(env_spaces=env.spaces)
    agent.eval_mode(itr=data['itr'])

    def policy_func(obs):
        # new_obs = np.hstack((obs['observation'], obs['desired_goal']))
        agent_info = agent.step(torchify_buffer(obs),
                                prev_action=None,
                                prev_reward=None)
        return agent_info.action.numpy()

    # env = HERGoalEnvWrapper(env)
    for _ in range(100):
        total_reward = 0
        o = env.reset()
        for _ in range(600):
            o, reward, done, info = env.step(policy_func(o))
            total_reward += reward
        print("total reward is :", total_reward)
    env.close()
Ejemplo n.º 3
0
def make(*args,
         frame_stack=3,
         from_pixels=True,
         height=84,
         width=84,
         frame_skip=4,
         **kwargs):
    env = dmc2gym.make(*args,
                       frame_skip=frame_skip,
                       visualize_reward=False,
                       from_pixels=from_pixels,
                       height=height,
                       width=width,
                       **kwargs)
    if isinstance(env, TimeLimit):
        # Strip the gym TimeLimit wrapper and replace with one which
        # outputs TimeLimit.truncated=True at max_episode_steps - 1,
        # because that's when the dmc2gym env seems to end the episode.
        print("WARNING: replacing Gym TimeLimit wrapper by TimeLimitMinusOne")
        env = TimeLimitMinusOne(env.env)
    if from_pixels:
        env = FrameStack(env, k=frame_stack)
    elif frame_stack != 1:
        print("WARNING: dmcontrol.make() requested with frame_stack>1, but not"
              " doing it on state.")
    env = GymEnvWrapper(env)
    env._frame_skip = frame_skip

    return env
Ejemplo n.º 4
0
def safety_gym_make(*args,
                    sometimes_info_kwargs=None,
                    obs_prev_cost=True,
                    obs_version="default",
                    **kwargs):
    assert obs_version in [
        "default",
        "vision",
        "vision_only",
        "no_lidar",
        "no_constraints",
    ]
    if obs_version != "default":
        eid = kwargs["id"]  # Must provide as kwarg, not arg.
        names = dict(  # Map to my modification in safety-gym suite.
            vision="Vision",
            vision_only="Visonly",
            no_lidar="NoLidar",
            no_constraints="NoConstr",
        )
        name = names[obs_version]
        # e.g. Safexp-PointGoal1-v0 --> Safexp-PointGoal1Vision-v0
        kwargs["id"] = eid[:-3] + name + eid[-3:]
    return GymEnvWrapper(
        SafetyGymEnvWrapper(
            gym.make(*args, **kwargs),
            sometimes_info_kwargs=sometimes_info_kwargs or dict(),
            obs_prev_cost=obs_prev_cost,
        ), )
Ejemplo n.º 5
0
def make_qec_env(error_model, error_rate, volume_depth=5):
    env = OptimizedSurfaceCodeEnvironment(error_model=error_model,
                                          volume_depth=volume_depth,
                                          p_meas=error_rate,
                                          p_phys=error_rate)
    # env = GeneralSurfaceCodeEnv(error_model=error_model, p_meas=error_rate, p_phys=error_rate, use_Y=False)
    # env = gym.make('CartPole-v0')
    return GymEnvWrapper(env)
def make_qec_env(error_model, error_rate, volume_depth=5, **kwargs):
    if 'fixed_episode_length' in kwargs.keys():
        fixed_episode_length = kwargs['fixed_episode_length']
        kwargs.pop('fixed_episode_length')
    else:
        fixed_episode_length = None
    env = Surface_Code_Environment_Multi_Decoding_Cycles(error_model='DP', volume_depth=5, p_meas=error_rate, p_phys=error_rate, use_Y=False)
    env = FixedLengthEnvWrapper(env, fixed_episode_length=fixed_episode_length)
    return GymEnvWrapper(env)
Ejemplo n.º 7
0
def _make_env(rank):
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      skip_frame=10,
                      enable_visualization=False,
                      seed=0 + rank,
                      max_episode_length=600)
    env = GymEnvWrapper(env)
    return env
Ejemplo n.º 8
0
def make_env():
    # Load the 2-vs-2 soccer environment with episodes of 10 seconds:
    dm_env = maw.load(team_size=args.team_size,
                      time_limit=args.time_limit,
                      terrain=not args.no_hfield,
                      agent_type=args.agent_type,
                      deterministic_spawn=not args.random_spawn,
                      raise_exception_on_physics_error=False,
                      task_id=args.task_id)
    #dm_env = dm_soccer.load(team_size=2, time_limit=10.)
    env = GymEnvWrapper(dmc2gym.DmControlWrapper('', '', env=dm_env))
    return env
Ejemplo n.º 9
0
 def get_env(*args, **kwargs):
     return GymEnvWrapper(
         TransformObservation(env=FrameStack(
             num_stack=4,
             env=(gym_wrapper.GymFromDMEnv(
                 bsuite.load_and_record_to_csv(
                     bsuite_id=bsuite_id,
                     results_dir=results_dir,
                     overwrite=True,
                 )) if not gym_id else gym.make(gym_id))),
                              f=lambda lazy_frames: np.reshape(
                                  np.stack(lazy_frames._frames), -1)))
Ejemplo n.º 10
0
def make_env(**kwargs):
    info_example = {'timeout': 0}
    # env = gym.make('CartPole-v0')
    env = Surface_Code_Environment_Multi_Decoding_Cycles(error_model='DP',
                                                         volume_depth=5,
                                                         p_meas=0.011,
                                                         p_phys=0.011,
                                                         use_Y=False)
    # env = OptimizedSurfaceCodeEnvironment(error_model='X', volume_depth=5, p_meas=0.011, p_phys=0.011)
    # env = GeneralSurfaceCodeEnv(error_model='DP', p_meas=0.011, p_phys=0.011, use_Y=False)
    env = GymEnvWrapper(EnvInfoWrapper(env, info_example))
    return env
Ejemplo n.º 11
0
def _make_env(rank):
    task = generate_task(task_generator_id='picking',
                         dense_reward_weights=np.array(
                             [250, 0, 125, 0, 750, 0, 0, 0.005]),
                         fractional_reward_weight=1,
                         goal_height=0.15,
                         tool_block_mass=0.02)
    env = CausalWorld(task=task,
                      skip_frame=3,
                      enable_visualization=False,
                      seed=0,
                      max_episode_length=600)
    env = GymEnvWrapper(env)
    return env
Ejemplo n.º 12
0
def _make_env(rank):
    task = generate_task('pushing',
                         dense_reward_weights=np.array([2500, 2500, 0]),
                         variables_space='space_a',
                         fractional_reward_weight=100)
    env = CausalWorld(task=task,
                      skip_frame=3,
                      enable_visualization=False,
                      seed=0 + rank)
    env = CurriculumWrapper(
        env,
        intervention_actors=[GoalInterventionActorPolicy()],
        actives=(0, 1e9, 2, 0))
    env = GymEnvWrapper(env)
    return env
Ejemplo n.º 13
0
def build_and_test(model_path, config_key):
    import dmc_wrapper
    from dm_control import viewer
    from rlpyt.utils.buffer import buffer_from_example, torchify_buffer, numpify_buffer
    import torch

    config = configs[config_key]

    reloaded = torch.load(model_path) if len(model_path) > 0 else None
    # import pdb; pdb.set_trace()
    agent = MultiFfAgent(model_kwargs=config["model"],
                         initial_model_state_dict=reloaded['agent_state_dict'],
                         **config["agent"])

    dm_env = maw.load(team_size=args.team_size,
                      time_limit=args.time_limit,
                      terrain=not args.no_hfield,
                      agent_type=args.agent_type,
                      deterministic_spawn=not args.random_spawn,
                      raise_exception_on_physics_error=False,
                      task_id=args.task_id)

    env = GymEnvWrapper(dmc2gym.DmControlWrapper('', '', env=dm_env))

    agent.initialize(env.spaces)
    agent.reset()
    # agent.eval_mode(0)

    prev_action = env.action_space.null_value()

    def get_prev_action():
        return prev_action

    def policy(time_step):
        obs = dmc_wrapper.convertObservation(time_step.observation)
        reward = time_step.reward
        reward = np.asarray(reward) if reward is not None else reward

        obs_pyt, act_pyt, rew_pyt = torchify_buffer(
            (obs, get_prev_action(), reward))
        # obs_pyt, rew_pyt = torchify_buffer((obs, reward))

        act_pyt, agent_info = agent.step(obs_pyt.float(), act_pyt, rew_pyt)
        # prev_action = act_pyt

        return act_pyt

    viewer.launch(dm_env, policy=policy)
def make_gym_env(**kwargs):
    info_example = {'timeout': 0}
    # print('making env: ' + str(kwargs))
    static_decoder_path = '/home/alex/DeepQ-Decoding/example_notebooks/referee_decoders/nn_d5_X_p5'
    # from keras.models import load_model
    # static_decoder = load_model(static_decoder_path)
    if 'fixed_episode_length' in kwargs.keys():
        fixed_episode_length = kwargs['fixed_episode_length']
        kwargs.pop('fixed_episode_length')
    else:
        fixed_episode_length = None

    env = Surface_Code_Environment_Multi_Decoding_Cycles(error_model='DP', volume_depth=5, p_meas=0.001, p_phys=0.001, use_Y=False)
    # env = gym.make(**kwargs)
    env = FixedLengthEnvWrapper(env, fixed_episode_length=fixed_episode_length)
    # return GymEnvWrapper(EnvInfoWrapper(env, info_example))
    return GymEnvWrapper(env)
Ejemplo n.º 15
0
 def make(self):
     return GymEnvWrapper(self.env)
Ejemplo n.º 16
0
def env_f(**kwargs):
    return GymEnvWrapper(DelayedActionEnv(**kwargs))
Ejemplo n.º 17
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--path',
                        help='path to params.pkl',
                        default='/home/alex/parkour-learning/data/params.pkl')
    parser.add_argument(
        '--env',
        default='HumanoidPrimitivePretraining-v0',
        choices=['HumanoidPrimitivePretraining-v0', 'TrackEnv-v0'])
    parser.add_argument('--algo', default='ppo', choices=['sac', 'ppo'])
    args = parser.parse_args()

    snapshot = torch.load(args.path, map_location=torch.device('cpu'))
    agent_state_dict = snapshot['agent_state_dict']
    env = GymEnvWrapper(gym.make(args.env, render=True))
    if args.algo == 'ppo':
        if args.env == 'TrackEnv-v0':
            agent = MujocoFfAgent(ModelCls=PpoMcpVisionModel)
        else:
            agent = MujocoFfAgent(ModelCls=PPOMcpModel)
    else:
        if args.env == 'TrackEnv-v0':
            agent = SacAgent(ModelCls=PiVisionModel,
                             QModelCls=QofMuVisionModel)
        else:
            agent = SacAgent(ModelCls=PiMCPModel, QModelCls=QofMCPModel)

    agent.initialize(env_spaces=env.spaces)
    agent.load_state_dict(agent_state_dict)
    agent.eval_mode(0)
Ejemplo n.º 18
0
def make_env():
    # Load the 2-vs-2 soccer environment with episodes of 10 seconds:
    dm_env = dm_soccer.load(team_size=2, time_limit=10.)
    env = GymEnvWrapper(dmc2gym.DmControlWrapper('', '', env=dm_env))
    return env
Ejemplo n.º 19
0
def make_po_taxi(**kwargs):
    """ max_episode_steps is a possible kwargs"""
    e = TaxiPartialObservabilityWrapper(gym.make('Taxi-v3', **kwargs))
    return GymEnvWrapper(e)
Ejemplo n.º 20
0
def football_self_play_env(rank=0, **kwargs):
    return GymEnvWrapper(FootballSelfPlayEnv(rank, **kwargs), act_null_value=0)
Ejemplo n.º 21
0
def football_env(rank=0, **kwargs):
    return GymEnvWrapper(FootballEnv(rank, **kwargs), act_null_value=0)
Ejemplo n.º 22
0
def make(*args, **kwargs):
    # Make the RLPYT Environment after wrapping the gym environment
    env = gym.make(*args, **kwargs)
    env = PytImgWrapper(env)
    env = GymEnvWrapper(env)
    return env
Ejemplo n.º 23
0
def make(*args, info_example=None, **kwargs):
    env = GridEnv((7, 7))
    return GymEnvWrapper(env)
Ejemplo n.º 24
0
def make_env():
    # Load the 2-vs-2 soccer environment with episodes of 10 seconds:
    # dm_env = dm_soccer.load(team_size=2, time_limit=10.)
    dm_env = suite.load(domain_name="quadruped", task_name="escape")
    env = GymEnvWrapper(dmc2gym.DmControlWrapper('', '', env=dm_env))
    return env
Ejemplo n.º 25
0
def make(*args, info_example=None, **kwargs):
    import pybulletgym
    import parkour_learning
    info_example = {'timeout': 0}
    return GymEnvWrapper(
        EnvInfoWrapper(gym.make(*args, **kwargs), info_example))
Ejemplo n.º 26
0
def cartpole_env(env_id=1, **kwargs):
    return GymEnvWrapper(CartPoleEnv(**kwargs), act_null_value=0)