예제 #1
0
def main(args):
    if args.cpu_only == True:
        cpu = tf.config.experimental.list_physical_devices(device_type='CPU')
        tf.config.experimental.set_visible_devices(devices=cpu,
                                                   device_type='CPU')

    # random seed setting
    if args.random_seed <= 0:
        random_seed = np.random.randint(1, 9999)
    else:
        random_seed = args.random_seed

    tf.random.set_seed(random_seed)
    np.random.seed(random_seed)
    random.seed(random_seed)

    domain_name = args.env_name.split('/')[0]
    task_name = args.env_name.split('/')[1]
    env = dmc2gym.make(domain_name=domain_name,
                       task_name=task_name,
                       seed=random_seed,
                       visualize_reward=False,
                       from_pixels=True,
                       height=args.image_size,
                       width=args.image_size,
                       frame_skip=args.frame_skip
                       )  #Pre image size for curl, image size for dbc
    env = FrameStack(env, k=args.frame_stack)

    test_env = dmc2gym.make(domain_name=domain_name,
                            task_name=task_name,
                            seed=random_seed,
                            visualize_reward=False,
                            from_pixels=True,
                            height=args.image_size,
                            width=args.image_size,
                            frame_skip=args.frame_skip
                            )  #Pre image size for curl, image size for dbc
    test_env = FrameStack(test_env, k=args.frame_stack)

    state_dim = (3 * args.frame_stack, args.image_size, args.image_size)
    action_dim = env.action_space.shape[0]
    max_action = env.action_space.high[0]
    min_action = env.action_space.low[0]

    if args.algorithm == 'SACv1':
        algorithm = 0
    elif args.algorithm == 'SACv2':
        algorithm = SACv2_AE(state_dim, action_dim, args)

    print("Training of", env.unwrapped.spec.id)
    print("Algorithm:", algorithm.name)
    print("State dim:", state_dim)
    print("Action dim:", action_dim)
    print("Max action:", max_action)
    print("Min action:", min_action)

    trainer = Basic_trainer(env, test_env, algorithm, max_action, min_action,
                            args)
    trainer.run()
예제 #2
0
def main(args):
    if args.cpu_only == True:
        cpu = tf.config.experimental.list_physical_devices(device_type='CPU')
        tf.config.experimental.set_visible_devices(devices=cpu,
                                                   device_type='CPU')

    # random seed setting
    if args.random_seed <= 0:
        random_seed = np.random.randint(1, 9999)
    else:
        random_seed = args.random_seed

    tf.random.set_seed(random_seed)
    np.random.seed(random_seed)
    random.seed(random_seed)

    #env setting
    if args.domain_type == 'gym':
        #openai gym
        env = gym.make(args.env_name)
        env.seed(random_seed)
        env.action_space.seed(random_seed)

        test_env = gym.make(args.env_name)
        test_env.seed(random_seed)
        test_env.action_space.seed(random_seed)
    else:
        #deepmind control suite
        env = dmc2gym.make(domain_name=args.env_name.split('/')[0],
                           task_name=args.env_name.split('/')[1],
                           seed=random_seed)
        test_env = dmc2gym.make(domain_name=args.env_name.split('/')[0],
                                task_name=args.env_name.split('/')[1],
                                seed=random_seed)

    if args.discrete == True:
        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.n
        max_action = 1
        min_action = 1
    else:
        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        max_action = env.action_space.high[0]
        min_action = env.action_space.low[0]

    algorithm = TRPO(state_dim, action_dim, args)

    print("Training of", env.unwrapped.spec.id)
    print("Algorithm:", algorithm.name)
    print("State dim:", state_dim)
    print("Action dim:", action_dim)
    print("Max action:", max_action)
    print("Min action:", min_action)
    print("Discrete: ", args.discrete)

    trainer = Basic_trainer(env, test_env, algorithm, max_action, min_action,
                            args)
    trainer.run()
예제 #3
0
def test_freeze():
    _freeze_mujoco_gym_env(gym.make("HalfCheetah-v2"))
    _freeze_mujoco_gym_env(dmc2gym.make(domain_name="cheetah",
                                        task_name="run"))
    _freeze_mujoco_gym_env(gym.make("Hopper-v2"))
    _freeze_mujoco_gym_env(
        dmc2gym.make(domain_name="hopper", task_name="stand"))
    _freeze_mujoco_gym_env(gym.make("Humanoid-v2"))
    _freeze_mujoco_gym_env(
        dmc2gym.make(domain_name="humanoid", task_name="run"))
예제 #4
0
def make_env(cfg):
    """Helper function to create dm_control environment"""
    if cfg.env == 'ball_in_cup_catch':
        domain_name = 'ball_in_cup'
        task_name = 'catch'
    else:
        domain_name = cfg.env.split('_')[0]
        task_name = '_'.join(cfg.env.split('_')[1:])

    env = dmc2gym.make(domain_name=domain_name,
                       task_name=task_name,
                       seed=cfg.seed,
                       visualize_reward=False,
                       from_pixels=cfg.from_pixels,
                       height=cfg.height,
                       width=cfg.width,
                       camera_id=cfg.camera_id,
                       frame_skip=cfg.frame_skip,
                       channels_first=False,
                       )

    env.seed(cfg.seed)
    assert env.action_space.low.min() >= -1
    assert env.action_space.high.max() <= 1
    if cfg.from_pixels == True:
        model_path = None
        if cfg.encoder_type == "VanillaVAE":
        	model_path = home + "/pytorch_sac/ckpts/" + cfg.env + "/_ckpt.ckpt"
        	print("VanillaVAE encoder path: ", model_path)
        env = ObservationWrapper(env, cfg.encoder_type, model_path, latent_dim=512)

    return env
예제 #5
0
    def __init__(self, opt):
        self.opt = opt
        self.env = dmc2gym.make(domain_name=opt.domain_name,
                                task_name=opt.task_name,
                                seed=0,
                                visualize_reward=False,
                                from_pixels=True,
                                height=256,
                                width=256,
                                frame_skip=opt.frame_skip)

        self.env.seed(0)
        random.seed(0)
        # self.state_dim = self.env.observation_space.shape[0]
        if self.opt.domain_name == 'finger':
            self.state_dim = 9
        elif self.opt.domain_name == 'reacher':
            self.state_dim = 6
        # self.state_dim = self.env.observation_space.shape[0] if opt.state_dim==0 else opt.state_dim

        self.action_dim = self.env.action_space.shape[0]
        self.max_action = float(self.env.action_space.high[0])
        self.log_root = opt.log_root
        self.episode_n = opt.episode_n
        log_path = os.path.join(
            opt.log_root, '{}_{}_base'.format(opt.domain_name, opt.task_name))
        self.policy_path = os.path.join(
            log_path, 'models/TD3_{}_0_actor'.format(opt.domain_name))
        if self.opt.domain_name == 'finger':
            self.policy_path = os.path.join(
                log_path, 'models_bak/TD3_{}_0_actor'.format(opt.domain_name))
        self.policy = TD3(self.policy_path, self.state_dim, self.action_dim,
                          self.max_action, opt)
예제 #6
0
def make_pad_env(domain_name,
                 task_name,
                 seed=0,
                 episode_length=1000,
                 frame_stack=3,
                 action_repeat=4,
                 mode='train'):
    """Make environment for PAD experiments"""
    env = dmc2gym.make(domain_name=domain_name,
                       task_name=task_name,
                       seed=seed,
                       visualize_reward=False,
                       from_pixels=True,
                       height=100,
                       width=100,
                       episode_length=episode_length,
                       frame_skip=action_repeat)
    env.seed(seed)
    env = GreenScreen(env, mode)
    env = FrameStack(env, frame_stack)
    env = ColorWrapper(env, mode)

    assert env.action_space.low.min() >= -1
    assert env.action_space.high.max() <= 1

    return env
예제 #7
0
def make_env(cfg):
    """Helper function to create dm_control environment"""
    if cfg.env == 'ball_in_cup_catch':
        domain_name = 'ball_in_cup'
        task_name = 'catch'
    else:
        domain_name = cfg.env.split('_')[0]
        task_name = '_'.join(cfg.env.split('_')[1:])

    env = dmc2gym.make(
        domain_name=domain_name,
        task_name=task_name,
        seed=cfg.seed,
        visualize_reward=False,
        from_pixels=cfg.from_pixels,
        height=cfg.height,
        width=cfg.width,
        camera_id=cfg.camera_id,
        frame_skip=cfg.frame_skip,
        channels_first=False,
    )

    env.seed(cfg.seed)
    assert env.action_space.low.min() >= -1
    assert env.action_space.high.max() <= 1

    return env
예제 #8
0
def make(*args,
         frame_stack=3,
         from_pixels=True,
         height=84,
         width=84,
         frame_skip=4,
         **kwargs):
    env = dmc2gym.make(*args,
                       frame_skip=frame_skip,
                       visualize_reward=False,
                       from_pixels=from_pixels,
                       height=height,
                       width=width,
                       **kwargs)
    if isinstance(env, TimeLimit):
        # Strip the gym TimeLimit wrapper and replace with one which
        # outputs TimeLimit.truncated=True at max_episode_steps - 1,
        # because that's when the dmc2gym env seems to end the episode.
        print("WARNING: replacing Gym TimeLimit wrapper by TimeLimitMinusOne")
        env = TimeLimitMinusOne(env.env)
    if from_pixels:
        env = FrameStack(env, k=frame_stack)
    elif frame_stack != 1:
        print("WARNING: dmcontrol.make() requested with frame_stack>1, but not"
              " doing it on state.")
    env = GymEnvWrapper(env)
    env._frame_skip = frame_skip

    return env
예제 #9
0
def make_env(domain_name,
             task_name,
             seed=0,
             episode_length=1000,
             frame_stack=3,
             action_repeat=4,
             image_size=100,
             mode='train'):
    """Make environment for experiments"""
    assert mode in {'train', 'color_easy', 'color_hard', 'video_easy', 'video_hard'}, \
     f'specified mode "{mode}" is not supported'

    env = dmc2gym.make(domain_name=domain_name,
                       task_name=task_name,
                       seed=seed,
                       visualize_reward=False,
                       from_pixels=True,
                       height=image_size,
                       width=image_size,
                       episode_length=episode_length,
                       frame_skip=action_repeat)
    env = VideoWrapper(env, mode, seed)
    env = FrameStack(env, frame_stack)
    env = ColorWrapper(env, mode, seed)

    return env
예제 #10
0
파일: train.py 프로젝트: HosseinSheikhi/drq
def make_env(cfg):
    """Helper function to create dm_control environment"""
    if cfg.env == 'ball_in_cup_catch':
        domain_name = 'ball_in_cup'
        task_name = 'catch'
    elif cfg.env == 'point_mass_easy':
        domain_name = 'point_mass'
        task_name = 'easy'
    else:
        domain_name = cfg.env.split('_')[0]
        task_name = '_'.join(cfg.env.split('_')[1:])

    # per dreamer: https://github.com/danijar/dreamer/blob/02f0210f5991c7710826ca7881f19c64a012290c/wrappers.py#L26
    camera_id = 2 if domain_name == 'quadruped' else 0

    env = dmc2gym.make(domain_name=domain_name,
                       task_name=task_name,
                       seed=cfg.seed,
                       visualize_reward=False,
                       from_pixels=True,
                       height=cfg.image_size,
                       width=cfg.image_size,
                       frame_skip=cfg.action_repeat,
                       camera_id=camera_id)

    env = utils.FrameStack(env, k=cfg.frame_stack)

    env.seed(cfg.seed)
    assert env.action_space.low.min() >= -1
    assert env.action_space.high.max() <= 1

    return env
예제 #11
0
def initialize_environment(domain_name, task_name, seed, frame_skip):
    """
    Initialize the Evironment
    :param domain_name:
    :param task_name:
    :param seed:
    :param frame_skip:
    :return:
    """
    LogHelper.print_step_log(f"Initialize Environment: {domain_name}/{task_name} ...")

    env = dmc2gym.make(domain_name=domain_name,
                       task_name=task_name,
                       seed=seed,
                       frame_skip=frame_skip)

    # Debug logging to check environment specs
    s = env.reset()
    a = env.action_space.sample()
    action_dim = env.action_space.shape[0]
    state_dim = env.observation_space.shape[0]

    logging.debug(f'Sample state: {s}')
    logging.debug(f'Sample action:{a}')
    logging.debug(f'State DIM: {state_dim}')
    logging.debug(f'Action DIM:{action_dim}')

    return env, action_dim, state_dim
예제 #12
0
    def __init__(self, opt):
        self.opt = opt
        self.env = dmc2gym.make(
            domain_name=opt.domain_name,
            task_name=opt.task_name,
            seed=0,
            visualize_reward=False,
            from_pixels=True,
            height=256,
            width=256,
            frame_skip=opt.frame_skip
        )

        self.env.seed(0)
        random.seed(0)
        self.state_dim = self.env.observation_space.shape[0]
        self.action_dim = self.env.action_space.shape[0]
        self.max_action = float(self.env.action_space.high[0])
        self.log_root = opt.log_root
        self.episode_n = opt.episode_n
        # self.policy_path = os.path.join(opt.log_root,
        #                 '{}_base/models/TD3_{}_0_actor'.format(opt.env,opt.env))
        # self.policy = TD3(self.policy_path,self.state_dim,self.action_dim,self.max_action)
        self.setup(opt)
        print('start collect')
        self.create_data()
        self.count = Count(opt)
        print('----------- Dataset initialized ---------------')
        print('-----------------------------------------------\n')
예제 #13
0
def make(cfg):
    """Helper function to create dm_control environment"""
    action_repeat = cfg.get('action_repeat', 1)

    if cfg.env_name == 'dmc_ball_in_cup_catch':
        domain_name = 'ball_in_cup'
        task_name = 'catch'
    else:
        toks = cfg.env_name.split('_')[1:]
        domain_name = toks[0]
        task_name = '_'.join(toks[1:])

    env = dmc2gym.make(
        domain_name=domain_name,
        task_name=task_name,
        seed=cfg.seed,
        visualize_reward=False,
        frame_skip=action_repeat,
        from_pixels=cfg.pixels,
        height=64,
        width=64,
    )

    env.seed(cfg.seed)
    assert env.action_space.low.min() >= -1
    assert env.action_space.high.max() <= 1

    return env
예제 #14
0
파일: envs.py 프로젝트: makdoudN/lea
def make_env(name: str, loader: str = "gym", **kwargs):
    rescale_action = kwargs.get('rescale_action', True)
    env = None
    if loader == "gym":  # Base Wrapper
        try:
            from gym_extensions.continuous import mujoco
        except:
            print('gym_extensions import failure !')
            pass
        env = gym.make(name)
    elif loader == "metaworld":
        env = import_class_from_string(name)(**kwargs)
    elif loader == "dm_control":
        import dmc2gym
        # Useful Options: frame_skip, from_pixels
        # Note: dmc2gym normalized the action but still we can
        # use RescaleAction.
        # NOTE: in the future maybe simply return the env.
        env = dmc2gym.make(domain_name=name, **kwargs)
    elif loader == 'atari':
        raise NotImplementedError()

    assert env is not None
    if isinstance(env.action_space, Box) and rescale_action:
        # Environment has continuous space and by default is normalized.
        env = RescaleAction(env, -1, 1)
    return env
예제 #15
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dmc2gym.make(domain_name=domain, task_name=task)
            env = ClipAction(env)
        elif env_id.startswith("rrc"):
            _, ac_type, ac_wrapper = env_id.split('.')
            ts_relative, sa_relative = False, False
            scaled_ac, task_space = False, False
            if ac_wrapper.split('-')[0] == 'task':
                task_space = True
                ts_relative = ac_wrapper.split('-')[-1] == 'rel'
            elif ac_wrapper.split('-')[0] == 'scaled':
                scaled_ac = True
                sa_relative = ac_wrapper.split('-')[-1] == 'rel'
            env = rrc_utils.build_env_fn(
                    action_type=ac_type, initializer=None, scaled_ac=scaled_ac,
                    task_space=task_space, sa_relative=sa_relative,
                    ts_relative=ts_relative, goal_relative=True,
                    rew_fn='step')()
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = NoopResetEnv(env, noop_max=30)
            env = MaxAndSkipEnv(env, skip=4)

        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = Monitor(env,
                          os.path.join(log_dir, str(rank)),
                          allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = EpisodicLifeEnv(env)
                if "FIRE" in env.unwrapped.get_action_meanings():
                    env = FireResetEnv(env)
                env = WarpFrame(env, width=84, height=84)
                env = ClipRewardEnv(env)
        elif len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
예제 #16
0
def make_env(cfg):
    """Helper function to create dm_control environment"""
    if cfg.env == 'ball_in_cup_catch':
        domain_name = 'ball_in_cup'
        task_name = 'catch'
    else:
        domain_name = cfg.env.split('_')[0]
        task_name = '_'.join(cfg.env.split('_')[1:])

    env = suite.load(domain_name, task_name)
    obs_space = int(
        sum([np.prod(s.shape) for s in env.observation_spec().values()]))
    train_factors = [
        np.eye(cfg.noise_dims) + i for i in range(cfg.num_train_envs)
    ]
    test_factors = [np.eye(cfg.noise_dims) * 3]
    # train_factors = [1, 2, 3]
    # test_factors = [4]
    train_envs = [
        dmc2gym.make(
            domain_name=domain_name,
            task_name=task_name,
            noise=cfg.noise,
            mult_factor=train_factors[idx],
            idx=idx,
            seed=cfg.seed,
            visualize_reward=True,
        ) for idx in range(cfg.num_train_envs)
    ]
    test_envs = [
        dmc2gym.make(
            domain_name=domain_name,
            task_name=task_name,
            noise=cfg.noise,
            mult_factor=test_factors[idx],
            idx=idx + cfg.num_train_envs,
            seed=cfg.seed,
            visualize_reward=True,
        ) for idx in range(len(test_factors))
    ]
    [env.seed(cfg.seed) for env in train_envs]
    assert train_envs[0].action_space.low.min() >= -1
    assert train_envs[0].action_space.high.max() <= 1

    return train_envs, test_envs
예제 #17
0
 def __init__(self, env_name):
     self.env_name = env_name
     open_ai_env = len(
         [pref for pref in OPENAI_MUJOCO_PREFIX if pref in env_name]) > 0
     if open_ai_env:
         self.env = gym.make(self.env_name)
     else:
         domain, task = env_name.split("-")
         self.env = dmc2gym.make(domain_name=domain, task_name=task)
예제 #18
0
def load_policy_and_env(fpath, itr='last', deterministic=False, env_name=None):
    """
    Load a policy from save, whether it's TF or PyTorch, along with RL env.

    Not exceptionally future-proof, but it will suffice for basic uses of the 
    Spinning Up implementations.

    Checks to see if there's a tf1_save folder. If yes, assumes the model
    is tensorflow and loads it that way. Otherwise, loads as if there's a 
    PyTorch save.
    """

    # determine if tf save or pytorch save
    if any(['tf1_save' in x for x in os.listdir(fpath)]):
        backend = 'tf1'
    else:
        backend = 'pytorch'

    # handle which epoch to load from
    if itr=='last':
        # check filenames for epoch (AKA iteration) numbers, find maximum value

        if backend == 'tf1':
            saves = [int(x[8:]) for x in os.listdir(fpath) if 'tf1_save' in x and len(x)>8]

        elif backend == 'pytorch':
            pytsave_path = osp.join(fpath, 'pyt_save')
            # Each file in this folder has naming convention 'modelXX.pt', where
            # 'XX' is either an integer or empty string. Empty string case
            # corresponds to len(x)==8, hence that case is excluded.
            saves = [int(x.split('.')[0][5:]) for x in os.listdir(pytsave_path) if len(x)>8 and 'model' in x]

        itr = '%d'%max(saves) if len(saves) > 0 else ''

    else:
        assert isinstance(itr, int), \
            "Bad value provided for itr (needs to be int or 'last')."
        itr = '%d'%itr

    # load the get_action function
    if backend == 'tf1':
        get_action = load_tf_policy(fpath, itr, deterministic)
    else:
        get_action = load_pytorch_policy(fpath, itr, deterministic)

    # try to load environment from save
    # (sometimes this will fail because the environment could not be pickled)
    try:
        state = joblib.load(osp.join(fpath, 'vars'+itr+'.pkl'))
        env = state['env']
    except:
        if env_name == 'reacher':
            env = dmc2gym.make(domain_name='reacher', task_name='easy', episode_length=200)
        else:
            env = None

    return env, get_action
예제 #19
0
 def make_env():
     env = dmc2gym.make(domain_name=args.domain_name,
                        task_name=args.task_name,
                        seed=args.seed,
                        visualize_reward=False,
                        from_pixels=(args.encoder_type == 'pixel'),
                        height=args.image_size,
                        width=args.image_size,
                        frame_skip=args.action_repeat)
예제 #20
0
def make_dmc(domain_name, task_name, action_repeat, image_size=64):
    env = dmc2gym.make(
        domain_name=domain_name,
        task_name=task_name,
        visualize_reward=False,
        from_pixels=True,
        height=image_size,
        width=image_size,
        frame_skip=action_repeat,
    )
    setattr(env, 'action_repeat', action_repeat)
    return env
예제 #21
0
def make_env(cfg):
    """Helper function to create dm_control environment"""
    if cfg.env == 'ball_in_cup_catch':
        domain_name = 'ball_in_cup'
        task_name = 'catch'
    elif cfg.env == 'point_mass_easy':
        domain_name = 'point_mass'
        task_name = 'easy'
    elif cfg.env == 'cartpole_two_poles':
        domain_name = 'cartpole'
        task_name = 'two_poles'
    elif cfg.env == 'cartpole_three_poles':
        domain_name = 'cartpole'
        task_name = 'three_poles'
    else:
        domain_name = cfg.env.split('_')[0]
        task_name = '_'.join(cfg.env.split('_')[1:])

    # per dreamer: https://github.com/danijar/dreamer/blob/02f0210f5991c7710826ca7881f19c64a012290c/wrappers.py#L26
    camera_id = 2 if domain_name == 'quadruped' else 0

    env = dmc2gym.make(domain_name=domain_name,
                       task_name=task_name,
                       seed=cfg.seed,
                       visualize_reward=False,
                       from_pixels=True,
                       height=cfg.image_size,
                       width=cfg.image_size,
                       frame_skip=cfg.action_repeat,
                       camera_id=camera_id)

    # env = dmc2gym_noisy.make(
    #     domain_name=domain_name,
    #     task_name=task_name,
    #     resource_files='../../../../../experiments/distractors/images/*.mp4',
    #     img_source='video',
    #     total_frames=10000,
    #     seed=cfg.seed,
    #     visualize_reward=False,
    #     from_pixels=True,
    #     height=84,
    #     width=84,
    #     frame_skip=cfg.action_repeat,
    #     camera_id=camera_id
    # )

    env = utils.FrameStack(env, k=cfg.frame_stack)

    env.seed(cfg.seed)
    assert env.action_space.low.min() >= -1
    assert env.action_space.high.max() <= 1

    return env
예제 #22
0
def eval(args):
    file_name = f"{args.policy}_{args.domain_name}_{args.seed}"
    print("---------------------------------------")
    print(f"Policy: {args.policy}, Env: {args.domain_name}, Seed: {args.seed}")
    print("---------------------------------------")

    log_path = safe_path(
        os.path.join(args.log_root,
                     '{}_{}_base'.format(args.domain_name, args.task_name)))
    result_path = safe_path(os.path.join(log_path, 'results'))
    model_path = safe_path(os.path.join(log_path, 'models_bak'))

    env = dmc2gym.make(domain_name=args.domain_name,
                       task_name=args.task_name,
                       seed=0,
                       visualize_reward=False,
                       from_pixels=False,
                       height=256,
                       width=256,
                       frame_skip=args.frame_skip)

    # Set seeds
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])

    kwargs = {
        "state_dim": state_dim,
        "action_dim": action_dim,
        "max_action": max_action,
        "discount": args.discount,
        "tau": args.tau,
    }

    # Initialize policy
    if args.policy == "TD3":
        # Target policy smoothing is scaled wrt the action scale
        kwargs["policy_noise"] = args.policy_noise * max_action
        kwargs["noise_clip"] = args.noise_clip * max_action
        kwargs["policy_freq"] = args.policy_freq
        policy = TD3.TD3(**kwargs)

    if args.load_model != "":
        policy_file = file_name if args.load_model == "default" else args.load_model
        policy.load(os.path.join(model_path, '{}'.format(policy_file)))

    # Evaluate untrained policy
    eval_policy(policy, env, args.seed)
예제 #23
0
    def make_env(self, args=None, kwargs=None, dm_task_name=None):
        """Create dm_control/metaworld environment"""

        if self.metaworld_env:
            env = mtw_envs_rand[self.env_name](*args, **kwargs)

            if debug_mode:
                env._max_episode_steps = 10000

                env.reset()
                env.render()
                global action_to_take
                glfw.set_key_callback(env.unwrapped.viewer.window, on_press)

                while True:
                    env.render()

                    if not np.array_equal(action_to_take, np.zeros(6)):
                        _, _, d, _ = env.step(action_to_take)
                        if d:
                            env.seed(args.seed)
                            env.reset()
                            env.render()

                        # Commenting this out makes the mocap faster but
                        # introduces some instabilities.
                        # action_to_take = np.zeros(6)
        else:

            camera_id = 2 if self.domain_name == 'quadruped' else 0
            if dm_task_name is not None:
                task_name = dm_task_name
            else:
                task_name = self.task_name
            env = dmc2gym.make(domain_name=self.domain_name,
                               task_name=task_name,
                               seed=self.cfg.seed,
                               visualize_reward=False,
                               from_pixels=False,
                               height=self.cfg.image_size,
                               width=self.cfg.image_size,
                               frame_skip=self.cfg.action_repeat,
                               camera_id=camera_id)

            if debug_mode:
                from dm_control import viewer
                viewer.launch(env)

        env = FrameStack(env, k=self.cfg.frame_stack)
        env.seed(self.cfg.seed)

        return env
예제 #24
0
    def _thunk():
        print(f"Using {env_id} environment")
        if env_id == "Warehouse":
            env = Warehouse(parameters)
        elif env_id == 'Sumo':
            # todo currently just using loop_network scene
            params = {'scene': "loop_network", 'libsumo': True}
            env = LoopNetwork(seed, params)
        else:
            if env_id.startswith("dm"):
                _, domain, task = env_id.split('.')
                env = dmc2gym.make(domain_name=domain, task_name=task)
                env = ClipAction(env)
            else:
                env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = NoopResetEnv(env, noop_max=30)
            env = MaxAndSkipEnv(env, skip=4)

        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if env_id not in ["Warehouse", "Sumo"]:
            if log_dir is not None:
                env = Monitor(env,
                              os.path.join(log_dir, str(rank)),
                              allow_early_resets=allow_early_resets)

            if is_atari:
                if len(env.observation_space.shape) == 3:
                    env = EpisodicLifeEnv(env)
                    if "FIRE" in env.unwrapped.get_action_meanings():
                        env = FireResetEnv(env)
                    env = WarpFrame(env, width=84, height=84)
                    env = ClipRewardEnv(env)
            elif len(env.observation_space.shape) == 3:
                raise NotImplementedError(
                    "CNN models work only for atari,\n"
                    "please use a custom wrapper for a custom pixel input env.\n"
                    "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
예제 #25
0
def make_dmc(domain_name, task_name, action_repeat, frame_stack=3,
             image_size=84):
    env = dmc2gym.make(
        domain_name=domain_name,
        task_name=task_name,
        visualize_reward=False,
        from_pixels=True,
        height=image_size,
        width=image_size,
        frame_skip=action_repeat
    )
    env = FrameStack(env, k=frame_stack)
    return env
예제 #26
0
파일: run_sacae.py 프로젝트: ymd-h/tf2rl
 def make_env():
     return DMCWrapper(dmc2gym.make(domain_name=domain_name,
                                    task_name=task_name,
                                    seed=args.seed,
                                    visualize_reward=False,
                                    from_pixels=True,
                                    height=input_obs_shape[0],
                                    width=input_obs_shape[1],
                                    frame_skip=action_repeat,
                                    channels_first=False),
                       obs_shape=input_obs_shape,
                       k=3,
                       channel_first=False)
예제 #27
0
def make_env(bg, args):
    env = dmc2gym.make(
        domain_name=args.domain_name,
        task_name=args.task_name,
        seed=args.seed,
        visualize_reward=False,
        from_pixels=True,
        height=args.image_size,
        width=args.image_size,
        frame_skip=args.action_repeat,
        bg_color=bg,
    )
    # env.seed(args.seed)
    env = FrameStack(env, k=args.frame_stack)
    return env
예제 #28
0
    def _thunk():
        if isinstance(env_id, Callable):
            env = env_id(**kwargs)

        elif env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dmc2gym.make(domain_name=domain, task_name=task)
            env = ClipAction(env)

        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = NoopResetEnv(env, noop_max=30)
            env = MaxAndSkipEnv(env, skip=4)

        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = Monitor(env,
                          os.path.join(log_dir, str(rank)),
                          allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = EpisodicLifeEnv(env)
                if "FIRE" in env.unwrapped.get_action_meanings():
                    env = FireResetEnv(env)
                env = WarpFrame(env, width=84, height=84)
                env = ClipRewardEnv(env)
        elif env.observation_space.shape and len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        if env.observation_space.shape:
            obs_shape = env.observation_space.shape
            if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
                env = TransposeImage(env, op=[2, 0, 1])

        return env
        def create_env(n_envs):
            """
            Create the environment and wrap it if necessary
            :param n_envs: (int)
            :return: (gym.Env)
            """
            global hyperparams

            if not is_dm_env:
              if is_atari:
                  if args.verbose > 0:
                      print("Using Atari wrapper")
                  env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
                  # Frame-stacking with 4 frames
                  env = VecFrameStack(env, n_stack=4)
              elif algo_ in ['dqn', 'ddpg']:
                  if hyperparams.get('normalize', False):
                      print("WARNING: normalization not supported yet for DDPG/DQN")
                  env = gym.make(env_id)
                  env.seed(args.seed)
                  if env_wrapper is not None:
                      env = env_wrapper(env)
              else:
                  if n_envs == 1:
                      env = DummyVecEnv([make_env(env_id, 0, args.seed, wrapper_class=env_wrapper)])
                  else:
                      # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
                      # On most env, SubprocVecEnv does not help and is quite memory hungry
                      env = DummyVecEnv([make_env(env_id, i, args.seed, wrapper_class=env_wrapper) for i in range(n_envs)])
                  if normalize:
                      if args.verbose > 0:
                          if len(normalize_kwargs) > 0:
                              print("Normalization activated: {}".format(normalize_kwargs))
                          else:
                              print("Normalizing input and reward")
                      env = VecNormalize(env, **normalize_kwargs)
              # Optional Frame-stacking
              if hyperparams.get('frame_stack', False):
                  n_stack = hyperparams['frame_stack']
                  env = VecFrameStack(env, n_stack)
                  print("Stacking {} frames".format(n_stack))
                  del hyperparams['frame_stack']
            else:
              env = dmc2gym.make(domain_name=args.domain, task_name=args.task, seed=args.seed)
            return env
예제 #30
0
def make_env(cfg):
    """Helper function to create dm_control environment"""
    if cfg.env == 'ball_in_cup_catch':
        domain_name = 'ball_in_cup'
        task_name = 'catch'
    else:
        domain_name = cfg.env.split('_')[0]
        task_name = '_'.join(cfg.env.split('_')[1:])

    env = dmc2gym.make(domain_name=domain_name,
                       task_name=task_name,
                       seed=cfg.seed,
                       visualize_reward=True)
    env.seed(cfg.seed)
    assert env.action_space.low.min() >= -1
    assert env.action_space.high.max() <= 1

    return env