Beispiel #1
0
 def _get_viewer(self, camera_id):
     if self.viewer is None:
         from mujoco_py import GlfwContext
         GlfwContext(offscreen=True)
         self.viewer = mujoco_py.MjRenderContextOffscreen(self._env.sim, -1)
     self.viewer_setup(camera_id)
     return self.viewer
    def __init__(self,
                 distance_threshold=0.02,
                 n_substeps=N_SUBSTEPS,
                 relative_control=True,
                 initial_qpos=DEFAULT_INITIAL_QPOS,
                 success_multiplier=0.1,
                 force_finger=None):
        # init rendering [IMPORTANT]
        from mujoco_py import GlfwContext
        GlfwContext(offscreen=True
                    )  # in newer version of gym use quiet=True to silence this

        self.total_steps = 0

        super().__init__(distance_threshold=distance_threshold,
                         n_substeps=n_substeps,
                         relative_control=relative_control,
                         initial_qpos=initial_qpos,
                         success_multiplier=success_multiplier,
                         force_finger=force_finger)

        # set hand and background colors
        self.sim.model.mat_rgba[2] = np.array([16, 18, 35, 255]) / 255
        self.sim.model.mat_rgba[4] = np.array([104, 143, 71, 255]) / 255
        self.sim.model.geom_rgba[48] = np.array([0.5, 0.5, 0.5, 0])

        # get touch sensor site names and their ids
        self._touch_sensor_id_site_id = []
        self._touch_sensor_id = []
        for k, v in self.sim.model._sensor_name2id.items():
            if 'robot0:TS_' in k:
                self._touch_sensor_id_site_id.append(
                    (v, self.sim.model._site_name2id[k.replace(
                        'robot0:TS_', 'robot0:T_')]))
                self._touch_sensor_id.append(v)
Beispiel #3
0
def mujocopy_render_hack():
    render_hack = false  # set to true for bugfix on bad openGL context
    if render_hack:
        print("Setting an offscreen GlfwContext. See mujoco-py issue #390")
        from mujoco_py import GlfwContext

        GlfwContext(offscreen=True)  # Create a window to init GLFW.
    return
 def start(
     self,
     lock,
     train=True,
 ):
     if not self.cluster:
         GlfwContext(offscreen=True)  # Create a window to init GLFW.
     self.collectData(train, lock)
 def __init__(self, past_frames=4):
     GlfwContext(offscreen=True)
     _FrameBufferEnv.__init__(self, past_frames)
     self._initialized = False
     path_to_xml = os.path.join(os.path.dirname(__file__),
                                'assets/hopper_flexible.xml')
     mujoco_env.MujocoEnv.__init__(self, path_to_xml, 4)
     utils.EzPickle.__init__(self)
 def __init__(self, mode='hard', past_frames=4, l2_penalty=False):
     GlfwContext(offscreen=True)
     _FrameBufferEnv.__init__(self, past_frames)
     self._mode = mode
     self._l2_penalty = l2_penalty
     utils.EzPickle.__init__(self)
     path_to_xml = os.path.join(os.path.dirname(__file__),
                                'assets/custom_reacher_3_link.xml')
     mujoco_env.MujocoEnv.__init__(self, path_to_xml, 2)
 def __init__(self, past_frames=4):
     GlfwContext(offscreen=True)
     _FrameBufferEnv.__init__(self, past_frames)
     self._initialized = False
     path_to_xml = os.path.join(os.path.dirname(__file__),
                                'assets/pusher_human_sim.xml')
     mujoco_env.MujocoEnv.__init__(self, path_to_xml, 2)
     utils.EzPickle.__init__(self)
     self.reset_model()
 def __init__(self, past_frames=4, action_penalties=True):
     GlfwContext(offscreen=True)
     _FrameBufferEnv.__init__(self, past_frames)
     self._initialized = False
     self._action_penalties = action_penalties
     path_to_xml = os.path.join(os.path.dirname(__file__),
                                'assets/custom_half_cheetah.xml')
     mujoco_env.MujocoEnv.__init__(self, path_to_xml, 2)
     utils.EzPickle.__init__(self)
    def __init__(self, past_frames=4):
        GlfwContext(offscreen=True)
        _FrameBufferEnv.__init__(self, past_frames)
        self._initialized = False

        utils.EzPickle.__init__(self)
        self._striked = False
        self._min_strike_dist = np.inf
        self.strike_threshold = 0.1
        path_to_xml = os.path.join(os.path.dirname(__file__),
                                   'assets/striker_human_sim.xml')
        mujoco_env.MujocoEnv.__init__(self, path_to_xml, 2)  #5)
Beispiel #10
0
 def __init__(self, env, headless=True):
     if headless:
         # pylint: disable=import-outside-toplevel
         # this import fails without a valid mujoco license
         # so keep this here to avoid unecessarily requiring
         # a mujoco license everytime the wrappers package is
         # accessed.
         from mujoco_py import GlfwContext
         GlfwContext(offscreen=True)
     env.reset()
     env = gymWrapper(env)
     super().__init__(env)
     self._observation_space = env.observation_space['pixels']
 def __init__(self,
              size=(32, 32),
              color_permutation=[0, 1, 2],
              smoothing_factor=0.0,
              past_frames=4,
              not_done=True):
     GlfwContext(offscreen=True)
     self._size = size
     self._not_done = not_done
     self._color_permutation = color_permutation
     self._smooth = 1.0 - smoothing_factor
     _FrameBufferEnv.__init__(self, past_frames)
     utils.EzPickle.__init__(self)
     mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
 def __init__(self,
              size=(32, 32),
              color_permutation=[0, 1, 2],
              smoothing_factor=0.0,
              past_frames=4,
              not_done=True):
     GlfwContext(offscreen=True)
     self._size = size
     self._not_done = not_done
     self._failure = False
     self._color_permutation = color_permutation
     self._smooth = 1.0 - smoothing_factor
     _FrameBufferEnv.__init__(self, past_frames)
     utils.EzPickle.__init__(self)
     path_to_xml = os.path.join(
         os.path.dirname(__file__),
         'assets/custom_inverted_double_pendulum.xml')
     mujoco_env.MujocoEnv.__init__(self, path_to_xml, 2)
Beispiel #13
0
import torch

import argparse
import sys

from lifelong_rl.envs.env_processor import make_env
import lifelong_rl.torch.pytorch_util as ptu
from lifelong_rl.util.visualize_mujoco import record_mujoco_video_from_states

from mujoco_py import GlfwContext
GlfwContext(offscreen=True)  # Create a window to init GLFW.
"""
Visualize replay buffer of agent and store as .mp4
"""


def get_env_states(snapshot_name):
    with open(snapshot_name + '.pt', 'rb') as f:
        snapshot = torch.load(f, map_location='cpu')
        env_states = snapshot['replay_buffer/env_states']
    return env_states


parser = argparse.ArgumentParser()
parser.add_argument(
    '--snapshot',
    '-name',
    type=str,
    help='Name of snapshot to visualize (ex. 12-07-hopper/run_1/itr_999')
parser.add_argument('--env',
                    type=str,
    def __init__(self,
                 model_path,
                 target_position,
                 target_rotation,
                 target_position_range,
                 reward_type,
                 initial_qpos={},
                 randomize_initial_position=True,
                 randomize_initial_rotation=True,
                 distance_threshold=0.01,
                 rotation_threshold=0.1,
                 n_substeps=N_SUBSTEPS,
                 relative_control=True,
                 ignore_z_target_rotation=False,
                 touch_visualisation="off",
                 touch_get_obs="sensordata",
                 visual_input: bool = False,
                 max_steps=100):
        """Initializes a new Hand manipulation environment with touch sensors.

        Args:
            touch_visualisation (string): how touch sensor sites are visualised
                - "on_touch": shows touch sensor sites only when touch values > 0
                - "always": always shows touch sensor sites
                - "off" or else: does not show touch sensor sites
            touch_get_obs (string): touch sensor readings
                - "boolean": returns 1 if touch sensor reading != 0.0 else 0
                - "sensordata": returns original touch sensor readings from self.sim.data.sensordata[id]
                - "log": returns log(x+1) touch sensor readings from self.sim.data.sensordata[id]
                - "off" or else: does not add touch sensor readings to the observation
            visual_input (bool): indicator whether the environment should return frames (True) or the exact object
                position (False)
            max_steps (int): maximum number of steps before episode is ended
        """

        if visual_input:
            # init rendering [IMPORTANT]
            from mujoco_py import GlfwContext
            GlfwContext(
                offscreen=True
            )  # in newer version of gym use quiet=True to silence this

        self.touch_visualisation = touch_visualisation
        self.touch_get_obs = touch_get_obs
        self.visual_input = visual_input
        self.touch_color = [1, 0, 0, 0.5]
        self.notouch_color = [0, 0.5, 0, 0.2]
        self.total_steps = 0
        self.max_steps = max_steps

        manipulate.ManipulateEnv.__init__(
            self,
            model_path,
            target_position,
            target_rotation,
            target_position_range,
            reward_type,
            initial_qpos=initial_qpos,
            randomize_initial_position=randomize_initial_position,
            randomize_initial_rotation=randomize_initial_rotation,
            distance_threshold=distance_threshold,
            rotation_threshold=rotation_threshold,
            n_substeps=n_substeps,
            relative_control=relative_control,
            ignore_z_target_rotation=ignore_z_target_rotation,
        )

        self._touch_sensor_id_site_id = []
        self._touch_sensor_id = []
        # get touch sensor site names and their ids
        for k, v in self.sim.model._sensor_name2id.items():
            if 'robot0:TS_' in k:
                self._touch_sensor_id_site_id.append(
                    (v, self.sim.model._site_name2id[k.replace(
                        'robot0:TS_', 'robot0:T_')]))
                self._touch_sensor_id.append(v)

        # set touch sensors rgba values
        if self.touch_visualisation == 'off':
            for _, site_id in self._touch_sensor_id_site_id:
                self.sim.model.site_rgba[site_id][3] = 0.0
        elif self.touch_visualisation == 'always':
            pass

        # set hand and background colors
        self.sim.model.mat_rgba[2] = np.array([16, 18, 35, 255]) / 255
        self.sim.model.mat_rgba[4] = np.array([104, 143, 71, 255]) / 255
        self.sim.model.geom_rgba[48] = np.array([0.5, 0.5, 0.5, 0])

        # set observation space
        self.observation_space = self._determine_observation_space()
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--expert_policy_file', '-epf', type=str,
        required=True)  # relative to where you're running this script from
    parser.add_argument(
        '--expert_data', '-ed', type=str,
        required=True)  #relative to where you're running this script from
    parser.add_argument(
        '--env_name',
        '-env',
        type=str,
        help=
        'choices: Ant-v2, Humanoid-v2, Walker-v2, HalfCheetah-v2, Hopper-v2',
        required=True)
    parser.add_argument('--exp_name',
                        '-exp',
                        type=str,
                        default='pick an experiment name',
                        required=True)
    parser.add_argument('--do_dagger', action='store_true')
    parser.add_argument('--ep_len', type=int)

    parser.add_argument(
        '--num_agent_train_steps_per_iter', type=int, default=1000
    )  # number of gradient steps for training policy (per iter in n_iter)
    parser.add_argument('--n_iter', '-n', type=int, default=1)

    parser.add_argument(
        '--batch_size', type=int, default=1000
    )  # training data collected (in the env) during each iteration
    parser.add_argument(
        '--eval_batch_size', type=int,
        default=1000)  # eval data collected (in the env) for logging metrics
    parser.add_argument(
        '--train_batch_size', type=int, default=100
    )  # number of sampled data points to be used per gradient/train step

    parser.add_argument('--n_layers', type=int,
                        default=2)  # depth, of policy to be learned
    parser.add_argument(
        '--size', type=int,
        default=64)  # width of each layer, of policy to be learned
    parser.add_argument('--learning_rate', '-lr', type=float,
                        default=5e-3)  # LR for supervised learning

    parser.add_argument('--video_log_freq', type=int, default=5)
    parser.add_argument('--scalar_log_freq', type=int, default=1)
    parser.add_argument('--no_gpu', '-ngpu', action='store_true')
    parser.add_argument('--which_gpu', type=int, default=0)
    parser.add_argument('--max_replay_buffer_size', type=int, default=1000000)
    parser.add_argument('--save_params', action='store_true')
    parser.add_argument('--seed', type=int, default=1)
    args = parser.parse_args()

    # convert args to dictionary
    params = vars(args)

    ##################################
    ### CREATE DIRECTORY FOR LOGGING
    ##################################

    if args.do_dagger:
        # Use this prefix when submitting. The auto-grader uses this prefix.
        logdir_prefix = 'q2_'
        assert args.n_iter > 1, (
            'DAGGER needs more than 1 iteration (n_iter>1) of training, to iteratively query the expert and train (after 1st warmstarting from behavior cloning).'
        )
    else:
        # Use this prefix when submitting. The auto-grader uses this prefix.
        logdir_prefix = 'q1_'
        assert args.n_iter == 1, (
            'Vanilla behavior cloning collects expert data just once (n_iter=1)'
        )

    ## directory for logging
    data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             '../../data')
    if not (os.path.exists(data_path)):
        os.makedirs(data_path)
    logdir = logdir_prefix + args.exp_name + '_' + args.env_name + '_' + time.strftime(
        "%d-%m-%Y_%H-%M-%S")
    logdir = os.path.join(data_path, logdir)
    params['logdir'] = logdir
    if not (os.path.exists(logdir)):
        os.makedirs(logdir)

    ###################
    ### RUN TRAINING
    ###################

    GlfwContext(offscreen=True)  # Create a window to init GLFW.

    trainer = BC_Trainer(params)
    trainer.run_training_loop()
Beispiel #16
0
def main():
    args = parse_rl_args()

    logging.basicConfig(level=args.log_level)

    # Set a random seed used in PFRL.
    utils.set_random_seed(args.seed)

    # Set different random seeds for different subprocesses.
    # If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3].
    # If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7].
    process_seeds = np.arange(args.num_envs) + args.seed * args.num_envs
    assert process_seeds.max() < 2**32

    args.outdir = experiments.prepare_output_dir(args, args.outdir)
    print("Output files are saved in {}".format(args.outdir))

    def make_ant_env(idx, test):

        # use different seeds for train vs test envs
        process_seed = int(process_seeds[idx])

        env_seed = 2**32 - 1 - process_seed if test else process_seed
        # env_seed = np.random.randint(0, 2**32 - 1) if not test else process_seed
        print(env_seed)

        utils.set_random_seed(env_seed)
        # create the anv environment with goal
        env = AntEnvWithGoal(create_maze_env(args.env),
                             args.env,
                             env_subgoal_dim=15)
        env.seed(int(env_seed))

        if args.render:
            env = pfrl.wrappers.GymLikeEnvRender(env, mode='human')

        return env

    def make_batch_ant__env(test):
        return pfrl.envs.MultiprocessVectorEnv([
            functools.partial(make_ant_env, idx, test)
            for idx in range(args.num_envs)
        ])

    eval_env = make_ant_env(0, test=True)

    env_state_dim = eval_env.state_dim
    env_action_dim = eval_env.action_dim
    env_subgoal_dim = eval_env.subgoal_dim

    # determined from the ant env
    if args.env == 'AntMaze' or args.env == 'AntPush':
        env_goal_dim = 2
    else:
        env_goal_dim = 3

    action_space = eval_env.action_space
    subgoal_space = eval_env.subgoal_space
    scale_low = action_space.high * np.ones(env_action_dim)
    scale_high = subgoal_space.high * np.ones(env_subgoal_dim)

    def low_level_burnin_action_func():
        """Select random actions until model is updated one or more times."""
        return np.random.uniform(action_space.low,
                                 action_space.high).astype(np.float32)

    def high_level_burnin_action_func():
        """Select random actions until model is updated one or more times."""
        return np.random.uniform(subgoal_space.low,
                                 subgoal_space.high).astype(np.float32)

    gpu = 0 if torch.cuda.is_available() else None
    agent = HIROAgent(
        state_dim=env_state_dim,
        action_dim=env_action_dim,
        goal_dim=env_goal_dim,
        subgoal_dim=env_subgoal_dim,
        high_level_burnin_action_func=high_level_burnin_action_func,
        low_level_burnin_action_func=low_level_burnin_action_func,
        scale_low=scale_low,
        scale_high=scale_high,
        buffer_size=200000,
        subgoal_freq=10,
        train_freq=10,
        reward_scaling=0.1,
        goal_threshold=5,
        gpu=gpu,
        add_entropy_layer=args.add_entropy_layer,
        soft_subgoal_update=args.soft_subgoal_update,
        temperature_high=args.temperature_high,
        temperature_low=args.temperature_low,
        optimize_high_temp=args.optimize_high_temp,
        optimize_low_temp=args.optimize_low_temp)

    if args.load:
        # load weights from a file if arg supplied
        agent.load(args.load)

    if args.record:
        from mujoco_py import GlfwContext
        GlfwContext(offscreen=True)

    if args.demo:
        eval_stats = experiments.eval_performance(
            env=eval_env,
            agent=agent,
            n_steps=None,
            n_episodes=args.eval_n_runs,
            video_outdir=args.outdir,
            step_number=-1
            if args.record else None  # justNonNoneObjectForRecording
        )
        print("n_runs: {} mean: {} median: {} stdev {}".format(
            args.eval_n_runs,
            eval_stats["mean"],
            eval_stats["median"],
            eval_stats["stdev"],
        ))
    else:
        # train the hierarchical agent

        experiments.train_hrl_agent_with_evaluation(agent=agent,
                                                    env=make_ant_env(
                                                        0, test=False),
                                                    steps=args.steps,
                                                    outdir=args.outdir,
                                                    eval_n_steps=None,
                                                    eval_interval=5000,
                                                    eval_n_episodes=10,
                                                    use_tensorboard=True,
                                                    record=args.record)
from torch import device
from mujoco_py.generated import const
from mujoco_py import GlfwContext
import numpy as np
import cv2
GlfwContext(offscreen=True)


class Play:
    def __init__(self, env, agent, env_name, max_episode=1):
        self.env = env
        self.max_episode = max_episode
        self.agent = agent
        _, self.state_rms_mean, self.state_rms_var = self.agent.load_weights()
        self.agent.set_to_eval_mode()
        self.device = device("cpu")
        self.fourcc = cv2.VideoWriter_fourcc(*'XVID')
        self.VideoWriter = cv2.VideoWriter(env_name + ".avi", self.fourcc, 50.0, (250, 250))

    def evaluate(self):

        for _ in range(self.max_episode):
            s = self.env.reset()
            episode_reward = 0
            for _ in range(self.env._max_episode_steps):
                s = np.clip((s - self.state_rms_mean) / (self.state_rms_var ** 0.5 + 1e-8), -5.0, 5.0)
                dist = self.agent.choose_dist(s)
                action = dist.sample().cpu().numpy()[0]
                s_, r, done, _ = self.env.step(action)
                episode_reward += r
                if done:
def main():
    args = parse_rl_args()

    cfg = YAML().load(
        open(os.environ["FLIGHTMARE_PATH"] + "/flightlib/configs/vec_env.yaml",
             'r'))

    if not args.train:
        cfg["env"]["num_envs"] = 1
        cfg["env"]["num_threads"] = 1

    if args.render:
        cfg["env"]["render"] = "yes"
    else:
        cfg["env"]["render"] = "no"

    logging.basicConfig(level=args.log_level)

    # Set a random seed used in PFRL.
    utils.set_random_seed(args.seed)

    # set random seed

    # Set different random seeds for different subprocesses.
    # If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3].
    # If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7].
    process_seeds = np.arange(args.num_envs) + args.seed * args.num_envs
    assert process_seeds.max() < 2**32

    args.outdir = experiments.prepare_output_dir(args, args.outdir)
    print("Output files are saved in {}".format(args.outdir))

    def make_drone_env(cfg, idx, test):

        # use different seeds for train vs test envs
        process_seed = int(process_seeds[idx])
        env_seed = 2**32 - 1 - process_seed if test else process_seed
        # env_seed = np.random.randint(0, 2**32 - 1) if not test else process_seed
        utils.set_random_seed(env_seed)
        # create the anv environment with goal
        env = wrapper.FlightEnvVec(
            QuadrotorGoalConditionedEnv_v1(dump(cfg, Dumper=RoundTripDumper),
                                           False))
        env.seed(env_seed)

        if args.render:
            env = pfrl.wrappers.GymLikeEnvRender(env)

        return env

    eval_env = make_drone_env(cfg, 0, test=True)

    env_state_dim = eval_env.state_dim
    env_action_dim = eval_env.action_dim

    env_subgoal_dim = 12
    # env_subgoal_dim = eval_env.subgoal_dim

    env_goal_dim = eval_env.obs_dim

    action_space = eval_env.action_space
    subgoal_space = eval_env.subgoal_space
    scale_low = action_space.high * np.ones(env_action_dim)
    # create subgoal space in env!
    scale_high = subgoal_space.high * np.ones(env_subgoal_dim)
    print(action_space.high, action_space.low)

    def low_level_burnin_action_func():
        """Select random actions until model is updated one or more times."""
        return np.random.uniform(action_space.low,
                                 action_space.high).astype(np.float32)

    def high_level_burnin_action_func():
        """Select random actions until model is updated one or more times."""
        return np.random.uniform(subgoal_space.low,
                                 subgoal_space.high).astype(np.float32)

    gpu = 0 if torch.cuda.is_available() else None
    agent = HIROAgent(
        state_dim=env_state_dim,
        action_dim=env_action_dim,
        goal_dim=env_goal_dim,
        subgoal_dim=env_subgoal_dim,
        high_level_burnin_action_func=high_level_burnin_action_func,
        low_level_burnin_action_func=low_level_burnin_action_func,
        scale_low=scale_low,
        scale_high=scale_high,
        buffer_size=200000,
        subgoal_freq=10,
        train_freq=10,
        reward_scaling=0.1,
        goal_threshold=5,
        gpu=gpu,
        add_entropy=args.add_entropy)
    print(args.add_entropy)
    if args.load:
        # load weights from a file if arg supplied
        agent.load(args.load)

    if args.record:
        from mujoco_py import GlfwContext
        GlfwContext(offscreen=True)

    if args.demo:
        eval_stats = experiments.eval_performance(
            env=eval_env,
            agent=agent,
            n_steps=None,
            n_episodes=args.eval_n_runs,
            video_outdir=args.outdir,
            step_number=-1
            if args.recordd else None  # justNonNoneObjectForRecording
        )
        print("n_runs: {} mean: {} median: {} stdev {}".format(
            args.eval_n_runs,
            eval_stats["mean"],
            eval_stats["median"],
            eval_stats["stdev"],
        ))
    else:
        # train the hierarchical agent

        experiments.train_hrl_agent_with_evaluation(agent=agent,
                                                    env=make_drone_env(
                                                        cfg, 0, test=False),
                                                    steps=args.steps,
                                                    outdir=args.outdir,
                                                    eval_n_steps=None,
                                                    eval_interval=5000,
                                                    eval_n_episodes=10,
                                                    use_tensorboard=True,
                                                    record=args.record)