def main(args): if args.cpu_only == True: cpu = tf.config.experimental.list_physical_devices(device_type='CPU') tf.config.experimental.set_visible_devices(devices=cpu, device_type='CPU') # random seed setting if args.random_seed <= 0: random_seed = np.random.randint(1, 9999) else: random_seed = args.random_seed tf.random.set_seed(random_seed) np.random.seed(random_seed) random.seed(random_seed) domain_name = args.env_name.split('/')[0] task_name = args.env_name.split('/')[1] env = dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=random_seed, visualize_reward=False, from_pixels=True, height=args.image_size, width=args.image_size, frame_skip=args.frame_skip ) #Pre image size for curl, image size for dbc env = FrameStack(env, k=args.frame_stack) test_env = dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=random_seed, visualize_reward=False, from_pixels=True, height=args.image_size, width=args.image_size, frame_skip=args.frame_skip ) #Pre image size for curl, image size for dbc test_env = FrameStack(test_env, k=args.frame_stack) state_dim = (3 * args.frame_stack, args.image_size, args.image_size) action_dim = env.action_space.shape[0] max_action = env.action_space.high[0] min_action = env.action_space.low[0] if args.algorithm == 'SACv1': algorithm = 0 elif args.algorithm == 'SACv2': algorithm = SACv2_AE(state_dim, action_dim, args) print("Training of", env.unwrapped.spec.id) print("Algorithm:", algorithm.name) print("State dim:", state_dim) print("Action dim:", action_dim) print("Max action:", max_action) print("Min action:", min_action) trainer = Basic_trainer(env, test_env, algorithm, max_action, min_action, args) trainer.run()
def main(args): if args.cpu_only == True: cpu = tf.config.experimental.list_physical_devices(device_type='CPU') tf.config.experimental.set_visible_devices(devices=cpu, device_type='CPU') # random seed setting if args.random_seed <= 0: random_seed = np.random.randint(1, 9999) else: random_seed = args.random_seed tf.random.set_seed(random_seed) np.random.seed(random_seed) random.seed(random_seed) #env setting if args.domain_type == 'gym': #openai gym env = gym.make(args.env_name) env.seed(random_seed) env.action_space.seed(random_seed) test_env = gym.make(args.env_name) test_env.seed(random_seed) test_env.action_space.seed(random_seed) else: #deepmind control suite env = dmc2gym.make(domain_name=args.env_name.split('/')[0], task_name=args.env_name.split('/')[1], seed=random_seed) test_env = dmc2gym.make(domain_name=args.env_name.split('/')[0], task_name=args.env_name.split('/')[1], seed=random_seed) if args.discrete == True: state_dim = env.observation_space.shape[0] action_dim = env.action_space.n max_action = 1 min_action = 1 else: state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] max_action = env.action_space.high[0] min_action = env.action_space.low[0] algorithm = TRPO(state_dim, action_dim, args) print("Training of", env.unwrapped.spec.id) print("Algorithm:", algorithm.name) print("State dim:", state_dim) print("Action dim:", action_dim) print("Max action:", max_action) print("Min action:", min_action) print("Discrete: ", args.discrete) trainer = Basic_trainer(env, test_env, algorithm, max_action, min_action, args) trainer.run()
def test_freeze(): _freeze_mujoco_gym_env(gym.make("HalfCheetah-v2")) _freeze_mujoco_gym_env(dmc2gym.make(domain_name="cheetah", task_name="run")) _freeze_mujoco_gym_env(gym.make("Hopper-v2")) _freeze_mujoco_gym_env( dmc2gym.make(domain_name="hopper", task_name="stand")) _freeze_mujoco_gym_env(gym.make("Humanoid-v2")) _freeze_mujoco_gym_env( dmc2gym.make(domain_name="humanoid", task_name="run"))
def make_env(cfg): """Helper function to create dm_control environment""" if cfg.env == 'ball_in_cup_catch': domain_name = 'ball_in_cup' task_name = 'catch' else: domain_name = cfg.env.split('_')[0] task_name = '_'.join(cfg.env.split('_')[1:]) env = dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=cfg.seed, visualize_reward=False, from_pixels=cfg.from_pixels, height=cfg.height, width=cfg.width, camera_id=cfg.camera_id, frame_skip=cfg.frame_skip, channels_first=False, ) env.seed(cfg.seed) assert env.action_space.low.min() >= -1 assert env.action_space.high.max() <= 1 if cfg.from_pixels == True: model_path = None if cfg.encoder_type == "VanillaVAE": model_path = home + "/pytorch_sac/ckpts/" + cfg.env + "/_ckpt.ckpt" print("VanillaVAE encoder path: ", model_path) env = ObservationWrapper(env, cfg.encoder_type, model_path, latent_dim=512) return env
def __init__(self, opt): self.opt = opt self.env = dmc2gym.make(domain_name=opt.domain_name, task_name=opt.task_name, seed=0, visualize_reward=False, from_pixels=True, height=256, width=256, frame_skip=opt.frame_skip) self.env.seed(0) random.seed(0) # self.state_dim = self.env.observation_space.shape[0] if self.opt.domain_name == 'finger': self.state_dim = 9 elif self.opt.domain_name == 'reacher': self.state_dim = 6 # self.state_dim = self.env.observation_space.shape[0] if opt.state_dim==0 else opt.state_dim self.action_dim = self.env.action_space.shape[0] self.max_action = float(self.env.action_space.high[0]) self.log_root = opt.log_root self.episode_n = opt.episode_n log_path = os.path.join( opt.log_root, '{}_{}_base'.format(opt.domain_name, opt.task_name)) self.policy_path = os.path.join( log_path, 'models/TD3_{}_0_actor'.format(opt.domain_name)) if self.opt.domain_name == 'finger': self.policy_path = os.path.join( log_path, 'models_bak/TD3_{}_0_actor'.format(opt.domain_name)) self.policy = TD3(self.policy_path, self.state_dim, self.action_dim, self.max_action, opt)
def make_pad_env(domain_name, task_name, seed=0, episode_length=1000, frame_stack=3, action_repeat=4, mode='train'): """Make environment for PAD experiments""" env = dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=seed, visualize_reward=False, from_pixels=True, height=100, width=100, episode_length=episode_length, frame_skip=action_repeat) env.seed(seed) env = GreenScreen(env, mode) env = FrameStack(env, frame_stack) env = ColorWrapper(env, mode) assert env.action_space.low.min() >= -1 assert env.action_space.high.max() <= 1 return env
def make_env(cfg): """Helper function to create dm_control environment""" if cfg.env == 'ball_in_cup_catch': domain_name = 'ball_in_cup' task_name = 'catch' else: domain_name = cfg.env.split('_')[0] task_name = '_'.join(cfg.env.split('_')[1:]) env = dmc2gym.make( domain_name=domain_name, task_name=task_name, seed=cfg.seed, visualize_reward=False, from_pixels=cfg.from_pixels, height=cfg.height, width=cfg.width, camera_id=cfg.camera_id, frame_skip=cfg.frame_skip, channels_first=False, ) env.seed(cfg.seed) assert env.action_space.low.min() >= -1 assert env.action_space.high.max() <= 1 return env
def make(*args, frame_stack=3, from_pixels=True, height=84, width=84, frame_skip=4, **kwargs): env = dmc2gym.make(*args, frame_skip=frame_skip, visualize_reward=False, from_pixels=from_pixels, height=height, width=width, **kwargs) if isinstance(env, TimeLimit): # Strip the gym TimeLimit wrapper and replace with one which # outputs TimeLimit.truncated=True at max_episode_steps - 1, # because that's when the dmc2gym env seems to end the episode. print("WARNING: replacing Gym TimeLimit wrapper by TimeLimitMinusOne") env = TimeLimitMinusOne(env.env) if from_pixels: env = FrameStack(env, k=frame_stack) elif frame_stack != 1: print("WARNING: dmcontrol.make() requested with frame_stack>1, but not" " doing it on state.") env = GymEnvWrapper(env) env._frame_skip = frame_skip return env
def make_env(domain_name, task_name, seed=0, episode_length=1000, frame_stack=3, action_repeat=4, image_size=100, mode='train'): """Make environment for experiments""" assert mode in {'train', 'color_easy', 'color_hard', 'video_easy', 'video_hard'}, \ f'specified mode "{mode}" is not supported' env = dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=seed, visualize_reward=False, from_pixels=True, height=image_size, width=image_size, episode_length=episode_length, frame_skip=action_repeat) env = VideoWrapper(env, mode, seed) env = FrameStack(env, frame_stack) env = ColorWrapper(env, mode, seed) return env
def make_env(cfg): """Helper function to create dm_control environment""" if cfg.env == 'ball_in_cup_catch': domain_name = 'ball_in_cup' task_name = 'catch' elif cfg.env == 'point_mass_easy': domain_name = 'point_mass' task_name = 'easy' else: domain_name = cfg.env.split('_')[0] task_name = '_'.join(cfg.env.split('_')[1:]) # per dreamer: https://github.com/danijar/dreamer/blob/02f0210f5991c7710826ca7881f19c64a012290c/wrappers.py#L26 camera_id = 2 if domain_name == 'quadruped' else 0 env = dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=cfg.seed, visualize_reward=False, from_pixels=True, height=cfg.image_size, width=cfg.image_size, frame_skip=cfg.action_repeat, camera_id=camera_id) env = utils.FrameStack(env, k=cfg.frame_stack) env.seed(cfg.seed) assert env.action_space.low.min() >= -1 assert env.action_space.high.max() <= 1 return env
def initialize_environment(domain_name, task_name, seed, frame_skip): """ Initialize the Evironment :param domain_name: :param task_name: :param seed: :param frame_skip: :return: """ LogHelper.print_step_log(f"Initialize Environment: {domain_name}/{task_name} ...") env = dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=seed, frame_skip=frame_skip) # Debug logging to check environment specs s = env.reset() a = env.action_space.sample() action_dim = env.action_space.shape[0] state_dim = env.observation_space.shape[0] logging.debug(f'Sample state: {s}') logging.debug(f'Sample action:{a}') logging.debug(f'State DIM: {state_dim}') logging.debug(f'Action DIM:{action_dim}') return env, action_dim, state_dim
def __init__(self, opt): self.opt = opt self.env = dmc2gym.make( domain_name=opt.domain_name, task_name=opt.task_name, seed=0, visualize_reward=False, from_pixels=True, height=256, width=256, frame_skip=opt.frame_skip ) self.env.seed(0) random.seed(0) self.state_dim = self.env.observation_space.shape[0] self.action_dim = self.env.action_space.shape[0] self.max_action = float(self.env.action_space.high[0]) self.log_root = opt.log_root self.episode_n = opt.episode_n # self.policy_path = os.path.join(opt.log_root, # '{}_base/models/TD3_{}_0_actor'.format(opt.env,opt.env)) # self.policy = TD3(self.policy_path,self.state_dim,self.action_dim,self.max_action) self.setup(opt) print('start collect') self.create_data() self.count = Count(opt) print('----------- Dataset initialized ---------------') print('-----------------------------------------------\n')
def make(cfg): """Helper function to create dm_control environment""" action_repeat = cfg.get('action_repeat', 1) if cfg.env_name == 'dmc_ball_in_cup_catch': domain_name = 'ball_in_cup' task_name = 'catch' else: toks = cfg.env_name.split('_')[1:] domain_name = toks[0] task_name = '_'.join(toks[1:]) env = dmc2gym.make( domain_name=domain_name, task_name=task_name, seed=cfg.seed, visualize_reward=False, frame_skip=action_repeat, from_pixels=cfg.pixels, height=64, width=64, ) env.seed(cfg.seed) assert env.action_space.low.min() >= -1 assert env.action_space.high.max() <= 1 return env
def make_env(name: str, loader: str = "gym", **kwargs): rescale_action = kwargs.get('rescale_action', True) env = None if loader == "gym": # Base Wrapper try: from gym_extensions.continuous import mujoco except: print('gym_extensions import failure !') pass env = gym.make(name) elif loader == "metaworld": env = import_class_from_string(name)(**kwargs) elif loader == "dm_control": import dmc2gym # Useful Options: frame_skip, from_pixels # Note: dmc2gym normalized the action but still we can # use RescaleAction. # NOTE: in the future maybe simply return the env. env = dmc2gym.make(domain_name=name, **kwargs) elif loader == 'atari': raise NotImplementedError() assert env is not None if isinstance(env.action_space, Box) and rescale_action: # Environment has continuous space and by default is normalized. env = RescaleAction(env, -1, 1) return env
def _thunk(): if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dmc2gym.make(domain_name=domain, task_name=task) env = ClipAction(env) elif env_id.startswith("rrc"): _, ac_type, ac_wrapper = env_id.split('.') ts_relative, sa_relative = False, False scaled_ac, task_space = False, False if ac_wrapper.split('-')[0] == 'task': task_space = True ts_relative = ac_wrapper.split('-')[-1] == 'rel' elif ac_wrapper.split('-')[0] == 'scaled': scaled_ac = True sa_relative = ac_wrapper.split('-')[-1] == 'rel' env = rrc_utils.build_env_fn( action_type=ac_type, initializer=None, scaled_ac=scaled_ac, task_space=task_space, sa_relative=sa_relative, ts_relative=ts_relative, goal_relative=True, rew_fn='step')() else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env.seed(seed + rank) if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) if log_dir is not None: env = Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = EpisodicLifeEnv(env) if "FIRE" in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env, width=84, height=84) env = ClipRewardEnv(env) elif len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def make_env(cfg): """Helper function to create dm_control environment""" if cfg.env == 'ball_in_cup_catch': domain_name = 'ball_in_cup' task_name = 'catch' else: domain_name = cfg.env.split('_')[0] task_name = '_'.join(cfg.env.split('_')[1:]) env = suite.load(domain_name, task_name) obs_space = int( sum([np.prod(s.shape) for s in env.observation_spec().values()])) train_factors = [ np.eye(cfg.noise_dims) + i for i in range(cfg.num_train_envs) ] test_factors = [np.eye(cfg.noise_dims) * 3] # train_factors = [1, 2, 3] # test_factors = [4] train_envs = [ dmc2gym.make( domain_name=domain_name, task_name=task_name, noise=cfg.noise, mult_factor=train_factors[idx], idx=idx, seed=cfg.seed, visualize_reward=True, ) for idx in range(cfg.num_train_envs) ] test_envs = [ dmc2gym.make( domain_name=domain_name, task_name=task_name, noise=cfg.noise, mult_factor=test_factors[idx], idx=idx + cfg.num_train_envs, seed=cfg.seed, visualize_reward=True, ) for idx in range(len(test_factors)) ] [env.seed(cfg.seed) for env in train_envs] assert train_envs[0].action_space.low.min() >= -1 assert train_envs[0].action_space.high.max() <= 1 return train_envs, test_envs
def __init__(self, env_name): self.env_name = env_name open_ai_env = len( [pref for pref in OPENAI_MUJOCO_PREFIX if pref in env_name]) > 0 if open_ai_env: self.env = gym.make(self.env_name) else: domain, task = env_name.split("-") self.env = dmc2gym.make(domain_name=domain, task_name=task)
def load_policy_and_env(fpath, itr='last', deterministic=False, env_name=None): """ Load a policy from save, whether it's TF or PyTorch, along with RL env. Not exceptionally future-proof, but it will suffice for basic uses of the Spinning Up implementations. Checks to see if there's a tf1_save folder. If yes, assumes the model is tensorflow and loads it that way. Otherwise, loads as if there's a PyTorch save. """ # determine if tf save or pytorch save if any(['tf1_save' in x for x in os.listdir(fpath)]): backend = 'tf1' else: backend = 'pytorch' # handle which epoch to load from if itr=='last': # check filenames for epoch (AKA iteration) numbers, find maximum value if backend == 'tf1': saves = [int(x[8:]) for x in os.listdir(fpath) if 'tf1_save' in x and len(x)>8] elif backend == 'pytorch': pytsave_path = osp.join(fpath, 'pyt_save') # Each file in this folder has naming convention 'modelXX.pt', where # 'XX' is either an integer or empty string. Empty string case # corresponds to len(x)==8, hence that case is excluded. saves = [int(x.split('.')[0][5:]) for x in os.listdir(pytsave_path) if len(x)>8 and 'model' in x] itr = '%d'%max(saves) if len(saves) > 0 else '' else: assert isinstance(itr, int), \ "Bad value provided for itr (needs to be int or 'last')." itr = '%d'%itr # load the get_action function if backend == 'tf1': get_action = load_tf_policy(fpath, itr, deterministic) else: get_action = load_pytorch_policy(fpath, itr, deterministic) # try to load environment from save # (sometimes this will fail because the environment could not be pickled) try: state = joblib.load(osp.join(fpath, 'vars'+itr+'.pkl')) env = state['env'] except: if env_name == 'reacher': env = dmc2gym.make(domain_name='reacher', task_name='easy', episode_length=200) else: env = None return env, get_action
def make_env(): env = dmc2gym.make(domain_name=args.domain_name, task_name=args.task_name, seed=args.seed, visualize_reward=False, from_pixels=(args.encoder_type == 'pixel'), height=args.image_size, width=args.image_size, frame_skip=args.action_repeat)
def make_dmc(domain_name, task_name, action_repeat, image_size=64): env = dmc2gym.make( domain_name=domain_name, task_name=task_name, visualize_reward=False, from_pixels=True, height=image_size, width=image_size, frame_skip=action_repeat, ) setattr(env, 'action_repeat', action_repeat) return env
def make_env(cfg): """Helper function to create dm_control environment""" if cfg.env == 'ball_in_cup_catch': domain_name = 'ball_in_cup' task_name = 'catch' elif cfg.env == 'point_mass_easy': domain_name = 'point_mass' task_name = 'easy' elif cfg.env == 'cartpole_two_poles': domain_name = 'cartpole' task_name = 'two_poles' elif cfg.env == 'cartpole_three_poles': domain_name = 'cartpole' task_name = 'three_poles' else: domain_name = cfg.env.split('_')[0] task_name = '_'.join(cfg.env.split('_')[1:]) # per dreamer: https://github.com/danijar/dreamer/blob/02f0210f5991c7710826ca7881f19c64a012290c/wrappers.py#L26 camera_id = 2 if domain_name == 'quadruped' else 0 env = dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=cfg.seed, visualize_reward=False, from_pixels=True, height=cfg.image_size, width=cfg.image_size, frame_skip=cfg.action_repeat, camera_id=camera_id) # env = dmc2gym_noisy.make( # domain_name=domain_name, # task_name=task_name, # resource_files='../../../../../experiments/distractors/images/*.mp4', # img_source='video', # total_frames=10000, # seed=cfg.seed, # visualize_reward=False, # from_pixels=True, # height=84, # width=84, # frame_skip=cfg.action_repeat, # camera_id=camera_id # ) env = utils.FrameStack(env, k=cfg.frame_stack) env.seed(cfg.seed) assert env.action_space.low.min() >= -1 assert env.action_space.high.max() <= 1 return env
def eval(args): file_name = f"{args.policy}_{args.domain_name}_{args.seed}" print("---------------------------------------") print(f"Policy: {args.policy}, Env: {args.domain_name}, Seed: {args.seed}") print("---------------------------------------") log_path = safe_path( os.path.join(args.log_root, '{}_{}_base'.format(args.domain_name, args.task_name))) result_path = safe_path(os.path.join(log_path, 'results')) model_path = safe_path(os.path.join(log_path, 'models_bak')) env = dmc2gym.make(domain_name=args.domain_name, task_name=args.task_name, seed=0, visualize_reward=False, from_pixels=False, height=256, width=256, frame_skip=args.frame_skip) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) kwargs = { "state_dim": state_dim, "action_dim": action_dim, "max_action": max_action, "discount": args.discount, "tau": args.tau, } # Initialize policy if args.policy == "TD3": # Target policy smoothing is scaled wrt the action scale kwargs["policy_noise"] = args.policy_noise * max_action kwargs["noise_clip"] = args.noise_clip * max_action kwargs["policy_freq"] = args.policy_freq policy = TD3.TD3(**kwargs) if args.load_model != "": policy_file = file_name if args.load_model == "default" else args.load_model policy.load(os.path.join(model_path, '{}'.format(policy_file))) # Evaluate untrained policy eval_policy(policy, env, args.seed)
def make_env(self, args=None, kwargs=None, dm_task_name=None): """Create dm_control/metaworld environment""" if self.metaworld_env: env = mtw_envs_rand[self.env_name](*args, **kwargs) if debug_mode: env._max_episode_steps = 10000 env.reset() env.render() global action_to_take glfw.set_key_callback(env.unwrapped.viewer.window, on_press) while True: env.render() if not np.array_equal(action_to_take, np.zeros(6)): _, _, d, _ = env.step(action_to_take) if d: env.seed(args.seed) env.reset() env.render() # Commenting this out makes the mocap faster but # introduces some instabilities. # action_to_take = np.zeros(6) else: camera_id = 2 if self.domain_name == 'quadruped' else 0 if dm_task_name is not None: task_name = dm_task_name else: task_name = self.task_name env = dmc2gym.make(domain_name=self.domain_name, task_name=task_name, seed=self.cfg.seed, visualize_reward=False, from_pixels=False, height=self.cfg.image_size, width=self.cfg.image_size, frame_skip=self.cfg.action_repeat, camera_id=camera_id) if debug_mode: from dm_control import viewer viewer.launch(env) env = FrameStack(env, k=self.cfg.frame_stack) env.seed(self.cfg.seed) return env
def _thunk(): print(f"Using {env_id} environment") if env_id == "Warehouse": env = Warehouse(parameters) elif env_id == 'Sumo': # todo currently just using loop_network scene params = {'scene': "loop_network", 'libsumo': True} env = LoopNetwork(seed, params) else: if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dmc2gym.make(domain_name=domain, task_name=task) env = ClipAction(env) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env.seed(seed + rank) if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) if env_id not in ["Warehouse", "Sumo"]: if log_dir is not None: env = Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = EpisodicLifeEnv(env) if "FIRE" in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env, width=84, height=84) env = ClipRewardEnv(env) elif len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def make_dmc(domain_name, task_name, action_repeat, frame_stack=3, image_size=84): env = dmc2gym.make( domain_name=domain_name, task_name=task_name, visualize_reward=False, from_pixels=True, height=image_size, width=image_size, frame_skip=action_repeat ) env = FrameStack(env, k=frame_stack) return env
def make_env(): return DMCWrapper(dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=args.seed, visualize_reward=False, from_pixels=True, height=input_obs_shape[0], width=input_obs_shape[1], frame_skip=action_repeat, channels_first=False), obs_shape=input_obs_shape, k=3, channel_first=False)
def make_env(bg, args): env = dmc2gym.make( domain_name=args.domain_name, task_name=args.task_name, seed=args.seed, visualize_reward=False, from_pixels=True, height=args.image_size, width=args.image_size, frame_skip=args.action_repeat, bg_color=bg, ) # env.seed(args.seed) env = FrameStack(env, k=args.frame_stack) return env
def _thunk(): if isinstance(env_id, Callable): env = env_id(**kwargs) elif env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dmc2gym.make(domain_name=domain, task_name=task) env = ClipAction(env) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env.seed(seed + rank) if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) if log_dir is not None: env = Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = EpisodicLifeEnv(env) if "FIRE" in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env, width=84, height=84) env = ClipRewardEnv(env) elif env.observation_space.shape and len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions if env.observation_space.shape: obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def create_env(n_envs): """ Create the environment and wrap it if necessary :param n_envs: (int) :return: (gym.Env) """ global hyperparams if not is_dm_env: if is_atari: if args.verbose > 0: print("Using Atari wrapper") env = make_atari_env(env_id, num_env=n_envs, seed=args.seed) # Frame-stacking with 4 frames env = VecFrameStack(env, n_stack=4) elif algo_ in ['dqn', 'ddpg']: if hyperparams.get('normalize', False): print("WARNING: normalization not supported yet for DDPG/DQN") env = gym.make(env_id) env.seed(args.seed) if env_wrapper is not None: env = env_wrapper(env) else: if n_envs == 1: env = DummyVecEnv([make_env(env_id, 0, args.seed, wrapper_class=env_wrapper)]) else: # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)]) # On most env, SubprocVecEnv does not help and is quite memory hungry env = DummyVecEnv([make_env(env_id, i, args.seed, wrapper_class=env_wrapper) for i in range(n_envs)]) if normalize: if args.verbose > 0: if len(normalize_kwargs) > 0: print("Normalization activated: {}".format(normalize_kwargs)) else: print("Normalizing input and reward") env = VecNormalize(env, **normalize_kwargs) # Optional Frame-stacking if hyperparams.get('frame_stack', False): n_stack = hyperparams['frame_stack'] env = VecFrameStack(env, n_stack) print("Stacking {} frames".format(n_stack)) del hyperparams['frame_stack'] else: env = dmc2gym.make(domain_name=args.domain, task_name=args.task, seed=args.seed) return env
def make_env(cfg): """Helper function to create dm_control environment""" if cfg.env == 'ball_in_cup_catch': domain_name = 'ball_in_cup' task_name = 'catch' else: domain_name = cfg.env.split('_')[0] task_name = '_'.join(cfg.env.split('_')[1:]) env = dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=cfg.seed, visualize_reward=True) env.seed(cfg.seed) assert env.action_space.low.min() >= -1 assert env.action_space.high.max() <= 1 return env