def get_goal_example_environment_from_variant(variant): import gym if variant['task'] not in [env.id for env in gym.envs.registry.all()]: from multiworld.envs.mujoco import register_goal_example_envs register_goal_example_envs() return GymAdapter(env=gym.make(variant['task']))
def get_goal_example_environment_from_variant(environment_name, gym_adapter=True, eval=False): import gym if environment_name not in [env.id for env in gym.envs.registry.all()]: from multiworld.envs.mujoco import register_goal_example_envs register_goal_example_envs() if gym_adapter: return GymAdapter(env=gym.make(environment_name)) else: return gym.make(environment_name)
def get_goal_example_environment_from_variant(variant): import gym if variant['task'] not in [env.id for env in gym.envs.registry.all()]: if 'Manip' in variant['task']: import manip_envs else: from multiworld.envs.mujoco import register_goal_example_envs register_goal_example_envs() from metaworld.envs.mujoco import register_rl_with_videos_custom_envs register_rl_with_videos_custom_envs() # import mj_envs.hand_manipulation_suite # from metaworld.envs.mujoco.sawyer_xyz import register_environments; register_environments() return GymAdapter(env=gym.make(variant['task']))
from distutils.util import strtobool import json import os import softlearning.algorithms.utils as alg_utils import softlearning.environments.utils as env_utils from softlearning.misc.utils import datetimestamp DEFAULT_TASK = 'StateDoorPullEnv-v0' DEFAULT_ALGORITHM = 'VICE' AVAILABLE_ALGORITHMS = set(alg_utils.ALGORITHM_CLASSES.keys()) import gym from multiworld.envs.mujoco import register_goal_example_envs envs_before = set(env_spec.id for env_spec in gym.envs.registry.all()) register_goal_example_envs() envs_after = set(env_spec.id for env_spec in gym.envs.registry.all()) goal_example_envs = tuple(sorted(envs_after - envs_before)) def add_ray_init_args(parser): def init_help_string(help_string): return help_string + " Passed to `ray.init`." parser.add_argument( '--cpus', type=int, default=None, help=init_help_string("Cpus to allocate to ray process.")) parser.add_argument( '--gpus',
def experiment(variant): from multiworld.envs.mujoco import register_goal_example_envs register_goal_example_envs() eval_env = gym.make('Image48SawyerPushForwardEnv-v0') expl_env = gym.make('Image48SawyerPushForwardEnv-v0') # Hack for now eval_env.wrapped_env.transpose = True expl_env.wrapped_env.transpose = True img_width, img_height = eval_env.image_shape num_channels = 3 action_dim = int(np.prod(eval_env.action_space.shape)) cnn_params = variant['cnn_params'] cnn_params.update( input_width=img_width, input_height=img_height, input_channels=num_channels, added_fc_input_size=4, output_conv_channels=True, output_size=None, ) qf_cnn = CNN(**cnn_params) qf_obs_processor = nn.Sequential( qf_cnn, Flatten(), ) qf_kwargs = copy.deepcopy(variant['qf_kwargs']) qf_kwargs['obs_processor'] = qf_obs_processor qf_kwargs['output_size'] = 1 qf_kwargs['input_size'] = (action_dim + qf_cnn.conv_output_flat_size) qf1 = MlpQfWithObsProcessor(**qf_kwargs) qf2 = MlpQfWithObsProcessor(**qf_kwargs) target_qf_cnn = CNN(**cnn_params) target_qf_obs_processor = nn.Sequential( target_qf_cnn, Flatten(), ) target_qf_kwargs = copy.deepcopy(variant['qf_kwargs']) target_qf_kwargs['obs_processor'] = target_qf_obs_processor target_qf_kwargs['output_size'] = 1 target_qf_kwargs['input_size'] = (action_dim + target_qf_cnn.conv_output_flat_size) target_qf1 = MlpQfWithObsProcessor(**target_qf_kwargs) target_qf2 = MlpQfWithObsProcessor(**target_qf_kwargs) action_dim = int(np.prod(eval_env.action_space.shape)) policy_cnn = CNN(**cnn_params) policy_obs_processor = nn.Sequential( policy_cnn, Flatten(), ) policy = TanhGaussianPolicyAdapter(policy_obs_processor, policy_cnn.conv_output_flat_size, action_dim, **variant['policy_kwargs']) eval_policy = MakeDeterministic(policy) eval_path_collector = MdpPathCollector( eval_env, eval_policy, **variant['eval_path_collector_kwargs']) replay_buffer = EnvReplayBuffer( variant['replay_buffer_size'], expl_env, ) trainer = SACTrainer(env=eval_env, policy=policy, qf1=qf1, qf2=qf2, target_qf1=target_qf1, target_qf2=target_qf2, **variant['trainer_kwargs']) if variant['collection_mode'] == 'batch': expl_path_collector = MdpPathCollector( expl_env, policy, **variant['expl_path_collector_kwargs']) algorithm = TorchBatchRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, **variant['algo_kwargs']) elif variant['collection_mode'] == 'online': expl_path_collector = MdpStepCollector( expl_env, policy, **variant['expl_path_collector_kwargs']) algorithm = TorchOnlineRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, **variant['algo_kwargs']) algorithm.to(ptu.device) algorithm.train()