예제 #1
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__
        if self._current_version in ['0.7.4', '0.9.4']:
            _env_name = {
                'gym_fwalker2d': 'Walker2d-v1',
                'gym_fhopper': 'Hopper-v1',
                'gym_fant': 'Ant-v1',

                'gym_fant2': 'Ant-v1',
                'gym_fant5': 'Ant-v1',
                'gym_fant10': 'Ant-v1',
                'gym_fant20': 'Ant-v1',
                'gym_fant30': 'Ant-v1',
            }
        else:
            _env_name = {
                'gym_fwalker2d': 'Walker2d-v2',
                'gym_fhopper': 'Hopper-v2',
                'gym_fant': 'Ant-v2',

                'gym_fant2': 'Ant-v2',
                'gym_fant5': 'Ant-v2',
                'gym_fant10': 'Ant-v2',
                'gym_fant20': 'Ant-v2',
                'gym_fant30': 'Ant-v2',
            }

        # make the environments
        self._env_info = env_register.get_env_info(self._env_name)
        self._env_name = self._env_name.split('-')[0]
        self._env = gym.make(_env_name[self._env_name])
예제 #2
0
 def _build_env(self):
     import gym
     self._current_version = gym.__version__
     _env_name = {'gym_mountain': 'MountainCarContinuous-v0'}
     # make the environments
     self._env = gym.make(_env_name[self._env_name])
     self._env_info = env_register.get_env_info(self._env_name)
예제 #3
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__

        # make the environments
        self._env = gym.make('CartPole-v1')
        self._env_info = env_register.get_env_info(self._env_name)
예제 #4
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__
        if self._current_version in ['0.7.4', '0.9.4']:
            _env_name = {
                'gym_cheetah': 'HalfCheetah-v1',
                'gym_walker2d': 'Walker2d-v1',
                'gym_hopper': 'Hopper-v1',
                'gym_swimmer': 'Swimmer-v1',
                'gym_ant': 'Ant-v1',
            }
        elif self._current_version == NotImplementedError:
            # TODO: other gym versions here
            _env_name = {
                'gym_cheetah': 'HalfCheetah-v2',
                'gym_walker2d': 'Walker2d-v2',
                'gym_hopper': 'Hopper-v2',
                'gym_swimmer': 'Swimmer-v2',
                'gym_ant': 'Ant-v2',
            }

        else:
            raise ValueError("Invalid gym-{}".format(self._current_version))

        # make the environments
        self._env_info = env_register.get_env_info(self._env_name)
        self._env_name = self._env_name.split('-')[0]
        self._env = gym.make(_env_name[self._env_name])
예제 #5
0
    def _build_env(self):
        _env_name = {
            'gym_point': 'Point-v0',
        }

        # make the environments
        self._env = gym.make(_env_name[self._env_name])
        self._env_info = env_register.get_env_info(self._env_name)
예제 #6
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__
        _env_name = {'gym_pendulum': 'Pendulum-v0'}

        # make the environments
        self._env = gym.make(_env_name[self._env_name])
        self._env_info = env_register.get_env_info(self._env_name)
예제 #7
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__

        # make the environments
        self._env_info = env_register.get_env_info(self._env_name)
        self._env_name = self._env_name.split('-')[0]
        self._env = gym.make('HalfCheetah-v1')
예제 #8
0
    def _build_env(self):
        _env_name = {
            'gym_lunar_lander': 'LunarLander-v2',
            'gym_lunar_lander_continuous': 'LunarLanderContinuous-v2'
        }[self._env_name]

        # make the environments
        self._env = box2d_make(_env_name)
        self._env_info = env_register.get_env_info(self._env_name)
예제 #9
0
    def _build_env(self):
        _env_name = {
            'gym_bipedal_walker': 'BipedalWalker-v2',
            'gym_bipedal_walker_hardcore': 'BipedalWalkerHardcore-v2'
        }[self._env_name]

        # make the environments
        self._env = box2d_make(_env_name)
        self._env_info = env_register.get_env_info(self._env_name)
예제 #10
0
    def _build_env(self):
        import gym, roboschool
        self._env_info = env_register.get_env_info(self._env_name)

        self._VIDEO_H, self._VIDEO_W = \
            self._env_info['image_height'], self._env_info['image_width']
        roboschool_env_name = self._env_name.split('-')
        roboschool_env_name = \
            roboschool_env_name[0] + '-' + roboschool_env_name[1]
        self._env = gym.make(roboschool_env_name)
예제 #11
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__
        _env_name = {
            'gym_cartpole': 'CartPole-v1',
            'gym_cartpole_continuous': 'CartPole-v1'
        }

        # make the environments
        self._env = gym.make(_env_name[self._env_name])
        self._env_info = env_register.get_env_info(self._env_name)
예제 #12
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__
        if self._current_version in ['0.7.4', '0.9.4']:
            _env_name = {'gym_reacher': 'Reacher-v1'}
        else:
            _env_name = {'gym_reacher': 'Reacher-v2'}

        # make the environments
        self._env = gym.make(_env_name[self._env_name])
        self._env_info = env_register.get_env_info(self._env_name)
예제 #13
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__
        _env_name = {
            'gym_acrobot': 'Acrobot-v1',
            'gym_acrobot_sparse': 'Acrobot-v1'
        }

        # make the environments
        self._env = gym.make(_env_name[self._env_name])
        self._env_info = env_register.get_env_info(self._env_name)
예제 #14
0
    def _build_env(self):
        import gym
        import mbbl.env.gym_env.pets_env
        self._current_version = gym.__version__
        _env_name = {
            'gym_petsReacher': 'MBRLReacher3D-v0',
            'gym_petsCheetah': 'MBRLHalfCheetah-v0',
            'gym_petsPusher': 'MBRLPusher-v0'
        }
        print(self._env_name)

        # make the environments
        self._env = gym.make(_env_name[self._env_name])
        self._env_info = env_register.get_env_info(self._env_name)
예제 #15
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__
        if self._current_version in ['0.7.4', '0.9.4']:
            _env_name = {'gym_reacher': 'Reacher-v1'}
        elif self._current_version == NotImplementedError:
            # TODO: other gym versions here
            _env_name = {'gym_reacher': 'Reacher-v2'}

        else:
            raise ValueError("Invalid gym-{}".format(self._current_version))

        # make the environments
        self._env = gym.make(_env_name[self._env_name])
        self._env_info = env_register.get_env_info(self._env_name)
예제 #16
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__
        if self._current_version in ['0.7.4', '0.9.4']:
            _env_name = {
                'gym_dfwalker2d': 'Walker2d-v1',
                'gym_dfhopper': 'Hopper-v1',
                'gym_dfant': 'Ant-v1',
            }
        else:
            raise NotImplementedError

        # make the environments
        self._env_info = env_register.get_env_info(self._env_name)
        self._env_name = self._env_name.split('-')[0]
        self._env = gym.make(_env_name[self._env_name])
예제 #17
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__
        if self._current_version in ['0.7.4', '0.9.4']:
            _env_name = {
                'gym_humanoid': 'Humanoid-v1',
                'gym_slimhumanoid': 'Humanoid-v1',
                'gym_nostopslimhumanoid': 'Humanoid-v1',
            }
        else:
            _env_name = {
                'gym_humanoid': 'Humanoid-v2',
                'gym_slimhumanoid': 'Humanoid-v2',
                'gym_nostopslimhumanoid': 'Humanoid-v2',
            }

        # make the environments
        self._env_info = env_register.get_env_info(self._env_name)
        self._env_name = self._env_name.split('-')[0]
        self._env = gym.make(_env_name[self._env_name])
예제 #18
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__
        if self._current_version in ['0.7.4', '0.9.4']:
            _env_name = {
                'gym_humanoid': 'Humanoid-v1',
                'gym_slimhumanoid': 'Humanoid-v1',
                'gym_nostopslimhumanoid': 'Humanoid-v1',
            }
        elif self._current_version == NotImplementedError:
            _env_name = {
                'gym_slimhumanoid': 'Humanoid-v2',
                'gym_humanoid': 'Humanoid-v2',
                'gym_nostophumanoid': 'Humanoid-v2',
            }

        else:
            raise ValueError("Invalid gym-{}".format(self._current_version))

        # make the environments
        self._env_info = env_register.get_env_info(self._env_name)
        self._env_name = self._env_name.split('-')[0]
        self._env = gym.make(_env_name[self._env_name])
예제 #19
0
    def _build_env(self):
        import gym
        self._current_version = gym.__version__
        if self._current_version in ['0.7.4', '0.9.4']:
            _env_name = {
                'gym_cheetah': 'HalfCheetah-v1',
                'gym_walker2d': 'Walker2d-v1',
                'gym_hopper': 'Hopper-v1',
                'gym_swimmer': 'Swimmer-v1',
                'gym_ant': 'Ant-v1',
            }
        else:
            _env_name = {
                'gym_cheetah': 'HalfCheetah-v2',
                'gym_walker2d': 'Walker2d-v2',
                'gym_hopper': 'Hopper-v2',
                'gym_swimmer': 'Swimmer-v2',
                'gym_ant': 'Ant-v2',
            }

        # make the environments
        self._env_info = env_register.get_env_info(self._env_name)
        self._env_name = self._env_name.split('-')[0]
        self._env = gym.make(_env_name[self._env_name])
예제 #20
0
from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
from gps.algorithm.policy_opt.tf_model_example import tf_network
from gps.algorithm.policy_opt.policy_opt_tf import PolicyOptTf
from gps.algorithm.policy.lin_gauss_init import init_lqr
from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
from gps.gui.config import generate_experiment_info
from gps.agent.gym_env_util import get_x0
from mbbl.env.env_register import get_env_info

env_name = 'gym_reacher'
num_samples = 5
iterations = 2000
rand_seed = 1234
env_info = get_env_info(env_name)

SENSOR_DIMS = {
    "observation": env_info['ob_size'],
    'action': env_info['action_size'],
}

# PR2_GAINS = np.array([3.09, 1.08, 0.393, 0.674, 0.111, 0.152, 0.098])
PR2_GAINS = np.ones(SENSOR_DIMS['action'])

BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2])
EXP_DIR = BASE_DIR + '/../experiments/' + env_name + '_mdgps_example/'

common = {
    'experiment_name':
    env_name + '_experiments' + '_' +
예제 #21
0
# TEST = 'REWARD_DERIVATIVE'

# candidate_names = invertedPendulum.env.PENDULUM
# candidate_names = pendulum.env.PENDULUM
# candidate_names = pets.env.ENV
candidate_names = humanoid.env.ENV
if __name__ == '__main__' and TEST == 'REWARD_DERIVATIVE':
    DERIVATIVE_EPS = 1e-6

    # test the walker
    # candidate_names = walker.env.WALKER
    # candidate_names = reacher.env.ARM_2D
    max_error = 0.0
    for env_name in candidate_names:
        env, _ = env_register.make_env(env_name, 123, {})
        env_info = env_register.get_env_info(env_name)
        derivative_env, _ = env_register.make_env(env_name, 234, {})

        data_dict = \
            {'action': np.random.uniform(-1, 1, [1, env_info['action_size']])}
        data_dict['start_state'], _, _, _ = env.reset()
        data_dict['start_state'] = data_dict['start_state'].reshape(1, -1)
        data_dict['start_state'] = data_dict['start_state'].reshape(1, -1)

        r_u = derivative_env.reward_derivative(data_dict, 'action')
        r_uu = derivative_env.reward_derivative(data_dict, 'action-action')
        r_x = derivative_env.reward_derivative(data_dict, 'state')
        r_xx = derivative_env.reward_derivative(data_dict, 'state-state')

        # test the derivative of the reward wrt action
        for i_elem in range(env_info['action_size']):