コード例 #1
0
ファイル: env_test.py プロジェクト: Neo-X/TerrainRLSim
    def test_multichar_velocityfield_x(self):

        # terrainRL_PATH = os.environ['TERRAINRL_PATH']
        # sys.path.append(terrainRL_PATH+'/lib')
        from simAdapter import terrainRLSim
        envs_list = terrainRLSim.getEnvsList()
        # print ("# of envs: ", len(envs_list))
        # print ("Envs:\n", json.dumps(envs_list, sort_keys=True, indent=4))
        env = terrainRLSim.getEnv(
            env_name="PD_Biped3D_MutliChar_WithVel_LargeBlocks-v0",
            render=False)

        env.reset()
        actionSpace = env.getActionSpace()
        env.setRandomSeed(1234)
        actions = []
        for i in range(11):
            action = ((actionSpace.getMaximum() - actionSpace.getMinimum()) *
                      np.random.uniform(size=actionSpace.getMinimum().shape[0])
                      ) + actionSpace.getMinimum()
            actions.append(action)

        observation, reward, done, info = env.step(actions)

        states = np.array(observation)
        img_data_size = 1024
        agent_num = 1
        data_ = []
        for i in range(10):
            data_.append(states[i + 1][0:img_data_size])

        ### There is some non-zero data
        assert np.std(data_) > 0.01
        plt.show()
        env.finish()
コード例 #2
0
 def __init__(self, env, meta_batch_size, envs_per_task, max_path_length):
     self.envs = []
     print ("env:", env)
     sys.exit()
     if (env is None):
         for _ in range(meta_batch_size * envs_per_task):
             env = terrainRLSim.getEnv(env_name="PD_Humanoid_3D_GRF_Mixed_1Sub_Imitate_30FPS_DenseState_v0", render=True)
             # env = globals()[config['env']]() # instantiate env
             env = normalize(env) # apply normalize wrapper to env
             self.envs.append(env)
     else:
         self.envs = np.asarray([copy.deepcopy(env) for _ in range(meta_batch_size * envs_per_task)])
     self.ts = np.zeros(len(self.envs), dtype='int')  # time steps
     self.max_path_length = max_path_length
コード例 #3
0
ファイル: env_test.py プロジェクト: Neo-X/TerrainRLSim
    def test_load_env(self):

        # terrainRL_PATH = os.environ['TERRAINRL_PATH']
        # sys.path.append(terrainRL_PATH+'/lib')
        from simAdapter import terrainRLSim
        envs_list = terrainRLSim.getEnvsList()
        # print ("# of envs: ", len(envs_list))
        # print ("Envs:\n", json.dumps(envs_list, sort_keys=True, indent=4))
        env = terrainRLSim.getEnv(env_name="PD_Biped3D_FULL_Imitate-Steps-v0",
                                  render=False)

        env.reset()
        actionSpace = env.getActionSpace()
        env.setRandomSeed(1234)
        env.finish()
コード例 #4
0
ファイル: SimulationUtil.py プロジェクト: skylbc/SMBAE
def createEnvironment(config_file, env_type, settings, render=False, index=None):
    
    ### For multitasking, can specify a list of config files
    # if ( isinstance(config_file, list ) ):
    if type(config_file) is list:
        config_file = config_file[index]
        print ("Using config file: ", config_file)
    else:
        print("Not a list hoser, it is a ", type(config_file), " for ", config_file)
        print (config_file[0])
    
    print("Creating sim Type: ", env_type)
    if env_type == 'ballgame_2d':
        from env.BallGame2D import BallGame2D
        from sim.BallGame2DEnv import BallGame2DEnv
        file = open(config_file)
        conf = json.load(file)
        # print ("Settings: " + str(json.dumps(conf)))
        file.close()
        conf['render'] = render
        exp = BallGame2D(conf)
        exp = BallGame2DEnv(exp, settings)
        return exp
    elif env_type == 'ballgame_1d':
        from env.BallGame1D import BallGame1D
        from sim.BallGame1DEnv import BallGame1DEnv
        file = open(config_file)
        conf = json.load(file)
        # print ("Settings: " + str(json.dumps(conf)))
        file.close()
        conf['render'] = render
        exp = BallGame1D(conf)
        exp = BallGame1DEnv(exp, settings)
        return exp
    elif env_type == 'gapgame_1d':
        from env.GapGame1D import GapGame1D
        from sim.GapGame1DEnv import GapGame1DEnv
        file = open(config_file)
        conf = json.load(file)
        # print ("Settings: " + str(json.dumps(conf)))
        file.close()
        conf['render'] = render
        exp = GapGame1D(conf)
        exp = GapGame1DEnv(exp, settings)
        return exp
    elif env_type == 'gapgame_2d':
        from env.GapGame2D import GapGame2D
        from sim.GapGame2DEnv import GapGame2DEnv
        file = open(config_file)
        conf = json.load(file)
        # print ("Settings: " + str(json.dumps(conf)))
        file.close()
        conf['render'] = render
        exp = GapGame2D(conf)
        exp = GapGame2DEnv(exp, settings)
        return exp
    elif env_type == 'nav_Game':
        from env.NavGame import NavGame
        from sim.NavGameEnv import NavGameEnv
        # file = open(config_file)
        # conf = json.load(file)
        conf = copy.deepcopy(settings)
        # print ("Settings: " + str(json.dumps(conf)))
        # file.close()
        conf['render'] = render
        exp = NavGame(conf)
        exp = NavGameEnv(exp, settings)
        return exp
    elif env_type == 'Particle_Sim':
        from env.ParticleGame import ParticleGame
        from sim.ParticleSimEnv import ParticleSimEnv
        # file = open(config_file)
        # conf = json.load(file)
        conf = copy.deepcopy(settings)
        # print ("Settings: " + str(json.dumps(conf)))
        # file.close()
        conf['render'] = render
        exp = ParticleGame(conf)
        exp = ParticleSimEnv(exp, settings)
        return exp
    
    elif env_type == 'open_AI_Gym':
        import gym
        from gym import wrappers
        from gym import envs
        from sim.OpenAIGymEnv import OpenAIGymEnv
        
        try:
            import roboschool
        except:
            print ("roboschool not installed")
            pass
        try:
            import gymdrl
        except:
            print ("Membrane/gymdrl not installed")
            pass
        from OpenGL import GL
        # load_roboschool
        # print(envs.registry.all())
        
        # env = gym.make('CartPole-v0')
        env_name = config_file
        env = gym.make(env_name)
        # file = open(config_file)
        # conf = json.load(file)
        
        conf = copy.deepcopy(settings)
        conf['render'] = render
        exp = OpenAIGymEnv(env, conf)
        exp = exp
        return exp
    
    elif ((env_type == 'simbiconBiped2D') or (env_type == 'simbiconBiped3D') or (env_type == 'Imitate3D') or 
          (env_type == 'simbiconBiped2DTerrain') or (env_type == 'hopper_2D')):
        import simbiconAdapter
        from sim.SimbiconEnv import SimbiconEnv
        c = simbiconAdapter.Configuration(config_file)
        print ("Num state: ", c._NUMBER_OF_STATES)
        c._RENDER = render
        sim = simbiconAdapter.SimbiconWrapper(c)
        print ("Using Environment Type: " + str(env_type))
        exp = SimbiconEnv(sim, settings)
        exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up!
        return exp
    elif ((env_type == 'mocapImitation2D') or (env_type == 'mocapImitation3D')):
        import simbiconAdapter
        from sim.MocapImitationEnv import MocapImitationEnv
        c = simbiconAdapter.Configuration(config_file)
        print ("Num state: ", c._NUMBER_OF_STATES)
        c._RENDER = render
        sim = simbiconAdapter.SimbiconWrapper(c)
        print ("Using Environment Type: " + str(env_type))
        exp = MocapImitationEnv(sim, settings)
        exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up!
        return exp
    elif env_type == 'terrainRLSimOld':
        # terrainRL_PATH = os.environ['TERRAINRL_PATH']
        # sys.path.append(terrainRL_PATH+'/lib')
        # from simAdapter import terrainRLAdapter
        # from sim.TerrainRLEnv import TerrainRLEnv
        from simAdapter import terrainRLSim
        from sim.OpenAIGymEnv import OpenAIGymEnv
        
        env = terrainRLSim.getEnv(env_name=config_file, render=render)
        print ("Using Environment Type: " + str(env_type) + ", " + str(config_file))
        # sim.setRender(render)
        # sim.init()
        conf = copy.deepcopy(settings)
        conf['render'] = render
        exp = OpenAIGymEnv(env, conf)
        # env.getEnv().setRender(render)
        # exp = TerrainRLEnv(env.getEnv(), settings)
        return exp
    
    elif ( (env_type == 'GymMultiChar') 
        or (env_type == 'terrainRLSim')
        ):
        # terrainRL_PATH = os.environ['TERRAINRL_PATH']
        # sys.path.append(terrainRL_PATH+'/lib')
        # from simAdapter import terrainRLAdapter
        # from sim.TerrainRLEnv import TerrainRLEnv
        from simAdapter import terrainRLSim
        from sim.GymMultiCharEnv import GymMultiCharEnv
        
        env = terrainRLSim.getEnv(env_name=config_file, render=render)
        print ("Using Environment Type: " + str(env_type) + ", " + str(config_file))
        # sim.setRender(render)
        # sim.init()
        conf = copy.deepcopy(settings)
        conf['render'] = render
        exp = GymMultiCharEnv(env, conf)
        # env.getEnv().setRender(render)
        # exp = TerrainRLEnv(env.getEnv(), settings)
        return exp
        
    elif env_type == 'terrainRLBiped2D':
        terrainRL_PATH = os.environ['TERRAINRL_PATH']
        sys.path.append(terrainRL_PATH+'/lib')
        from simAdapter import terrainRLAdapter
        from sim.TerrainRLEnv import TerrainRLEnv
        sim = terrainRLAdapter.cSimAdapter(['train', '-arg_file=', terrainRL_PATH+'/'+config_file, '-relative_file_path=', terrainRL_PATH+'/'])
        sim.setRender(render)
        # sim.init(['train', '-arg_file=', config_file])
        # print ("Num state: ", c._NUMBER_OF_STATES)
        # sim = simbiconAdapter.SimbiconWrapper(c)
        print ("Using Environment Type: " + str(env_type))
        exp = TerrainRLEnv(sim, settings)
        # exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up!
        return exp
    elif env_type == 'terrainRLFlatBiped2D':
        terrainRL_PATH = os.environ['TERRAINRL_PATH']
        sys.path.append(terrainRL_PATH+'/lib')
        from simAdapter import terrainRLAdapter
        from sim.TerrainRLFlatEnv import TerrainRLFlatEnv
        sim = terrainRLAdapter.cSimAdapter(['train', '-arg_file=', terrainRL_PATH+'/'+config_file, '-relative_file_path=', terrainRL_PATH+'/'])
        sim.setRender(render)
        # sim.init(['train', '-arg_file=', config_file])
        # print ("Num state: ", c._NUMBER_OF_STATES)
        # sim = simbiconAdapter.SimbiconWrapper(c)
        print ("Using Environment Type: " + str(env_type))
        exp = TerrainRLFlatEnv(sim, settings)
        # exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up!
        return exp
    elif (env_type == 'terrainRLImitateBiped2D' or (env_type == 'terrainRLImitateBiped3D')):
        terrainRL_PATH = os.environ['TERRAINRL_PATH']
        sys.path.append(terrainRL_PATH+'/lib')
        from simAdapter import terrainRLAdapter
        from sim.TerrainRLImitateEnv import TerrainRLImitateEnv
        sim = terrainRLAdapter.cSimAdapter(['train', '-arg_file=', terrainRL_PATH+'/'+config_file, '-relative_file_path=', terrainRL_PATH+'/'])
        sim.setRender(render)
        # sim.init(['train', '-arg_file=', config_file])
        # print ("Num state: ", c._NUMBER_OF_STATES)
        # sim = simbiconAdapter.SimbiconWrapper(c)
        print ("Using Environment Type: " + str(env_type))
        exp = TerrainRLImitateEnv(sim, settings)
        # exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up!
        return exp
    elif ((env_type == 'terrainRLHLCBiped3D')):
        terrainRL_PATH = os.environ['TERRAINRL_PATH']
        sys.path.append(terrainRL_PATH+'/lib')
        from simAdapter import terrainRLAdapter
        from sim.TerrainRLHLCEnv import TerrainRLHLCEnv
        sim = terrainRLAdapter.cSimAdapter(['train', '-arg_file=', terrainRL_PATH+'/'+config_file, '-relative_file_path=', terrainRL_PATH+'/'])
        sim.setRender(render)
        # sim.init(['train', '-arg_file=', config_file])
        # print ("Num state: ", c._NUMBER_OF_STATES)
        # sim = simbiconAdapter.SimbiconWrapper(c)
        print ("Using Environment Type: " + str(env_type))
        exp = TerrainRLHLCEnv(sim, settings)
        # exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up!
        return exp
    
    import characterSim
    c = characterSim.Configuration(config_file)
    # print ("Num state: ", c._NUMBER_OF_STATES)
    c._RENDER = render
    exp = characterSim.Experiment(c)
    # print ("Num state: ", exp._config._NUMBER_OF_STATES)
    if env_type == 'pendulum_env_state':
        from sim.PendulumEnvState import PendulumEnvState
        print ("Using Environment Type: " + str(env_type))
        exp = PendulumEnvState(exp, settings)
    elif env_type == 'pendulum_env':
        from sim.PendulumEnv import PendulumEnv
        print ("Using Environment Type: " + str(env_type))
        exp = PendulumEnv(exp, settings)
    elif env_type == 'pendulum3D_env':
        from sim.PendulumEnv import PendulumEnv
        print ("Using Environment Type: " + str(env_type))
        exp = PendulumEnv(exp, settings)
    elif env_type == 'pendulum_3D_env':
        from sim.PendulumEnv import PendulumEnv
        print ("Using Environment Type: " + str(env_type))
        exp = PendulumEnv(exp, settings)
    elif env_type == 'paperGibbon_env':
        from sim.PaperGibbonEnv import PaperGibbonEnv
        print ("Using Environment Type: " + str(env_type))
        exp = PaperGibbonEnv(exp, settings)
    else:
        print ("Invalid environment type: " + str(env_type))
        raise ValueError("Invalid environment type: " + str(env_type))
        # sys.exit()
    
    exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up!    
    return exp
コード例 #5
0
def main(config):
    set_seed(config['seed'])

    baseline = globals()[config['baseline']]()  #instantiate baseline

    env = terrainRLSim.getEnv(env_name=None, render=True)
    # env = normalize(env) # apply normalize wrapper to env

    policy = MetaGaussianMLPPolicy(
        name="meta-policy",
        obs_dim=np.prod((196, )),
        action_dim=np.prod((38, )),
        meta_batch_size=config['meta_batch_size'],
        hidden_sizes=config['hidden_sizes'],
    )

    sampler = MetaSampler(
        env=('terrianrlSim', config['env']),
        policy=policy,
        rollouts_per_meta_task=config[
            'rollouts_per_meta_task'],  # This batch_size is confusing
        meta_batch_size=config['meta_batch_size'],
        max_path_length=config['max_path_length'],
        parallel=config['parallel'],
    )
    env = terrainRLSim.getEnv(env_name=config['env'], render=True)
    # env = globals()[config['env']]() # instantiate env
    env = normalize(env)  # apply normalize wrapper to env
    print("env.observation_space.shape: ", env.observation_space.shape)
    print("env.action_space.shape: ", env.action_space.shape)
    sampler.set_env(env)

    sample_processor = MetaSampleProcessor(
        baseline=baseline,
        discount=config['discount'],
        gae_lambda=config['gae_lambda'],
        normalize_adv=config['normalize_adv'],
    )

    algo = ProMP(
        policy=policy,
        inner_lr=config['inner_lr'],
        meta_batch_size=config['meta_batch_size'],
        num_inner_grad_steps=config['num_inner_grad_steps'],
        learning_rate=config['learning_rate'],
        num_ppo_steps=config['num_promp_steps'],
        clip_eps=config['clip_eps'],
        target_inner_step=config['target_inner_step'],
        init_inner_kl_penalty=config['init_inner_kl_penalty'],
        adaptive_inner_kl_penalty=config['adaptive_inner_kl_penalty'],
    )

    trainer = Trainer(
        algo=algo,
        policy=policy,
        env=env,
        sampler=sampler,
        sample_processor=sample_processor,
        n_itr=config['n_itr'],
        num_inner_grad_steps=config['num_inner_grad_steps'],
    )

    trainer.train()
コード例 #6
0
import gym

from simAdapter import terrainRLSim
from OpenGL import GL
import numpy as np
# print(envs.registry.all())
# env = gym.make('CartPole-v0')
# env = gym.make('BipedalWalker-v2')
# import roboschool, gym; print("\n".join(['- ' + spec.id for spec in gym.envs.registry.all() if spec.id.startswith('Roboschool')]))
env = terrainRLSim.getEnv(env_name="PD_Biped2D_Gaps_Terrain-v0", render=True)
# env.getEnv().setRender(True)
# env.init()
# env = gym.make('Hopper-v1')
# env = wrappers.Monitor(env, '/tmp/cartpole-experiment-1')
"""
print( "Action Space: ", env.action_space)
if (not isinstance(env.action_space, gym.spaces.Discrete)):
    print( "Action Space high: ", repr(env.action_space.high))
    print( "Action Space low: ", repr(env.action_space.low))
print( "State Space: ", env.observation_space)
if (not isinstance(env.observation_space, gym.spaces.Discrete)):
    print( "State Space high: ", repr(env.observation_space.high))
    print( "State Space low: ", repr(env.observation_space.low))
"""
rewards = []
states = []
time_limit = 128
for i_episode in range(20):
    observation = env.reset()
    for t in range(time_limit):
        env.render()
コード例 #7
0
def worker(remote, parent_remote, env_pickle, n_envs, max_path_length, seed):
    """
    Instantiation of a parallel worker for collecting samples. It loops continually checking the task that the remote
    sends to it.

    Args:
        remote (multiprocessing.Connection):
        parent_remote (multiprocessing.Connection):
        env_pickle (pkl): pickled environment
        n_envs (int): number of environments per worker
        max_path_length (int): maximum path length of the task
        seed (int): random seed for the worker
    """
    parent_remote.close()
    # print ("env_pickle: ", env_pickle)
    # sys.exit()
    envs = []
    if type(env_pickle) is tuple:
        for _ in range(n_envs):
            if (env_pickle[0] == 'terrianrlSim'):
                env = terrainRLSim.getEnv(env_name=env_pickle[1], render=False)
                # env = globals()[config['env']]() # instantiate env
                env = normalize(env)  # apply normalize wrapper to env
                envs.append(env)
    else:
        envs = [pickle.loads(env_pickle) for _ in range(n_envs)]

    np.random.seed(seed)

    ts = np.zeros(n_envs, dtype='int')

    while True:
        # receive command and data from the remote
        cmd, data = remote.recv()

        # do a step in each of the environment of the worker
        if cmd == 'step':
            all_results = [env.step(a) for (a, env) in zip(data, envs)]
            obs, rewards, dones, infos = map(list, zip(*all_results))
            ts += 1
            for i in range(n_envs):
                if dones[i] or (ts[i] >= max_path_length):
                    dones[i] = True
                    obs[i] = envs[i].reset()
                    ts[i] = 0
            remote.send((obs, rewards, dones, infos))

        # reset all the environments of the worker
        elif cmd == 'reset':
            obs = [env.reset() for env in envs]
            ts[:] = 0
            remote.send(obs)

        # set the specified task for each of the environments of the worker
        elif cmd == 'set_task':
            for env in envs:
                env.set_task(data)
            remote.send(None)

        # close the remote and stop the worker
        elif cmd == 'close':
            remote.close()
            break

        else:
            raise NotImplementedError