Beispiel #1
0
    def setup_env(self):
        """
            :params ratio: should be a float between 0 and 1
        """
        env_id = self.args.env
        if self.env_kwargs:
            env_id = auto_tune_env(env_id, self.env_kwargs)
        env = GymEnv(
            env_id,
            log_dir=os.path.join(self.args.load_path, "movie")
            if self.args.render else None,
            record_video=self.args.record,
        )
        env.env.seed(self.args.seed)
        if self.args.c2d:
            env = C2DEnv(env)

        if self.env is None:
            self.env = NormalizedEnv(env)
            if self.args.mirror is True:
                if hasattr(env.unwrapped, "mirror_sizes"):
                    self.env.stats = SymmetricStats(
                        *env.unwrapped.mirror_sizes[:3], max_obs=4000)
                else:
                    self.args.mirror = False
            elif self.args.mirror == "new":
                self.env = SymEnv(self.env)
        else:
            # don't want to override the normalization
            self.env.replace_wrapped_env(env)
Beispiel #2
0
def test_continuous2discrete():
    continuous_env = GymEnv('Pendulum-v0', record_video=False)
    discrete_env = C2DEnv(continuous_env, n_bins=10)

    assert np.all(discrete_env.action_space.nvec == np.array([10]))

    discrete_env.reset()
    out = discrete_env.step([3, 10])
Beispiel #3
0
    def setUpClass(cls):
        cls.env = GymEnv('Pendulum-v0')
        pol = RandomPol(cls.env.observation_space, cls.env.action_space)
        sampler = EpiSampler(cls.env, pol, num_parallel=1)
        epis = sampler.sample(pol, max_steps=32)

        cls.traj = Traj()
        cls.traj.add_epis(epis)
        cls.traj.register_epis()
Beispiel #4
0
def test_flatten2dict():
    dict_env = gym.make('PendulumDictEnv-v0')
    dict_env = GymEnv(dict_env)
    dict_ob = dict_env.observation_space.sample()
    dict_observation_space = dict_env.observation_space
    dict_keys = dict_env.observation_space.spaces.keys()
    env = _make_flat(dict_env, dict_keys)
    flatten_ob = env.observation(dict_ob)
    recovered_dict_ob = flatten_to_dict(flatten_ob, dict_observation_space,
                                        dict_keys)
    tf = []
    for (a_key, a_val), (b_key, b_val) in zip(dict_ob.items(),
                                              recovered_dict_ob.items()):
        tf.append(a_key == b_key)
        tf.append(all(a_val == b_val))
    assert all(tf)
Beispiel #5
0
    def setUpClass(cls):
        env = GymEnv('Pendulum-v0')
        random_pol = RandomPol(cls.env.observation_space, cls.env.action_space)
        sampler = EpiSampler(cls.env, pol, num_parallel=1)
        epis = sampler.sample(pol, max_steps=32)
        traj = Traj()
        traj.add_epis(epis)
        traj.register_epis()

        cls.num_step = traj.num_step

        make_redis('localhost', '6379')
        cls.r = get_redis()

        cls.r.set('env', env)
        cls.r.set('traj', traj)

        pol_net = PolNet(env.observation_space, env.action_space)
        gpol = GaussianPol(env.observation_space, env.action_space, pol_net)
        pol_net = PolNet(env.observation_space,
                         env.action_space, deterministic=True)
        dpol = DeterministicActionNoisePol(
            env.observation_space, env.action_space, pol_net)
        model_net = ModelNet(env.observation_space, env.action_space)
        mpcpol = MPCPol(env.observation_space,
                        env.action_space, model_net, rew_func)
        q_net = QNet(env.observation_space, env.action_space)
        qfunc = DeterministicSAVfunc(
            env.observation_space, env.action_space, q_net)
        aqpol = ArgmaxQfPol(env.observation_space, env.action_space, qfunc)
        v_net = VNet(env.observation_space)
        vfunc = DeterministicSVfunc(env.observation_space, v_net)

        cls.r.set('gpol', cloudpickle.dumps(gpol))
        cls.r.set('dpol', cloudpickle.dumps(dpol))
        cls.r.set('mpcpol', cloudpickle.dumps(mpcpol))
        cls.r.set('qfunc', cloudpickle.dumps(qfunc))
        cls.r.set('aqpol', cloudpickle.dumps(aqpol))
        cls.r.set('vfunc', cloudpickle.dumps(vfunc))

        c2d = C2DEnv(env)
        pol_net = PolNet(c2d.observation_space, c2d.action_space)
        mcpol = MultiCategoricalPol(
            env.observation_space, env.action_space, pol_net)

        cls.r.set('mcpol', cloudpickle.dumps(mcpol))
Beispiel #6
0
def main(args):
    init_ray(args.num_cpus, args.num_gpus, args.ray_redis_address)

    if not os.path.exists(args.log):
        os.makedirs(args.log)
    if not os.path.exists(os.path.join(args.log, 'models')):
        os.mkdir(os.path.join(args.log, 'models'))
    score_file = os.path.join(args.log, 'progress.csv')
    logger.add_tabular_output(score_file)
    logger.add_tensorboard_output(args.log)
    with open(os.path.join(args.log, 'args.json'), 'w') as f:
        json.dump(vars(args), f)
    pprint(vars(args))

    # when doing the distributed training, disable video recordings
    env = GymEnv(args.env_name)
    env.env.seed(args.seed)
    if args.c2d:
        env = C2DEnv(env)

    observation_space = env.observation_space
    action_space = env.action_space
    pol_net = PolNet(observation_space, action_space)
    rnn = False
    # pol_net = PolNetLSTM(observation_space, action_space)
    # rnn = True
    if isinstance(action_space, gym.spaces.Box):
        pol = GaussianPol(observation_space, action_space, pol_net, rnn=rnn)
    elif isinstance(action_space, gym.spaces.Discrete):
        pol = CategoricalPol(observation_space, action_space, pol_net)
    elif isinstance(action_space, gym.spaces.MultiDiscrete):
        pol = MultiCategoricalPol(observation_space, action_space, pol_net)
    else:
        raise ValueError('Only Box, Discrete, and MultiDiscrete are supported')

    vf_net = VNet(observation_space)
    vf = DeterministicSVfunc(observation_space, vf_net)

    trainer = TrainManager(Trainer,
                           args.num_trainer,
                           args.master_address,
                           args=args,
                           vf=vf,
                           pol=pol)
    sampler = EpiSampler(env, pol, args.num_parallel, seed=args.seed)

    total_epi = 0
    total_step = 0
    max_rew = -1e6
    start_time = time.time()

    while args.max_epis > total_epi:

        with measure('sample'):
            sampler.set_pol_state(trainer.get_state("pol"))
            epis = sampler.sample(max_steps=args.max_steps_per_iter)

        with measure('train'):
            result_dict = trainer.train(epis=epis)

        step = result_dict["traj_num_step"]
        total_step += step
        total_epi += result_dict["traj_num_epi"]

        rewards = [np.sum(epi['rews']) for epi in epis]
        mean_rew = np.mean(rewards)
        elapsed_time = time.time() - start_time
        logger.record_tabular('ElapsedTime', elapsed_time)
        logger.record_results(args.log,
                              result_dict,
                              score_file,
                              total_epi,
                              step,
                              total_step,
                              rewards,
                              plot_title=args.env_name)

        with measure('save'):
            pol_state = trainer.get_state("pol")
            vf_state = trainer.get_state("vf")
            optim_pol_state = trainer.get_state("optim_pol")
            optim_vf_state = trainer.get_state("optim_vf")

            torch.save(pol_state,
                       os.path.join(args.log, 'models', 'pol_last.pkl'))
            torch.save(vf_state, os.path.join(args.log, 'models',
                                              'vf_last.pkl'))
            torch.save(optim_pol_state,
                       os.path.join(args.log, 'models', 'optim_pol_last.pkl'))
            torch.save(optim_vf_state,
                       os.path.join(args.log, 'models', 'optim_vf_last.pkl'))

            if mean_rew > max_rew:
                torch.save(pol_state,
                           os.path.join(args.log, 'models', 'pol_max.pkl'))
                torch.save(vf_state,
                           os.path.join(args.log, 'models', 'vf_max.pkl'))
                torch.save(
                    optim_pol_state,
                    os.path.join(args.log, 'models', 'optim_pol_max.pkl'))
                torch.save(
                    optim_vf_state,
                    os.path.join(args.log, 'models', 'optim_vf_max.pkl'))
                max_rew = mean_rew
    del sampler
    del trainer
Beispiel #7
0
if not os.path.exists(args.pol_dir):
    os.mkdir(args.pol_dir)

with open(os.path.join(args.pol_dir, 'args.json'), 'w') as f:
    json.dump(vars(args), f)
pprint(vars(args))

np.random.seed(args.seed)
torch.manual_seed(args.seed)

device_name = 'cpu' if args.cuda < 0 else "cuda:{}".format(args.cuda)
device = torch.device(device_name)
set_device(device)

env = GymEnv(args.env_name,
             log_dir=os.path.join(args.pol_dir, 'optimal_movie'),
             record_video=True,
             video_schedule=lambda x: True)
env.env.seed(args.seed)
if args.c2d:
    env = C2DEnv(env)

observation_space = env.observation_space
action_space = env.action_space

if args.ddpg:
    pol_net = PolNet(observation_space,
                     action_space,
                     args.pol_h1,
                     args.pol_h2,
                     deterministic=True)
    noise = OUActionNoise(action_space)
Beispiel #8
0
 def setUp(self):
     self.env = GymEnv('Pendulum-v0')
     self.env = SkillEnv(self.env, num_skill=4)
Beispiel #9
0
 def setUp(self):
     self.env = GymEnv('Pendulum-v0')
Beispiel #10
0
 def setUp(self):
     self.env = GymEnv('CartPole-v0')
Beispiel #11
0
with open(os.path.join(args.pol_dir, 'args.json'), 'w') as f:
    json.dump(vars(args), f)
pprint(vars(args))

if not os.path.exists(os.path.join(args.epis_dir)):
    os.mkdir(args.epis_dir)

np.random.seed(args.seed)
torch.manual_seed(args.seed)

device_name = 'cpu' if args.cuda < 0 else "cuda:{}".format(args.cuda)
device = torch.device(device_name)
set_device(device)

env = GymEnv(args.env_name,
             log_dir=os.path.join(args.pol_dir, 'movie'),
             record_video=args.record)
env.env.seed(args.seed)
if args.c2d:
    env = C2DEnv(env)

ob_space = env.observation_space
ac_space = env.action_space

if args.ddpg:
    pol_net = PolNet(ob_space,
                     ac_space,
                     args.pol_h1,
                     args.pol_h2,
                     deterministic=True)
    noise = OUActionNoise(ac_space.shape)
Beispiel #12
0
pprint(vars(args))

if not os.path.exists(os.path.join(args.log, 'models')):
    os.mkdir(os.path.join(args.log, 'models'))

np.random.seed(args.seed)
torch.manual_seed(args.seed)

device_name = 'cpu' if args.cuda < 0 else "cuda:{}".format(args.cuda)
device = torch.device(device_name)
set_device(device)

score_file = os.path.join(args.log, 'progress.csv')
logger.add_tabular_output(score_file)

env1 = GymEnv('HumanoidBulletEnv-v0')
env1.original_env.seed(args.seed)
env1 = AcInObEnv(env1)
env1 = RewInObEnv(env1)
env1 = C2DEnv(env1)

env2 = GymEnv('HumanoidFlagrunBulletEnv-v0')
env2.original_env.seed(args.seed)
env2 = AcInObEnv(env2)
env2 = RewInObEnv(env2)
env2 = C2DEnv(env2)

assert env1.ob_space == env2.ob_space
assert env1.ac_space.shape == env2.ac_space.shape

ob_space = env1.observation_space
Beispiel #13
0
    os.mkdir(os.path.join(args.log, 'models'))

np.random.seed(args.seed)
torch.manual_seed(args.seed)

args.cuda = args.local_rank

device_name = "cuda:{}".format(args.cuda)
device = torch.device(device_name)
set_device(device)

score_file = os.path.join(args.log, 'progress.csv')
logger.add_tabular_output(score_file)
logger.add_tensorboard_output(args.log)

env = GymEnv(args.env_name)
env.env.seed(args.seed)
if args.c2d:
    env = C2DEnv(env)

observation_space = env.observation_space
action_space = env.action_space

if args.rnn:
    pol_net = PolNetLSTM(observation_space,
                         action_space,
                         h_size=256,
                         cell_size=256)
else:
    pol_net = PolNet(observation_space, action_space)
if isinstance(action_space, gym.spaces.Box):
Beispiel #14
0
 def setUp(self):
     dict_env = gym.make('PendulumDictEnv-v0')
     self.dict_observation_space = dict_env.observation_space
     env = _make_flat(dict_env, dict_env.observation_space.spaces.keys())
     self.env = GymEnv(env)
Beispiel #15
0
 def setUpClass(cls):
     cls.env = GymEnv('Pendulum-v0')
     cls.pol = RandomPol(cls.env.observation_space, cls.env.action_space)
Beispiel #16
0
device = torch.device(device_name)
set_device(device)

# logのcsvファイル確保
score_file = os.path.join(args.log, 'progress.csv')
logger.add_tabular_output(score_file)

# Gymのenviromentを生成
from pybullet_envs.bullet.kukaCamGymEnv import KukaCamGymEnv
env = KukaCamGymEnv(renders=False,
                    isDiscrete=False)  # renders=Trueだとmachinaのtrain進まない。相性が悪い?
env = FlattenedObservationWrapper(env)
flattend_observation_space = env.flattend_observation_space

env = GymEnv(env,
             log_dir=os.path.join(args.log, 'movie'),
             record_video=args.record)
env.env.seed(args.seed)

# 観測と行動の次元
observation_space = env.observation_space
action_space = env.action_space
print('obs: {0}, act: {1}'.format(observation_space, action_space))

# Q-Network
print('Qnet')
qf_net = QTOptNet(observation_space, action_space)
qf = DeterministicSAVfunc(
    flattend_observation_space,
    action_space,
    qf_net,
from machina import logger
from machina.utils import measure
from machina.traj import Traj
from machina.algos import ppo_clip
import premaidai_gym

from util.simple_net import PolNet, VNet

seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

log_dir_name = 'garbage'
env_name = 'RoboschoolPremaidAIWalker-v0'
env = GymEnv(env_name,
             log_dir=os.path.join(log_dir_name, 'movie'),
             record_video=True)
env.env.seed(seed)

# check dimension of observation space and action space
observation_space = env.observation_space
action_space = env.action_space

# policy
pol_net = PolNet(observation_space, action_space)
pol = GaussianPol(observation_space, action_space, pol_net)
# value function
vf_net = VNet(observation_space)
vf = DeterministicSVfunc(observation_space, vf_net)

# optimizer to both models