Exemple #1
0
    def __init__(self, rsc, cfg, visualize):
        self.env = rsg_anymal.RaisimGymEnv(
            rsc, dump(cfg['environment'], Dumper=RoundTripDumper), visualize)
        self.num_acts = self.env.getActionDim()
        self.num_obs = self.env.getObDim()

        self.action_space = spaces.Box(np.ones(self.num_acts) * -1.,
                                       np.ones(self.num_acts) * 1.,
                                       dtype=np.float32)
        self.observation_space = spaces.Box(np.ones(self.num_obs) * -100.,
                                            np.ones(self.num_obs) * 100.,
                                            dtype=np.float32)

        self.reward_range = (-float('inf'), float('inf'))
        self.metadata = {'render.modes': []}
home_path = task_path + "/../../../.."

#argument
test_mode = sys.argv[1] == 'True'

# config
cfg = YAML().load(open(task_path + "/cfg.yaml", 'r'))
curriculum_start = cfg['environment']['curriculum']['curriculum_start']

# create environment from the configuration file
if test_mode:
    cfg_tmp = cfg
    cfg_tmp['environment']['num_envs'] = 1
    env = VecEnv(
        rsg_anymal.RaisimGymEnv(
            task_path + "/anymal",
            dump(cfg_tmp['environment'], Dumper=RoundTripDumper)),
        cfg['environment'])
else:
    env = VecEnv(
        rsg_anymal.RaisimGymEnv(
            task_path + "/anymal",
            dump(cfg['environment'], Dumper=RoundTripDumper)),
        cfg['environment'])

# shortcuts
ob_dim = env.num_obs
act_dim = env.num_acts

# save the configuration and other files
saver = ConfigurationSaver(log_dir=home_path + "/data",
Exemple #3
0
mode = args.mode
weight_path = args.weight

# check if gpu is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# directories
task_path = os.path.dirname(os.path.realpath(__file__))
home_path = task_path + "/../../../../.."

# config
cfg = YAML().load(open(task_path + "/cfg.yaml", 'r'))

# create environment from the configuration file
env = VecEnv(
    rsg_anymal.RaisimGymEnv(home_path + "/rsc",
                            dump(cfg['environment'], Dumper=RoundTripDumper)),
    cfg['environment'])

# shortcuts
ob_dim = env.num_obs
act_dim = env.num_acts

# Training
n_steps = math.floor(cfg['environment']['max_time'] /
                     cfg['environment']['control_dt'])
total_steps = n_steps * env.num_envs

avg_rewards = []

actor = ppo_module.Actor(
    ppo_module.MLP(cfg['architecture']['policy_net'], nn.LeakyReLU, ob_dim,
Exemple #4
0
import gym
import os

from ruamel.yaml import YAML, dump, RoundTripDumper
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from raisimGymTorch.env.bin import rsg_anymal
from raisimGymTorch.stable_baselines3.RaisimSbGymVecEnv import RaisimSbGymVecEnv as VecEnv


# Parallel environments
# directories
stb_path = os.path.dirname(os.path.realpath(__file__))
rsc_path = stb_path + "/../../../rsc"
task_path = stb_path + "/../env/envs/rsg_anymal"

# config
cfg = YAML().load(open(task_path + "/cfg.yaml", 'r'))

# create environment from the configuration file
env = VecEnv(rsg_anymal.RaisimGymEnv(rsc_path, dump(cfg['environment'], Dumper=RoundTripDumper)), cfg['environment'])
obs = env.reset()

n_steps = int(cfg['environment']['max_time'] / cfg['environment']['control_dt'])
model = PPO(MlpPolicy, env,
            n_steps=n_steps,
            verbose=1,
            batch_size=int(n_steps*env.num_envs/4),
            n_epochs=4)

model.learn(total_timesteps=250000000)