def __init__(self, rsc, cfg, visualize): self.env = rsg_anymal.RaisimGymEnv( rsc, dump(cfg['environment'], Dumper=RoundTripDumper), visualize) self.num_acts = self.env.getActionDim() self.num_obs = self.env.getObDim() self.action_space = spaces.Box(np.ones(self.num_acts) * -1., np.ones(self.num_acts) * 1., dtype=np.float32) self.observation_space = spaces.Box(np.ones(self.num_obs) * -100., np.ones(self.num_obs) * 100., dtype=np.float32) self.reward_range = (-float('inf'), float('inf')) self.metadata = {'render.modes': []}
home_path = task_path + "/../../../.." #argument test_mode = sys.argv[1] == 'True' # config cfg = YAML().load(open(task_path + "/cfg.yaml", 'r')) curriculum_start = cfg['environment']['curriculum']['curriculum_start'] # create environment from the configuration file if test_mode: cfg_tmp = cfg cfg_tmp['environment']['num_envs'] = 1 env = VecEnv( rsg_anymal.RaisimGymEnv( task_path + "/anymal", dump(cfg_tmp['environment'], Dumper=RoundTripDumper)), cfg['environment']) else: env = VecEnv( rsg_anymal.RaisimGymEnv( task_path + "/anymal", dump(cfg['environment'], Dumper=RoundTripDumper)), cfg['environment']) # shortcuts ob_dim = env.num_obs act_dim = env.num_acts # save the configuration and other files saver = ConfigurationSaver(log_dir=home_path + "/data",
mode = args.mode weight_path = args.weight # check if gpu is available device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # directories task_path = os.path.dirname(os.path.realpath(__file__)) home_path = task_path + "/../../../../.." # config cfg = YAML().load(open(task_path + "/cfg.yaml", 'r')) # create environment from the configuration file env = VecEnv( rsg_anymal.RaisimGymEnv(home_path + "/rsc", dump(cfg['environment'], Dumper=RoundTripDumper)), cfg['environment']) # shortcuts ob_dim = env.num_obs act_dim = env.num_acts # Training n_steps = math.floor(cfg['environment']['max_time'] / cfg['environment']['control_dt']) total_steps = n_steps * env.num_envs avg_rewards = [] actor = ppo_module.Actor( ppo_module.MLP(cfg['architecture']['policy_net'], nn.LeakyReLU, ob_dim,
import gym import os from ruamel.yaml import YAML, dump, RoundTripDumper from stable_baselines3 import PPO from stable_baselines3.ppo import MlpPolicy from raisimGymTorch.env.bin import rsg_anymal from raisimGymTorch.stable_baselines3.RaisimSbGymVecEnv import RaisimSbGymVecEnv as VecEnv # Parallel environments # directories stb_path = os.path.dirname(os.path.realpath(__file__)) rsc_path = stb_path + "/../../../rsc" task_path = stb_path + "/../env/envs/rsg_anymal" # config cfg = YAML().load(open(task_path + "/cfg.yaml", 'r')) # create environment from the configuration file env = VecEnv(rsg_anymal.RaisimGymEnv(rsc_path, dump(cfg['environment'], Dumper=RoundTripDumper)), cfg['environment']) obs = env.reset() n_steps = int(cfg['environment']['max_time'] / cfg['environment']['control_dt']) model = PPO(MlpPolicy, env, n_steps=n_steps, verbose=1, batch_size=int(n_steps*env.num_envs/4), n_epochs=4) model.learn(total_timesteps=250000000)