import gym from rlkit.envs.point_robot import PointEnv import numpy as np from rlkit.envs import ENVS from rlkit.envs.wrappers import NormalizedBoxEnv print(ENVS) env = NormalizedBoxEnv(ENVS['point-robot']()) traj = [] obs = env.reset() traj.append(obs) while True: action = env.action_space.sample() # action = np.array([2.0, 2.0]) print(action) obs, rew, d, _ = env.step(action) print(obs, rew) traj.append(obs) env.render() if d: break print(traj) states = np.array(traj) import numpy as np import matplotlib
import numpy as np import time import argparse from rlkit.envs.wrappers import NormalizedBoxEnv parser = argparse.ArgumentParser() parser.add_argument('--exp_name', type=str, default='Ant') parser.add_argument('--ml', type=int, default=1000) args = parser.parse_args() import gym env = NormalizedBoxEnv(gym.make(args.exp_name+'-v2')) o = env.reset() # print(env.observation_space.high) max_path_length = args.ml path_length = 0 done = False c_r = 0.0 while (path_length < max_path_length) and (not done): path_length += 1 a = env.action_space.sample() o, r, done, _ = env.step(a) c_r += r env.render() print("step: ",path_length) print("o_max: ",np.max(o),np.argmax(o)) print("o_mean: ",np.mean(o)) print("a: ",a) print('r: ',r) print(done)