from runner import Runner from common.arguments import get_args from common.utils import make_env import numpy as np import random import torch if __name__ == '__main__': # get the params args = get_args() env, args = make_env(args) runner = Runner(args, env) if args.evaluate: returns = runner.evaluate() print('Average returns is', returns) else: runner.run()
import numpy as np from keras.utils import to_categorical import copy from common.utils import eligibility_traces, default_config, make_env, RunningMeanStd, str2bool, discount_rewards from common.ppo_independant import PPOPolicyNetwork, ValueNetwork render = False normalize_inputs = True config = default_config() LAMBDA = float(config['agent']['lambda']) lr_actor = float(config['agent']['lr_actor']) meta_skip_etrace = str2bool(config['agent']['meta_skip_etrace']) communication_round = int(config['agent']['fen_communication_round']) env=make_env(config, normalize_inputs) env.toggle_compute_neighbors() n_agent=env.n_agent T = env.T GAMMA = env.GAMMA n_episode = env.n_episode max_steps = env.max_steps n_actions = env.n_actions n_signal = env.n_signal max_u = env.max_u i_episode = 0 meta_Pi=[] meta_V=[] for i in range(n_agent): meta_Pi.append(PPOPolicyNetwork(num_features=env.input_size+2, num_actions=n_signal,layer_size=128,epsilon=0.1,learning_rate=lr_actor))
model_path = Path(models_dir).joinpath( "{}_{}".format("wave" if wave else "gym", label) ) # e.g.: ./logs log_path = Path(log_dir) # create folders model_path.mkdir(parents=True, exist_ok=True) log_path.mkdir(exist_ok=True) # create the wave or gym environment, with or without multiprocessing id = "LunarLander" if wave else "LunarLander-v2" if num_cpu > 1: env = make_multi_env(num_cpu, id, wave, render_mode=False, reset_mode="random") else: env = make_env(id, wave, render_mode=False, reset_mode="random") # create A2C with Mlp policy, and the callback to save snapshots model = A2C(MlpPolicy, env, ent_coef=0.1, verbose=0, tensorboard_log=log_dir) callback = save_callback( model_path, "snapshot-", save_interval, call_interval=model.n_steps * num_cpu, order=order, order_str=order_str, ) # save final model print("Training...") _t = time.time()
from stable_baselines import A2C # noqa: E402 wave = True render_episodes = 20 if len(sys.argv) < 2: print("USAGE: {} PATH-TO-MODEL-FILE".format(sys.argv[0])) exit(-1) model_path = sys.argv[1] if not os.path.isfile(model_path): print("Path '{}' does not exist.".format(model_path)) exit(-1) id = "LunarLander" if wave else "LunarLander-v2" env = make_env(id, wave, port=4000, reset_mode="random") model = A2C.load(model_path) episode = render_episodes reward_sum = 0 obs = env.reset() while episode > 0: action, _states = model.predict(obs) obs, reward, done, _ = env.step(action) reward_sum += reward env.render() if done: print("Points: {}".format(reward_sum)) episode -= 1 reward_sum = 0
def test(seed, model_filename, vec_filename, train, test, test_as_class=0, render=False, save_file="default.yml"): global g_step, g_obs_data print("Testing:") total_rewards = [] distance_xs = [] if True: os.makedirs(f"{folder}/obs_data_videos", exist_ok=True) g_step = 0 print(f" Seed {seed}, model {model_filename} vec {vec_filename}") print(f" Train on {train}, test on {test}, w/ bodyinfo {test_as_class}") if test_as_class>=0: bodyinfo = test_as_class else: if args.with_bodyinfo: bodyinfo = test//100 else: bodyinfo = 0 # default_wrapper = wrapper.BodyinfoWrapper # if args.disable_wrapper: # default_wrapper = None default_wrapper = wrapper.WalkerWrapper eval_env = utils.make_env(template=utils.template(test), render=render, robot_body=test, wrapper=default_wrapper, body_info=bodyinfo) eval_env = DummyVecEnv([eval_env]) if args.vec_normalize: eval_env = VecNormalize.load(vec_filename, eval_env) eval_env.norm_reward = False eval_env.seed(seed) model = PPO.load(model_filename) obs = eval_env.reset() g_obs_data = np.zeros(shape=[args.test_steps, obs.shape[1]], dtype=np.float32) if render: # eval_env.env_method("set_view") import common.linux common.linux.fullscreen() print("\n\nWait for a while, so I have the time to press Ctrl+F11 to enter FullScreen Mode.\n\n") time.sleep(2) # Wait for a while, so I have the time to press Ctrl+F11 to enter FullScreen Mode. distance_x = 0 # print(obs) total_reward = 0 for step in tqdm(range(args.test_steps)): g_obs_data[step,:] = obs[0] action, _states = model.predict(obs, deterministic=True) obs, reward, done, info = eval_env.step(action) if render: eval_env.envs[0].camera_adjust() if args.disable_saving_image: time.sleep(0.01) else: (width, height, rgbPixels, _, _) = eval_env.envs[0].pybullet.getCameraImage(1920,1080, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL) image = rgbPixels[:,:,:3] image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imwrite(f"{folder}/obs_data_videos/getCameraImage_b{test}_s{seed}_{step:05}.png", image) if done: # it should not matter if the env reset. I guess... # break pass else: # the last observation will be after reset, so skip the last distance_x = eval_env.envs[0].robot.body_xyz[0] total_reward += reward[0] # if render: # time.sleep(0.01) eval_env.close() print(f"train {train}, test {test}, test_as_class {test_as_class}, step {step}, total_reward {total_reward}, distance_x {distance_x}") if args.save_obs_data: base_obs_data = None sorted_data = g_obs_data.copy() if test!=0 or seed!=0: # if sorted_arg_obs_data exists, use the existing one # because we want to compare the patterns of two experiments sorted_arg_obs_data = np.load(f"{folder}/sorted_arg_obs_data.npy") base_obs_data = np.load(f"{folder}/base_obs_data.npy") else: sorted_arg_obs_data = np.argsort(np.mean(sorted_data,axis=0)) np.save(f"{folder}/sorted_arg_obs_data.npy", sorted_arg_obs_data) base_obs_data = g_obs_data.copy() np.save(f"{folder}/base_obs_data.npy", base_obs_data) # sorted_data = sorted_data[:,sorted_arg_obs_data] # base_obs_data = base_obs_data[:, sorted_arg_obs_data] for step in tqdm(range(args.test_steps)): plt.close() plt.figure(figsize=[10,4]) if test!=0 or seed!=0: x = sorted_data[step] plt.bar(np.arange(len(x)), x, color=[0.1, 0.3, 0.7, 0.5]) x = base_obs_data[step] plt.bar(np.arange(len(x)), x, color=[0.6, 0.6, 0.6, 0.5]) plt.ylim(-2,2) plt.savefig(f"{folder}/obs_data_videos/barchart_b{test}_s{seed}_{step:05}.png") plt.close() total_rewards.append(total_reward) distance_xs.append(distance_x) # avoid yaml turn float64 to numpy array total_rewards = [float(x) for x in total_rewards] distance_xs = [float(x) for x in distance_xs] data = { "title": "test", "train": train, "test": test, "total_reward": total_rewards, "distance_x": distance_xs, } with open(f"{save_file}", "w") as f: yaml.dump(data, f)
import gym import pybullet_envs from stable_baselines3.common.vec_env.vec_frame_stack import VecFrameStack from stable_baselines3.common.vec_env import DummyVecEnv import common.utils as utils utils.folder = "exp5" venv = DummyVecEnv([utils.make_env(template=utils.template(400), robot_body=400, wrapper=None)]) venv = VecFrameStack(venv, 4) obs = venv.reset() print(obs.shape)
str_ids = "-".join(str(x) for x in training_bodies) if args.test_bodies == "": test_bodies = [] else: test_bodies = [int(x) for x in args.test_bodies.split(",")] default_wrapper = wrapper.BodyinfoWrapper if args.disable_wrapper: default_wrapper = None if with_bodyinfo: env = DummyVecEnv([ utils.make_env( rank=i, seed=utils.seed, wrapper=default_wrapper, render=args.render, robot_body=training_bodies[i % len(training_bodies)], body_info=training_bodies[i % len(training_bodies)] // 100) for i in range(train_num_envs) ]) save_filename = f"model-ant-{str_ids}-with-bodyinfo" else: env = DummyVecEnv([ utils.make_env(rank=i, seed=utils.seed, wrapper=default_wrapper, render=args.render, robot_body=training_bodies[i % len(training_bodies)], body_info=0) for i in range(train_num_envs)
training_bodies = [int(x) for x in args.train_bodies.split(",")] str_ids = "-".join(str(x) for x in training_bodies) if args.test_bodies=="": test_bodies = [] else: test_bodies = [int(x) for x in args.test_bodies.split(",")] # default_wrapper = wrapper.BodyinfoWrapper # if args.disable_wrapper: # default_wrapper = None default_wrapper = wrapper.WalkerWrapper # default_wrapper = None if with_bodyinfo: env = DummyVecEnv([utils.make_env(template=utils.template(training_bodies[i%len(training_bodies)]), rank=i, seed=utils.seed, wrapper=default_wrapper, render=args.render, robot_body=training_bodies[i%len(training_bodies)], body_info=training_bodies[i%len(training_bodies)]//100) for i in range(train_num_envs)]) save_filename = f"model-ant-{str_ids}-with-bodyinfo" else: env = DummyVecEnv([utils.make_env(template=utils.template(training_bodies[i%len(training_bodies)]), rank=i, seed=utils.seed, wrapper=default_wrapper, render=args.render, robot_body=training_bodies[i%len(training_bodies)], body_info=0) for i in range(train_num_envs)]) save_filename = f"model-ant-{str_ids}" if args.vec_normalize: env = VecNormalize(env, **normalize_kwargs) if args.stack_frames>1: env = VecFrameStack(env, args.stack_frames) keys_remove =["normalize", "n_envs", "n_timesteps", "policy"] for key in keys_remove: del hyperparams[key]