hiders = 1 seekers = 1 boxes = 1 ramps = 1 food = 0 rooms = 2 display = False load_weights = True save_weights = False debug = False env = hide_and_seek.make_env(n_hiders=hiders, n_seekers=seekers, n_boxes=boxes, n_ramps=ramps, n_food=food, n_rooms=rooms, n_lidar_per_agent=30, visualize_lidar=True) # # probably shouldn't use those two. but was testing. # rewardWrapper = hide_and_seek.HideAndSeekRewardWrapper(env, n_hiders=hiders, n_seekers=seekers) # trackStatW = hide_and_seek.TrackStatWrapper(env, boxes, ramps, food) # run one episode env.seed(42) env.reset() agents = [] # https://github.com/keras-rl/keras-rl/blob/master/rl/core.py
import numpy as np import tensorflow as tf tf.compat.v1.disable_eager_execution() # environment parameters (there's more). Check make_env hiders = 1 seekers = 1 boxes = 1 ramps = 1 food = 0 rooms = 2 display = True load_weights = False env = hide_and_seek.make_env(n_hiders=hiders, n_seekers=seekers, n_boxes=boxes, n_ramps=ramps, n_food=food, n_rooms=rooms) # # probably shouldn't use those two. but was testing. # rewardWrapper = hide_and_seek.HideAndSeekRewardWrapper(env, n_hiders=hiders, n_seekers=seekers) # trackStatW = hide_and_seek.TrackStatWrapper(env, boxes, ramps, food) # run one episode env.seed(42) env.reset() agents = [] for i in range(hiders+seekers): agents.append(get_agent(env,i)) if load_weights: agents[-1].load_weights("agent_%i_weights.h5f"%(i))
def test_ppo(exp_dir, itr='last'): _, get_action, lstm = load_policy_and_env(exp_dir, itr=itr) env = hide_and_seek.make_env() run_policy(env, get_action, lstm=lstm)
import tensorflow as tf import gym import time from ppo_pkg.ppo import ppo from mae_envs.envs import hide_and_seek from ma_policy.ma_policy import MAPolicy from testing.test_policy import test_ppo env_fn = lambda: hide_and_seek.make_env() # choose desired environment dir_str = '../Testing/exp/' # set output directory now_str = time.asctime( time.localtime()) # get current time stamp and set logger dict logger_kwargs = dict( output_dir=dir_str + now_str, exp_name='hide_and_seek') # for saving information during training ppo( env_fn=env_fn, # run ppo training loop (check ppo.py for documentation) pi_lr=3e-4, vf_lr=3e-4, steps_per_epoch=10000, epochs=1000, train_pi_iters=50, train_v_iters=50, logger_kwargs=logger_kwargs) test_ppo(dir_str + now_str) # test learned policy