Ejemplo n.º 1
0
hiders = 1
seekers = 1
boxes = 1
ramps = 1
food = 0
rooms = 2

display = False
load_weights = True
save_weights = False
debug = False

env = hide_and_seek.make_env(n_hiders=hiders,
                             n_seekers=seekers,
                             n_boxes=boxes,
                             n_ramps=ramps,
                             n_food=food,
                             n_rooms=rooms,
                             n_lidar_per_agent=30,
                             visualize_lidar=True)

# # probably shouldn't use those two. but was testing.
# rewardWrapper = hide_and_seek.HideAndSeekRewardWrapper(env, n_hiders=hiders, n_seekers=seekers)
# trackStatW = hide_and_seek.TrackStatWrapper(env, boxes, ramps, food)

# run one episode
env.seed(42)
env.reset()

agents = []

# https://github.com/keras-rl/keras-rl/blob/master/rl/core.py
Ejemplo n.º 2
0
import numpy as np
import tensorflow as tf
tf.compat.v1.disable_eager_execution()

# environment parameters (there's more). Check make_env
hiders = 1
seekers = 1
boxes = 1
ramps = 1
food = 0
rooms = 2

display = True
load_weights = False

env = hide_and_seek.make_env(n_hiders=hiders, n_seekers=seekers, n_boxes=boxes, n_ramps=ramps, n_food=food, n_rooms=rooms)

# # probably shouldn't use those two. but was testing.
# rewardWrapper = hide_and_seek.HideAndSeekRewardWrapper(env, n_hiders=hiders, n_seekers=seekers)
# trackStatW = hide_and_seek.TrackStatWrapper(env, boxes, ramps, food)

# run one episode
env.seed(42)
env.reset()

agents = []
for i in range(hiders+seekers):
    agents.append(get_agent(env,i))
    if load_weights:
        agents[-1].load_weights("agent_%i_weights.h5f"%(i))
def test_ppo(exp_dir, itr='last'):
    _, get_action, lstm = load_policy_and_env(exp_dir, itr=itr)
    env = hide_and_seek.make_env()
    run_policy(env, get_action, lstm=lstm)
import tensorflow as tf
import gym
import time

from ppo_pkg.ppo import ppo
from mae_envs.envs import hide_and_seek
from ma_policy.ma_policy import MAPolicy
from testing.test_policy import test_ppo

env_fn = lambda: hide_and_seek.make_env()  # choose desired environment

dir_str = '../Testing/exp/'  # set output directory

now_str = time.asctime(
    time.localtime())  # get current time stamp and set logger dict
logger_kwargs = dict(
    output_dir=dir_str + now_str,
    exp_name='hide_and_seek')  # for saving information during training

ppo(
    env_fn=env_fn,  # run ppo training loop (check ppo.py for documentation)
    pi_lr=3e-4,
    vf_lr=3e-4,
    steps_per_epoch=10000,
    epochs=1000,
    train_pi_iters=50,
    train_v_iters=50,
    logger_kwargs=logger_kwargs)

test_ppo(dir_str + now_str)  # test learned policy