default=False, help='Agent Playing.') args = parse.parse_args() TEAM = 'HELIOS' PORT = 6000 ACTOR_MODEL_NAME = "ppo_actor_go_to_ball" CRITIC_MODEL_NAME = "ppo_critic_go_to_ball" use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") hfo_env = HFOEnv(is_offensive=True, strict=True, continuous=True, team=TEAM, port=PORT, selected_action=DASH_ACTION, selected_reward=GO_TO_BALL_REWARD, selected_state=BALL_AXIS_POSITION_SPACE) unum = hfo_env.getUnum() params = PARAMS['ppo'] ppo = PPO(hfo_env.observation_space.shape[0], hfo_env.action_space.shape[0], params) def train(): writer = SummaryWriter('logs/{}_PPO_GO_TO_BALL'.format( datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))) Transition = namedtuple('Transition', ['s', 'a', 'a_log_p', 'r', 's_']) try:
parse = argparse.ArgumentParser( description='Agent Args', formatter_class=argparse.RawTextHelpFormatter) parse.add_argument('--play', dest='play', action='store_true', default=False, help='Agent Playing.') args = parse.parse_args() TEAM = 'HELIOS' PORT = 6000 ACTOR_MODEL_NAME = "ddpg_actor_ball_to_goal" CRITIC_MODEL_NAME = "ddpg_critic_ball_to_goal" ENABLE_LOSS_WRITE = False hfo_env = HFOEnv(is_offensive=True, strict=True, continuous=True, team=TEAM, port=PORT, selected_action=CONDITIONAL_DASH_OR_KICK_ACTION, selected_reward=AGENT_AND_BALL_POTENCIAL_REWARD, selected_state=AGENT_ORIENTATION_AND_BALL_POSITION_SPACE) unum = hfo_env.getUnum() params = PARAMS['ddpg'] ddpg = DDPG( hfo_env.observation_space.shape[0], hfo_env.action_space.shape[0], params) ou_noise = OUNoise(hfo_env.action_space) replay_buffer = ReplayBuffer(params['replay_buffer_size']) def train(): writer = SummaryWriter( 'logs/{}_DDPG_BALL_TO_GOAL'.format(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))) frame_idx = 0
import datetime import logging import os import pickle import itertools import hfo import numpy as np from src.lib.hfo_env import HFOEnv from src.lib.utils.action_selector import TEST_ACTION team = 'HELIOS' port = 6000 hfo_env = HFOEnv(is_offensive=True, strict=True, continuous=True, team=team, port=port, selected_action=TEST_ACTION) for episode in itertools.count(): status = hfo.IN_GAME done = True state = hfo_env.reset() while status == hfo.IN_GAME: next_state, reward, done, status = hfo_env.step([-0.6]) if done: break if status == hfo.SERVER_DOWN: hfo_env.act(hfo.QUIT)