Beispiel #1
0
                   default=False,
                   help='Agent Playing.')
args = parse.parse_args()

TEAM = 'HELIOS'
PORT = 6000
ACTOR_MODEL_NAME = "ppo_actor_go_to_ball"
CRITIC_MODEL_NAME = "ppo_critic_go_to_ball"

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

hfo_env = HFOEnv(is_offensive=True,
                 strict=True,
                 continuous=True,
                 team=TEAM,
                 port=PORT,
                 selected_action=DASH_ACTION,
                 selected_reward=GO_TO_BALL_REWARD,
                 selected_state=BALL_AXIS_POSITION_SPACE)
unum = hfo_env.getUnum()
params = PARAMS['ppo']
ppo = PPO(hfo_env.observation_space.shape[0], hfo_env.action_space.shape[0],
          params)


def train():
    writer = SummaryWriter('logs/{}_PPO_GO_TO_BALL'.format(
        datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")))
    Transition = namedtuple('Transition', ['s', 'a', 'a_log_p', 'r', 's_'])

    try:
Beispiel #2
0
parse = argparse.ArgumentParser(
    description='Agent Args', formatter_class=argparse.RawTextHelpFormatter)
parse.add_argument('--play', dest='play', action='store_true',
                   default=False, help='Agent Playing.')
args = parse.parse_args()

TEAM = 'HELIOS'
PORT = 6000
ACTOR_MODEL_NAME = "ddpg_actor_ball_to_goal"
CRITIC_MODEL_NAME = "ddpg_critic_ball_to_goal"
ENABLE_LOSS_WRITE = False

hfo_env = HFOEnv(is_offensive=True, strict=True,
                 continuous=True, team=TEAM, port=PORT,
                 selected_action=CONDITIONAL_DASH_OR_KICK_ACTION, 
                 selected_reward=AGENT_AND_BALL_POTENCIAL_REWARD,
                 selected_state=AGENT_ORIENTATION_AND_BALL_POSITION_SPACE)
unum = hfo_env.getUnum()
params = PARAMS['ddpg']
ddpg = DDPG(
    hfo_env.observation_space.shape[0], hfo_env.action_space.shape[0], params)
ou_noise = OUNoise(hfo_env.action_space)
replay_buffer = ReplayBuffer(params['replay_buffer_size'])


def train():
    writer = SummaryWriter(
        'logs/{}_DDPG_BALL_TO_GOAL'.format(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")))
    frame_idx = 0
    
Beispiel #3
0
import datetime
import logging
import os
import pickle

import itertools
import hfo
import numpy as np
from src.lib.hfo_env import HFOEnv
from src.lib.utils.action_selector import TEST_ACTION

team = 'HELIOS'
port = 6000
hfo_env = HFOEnv(is_offensive=True,
                 strict=True,
                 continuous=True,
                 team=team,
                 port=port,
                 selected_action=TEST_ACTION)

for episode in itertools.count():
    status = hfo.IN_GAME
    done = True
    state = hfo_env.reset()

    while status == hfo.IN_GAME:
        next_state, reward, done, status = hfo_env.step([-0.6])
        if done:
            break

    if status == hfo.SERVER_DOWN:
        hfo_env.act(hfo.QUIT)