def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--port', default=6000, type=int)
    parser.add_argument('--feature_set',
                        default=hfo.LOW_LEVEL_FEATURE_SET,
                        type=int)
    parser.add_argument('--trials', default=10000, type=int)
    args = parser.parse_args()

    env = hfo.HFOEnvironment()
    env.connectToServer(
        args.feature_set,
        '/home/student/Desktop/HFO-master_ruben/bin/teams/base/config/formations-dt',
        args.port, 'localhost', 'base_right', True)

    #num_features = hfo.getStateSize()

    for episode in range(args.trials):
        status = hfo.IN_GAME
        while status != hfo.SERVER_DOWN:
            features = env.getState()

            print math.degrees(math.asin(features[51]))
            status = env.step()

        #advanced_stats.append(stats[status])
        print(('Episode %d ended with %s' %
               (episode, env.statusToString(status))))
        # Quit if the server goes down
        if status == hfo.SERVER_DOWN:
            env.act(hfo.QUIT)
            exit()
Ejemplo n.º 2
0
 def __init__(self,
              agent_id=0,
              port=6000,
              server_addr='localhost',
              team_name=None,
              num_opponents=0,
              num_teammates=0):
     # Hfo game interface:
     self.hfo = hfo.HFOEnvironment()
     # Server configuration:
     self.feature_set = hfo.HIGH_LEVEL_FEATURE_SET
     self.config_dir = settings.CONFIG_DIR
     self.port = port
     self.server_addr = server_addr
     if team_name and team_name != "base":
         self.team_name = f'{team_name.upper()}_left'
     else:
         self.team_name = "base_left"
     self.play_goalie = False
     # Attributes:
     self.num_teammates = num_teammates
     self.num_opponents = num_opponents
     self.agent_id = agent_id
     # Metrics:
     self._check_flag = 0
     self.episode = 0
     self.num_steps = 0
     self.status = hfo.IN_GAME
     # Last Player to touch the ball:
     self.last_player_to_touch_ball = 0
Ejemplo n.º 3
0
def test_basic():
    hfo_env = hfo.HFOEnvironment()

    for action in range(hfo.NUM_HFO_ACTIONS):
        assert len(hfo_env.actionToString(action))

    for state in range(hfo.NUM_GAME_STATUS_STATES):
        assert len(hfo_env.statusToString(state))
Ejemplo n.º 4
0
def main():
    hfo_env = hfo.HFOEnvironment()
    hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET)
    agent = DDPG()
    # model_file=tf.train.latest_checkpoint('ckpt/')
    # agent.saver.restore(agent.sess,model_file)
    for episode in range(130):
        status = hfo.IN_GAME
        stop_perceive = False
        with open('/home/ruizhao/Desktop/a.txt', 'a') as f:
            print('Hello World!', file=f)
        while True:

            state = hfo_env.getState()
            # print(state)
            action = agent.noise_action(state)
            print(action)
            next_state, reward, done, status = env_step(agent, hfo_env, action)
            # print(reward)
            if state_violated(next_state):
                # print("hhhhhhhhhhhhhh")
                stop_perceive = True
            if not stop_perceive:
                # print(state, next_state,done)
                # print(stop_perceive)
                agent.perceive(state, action, reward, next_state, done)
            if status != hfo.IN_GAME:
                break
        if status == hfo.SERVER_DOWN:
            hfo_env.act(hfo.QUIT)
            exit()
        # print(episode)
        # print(episode % 100 == 0 and episode > 100)
        if episode % 100 == 0 and episode > 100:
            # if True:
            total_reward = 0
            for i in range(TEST):
                # state = env.reset()
                while True:
                    state = hfo_env.getState()
                    action = agent.action(state)
                    next_state, reward, done, status = env_step(
                        agent, hfo_env, action)
                    total_reward += reward
                    if done:
                        break
            ave_reward = total_reward / TEST
            agent.saver.save(agent.sess,
                             'ckpt/mnist.ckpt',
                             global_step=episode)
            print('             episode: ', episode,
                  'Evaluation Average Reward:', ave_reward)
Ejemplo n.º 5
0
 def __init__(self,
              envir=hfo.HFOEnvironment(),
              action_set="low_level",
              seed=123):
     Agent.__init__(self,
                    env=envir,
                    agent_type="low_level_random_agent",
                    action_space=LowLevelActionSpace(),
                    state_space=NeuralStateSpace(),
                    feature_set=hfo.LOW_LEVEL_FEATURE_SET,
                    port=6000,
                    base="base_right",
                    goalie=True)
     self.seed = seed
Ejemplo n.º 6
0
def run(num_episodes):
    env = hfo.HFOEnvironment()
    env.connectToServer(
        hfo.LOW_LEVEL_FEATURE_SET,
        '/home/student/Desktop/HFO-master_ruben/bin/teams/base/config/formations-dt',
        6000, 'localhost', 'base_right', True)

    for episode in xrange(
            num_episodes):  # replace with xrange(5) for Python 2.X
        status = hfo.IN_GAME
        while status == hfo.IN_GAME:
            features = env.getState()
            env.act(hfo.DASH, 100.0, -90.0)
            status = env.step()
        print 'Episode', episode, 'ended'
Ejemplo n.º 7
0
 def connect_server(self, agentIndex):
     """Connects the client subprocess in the hfo server
         The learning process should be all executed in here because of strange
         errors in the HFO server when executing more than one client at the same time
     """
     #Path with formations file
     connectPath = self.serverPath + 'teams/base/config/formations-dt'
     self.hfoObj[agentIndex] = hfo.HFOEnvironment()
     #Connecting in the server
     serverResponse = self.hfoObj[agentIndex].connectToServer(
         feature_set=hfo.HIGH_LEVEL_FEATURE_SET,
         config_dir=connectPath,
         server_port=self.serverPort,
         server_addr='localhost',
         team_name='base_left',
         play_goalie=False)
     print("%%%% Server connection FeedBack:    " + str(serverResponse))
Ejemplo n.º 8
0
    def __init__(self, taskParam, limitFrames=200, agentsControl=1):
        """Initiates the HFO environment"""
        #Returns a port that is not being used
        self.serverPort = portmanager.get_free_port()
        #self.serverPort = 2000
        self.numberFriends = taskParam[0]
        self.numberOpponents = taskParam[1]

        self.applyAction = None  #[None]*agentsControl
        self.actionParameter = None  #[None]*agentsControl

        #self.agentsControl = agentsControl
        self.lastAction = [None]  #*agentsControl
        self.hfoObj = []
        #for i in range(agentsControl):
        #self.hfoObj.append(hfo.HFOEnvironment())
        self.hfoObj = hfo.HFOEnvironment()

        self.stepRequest = False  #[False]*agentsControl
        self.clearServer = False  #[False]*agentsControl
        #self.init_server(taskParam,limitFrames)

        #Initiates a new thread only to avoid an error when loading the strategy.cpp file
        self.terminateThread = False
        t = Thread(target=init_server,
                   args=(self, taskParam, limitFrames, agentsControl))
        t.start()
        t.join()

        time.sleep(2)

        #Initiates one thread for each agent controlled by learning algorithms
        #for i in range(self.agentsControl):
        #t = Thread(target=connect_server, args=(self, i))
        #t.start()
        #time.sleep(2)
        t = Thread(target=connect_server, args=(self, ))
        t.start()
        #The connection with the server is OK after here.
        time.sleep(3)
        self.totalEpisodes = 0
        self.goals = 0

        self.stateSpaceManager = HFOStateManager(self.numberFriends,
                                                 self.numberOpponents)
def run(num_episodes):
    env = hfo.HFOEnvironment()
    r = 0

    env.connectToServer(
        hfo.LOW_LEVEL_FEATURE_SET,
        '/home/student/Desktop/HFO-master_ruben/bin/teams/base/config/formations-dt',
        6000, 'localhost', 'base_left', False)

    for episode in xrange(
            num_episodes):  # replace with xrange(5) for Python 2.X
        kick_angle = random.randint(-8, 8)
        status = hfo.IN_GAME
        while status == hfo.IN_GAME:
            features = env.getState()
            env.act(hfo.KICK, 100.0, kick_angle)
            status = env.step()
        print 'Episode', episode, 'ended'
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--port', type=int, default=12345, help="Server port")
    parser.add_argument(
        '--seed',
        type=int,
        default=None,
        help="Python randomization seed; uses python default if 0 or not given"
    )
    parser.add_argument('--record',
                        action='store_true',
                        help="Doing HFO --record")
    parser.add_argument('--rdir',
                        type=str,
                        default='log/',
                        help="Set directory to use if doing HFO --record")
    args = parser.parse_args()

    serverCommands = [
        "/home/leno/gitProjects/Curriculum_HFO/HFO/bin/HFO --offense-team helios --fullstate --offense-on-ball 12 --no-logging --headless --port 12345 --frames-per-trial 200 --offense-agents 1 --offense-npcs 2 --defense-npcs 3 --offense-team base --defense-team helios --ball-x-min 0.6 --ball-x-max 0.7999999999999999 --seed 123 --verbose >> tt.log",
        "/home/leno/gitProjects/Curriculum_HFO/HFO/bin/HFO --offense-team helios --fullstate --offense-on-ball 12 --no-logging --headless --port 12350 --frames-per-trial 200 --offense-agents 1 --offense-npcs 2 --defense-npcs 3 --offense-team base --defense-team helios --ball-x-min 0.6 --ball-x-max 0.7999999999999999 --seed 123 --verbose >> tt.log",
        "/home/leno/gitProjects/Curriculum_HFO/HFO/bin/HFO --offense-team helios --fullstate --offense-on-ball 12 --no-logging --headless --port 12355 --frames-per-trial 200 --offense-agents 1 --offense-npcs 2 --defense-npcs 3 --offense-team base --defense-team helios --ball-x-min 0.6 --ball-x-max 0.7999999999999999 --seed 123 --verbose >> tt.log",
        "/home/leno/gitProjects/Curriculum_HFO/HFO/bin/HFO --offense-team helios --fullstate --offense-on-ball 12 --no-logging --headless --port 12360 --frames-per-trial 200 --offense-agents 1 --offense-npcs 2 --defense-npcs 3 --offense-team base --defense-team helios --ball-x-min 0.6 --ball-x-max 0.7999999999999999 --seed 123 --verbose >> tt.log"
    ]

    for serverCommand in serverCommands:
        serverProcess = subprocess.Popen(serverCommand, shell=True)

        print("NEW Server")

        time.sleep(1)
        if args.seed:
            random.seed(args.seed)
        # Create the HFO Environment
        hfo_env = hfo.HFOEnvironment()

        # Connect to the server with the specified
        # feature set. See feature sets in hfo.py/hfo.hpp.
        t = Thread(target=init_connect, args=(hfo_env, args))
        t.start()
        t.join()

        subprocess.call("kill -9 -" + str(serverProcess.pid), shell=True)
        args.port = args.port + 5
Ejemplo n.º 11
0
def main():
    env = hfo.HFOEnvironment()
    env.connectToServer(LOW_LEVEL_FEATURE_SET,
                        config_dir='./',
                        server_port=1234)

    for episode in itertools.count():
        status = env.step()
        s1 = env.getState()
        already_close_to_ball = False
        while status == IN_GAME:
            time.sleep(3)
            env.act(GO_TO_BALL)
            status = env.step()
            s2 = env.getState()
            low_level_reward_function(s2, s1, already_close_to_ball, status)
            s1 = s2
        print("Episode %d ended with")
        if status == SERVER_DOWN:
            env.act(QUIT)
            break
Ejemplo n.º 12
0
    def __init__(self, port):

        game_cmd = "{}/bin/HFO --offense-agents=1" \
                   " --defense-npcs=1 --port={} --trials 200 --headless &".format(hfo_root, port)
        os.system(game_cmd)
        time.sleep(2)
        self.env = hfo.HFOEnvironment()
        self.env.connectToServer(
            HIGH_LEVEL_FEATURE_SET,
            '{}/bin/teams/base/config/formations-dt'.format(hfo_root), port,
            'localhost', 'base_left', False)

        self.avaliable_actions = {
            0: MOVE,
            1: SHOOT,
            2: DRIBBLE,
            3: GO_TO_BALL,
            4: NOOP
        }

        self.action_space = len(self.avaliable_actions)
        self.state_space = self.env.getStateSize()
Ejemplo n.º 13
0
 def __init__(self,
              agent_id=0,
              port=6000,
              server_addr='localhost',
              num_opponents=0,
              num_teammates=0):
     # Hfo game interface:
     self.hfo = hfo.HFOEnvironment()
     # Server configuration:
     self.feature_set = hfo.HIGH_LEVEL_FEATURE_SET
     self.config_dir = settings.CONFIG_DIR
     self.port = port
     self.server_addr = server_addr
     self.team_name = 'base_left'
     self.play_goalie = False
     # Attributes:
     self.num_teammates = num_teammates
     self.num_opponents = num_opponents
     self.agent_id = agent_id
     # Metrics:
     self.episode = 0
     self.num_steps = 0
     self.status = hfo.IN_GAME
Ejemplo n.º 14
0
def playGame(train_indicator=0):  # 1 means Train, 0 means simply Run
    BUFFER_SIZE = 100000.
    BATCH_SIZE = 32
    GAMMA = 0.99
    TAU = 0.001  # Target Network HyperParameters
    LRA = 0.0005  # Learning rate for Actor
    LRC = 0.001  # Lerning rate for Critic

    action_dim = 10  # 4 actions and their 6 continuous parameters
    state_dim = 58  # of sensors input

    np.random.seed(1337)

    EXPLORE = 100000
    episode_count = 20000
    max_steps = 1000
    reward = 0
    step = 0
    epsilon = 1
    indicator = 0

    # Tensorflow GPU optimization
    config = tf.ConfigProto()
    # config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    actor = ActorNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRA)
    critic = CriticNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRC)
    buff = ReplayBuffer(BUFFER_SIZE)  #Create replay buffer

    # Generate a HFO environment
    env = hfo.HFOEnvironment()
    env.connectToServer(hfo.LOW_LEVEL_FEATURE_SET,
                        config_dir='./conf',
                        server_port=1111)

    #Now load the weight
    print("Now we load the weight")
    try:
        actor.model.load_weights("actormodel.h5")
        critic.model.load_weights("criticmodel.h5")
        actor.target_model.load_weights("actormodel.h5")
        critic.target_model.load_weights("criticmodel.h5")
        print("Weight load successfully")
    except:
        print("Cannot find the weight")

    print("Soccer Experiment Start.")
    for episode in range(episode_count):
        print("Episode : " + str(episode) + " Replay Buffer " +
              str(buff.count()))
        isBall = Locker()
        s_t = np.hstack(env.getState())
        status = env.step()
        total_reward = 0.
        total_target_q_values = 0
        for j in range(max_steps):

            # time.sleep(.1)
            loss = 0
            epsilon -= 1.0 / EXPLORE
            a_t = np.zeros([1, action_dim])
            noise_t = np.zeros([1, action_dim])

            a_t_original = actor.model.predict(s_t.reshape(1, s_t.shape[0]))
            noise_t[0][0] = train_indicator * max(epsilon, 0) * OU.function(
                a_t_original[0][0], 0.60, 0.15, 0.20)
            noise_t[0][1] = train_indicator * max(epsilon, 0) * OU.function(
                a_t_original[0][1], 0.25, 0.15, 0.20)
            noise_t[0][2] = train_indicator * max(epsilon, 0) * OU.function(
                a_t_original[0][2], 0.20, 0.15, 0.20)
            noise_t[0][3] = train_indicator * max(epsilon, 0) * OU.function(
                a_t_original[0][3], 0.40, 0.15, 0.20)
            noise_t[0][4] = train_indicator * max(epsilon, 0) * OU.function(
                a_t_original[0][4], 0.0, 0.15, 0.20)
            noise_t[0][5] = train_indicator * max(epsilon, 0) * OU.function(
                a_t_original[0][5], 0.0, 0.15, 0.20)
            noise_t[0][6] = train_indicator * max(epsilon, 0) * OU.function(
                a_t_original[0][6], 0.0, 0.15, 0.20)
            noise_t[0][7] = train_indicator * max(epsilon, 0) * OU.function(
                a_t_original[0][7], 0.0, 0.15, 0.20)
            noise_t[0][8] = train_indicator * max(epsilon, 0) * OU.function(
                a_t_original[0][8], 0.0, 0.15, 0.20)
            noise_t[0][9] = train_indicator * max(epsilon, 0) * OU.function(
                a_t_original[0][9], 0.0, 0.15, 0.20)

            a_t[0][0] = a_t_original[0][0] + noise_t[0][0]
            a_t[0][4] = a_t_original[0][4] + noise_t[0][4]
            a_t[0][5] = a_t_original[0][5] + noise_t[0][5]

            a_t[0][1] = a_t_original[0][1] + noise_t[0][1]
            a_t[0][6] = a_t_original[0][6] + noise_t[0][6]

            a_t[0][2] = a_t_original[0][2] + noise_t[0][2]
            a_t[0][7] = a_t_original[0][7] + noise_t[0][7]

            a_t[0][3] = a_t_original[0][3] + noise_t[0][3]
            a_t[0][8] = a_t_original[0][8] + noise_t[0][8]
            a_t[0][9] = a_t_original[0][9] + noise_t[0][9]

            dash_tuple = namedtuple('Dash', ['SOFTMAX', 'PWR', 'ANGLE'])
            turn_tuple = namedtuple('Turn', ['SOFTMAX', 'ANGLE'])
            tackle_tuple = namedtuple('Tackle', ['SOFTMAX', 'ANGLE'])
            kick_tuple = namedtuple('Kick', ['SOFTMAX', 'PWR', 'ANGLE'])

            dash = dash_tuple(a_t[0][0], 100 * a_t[0][4], 180 * a_t[0][5])
            turn = turn_tuple(a_t[0][1], 180 * a_t[0][6])
            tackle = tackle_tuple(a_t[0][2], 180 * a_t[0][7])
            kick = kick_tuple(a_t[0][3], 100 * a_t[0][8], 180 * a_t[0][9])

            print("Actions:\n--{}\n--{}\n--{}\n--{}".format(
                dash, turn, tackle, kick))
            # if 0 <= episode <= 200:
            #     r = .6
            # elif 201 < episode <=500:
            #     r = 0.7
            # elif 501 <= episode < 1000:
            #     r = 0.75
            # elif 1001 <- episode < 2000:
            #     r = .8
            # else:
            #     r = .9

            actions = sorted([dash, turn, tackle, kick],
                             key=lambda x: x.SOFTMAX,
                             reverse=True)
            # action = actions[0 if random.random() < r else random.randint(0,3)]
            action = actions[0]
            print(action)
            if type(action) == type(dash):
                env.act(hfo.DASH, dash.PWR, dash.ANGLE)
            elif type(action) == type(turn):
                env.act(hfo.TURN, turn.ANGLE)
            elif type(action) == type(tackle):
                env.act(hfo.TACKLE, tackle.ANGLE)
            elif type(action) == type(kick):
                env.act(hfo.KICK, kick.PWR, kick.ANGLE)
            else:
                print('I am not acting')
            player = env.playerOnBall()
            status = env.step()

            s_t1 = np.array(env.getState())
            r_t = low_level_reward_function(s_t1, s_t, isBall, status)

            buff.add(s_t, a_t[0], r_t, s_t1, status)

            #Do the batch update
            batch = buff.getBatch(BATCH_SIZE)
            states = np.asarray([e[0] for e in batch])
            actions = np.asarray([e[1] for e in batch])
            rewards = np.asarray([e[2] for e in batch])
            new_states = np.asarray([e[3] for e in batch])
            dones = np.asarray([e[4] for e in batch])
            y_t = np.asarray([e[1] for e in batch])

            predicted_actions = actor.target_model.predict(new_states)
            target_q_values = critic.target_model.predict(
                [new_states, predicted_actions])

            for k in range(len(batch)):
                if dones[k]:
                    y_t[k] = rewards[k]
                else:
                    y_t[k] = rewards[k] + GAMMA * target_q_values[k]

            for k in range(len(batch)):
                total_target_q_values = total_target_q_values + target_q_values[
                    k]

            if (train_indicator):
                loss += critic.model.train_on_batch([states, actions], y_t)
                a_for_grad = actor.model.predict(states)
                grads = critic.gradients(states, a_for_grad)
                inverted_grads = invert_grads(
                    grads, a_for_grad
                )  # Invert the gradients if they exceed the parameter max and min values
                actor.train(states, inverted_grads)
                actor.target_train()
                critic.target_train()

            total_reward += r_t
            s_t = s_t1
            step += 1
            if status != hfo.IN_GAME:
                break

            if status == hfo.SERVER_DOWN:
                env.act(hfo.QUIT)
                break
        with open('rewards.csv', 'a') as f:
            f.writelines("{},{}\n".format(episode, total_reward))
        with open('q_values.csv', 'a') as g:
            g.write("{},{}\n".format(episode, sum(total_target_q_values)))
        if np.mod(episode, 3) == 0:
            if (train_indicator):
                print("Now we save model")
                actor.model.save_weights("actormodel.h5", overwrite=True)
                with open("actormodel.json", "w") as outfile:
                    json.dump(actor.model.to_json(), outfile)

                critic.model.save_weights("criticmodel.h5", overwrite=True)
                with open("criticmodel.json", "w") as outfile:
                    json.dump(critic.model.to_json(), outfile)

        print("TOTAL REWARD @ " + str(episode) + "-th Episode  : Reward " +
              str(total_reward))
        print("Total Step: " + str(step))
        print("")

    print("Finish.")
Ejemplo n.º 15
0
"""A few tests using a server"""
from __future__ import print_function

import os
import subprocess
import sys
import time

import hfo

hfo_env = hfo.HFOEnvironment()


def try_step():  # if a game ends within ~20 frames, something is wrong...
    status = hfo_env.step()

    assert (status == hfo.IN_GAME), (
        "Status is {!s} ({!r}), not IN_GAME".format(
            hfo_env.statusToString(status), status))

    return hfo_env.getState()


def test_with_server():
    test_dir = os.path.dirname(os.path.abspath(os.path.realpath(__file__)))
    binary_dir = os.path.normpath(test_dir + "/../bin")
    conf_dir = os.path.join(binary_dir, 'teams/base/config/formations-dt')
    bin_HFO = os.path.join(binary_dir, "HFO")

    popen_list = [
        sys.executable, "-x", bin_HFO, "--offense-agents=1",
Ejemplo n.º 16
0
def player(mark):
    print('--I am player', mark, ctime())
    # Create the HFO Environment
    hfo_env = hfo.HFOEnvironment()
    hfo_env.connectToServer(
        hfo.LOW_LEVEL_FEATURE_SET,
        'C:/Users/Administrator/HFO/bin/teams/base/config/formations-dt',
        args.port, 'localhost', 'base_right', False)

    total_step = 0
    ep_rewards = []
    ep_steps = []
    ep_goals = []
    for episode in itertools.count():
        status = hfo.IN_GAME
        episode_step = 0
        #FIXME 0061
        isBall = Locker
        total_reward = 0.
        while status == hfo.IN_GAME:
            total_step += 1
            episode_step += 1

            # Get the vector of state features for the current state
            st = np.hstack(hfo_env.getState())
            action, c_action = ma_pdqn.act(state=st, index=mark)
            if action == 0:
                hfo_env.act(hfo.DASH, c_action[0], c_action[1])
            elif action == 1:
                hfo_env.act(hfo.TURN, c_action)
            elif action == 2:
                hfo_env.act(hfo.TACKLE, c_action)
            elif action == 3:
                hfo_env.act(hfo.KICK, c_action[0], c_action[1])
            else:
                print('I am not acting', mark)

            # Advance the environment and get the game status
            #player = env.playerOnBall()
            status = hfo_env.step()
            st_ = np.array(hfo_env.getState())
            r_t = low_level_reward_function(st_, st, isBall, status)
            #FIXME st[0:58]
            if mark == 0:
                ma_pdqn.storeTransition1(st, st_, action, c_action, r_t,
                                         st[0:58], st_[0:58])
            else:
                ma_pdqn.storeTransition2(st, st_, action, c_action, r_t,
                                         st[0:58], st_[0:58])
            total_reward += r_t
            ma_pdqn.train()
            #FIXME 0065
            ma_pdqn.episode_done(index=mark)

        ep_steps.append(episode_step)
        #FIXME rewards.append
        ep_rewards.append(total_reward)
        if status == hfo.GOAL:
            ep_goals.append(1)
        else:
            ep_goals.append(0)
        ep_steps = ep_steps[-100:]
        ep_rewards = ep_rewards[-100:]
        ep_goals = ep_goals[-100:]

        if (episode + 1) % print_interval == 0 and mark == 0:
            print("================================================")
            print("--Agent:", mark)
            print("--Episode: ", episode)
            print("----Avg_steps: ", sum(ep_steps[-100:]) / 100.0)
            print("----Avg_reward: ", sum(ep_rewards[-100:]) / 100.0)
            print("----Goal_rate: ", sum(ep_goals[-100:]) / 100.0)
            print("------------------------------------------------")

        # Check the outcome of the episode

        # end_status = hfo_env.statusToString(status)
        # print("Episode {0:n} ended with {1:s}".format(episode, end_status))

        # Quit if the server goes down
        if status == hfo.SERVER_DOWN:
            hfo_env.act(hfo.QUIT)
            exit()
Ejemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--port', type=int, default=6000, help="Server port")
    parser.add_argument(
        '--seed',
        type=int,
        default=None,
        help="Python randomization seed; uses python default if 0 or not given"
    )
    parser.add_argument(
        '--rand-pass',
        action="store_true",
        help="Randomize order of checking teammates for a possible pass")
    parser.add_argument(
        '--epsilon',
        type=float,
        default=0,
        help=
        "Probability of a random action if has the ball, to adjust difficulty")
    parser.add_argument('--record',
                        action='store_true',
                        help="If doing HFO --record")
    parser.add_argument('--rdir',
                        type=str,
                        default='log/',
                        help="Set directory to use if doing --record")
    args = parser.parse_args()
    if args.seed:
        random.seed(args.seed)
    hfo_env = hfo.HFOEnvironment()
    if args.record:
        hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET,
                                'bin/teams/base/config/formations-dt',
                                args.port,
                                'localhost',
                                'base_left',
                                False,
                                record_dir=args.rdir)
    else:
        hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET,
                                'bin/teams/base/config/formations-dt',
                                args.port, 'localhost', 'base_left', False)
    num_teammates = hfo_env.getNumTeammates()
    #num_opponents = hfo_env.getNumOpponents()
    if args.seed:
        if (args.rand_pass and (num_teammates > 1)) or (args.epsilon > 0):
            print("Python randomization seed: {0:d}".format(args.seed))
        else:
            print(
                "Python randomization seed useless without --rand-pass w/2+ teammates or --epsilon >0"
            )
    if args.rand_pass and (num_teammates > 1):
        print("Randomizing order of checking for a pass")
    if args.epsilon > 0:
        print("Using epsilon {0:n}".format(args.epsilon))
    for episode in itertools.count():
        num_eps = 0
        num_had_ball = 0
        num_move = 0
        status = hfo.IN_GAME
        while status == hfo.IN_GAME:
            state = hfo_env.getState()
            #print(state)
            if int(state[5]) == 1:  # state[5] is 1 when player has the ball
                if (args.epsilon > 0) and (random.random() < args.epsilon):
                    if random.random() < 0.5:
                        hfo_env.act(hfo.SHOOT)
                    else:
                        hfo_env.act(hfo.DRIBBLE)
                    num_eps += 1
                else:
                    get_action(state, hfo_env, num_teammates, args.rand_pass)
                num_had_ball += 1
            else:
                hfo_env.act(hfo.MOVE)
                num_move += 1
            status = hfo_env.step()
            #print(status)

        # Quit if the server goes down
        if status == hfo.SERVER_DOWN:
            hfo_env.act(hfo.QUIT)
            exit()

        # Check the outcome of the episode
        print("Episode {0:d} ended with {1:s}".format(
            episode, hfo_env.statusToString(status)))
        if args.epsilon > 0:
            print("\tNum move: {0:d}; Random action: {1:d}; Nonrandom: {2:d}".
                  format(num_move, num_eps, (num_had_ball - num_eps)))
Ejemplo n.º 18
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--port', default=6000, type=int)
  parser.add_argument('--feature_set', default=hfo.HIGH_LEVEL_FEATURE_SET, type=int)
  parser.add_argument('--trials', default=10000, type=int)
  args = parser.parse_args()
  
  # Starting the TensorFlow network
  tf.reset_default_graph()

 
  # Create the HFO Environment
  env = hfo.HFOEnvironment()
  feature_space_n = 100
  action_space_n = 8
  # Connect to the server with the specified
  # feature set. See feature sets in hfo.py/hfo.hpp.
   
  env.connectToServer(args.feature_set,
                      '/home/student/Desktop/HFO-master_ruben/bin/teams/base/config/formations-dt', args.port,
                      'localhost', 'base_right', True)
     
  # setting feed-forward part of the network
  
  inputs1 = tf.placeholder(shape=[1,feature_space_n],dtype=tf.float32)
  W = tf.Variable(tf.random_uniform([feature_space_n,action_space_n],0,0.01))
  Qout = tf.matmul(inputs1,W)
  predict = tf.argmax(Qout,1)
  
  nextQ = tf.placeholder(shape=[1,action_space_n],dtype=tf.float32)
  loss = tf.reduce_sum(tf.square(nextQ - Qout))
  trainer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
  updateModel = trainer.minimize(loss)
  
  init = tf.initialize_all_variables()
  action_set = LowLevelActionSet()
  
  
  
  gamma = 0.99
  e = 0.1
  
  x_bounds = [-1, 1]
  y_bounds = [-0.3, 0.3]   
    
  #num_features = hfo.getStateSize()
  with tf.Session() as sess:
    sess.run(init)
  
    for episode in range(args.trials):
        s = helper.bin_ball_position(env.getState()[3:5], x_bounds, y_bounds)
        status = hfo.IN_GAME    
        while status == hfo.IN_GAME:
            a,allQ = sess.run([predict,Qout],feed_dict={inputs1:np.identity(feature_space_n)[s:s+1]})
            if np.random.rand(1) < e:
                a[0] = random.randint(0, 8)
            #Get new state and reward from environment
            env.act(*action_set[a[0]])
            status = env.step()  
            s1 = helper.bin_ball_position(env.getState()[3:5], x_bounds, y_bounds)
            reward = (1-np.linalg.norm(env.getState()[3:5]-env.getState()[0:2]))*100
            if status == hfo.GOAL :
                reward = -500
                e = 1./((episode/50) + 10)
                break
            elif status == hfo.CAPTURED_BY_DEFENSE:
                e = 1./((episode/50) + 10)
                reward = 500
                break
            #Obtain the Q' values by feeding the new state through our network
            Q1 = sess.run(Qout,feed_dict={inputs1:np.identity(feature_space_n)[s1:s1+1]})
            #Obtain maxQ' and set our target value for chosen action.
            maxQ1 = np.max(Q1)
            targetQ = allQ
            targetQ[0,a[0]] = reward + gamma*maxQ1
            #Train our network using target and predicted Q values
            _,W1 = sess.run([updateModel,W],feed_dict={inputs1:np.identity(feature_space_n)[s:s+1],nextQ:targetQ})
            s = s1
        
        #advanced_stats.append(stats[status])
        print(('Episode %d ended with %s'%(episode, env.statusToString(status))))
        # Quit if the server goes down    
        if status == hfo.SERVER_DOWN:
          env.act(hfo.QUIT)
          exit()
Ejemplo n.º 19
0
from pathlib import Path

import hfo
from hfo.hfo import *
base_dir = Path('/home/goncalo/HFO')
config_dir = base_dir / 'bin/teams/base/config/formations-dt'

hfo = hfo.HFOEnvironment()
hfo.connectToServer(HIGH_LEVEL_FEATURE_SET, config_dir=str(config_dir))
for episode in range(5): # replace with xrange(5) for Python 2.X
    status = IN_GAME
    while status == IN_GAME:
        features = hfo.getState()
        print(features[:2])
        # dist = features[33]
        hfo.act(DASH, 20.0, 0.0)
        status = hfo.step()
    print('episode', episode)
Ejemplo n.º 20
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--port', default=6000, type=int)
    parser.add_argument('--feature_set',
                        default=hfo.HIGH_LEVEL_FEATURE_SET,
                        type=int)
    parser.add_argument('--trials', default=10000, type=int)
    args = parser.parse_args()
    state_space = 100  # amount of bins for ball to go in

    alpha = 0.8
    gamma = 0.95
    #stats_path = '/home/student/Desktop/HFO-master_ruben/example/test_keepers/stats.bin'
    stats = {
        hfo.GOAL: 0,
        hfo.CAPTURED_BY_DEFENSE: 0,
        hfo.OUT_OF_BOUNDS: 0,
        hfo.OUT_OF_TIME: 0,
        hfo.SERVER_DOWN: 0
    }
    #advanced_stats = array('b')
    # Create the HFO Environment
    env = hfo.HFOEnvironment()
    actions = ActionSet("high_level")
    action_space = len(actions)
    Q = np.zeros((state_space, action_space))
    # Connect to the server with the specified
    # feature set. See feature sets in hfo.py/hfo.hpp.

    # saving first state

    env.connectToServer(
        args.feature_set,
        '/home/student/Desktop/HFO-master_ruben/bin/teams/base/config/formations-dt',
        args.port, 'localhost', 'base_right', True)

    #num_features = hfo.getStateSize()

    x_bounds = [-1, 1]
    y_bounds = [-0.3, 0.3]

    for episode in range(args.trials):
        s = helper.bin_ball_position(env.getState()[3:5], x_bounds, y_bounds)
        status = hfo.IN_GAME
        while status == hfo.IN_GAME:
            a = np.argmax(Q[s, :] + np.random.randn(1, action_space) *
                          (1. / (episode + 1)))
            env.act(*actions[a])

            status = env.step()
            features = env.getState()
            reward = (1 - features[9]) * 100
            if status == hfo.GOAL:
                reward = -500
                break
            elif status == hfo.CAPTURED_BY_DEFENSE:
                reward = 500
                break
            s1 = helper.bin_ball_position(features[3:5], x_bounds, y_bounds)
            Q[s,
              a] = Q[s,
                     a] + alpha * (reward + gamma * np.max(Q[s1, :]) - Q[s, a])

            s = s1
        stats[status] += 1

        # Grab the state features from the environment

        #advanced_stats.append(stats[status])
        print(('Episode %d ended with %s' %
               (episode, env.statusToString(status))))
        # Quit if the server goes down
        if status == hfo.SERVER_DOWN:
            env.act(hfo.QUIT)
            exit()
Ejemplo n.º 21
0
 def __init__(self):
     self.state_space = StateSpace(5, 5)
     self.action_space = ActionSpace()
     self.env = hfo.HFOEnvironment()
     self._connect_to_server()
Ejemplo n.º 22
0
 def __init__(self, envir=hfo.HFOEnvironment(), action_set="high_level",seed=123):
     Agent.__init__(self,env=envir, agent_type="high_level_agent", action_set=HighLevelActionSpace(), 
     state_space=StateSpace(500), feature_set=hfo.HIGH_LEVEL_FEATURE_SET, port=6000,base="base_right", goalie=True)
     self.seed = seed        
Ejemplo n.º 23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--port', type=int, default=6000, help="Server port")
    parser.add_argument(
        '--seed',
        type=int,
        default=None,
        help="Python randomization seed; uses python default if 0 or not given"
    )
    parser.add_argument(
        '--epsilon',
        type=float,
        default=0,
        help="Probability of a random action, to adjust difficulty")
    parser.add_argument('--record',
                        action='store_true',
                        help="If doing HFO --record")
    parser.add_argument('--rdir',
                        type=str,
                        default='log/',
                        help="Set directory to use if doing --record")
    args = parser.parse_args()
    if args.seed:
        random.seed(args.seed)
    hfo_env = hfo.HFOEnvironment()
    if args.record:
        hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET,
                                settings.CONFIG_DIR,
                                args.port,
                                'localhost',
                                'base_right',
                                play_goalie=False,
                                record_dir=args.rdir)
    else:
        hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET,
                                settings.CONFIG_DIR,
                                args.port,
                                'localhost',
                                'base_right',
                                play_goalie=False)
    numTeammates = hfo_env.getNumTeammates()
    numOpponents = hfo_env.getNumOpponents()
    if args.seed:
        if args.epsilon > 0:
            print("Python randomization seed: {0:d}".format(args.seed))
        else:
            print("Python randomization seed useless without --epsilon >0")
    if args.epsilon > 0:
        print("Using epsilon {0:n}".format(args.epsilon))
    my_unum = hfo_env.getUnum()
    assert ((my_unum > 1) and (my_unum <= 11)), "Bad unum {!r}".format(my_unum)
    print("My unum is {0:d}".format(my_unum))
    num_times_overall = {}
    num_times_kickable = {}
    for action in range(hfo.NUM_HFO_ACTIONS):
        num_times_overall[action] = 0
        num_times_kickable[action] = 0
    misc_tracked = {'max_kickable_dist': 0}
    for episode in itertools.count():
        old_ball_pos_x = -1
        old_ball_pos_y = 0
        episode_start = True
        status = hfo.IN_GAME
        while status == hfo.IN_GAME:
            state = hfo_env.getState()
            if episode_start:
                if (state[3] >= -1) and (state[3] <= 1):
                    old_ball_pos_x = state[3]
                if (state[4] >= -1) and (state[4] <= 1):
                    old_ball_pos_y = state[4]
                episode_start = False
            if (args.epsilon > 0) and (random.random() < args.epsilon):
                do_random_defense_action(state, hfo_env)
            else:
                do_defense_action(state_vec=state,
                                  hfo_env=hfo_env,
                                  num_opponents=numOpponents,
                                  num_teammates=numTeammates,
                                  old_ball_pos_x=old_ball_pos_x,
                                  old_ball_pos_y=old_ball_pos_y,
                                  num_times_overall=num_times_overall,
                                  num_times_kickable=num_times_kickable,
                                  misc_tracked=misc_tracked)
            old_ball_pos_x = state[3]
            old_ball_pos_y = state[4]
            status = hfo_env.step()
            #print(status)

        # Quit if the server goes down
        if status == hfo.SERVER_DOWN:
            for action in range(hfo.NUM_HFO_ACTIONS):
                if num_times_overall[action]:
                    print("Overall times {0!s}: {1:d}".format(
                        hfo_env.actionToString(action),
                        num_times_overall[action]))
            for action in range(hfo.NUM_HFO_ACTIONS):
                if num_times_kickable[action]:
                    print("Kickable times {0!s}: {1:d}".format(
                        hfo_env.actionToString(action),
                        num_times_kickable[action]))
            print("Max kickable dist: {0:n}".format(
                misc_tracked['max_kickable_dist']))
            hfo_env.act(hfo.QUIT)
            exit()
Ejemplo n.º 24
0
feature_set = hfo.LOW_LEVEL_FEATURE_SET
config = '/Users/codeMan/Documents/hfo/HFO/bin/teams/base/config/formations-dt'
port = 6000
host = 'localhost'
side = 'base_left'

# ACTION_LOOKUP = {
#     0: hfo.DASH,
#     1: hfo.TURN,
#     2: hfo.KICK,
#     3: hfo.TACKLE, # Used on defense to slide tackle the ball
#     4: hfo.CATCH,  # Used only by goalie to catch the ball
# }

server = hfo.HFOEnvironment()
server.connectToServer(feature_set, config, port, host, side, False)

env = SoccerEnv(env=server)

for i in range(1000):
    rand_action_index = np.random.randint(0, 3)

    rand_dash_pow = np.random.uniform(0, 100)
    rand_dash_dic = np.random.uniform(-180, 180)

    rand_turn_dic = np.random.uniform(-180, 180)

    rand_kick_pow = np.random.uniform(0, 100)
    rand_kick_dic = np.random.uniform(-180, 180)