def __init__(self, discountFactor, epsilon, initVals=0.0): super(MonteCarloAgent, self).__init__() self.attack = HFOAttackingPlayer() # initialise all possible states for the agent self.State = [(x, y) for x in range(5) for y in range(6)] self.State.append("GOAL") self.State.append("OUT_OF_BOUNDS") # discount self.discountFactor = discountFactor # epsilon self.epsilon = epsilon # variable current state self.cur = 0 # 3 empty lists used to record the episode self.logA = [] self.logR = [] self.logS = [] # cumulative reward self.G = 0 # Q table is a dict where keys are the "states" and values are another dict. # This inside dict contains "actions" as keys and the values are initilised to 0 self.Q = {} # Returns table is a dict where the key is "state-action" pairs self.returns = {} for s in self.State: self.Q[s] = {} for a in self.possibleActions: self.Q[s][a] = 0 self.returns[(s, a)] = []
return self.learningRate, self.epsilon if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--id', type=int, default=0) parser.add_argument('--numOpponents', type=int, default=0) parser.add_argument('--numTeammates', type=int, default=0) parser.add_argument('--numEpisodes', type=int, default=500) args = parser.parse_args() # Initialize connection with the HFO server hfoEnv = HFOAttackingPlayer(numOpponents=args.numOpponents, numTeammates=args.numTeammates, agentId=args.id) hfoEnv.connectToServer() # Initialize a Q-Learning Agent agent = QLearningAgent(learningRate=0.1, discountFactor=0.99, epsilon=1.0) numEpisodes = args.numEpisodes # Run training using Q-Learning numTakenActions = 0 for episode in range(numEpisodes): status = 0 observation = hfoEnv.reset() while status == 0: learningRate, epsilon = agent.computeHyperparameters(
from DiscreteHFO.HFOAttackingPlayer import HFOAttackingPlayer import random import argparse if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--id', type=int, default=0) parser.add_argument('--numOpponents', type=int, default=0) parser.add_argument('--numTeammates', type=int, default=0) parser.add_argument('--numEpisodes', type=int, default=500) args=parser.parse_args() hfoEnv = HFOAttackingPlayer(numOpponents = args.numOpponents, numTeammates = args.numTeammates, agentId = args.id) hfoEnv.connect_to_server() numEpisodes = 500 for episode in range(numEpisodes+1): status = 0 observation = hfoEnv.reset() while status==0: act = random.randint(0,4) nextObservation, reward, done, status = hfoEnv.step(hfoEnv.possibleActions[act]) observation = nextObservation if status == 5: hfoEnv.quitGame()