hfoEnv.connectToServer() # Initialize a Q-Learning Agent agent = QLearningAgent(learningRate=0.1, discountFactor=0.99, epsilon=1.0) numEpisodes = args.numEpisodes # Run training using Q-Learning numTakenActions = 0 for episode in range(numEpisodes): status = 0 observation = hfoEnv.reset() while status == 0: learningRate, epsilon = agent.computeHyperparameters( numTakenActions, episode) agent.setEpsilon(epsilon) agent.setLearningRate(learningRate) obsCopy = observation.copy() agent.setState(agent.toStateRepresentation(obsCopy)) action = agent.act() numTakenActions += 1 nextObservation, reward, done, status = hfoEnv.step(action) agent.setExperience(agent.toStateRepresentation(obsCopy), action, reward, status, agent.toStateRepresentation(nextObservation)) update = agent.learn() observation = nextObservation
parser = argparse.ArgumentParser() parser.add_argument('--id', type=int, default=0) parser.add_argument('--numOpponents', type=int, default=0) parser.add_argument('--numTeammates', type=int, default=0) parser.add_argument('--numEpisodes', type=int, default=500) args=parser.parse_args() hfoEnv = HFOAttackingPlayer(numOpponents = args.numOpponents, numTeammates = args.numTeammates, agentId = args.id) hfoEnv.connect_to_server() numEpisodes = 500 for episode in range(numEpisodes+1): status = 0 observation = hfoEnv.reset() while status==0: act = random.randint(0,4) nextObservation, reward, done, status = hfoEnv.step(hfoEnv.possibleActions[act]) observation = nextObservation if status == 5: hfoEnv.quitGame() break