# Initialize connection with the HFO server hfoEnv = HFOAttackingPlayer(numOpponents=args.numOpponents, numTeammates=args.numTeammates, agentId=args.id) hfoEnv.connectToServer() # Initialize a Q-Learning Agent agent = QLearningAgent(learningRate=0.1, discountFactor=0.99, epsilon=1.0) numEpisodes = args.numEpisodes # Run training using Q-Learning numTakenActions = 0 for episode in range(numEpisodes): status = 0 observation = hfoEnv.reset() while status == 0: learningRate, epsilon = agent.computeHyperparameters( numTakenActions, episode) agent.setEpsilon(epsilon) agent.setLearningRate(learningRate) obsCopy = observation.copy() agent.setState(agent.toStateRepresentation(obsCopy)) action = agent.act() numTakenActions += 1 nextObservation, reward, done, status = hfoEnv.step(action) agent.setExperience(agent.toStateRepresentation(obsCopy), action, reward, status,
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--id', type=int, default=0) parser.add_argument('--numOpponents', type=int, default=0) parser.add_argument('--numTeammates', type=int, default=0) parser.add_argument('--numEpisodes', type=int, default=500) args = parser.parse_args() hfoEnv = HFOAttackingPlayer(numOpponents=args.numOpponents, numTeammates=args.numTeammates, agentId=args.id) hfoEnv.connectToServer() numEpisodes = 500 for episode in range(numEpisodes + 1): status = 0 observation = hfoEnv.reset() #恢复到初始状态 while status == 0: act = random.randint(0, 4) nextObservation, reward, done, status = hfoEnv.step( hfoEnv.possibleActions[act]) #将所选动作传递给agent observation = nextObservation if status == 5: hfoEnv.quitGame() break