class Agent(object): def __init__(self, maze): self.maze = Maze(maze) self.maze.SetRewardAndNextStateDict() self.stateNum = self.maze.m * self.maze.n self.qTable = np.zeros([4, self.stateNum]) def GetNextState(self, state, action): if action == 0: return self.maze.up[state] elif action == 1: return self.maze.down[state] elif action == 2: return self.maze.left[state] elif action == 3: return self.maze.right[state]
import numpy as np import random from Maze import Maze from Agent import Agent mazeFileName = input("请输入迷宫文件:") maze = np.load(mazeFileName) mazeTraining = Maze(maze) mazeTraining.SetRewardAndNextStateDict() agentTraining = Agent(maze) preTrain = input("是否使用预训练模型?(y/n):") if preTrain == 'y': agentFileName = input("请输入智能体的预训练模型文件名:") agentTraining.qTable = np.load(agentFileName) trainingTime = int(input("请输入训练次数:")) epsilon = 1 learningRate = 0.5 gamma = 1 for i in range(trainingTime): epsilon -= 1 / (trainingTime // 2) # epsilon递减 # 随机初始化训练起点 state = np.random.randint(agentTraining.stateNum) while mazeTraining.GetState(state) != 1: state = np.random.randint(agentTraining.stateNum) time = 0 while mazeTraining.GetState(state) != 0.9: # epsilon贪心策略 if False not in (agentTraining.qTable[:, state] == [0, 0, 0, 0]): action = np.random.randint(4) else: if random.random() < epsilon: