class Agent(object):
    def __init__(self, maze):
        self.maze = Maze(maze)
        self.maze.SetRewardAndNextStateDict()
        self.stateNum = self.maze.m * self.maze.n
        self.qTable = np.zeros([4, self.stateNum])

    def GetNextState(self, state, action):
        if action == 0:
            return self.maze.up[state]
        elif action == 1:
            return self.maze.down[state]
        elif action == 2:
            return self.maze.left[state]
        elif action == 3:
            return self.maze.right[state]
import numpy as np
import random
from Maze import Maze
from Agent import Agent

mazeFileName = input("请输入迷宫文件:")
maze = np.load(mazeFileName)
mazeTraining = Maze(maze)
mazeTraining.SetRewardAndNextStateDict()
agentTraining = Agent(maze)
preTrain = input("是否使用预训练模型?(y/n):")
if preTrain == 'y':
    agentFileName = input("请输入智能体的预训练模型文件名:")
    agentTraining.qTable = np.load(agentFileName)
trainingTime = int(input("请输入训练次数:"))
epsilon = 1
learningRate = 0.5
gamma = 1
for i in range(trainingTime):
    epsilon -= 1 / (trainingTime // 2)  # epsilon递减
    # 随机初始化训练起点
    state = np.random.randint(agentTraining.stateNum)
    while mazeTraining.GetState(state) != 1:
        state = np.random.randint(agentTraining.stateNum)
    time = 0
    while mazeTraining.GetState(state) != 0.9:
        # epsilon贪心策略
        if False not in (agentTraining.qTable[:, state] == [0, 0, 0, 0]):
            action = np.random.randint(4)
        else:
            if random.random() < epsilon: