mazeTraining = Maze(maze)
mazeTraining.SetRewardAndNextStateDict()
agentTraining = Agent(maze)
preTrain = input("是否使用预训练模型?(y/n):")
if preTrain == 'y':
    agentFileName = input("请输入智能体的预训练模型文件名:")
    agentTraining.qTable = np.load(agentFileName)
trainingTime = int(input("请输入训练次数:"))
epsilon = 1
learningRate = 0.5
gamma = 1
for i in range(trainingTime):
    epsilon -= 1 / (trainingTime // 2)  # epsilon递减
    # 随机初始化训练起点
    state = np.random.randint(agentTraining.stateNum)
    while mazeTraining.GetState(state) != 1:
        state = np.random.randint(agentTraining.stateNum)
    time = 0
    while mazeTraining.GetState(state) != 0.9:
        # epsilon贪心策略
        if False not in (agentTraining.qTable[:, state] == [0, 0, 0, 0]):
            action = np.random.randint(4)
        else:
            if random.random() < epsilon:
                action = np.random.randint(4)
            else:
                action = np.argmax(agentTraining.qTable[:, state])
        nextState = agentTraining.GetNextState(state, action)
        # 更新Q表
        agentTraining.qTable[action,state] =(1 - learningRate) * agentTraining.qTable[action,state] +\
            learningRate * (agentTraining.maze.reward[action,state] + gamma * np.amax(agentTraining.qTable[:,nextState]))