Esempio n. 1
0
class GameEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):
       # print("Initialising")
        m= "Test,6,6,4,5,1,0,131111100051105001100051150001111121" #"Test,10,10,4,0,4,9,1111211151100000000111100110011000000001100001511111011000111000000001100501050110000000011111311111"
        #"Test,4,4,2,0,2,3,1125100110011531" MazeGenerator() 
        self.maze= Maze(m)
        self.s=Simulator(self.maze)
        self.span=6
        self.number=1
        self.pList=[]
        self.stateList=[]
        self.history=m+"|"+"0"
        self.finished=0
        for j in range(self.number):
            p=Worker(self.maze)
            self.s.add(p)
            self.pList.append(p)
            state= np.asarray(p.getView(p.getPos(),self.span))
            self.stateList.append(state)
            self.history+="#"+p.getName()+"-"+p.getPos().CordToString()
        self.history+="|"
        action_space=[]
        
        for i in range(0,len(Action)):            
            action_space.append(Action(i))
        self.action_space=np.asarray(action_space)
        self.observation_space= math.pow(2*self.span+1,2)*self.number
        self.shortestRoute=len(self.maze.GetOptimalRoute()[0])
        self.maze.printMaze()
        
        

    def step(self, action):        
        #print("Stepping")
        stateList=self.stateList
        state_nextList=np.empty([1,2*self.span+1,2*self.span+1])
        reward=0
        terminal=False
        info={}
        
        wallMove=False
        index=0
        for p in self.pList:            
            oldPosition=p.getPos()
            state_next=np.empty(1)
            
            if(action in self.maze.WhichWayIsClear(oldPosition)):
                p.Do(action,self.maze)
                print(p.getName(), oldPosition.CordToString(), " to ",p.getPos().CordToString(),action, "Moving",self.maze.returnMoving())
                state_Next=np.asarray(p.getView(p.getPos(),self.span))
                reward+=p.getReward(p.getPos(), True,oldPosition,p.getView(p.getPos(),self.span))
                wallMove=True
                
            else:
                #print(p.getName()," from ", oldPosition.CordToString(), " to ",p.getPos().CordToString(),action, reward, "Not Possible")
                state_Next=np.asarray(p.getView(p.getPos(),self.span))                
                reward+=p.getReward(p.getPos(), False,oldPosition,p.getView(p.getPos(),self.span))
            #print(state_nextList)
            #print(state_Next)
            
            state_nextList=np.append(state_nextList,[state_Next], axis=0)
            index+=1
            #self.history+="#"+p.getName()+"-"+p.getPos().CordToString()
            if(self.maze.CheckExit(p.getPos())):
                print("EXIT: ", p.getPos(), len(self.pList))
                self.finished+=1
        state_nextList=np.delete(state_nextList,0,axis=0)
        
        
        if(wallMove):
            self.history+=str(self.pList[0].getTime())+"#"+self.maze.returnAllClearString()
            blocked=[]
            for p in self.pList:
                blocked.append(p.getPos())
                self.history+="#"+p.getName()+"-"+p.getPos().CordToString()            
            self.maze.WallMove(blocked)
            self.shortestRoute=len(self.maze.GetOptimalRoute()[0])            
            self.history+="|"   
            
        if(self.finished==len(self.pList)):
            file=open("Games.txt","a+")
            file.write(self.history+"\n")
            file.close()
            terminal=True
            
        #self.maze.printMaze()
        #print(" ")
        return state_nextList, reward, terminal, info
    
    def reset(self):        
        #print("Resetting")
        self.stateList=[]
        self.history=self.maze.mazeString+"|"+"0"
        self.finished=0
        for p in self.pList:
            p.setInitPos(Cord(self.maze.getInitialX(),self.maze.getInitialY()))
            state=np.asarray(p.getView(p.getPos(),self.span))
            self.stateList.append(state)
            self.history+="#"+p.getName()+"-"+p.getPos().CordToString()
        self.history+="|"
        self.shortestRoute=len(self.maze.GetOptimalRoute()[0])
        return self.stateList
    
    def resetNewMaze(self):        
        m= MazeGenerator()  
        self.maze= Maze(m)
        self.s=Simulator(self.maze)
        self.pList=[]
        self.stateList=[]
        self.history=m+"|"+"0"
        self.finished=0
        for j in range(self.number):
            p=Worker(self.maze)
            self.pList.append(p)
            self.s.add(p)
            state= np.asarray(p.getView(p.getPos(),self.span))
            self.stateList.append(state)
            self.history+="#"+p.getName()+"-"+p.getPos().CordToString()
        self.history+="|"
        self.shortestRoute=len(self.maze.GetOptimalRoute()[0])
        self.maze.printMaze()
        self.shortestRoute=len(self.maze.GetOptimalRoute()[0])
        return self.stateList
        
    def render(self, mode='human', close=False):
        self.s.display()
Esempio n. 2
0
class GameEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):
       # print("Initialising")
        m= "Test,10,10,4,0,4,9,1111211111100000000111100110011000000001100001011111011000111000000001100101010110000000011111311111"
        #"Test,10,10,4,0,4,9,1111211111100000000111100110011000000001100001011111011000111000000001100101010110000000011111311111"
        #"Test,6,6,4,5,1,0,131111100051105001100051150001111121" #"Test,10,10,4,0,4,9,1111211151100000000111100110011000000001100001511111011000111000000001100501050110000000011111311111"
        #"Test,4,4,2,0,2,3,1125100110011531" MazeGenerator() 
        self.maze= Maze(m)
        self.s=Simulator(self.maze)
        self.span=6
        self.number=2
        self.pList=[]
        self.stateList=[]
        self.finishedP=[]
        self.history=m+"|"+"0"
        self.finished=0
        self.count=0
        for j in range(self.number):
            p=Worker(self.maze)
            self.s.add(p)
            self.pList.append(p)
            state= np.asarray(p.getView(p.getPos(),self.span))
            self.stateList.append(state)
            self.history+="#"+p.getName()+"-"+p.getPos().CordToString()
        self.history+="|"
        action_space=[]
        
        for i in range(0,len(Action)):            
            action_space.append(i)
        self.action_space_worker=np.asarray(action_space)
        self.observation_space_worker= math.pow(2*self.span+1,2)
        self.shortestRoute=len(self.maze.GetOptimalRoute()[0])
        self.maze.printMaze()
        
        self.dqn_solver_worker = DQNSolver(int(self.observation_space_worker), len(self.action_space_worker))
        self.reward=0
        
    def stepAll(self):
        #walls move
        self.history+=str(self.pList[0].getTime())+"#"+ self.maze.returnAllClearString()
        blocked=[]
        for q in self.pList:
            blocked.append(q.getPos())
                        
        self.maze.WallMove(blocked)
        self.shortestRoute=len(self.maze.GetOptimalRoute()[0])            
          
        
        #prey move_workers
        index=0
        for p in self.pList:
            state = np.reshape(self.stateList[index],  [1,int(self.observation_space_worker)])
            action = self.dqn_solver_worker.act(state)
            state_next, reward, terminal, info = self.step(p, action, index)
            state_next = np.reshape(state_next, [1,int(self.observation_space_worker)])
            self.dqn_solver_worker.remember(state, action, reward, state_next, terminal)
            self.stateList[index]=state_next
            self.dqn_solver_worker.experience_replay()
            self.reward+=reward
            self.history+="#"+p.getName()+"-"+p.getPos().CordToString()
            index+=1
        self.history+="|" 
        #predators move
        
        if(self.finished==len(self.pList)):
            file=open("GamesData.txt","a+")
            file.write(self.history+"\n")
            file.close()
            terminal=True  
            
            
        return self.reward, terminal

    def step(self, agent, action, index):  
        stateList=self.stateList
        reward=0
        terminal=False
        info={}
        
        oldPosition=agent.getPos()
        state_next=np.empty(1)      
            
        if(self.action_space_worker[action] in self.maze.WhichWayIsClear(oldPosition, True)):
            agent.Do(self.action_space_worker[action],self.maze)
            state_Next=np.asarray(agent.getView(agent.getPos(),self.span))
            reward+=agent.getReward(agent.getPos(), True,oldPosition,agent.getView(agent.getPos(),self.span))
            self.count+=1            
        else:              
            state_Next=np.asarray(agent.getView(agent.getPos(),self.span))                
            reward+=agent.getReward(agent.getPos(), False,oldPosition,agent.getView(agent.getPos(),self.span))
            
        if(self.maze.CheckExit(agent.getPos()) and agent not in self.finishedP):
            self.finished+=1
            self.finishedP.append(agent)           
            
        
        return state_Next, reward, terminal, info
    
    def reset(self):        
        #print("Resetting")
        self.maze= Maze(self.maze.mazeString)
        self.stateList=[]
        self.history=self.maze.mazeString+"|"+"0"
        self.finished=0
        self.finishedP=[]
        self.count=0
        for p in self.pList:
            p.setInitPos(Cord(self.maze.getInitialX(),self.maze.getInitialY()))
            state=np.asarray(p.getView(p.getPos(),self.span))
            self.stateList.append(state)
            self.history+="#"+p.getName()+"-"+p.getPos().CordToString()
        self.history+="|"
        self.shortestRoute=len(self.maze.GetOptimalRoute()[0])
        #print(self.maze.mazeString)
        #self.maze.printMaze()
        return self.stateList
    
    def resetNewMaze(self):        
        m= MazeGenerator()  
        self.maze= Maze(m)
        self.s=Simulator(self.maze)
        self.pList=[]
        self.stateList=[]
        self.history=m+"|"+"0"
        self.finished=0
        self.finishedP=[]
        self.count=0
        for j in range(self.number):
            p=Worker(self.maze)
            self.pList.append(p)
            self.s.add(p)
            state= np.asarray(p.getView(p.getPos(),self.span))
            self.stateList.append(state)
            self.history+="#"+p.getName()+"-"+p.getPos().CordToString()
        self.history+="|"
        self.shortestRoute=len(self.maze.GetOptimalRoute()[0])
        self.maze.printMaze()
        self.shortestRoute=len(self.maze.GetOptimalRoute()[0])
        return self.stateList
        
    def render(self, mode='human', close=False):
        self.s.display()
class GameEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):
        # print("Initialising")
        m = MazeGenerator(
            10, 10
        )  #"Test,10,10,4,0,4,9,1111211111100000000111100110011000000001100001011111011000111000000001100101010110000000011111311111"
        #"Test,10,10,4,0,4,9,1111211111100000000111100110011000000001100001011111011000111000000001100101010110000000011111311111"
        #"Test,6,6,4,5,1,0,131111100051105001100051150001111121" #"Test,10,10,4,0,4,9,1111211151100000000111100110011000000001100001511111011000111000000001100501050110000000011111311111"
        #"Test,4,4,2,0,2,3,1125100110011531" MazeGenerator()
        self.maze = Maze(m)
        self.s = Simulator(self.maze)
        self.spanP = 5
        self.spanM = 5
        self.wNumber = 5
        self.qNumber = 0
        self.sNumber = 0
        self.pList = []
        self.qList = []
        self.mList = []
        self.pPos = []
        self.qPos = []
        self.mPos = []
        self.pStateList = []
        self.qStateList = []
        self.mStateList = []
        self.finishedP = []
        self.finishedQ = []
        self.history = m + "|" + "0"
        self.finished = 0
        self.count = 0
        self.eaten = 0
        self.queenEaten = False
        self.queenLeft = False

        #rewards
        self.wReward_not_possible = -50
        self.wReward_wall = -50
        self.wReward_entrance = -20
        self.wReward_finished_before = 0
        self.wReward_exit = self.maze.height * self.maze.width * 20
        self.wReward_towards_exit = -1
        self.wReward_toQueen = -1
        self.wReward_atQueen = self.maze.height * self.maze.width * 20
        self.wReward_repeat_pos = -20
        self.wReward_else = -3
        self.wReward_queenEaten = -5000

        self.qReward_not_possible = -50
        self.qReward_wall = -50
        self.qReward_entrance = -20
        self.qReward_finished_before = 0
        self.qReward_exit = self.maze.height * self.maze.width * 20
        self.qReward_towards_exit = -1
        self.qReward_repeat_pos = -20
        self.qReward_else = -3

        self.sReward_not_possible = -50
        self.sReward_wall = -50
        self.sReward_eat = 1000
        self.sReward_eatQueen = 5000
        self.sReward_towards_prey = -1
        self.sReward_repeat_pos = -20
        self.sReward_else = -3

        #for book-keeping/need to update with spiders
        self.folderName = "Span_" + str(self.spanP) + "Dim_" + str(
            self.maze.height) + "_" + str(self.maze.width)
        #m+"_maze_"+str(self.wNumber)+"_workers_"+str(self.spanP)+"_span_"+str(self.wReward_not_possible)+"_not poss_"+str(self.wReward_wall)+"_wall_"+str(self.wReward_entrance)+"_ent_"+str(self.wReward_finished_before) +"_finished already_"+str(self.wReward_exit)+"_exit_"+str(self.wReward_towards_exit)+"_to exit_"+str(self.wReward_repeat_pos) +"_rep pos_"+str(self.wReward_else)+"_else " ####

        self.maxIter = 10 * self.maze.height * self.maze.width
        self.completeStop = False

        for j in range(self.wNumber):
            p = Worker(self.maze, self.spanP)
            self.s.add(p)
            self.pList.append(p)
            self.pPos.append(p.getPos())

        for j in range(self.qNumber):
            q = Queen(self.maze)
            self.s.add(q)
            self.qList.append(q)
            self.qPos.append(q.getPos())

        #self.history+="|"
        action_space = []

        for j in range(self.sNumber):
            s = Spider(self.maze)
            self.s.add(s)
            self.mList.append(s)
            self.mPos.append(s.getPos())

        for j in range(self.wNumber):
            state = np.asarray(
                p.getAugView(p.getPos(), self.spanP, self.pPos, self.qPos,
                             self.mPos))
            self.pStateList.append(state)
            self.history += "#" + p.getName() + "-" + p.getPos().CordToString()

        for j in range(self.qNumber):
            #state= np.asarray(q.getView())
            #self.qStateList.append(state)
            self.history += "#" + q.getName() + "-" + q.getPos().CordToString()

        for j in range(self.sNumber):
            state = np.asarray(
                s.getAugView(s.getPos(), self.spanM, self.pPos, self.qPos,
                             self.mPos))
            self.mStateList.append(state)
            self.history += "#" + s.getName() + "-" + s.getPos().CordToString(
            )  #fix writeup
        self.history += "|"
        action_space = []

        for i in range(0, len(Action)):
            action_space.append(i)
        self.action_space_worker = np.asarray(action_space)
        self.observation_space_worker = math.pow(2 * self.spanP + 1, 2)

        self.action_space_queen = np.asarray(action_space)
        self.observation_space_queen = (self.maze.height + 2 * self.spanP) * (
            self.maze.width + 2 * self.spanP)  #math.pow(2*self.spanP+1,2)

        self.action_space_spider = np.asarray(action_space)
        self.observation_space_spider = math.pow(2 * self.spanM + 1, 2)
        self.shortestRoute = len(self.maze.GetOptimalRoute()[0])
        self.maze.printMaze()

        self.dqn_solver_worker = DQNSolver(int(self.observation_space_worker),
                                           len(self.action_space_worker))
        self.dqn_solver_queen = DQNSolver(int(self.observation_space_queen),
                                          len(self.action_space_queen))
        self.dqn_solver_spider = DQNSolver(int(self.observation_space_spider),
                                           len(self.action_space_spider))
        self.pReward = 0
        self.qReward = 0
        self.mReward = 0

    def stepAll(self):
        terminal = False
        #walls move
        time = ''
        if (len(self.pList) > 0):
            time = str(self.pList[0].getTime())
        elif (len(self.qList) > 0):
            time = str(self.qList[0].getTime())
        elif (len(self.mList) > 0):
            time = str(self.mList[0].getTime())
        #print("TIME",time)
        self.history += time + "#" + self.maze.returnAllClearString()
        blocked = []
        for p in self.pList:
            blocked.append(p.getPos())
        for q in self.qList:
            blocked.append(q.getPos())
        for m in self.mList:
            blocked.append(m.getPos())

        self.maze.WallMove(blocked)
        self.shortestRoute = len(self.maze.GetOptimalRoute()[0])

        #prey move_workers
        index = 0
        for p in self.pList:
            if (p.exploring):
                state = np.reshape(self.pStateList[index],
                                   [1, int(self.observation_space_worker)])
                #print(self.pStateList[index],[1,int(self.observation_space_worker)])
                #print(state)
                action = self.dqn_solver_worker.act(state)
                #print("1")
                state_next, reward, terminal, info = self.step(
                    p, action, index)
                state_next = np.reshape(
                    state_next, [1, int(self.observation_space_worker)])
                self.dqn_solver_worker.remember(state, action, reward,
                                                state_next, terminal)
                self.pStateList[index] = state_next
                self.dqn_solver_worker.experience_replay()
                self.pReward += reward
                self.history += "#" + p.getName() + "-" + p.getPos(
                ).CordToString()
                p.routeToQueen.append(p.getPos())
            else:
                # need reward, state_next
                state_next, reward = p.goToQueen(
                    self.qList[0].getPos(), self.spanP, self.pPos, self.qPos,
                    self.mPos, self.wReward_not_possible, self.wReward_wall,
                    self.wReward_toQueen, self.wReward_atQueen,
                    self.wReward_repeat_pos, self.wReward_else)

                state_next = np.reshape(
                    state_next, [1, int(self.observation_space_worker)])
                self.pStateList[index] = state_next
                self.pReward += reward
                self.history += "#" + p.getName() + "-" + p.getPos(
                ).CordToString()

            #print(p.getName(),"moved to",p.getPos().CordToString(),"at",time, p.exploring)
            self.pPos[index] = p.getPos()
            if (p.exploring):
                #p.exploring=not (p.isQueenOnEdge(self.spanP,self.pPos, self.qPos, self.mPos))
                if (p.isQueenOnEdge(self.spanP, self.pPos, self.qPos,
                                    self.mPos)):
                    #print("i")
                    p.save_state(self.spanP, self.pPos, self.qPos, self.mPos)
                    #print("ii")
                    print("Ant", "Taking Snapshot", p.savedTime)
                #print("iii")
            if ((not p.exploring)
                    and (p.getPos().equals(self.qList[0].getPos()))):
                #print(p.savedState,p.savedTime,len(q.history))
                #for key in q.history:
                #   print(key,q.history.get(key).CordToString())
                #print(p.getName(),"returned to the queen ",self.qList[0].history.get(p.savedTime).CordToString(),"at",time)
                p.exploring = True
                #print("Workers at:",[[c.getName(), c.getPos().CordToString()] for c in self.pList])

                self.qList[0].combine(p.savedState, p.savedTime)
            index += 1

            #print("ANT",p.getName(), p.getTime(), p.getPos().CordToString(), "Snapshot", p.savedTime)
        #self.history+="|"

        #queen moves
        index = 0
        for q in self.qList:
            oldPos = q.getPos()
            if (len(q.view) > 0):
                view = q.norm_view((self.maze.height + 2 * self.spanP),
                                   (self.maze.width + 2 * self.spanP))
                state = np.reshape(view,
                                   [1, int(self.observation_space_queen)])
                action = self.dqn_solver_queen.act(state)
                state_next, reward, terminal, info = self.step(
                    q, action, index)
                viewNew = q.norm_view((self.maze.height + 2 * self.spanP),
                                      (self.maze.width + 2 * self.spanP))
                state_next = np.reshape(
                    viewNew, [1, int(self.observation_space_queen)])
                self.dqn_solver_queen.remember(state, action, reward,
                                               state_next, terminal)
                #self.mStateList[index]=state_next
                self.dqn_solver_queen.experience_replay()
                self.qReward += reward
            else:
                q.TimeStep += 1
            self.history += "#" + q.getName() + "-" + q.getPos().CordToString()
            self.qPos[0] = q.getPos()
            #print("(Global)Queen going",oldPos.CordToString(),"->",q.getPos().CordToString(), "at", q.getTime(),time)
            action = 4
            if (q.getPos().Y == oldPos.Y + 1):
                action = 0
            elif (q.getPos().Y == oldPos.Y - 1):
                action = 1
            elif (q.getPos().X == oldPos.X + 1):
                action = 2
            elif (q.getPos().X == oldPos.X - 1):
                action = 3

            q.updateView(action)
            #print("Updated view:")
            #q.show_span(q.getView())
            q.history[q.getTime()] = q.getPos()
            #print("----------------------------")
            index += 1

        for p1 in self.pList:
            q = p1.getPos()
            if (len(self.qList) > 0):
                q = self.qList[0].getPos()
            p1.updateVulnerability(self.pPos, q)
        for q1 in self.qList:
            q1.updateVulnerability(self.pPos)

        #predators move
        index = 0
        for s in self.mList:
            state = np.reshape(self.mStateList[index],
                               [1, int(self.observation_space_spider)])
            action = self.dqn_solver_spider.act(state)
            state_next, reward, terminal, info = self.step(s, action, index)
            state_next = np.reshape(state_next,
                                    [1, int(self.observation_space_spider)])
            self.dqn_solver_spider.remember(state, action, reward, state_next,
                                            terminal)
            self.mStateList[index] = state_next
            self.dqn_solver_spider.experience_replay()
            self.mReward += reward
            self.history += "#" + s.getName() + "-" + s.getPos().CordToString()
            self.mPos[index] = s.getPos()
            index += 1
        self.history += "|"

        trueTermination = False
        #print("Check ",self.finished,len(self.pList))
        #print(" ")
        self.maxIter -= 1
        if (((self.finished == len(self.pList)) and (len(self.qList) == 0))
                or (self.queenEaten) or (self.queenLeft)
                or (self.maxIter == 0)):
            path = u"DATA/" + u"Testing/exp2/" + self.folderName
            if not os.path.exists(path):
                #print(path)
                #print(len(path.encode()))
                os.makedirs(path)
            file = open(path + u"/GamesData.txt", "a+")
            file.write(self.history + "\n")
            file.close()
            terminal = True
            trueTermination = True

        #if(self.maxIter==0):
        #    terminal=True

    #     trueTermination=False
    #print(" ")
        return self.pReward, self.mReward, terminal, trueTermination, self.eaten

    def step(self, agent, action, index):
        reward = 0
        terminal = False
        info = {}

        oldPosition = agent.getPos()
        state_Next = np.empty(1)

        if (str(type(agent).__name__) == "Worker"):
            if (self.action_space_worker[action]
                    in self.maze.WhichWayIsClear(oldPosition, True)):
                agent.Do(self.action_space_worker[action], self.maze)
                state_Next = np.asarray(
                    agent.getAugView(agent.getPos(), self.spanP, self.pPos,
                                     self.qPos, self.mPos))

                reward += agent.getReward(
                    agent.getPos(), True, oldPosition,
                    agent.getAugView(agent.getPos(), self.spanP, self.pPos,
                                     self.qPos, self.mPos),
                    self.wReward_not_possible, self.wReward_wall,
                    self.wReward_entrance, self.wReward_finished_before,
                    self.wReward_exit, self.wReward_towards_exit,
                    self.wReward_repeat_pos, self.wReward_else)
                self.count += 1

            else:
                agent.Do(self.action_space_worker[4], self.maze)
                state_Next = np.asarray(
                    agent.getAugView(agent.getPos(), self.spanP, self.pPos,
                                     self.qPos, self.mPos))

                reward += agent.getReward(
                    agent.getPos(), False, oldPosition,
                    agent.getAugView(agent.getPos(), self.spanP, self.pPos,
                                     self.qPos, self.mPos),
                    self.wReward_not_possible, self.wReward_wall,
                    self.wReward_entrance, self.wReward_finished_before,
                    self.wReward_exit, self.wReward_towards_exit,
                    self.wReward_repeat_pos, self.wReward_else)

            if (self.maze.CheckExit(agent.getPos())
                    and agent not in self.finishedP):
                self.finished += 1
                self.finishedP.append(agent)

        elif (str(type(agent).__name__) == "Spider"):
            if (self.action_space_spider[action]
                    in self.maze.WhichWayIsClear(oldPosition, True)):
                agent.Do(self.action_space_spider[action], self.maze)
                state_Next = np.asarray(
                    agent.getAugView(agent.getPos(), self.spanM, self.pPos,
                                     self.qPos, self.mPos))

                reward += agent.getReward(
                    agent.getPos(), True, oldPosition,
                    agent.getAugView(agent.getPos(), self.spanM, self.pPos,
                                     self.qPos,
                                     self.mPos), self.sReward_not_possible,
                    self.sReward_wall, [p.getPos() for p in self.pList],
                    self.sReward_eat, self.sReward_towards_prey,
                    self.sReward_repeat_pos, self.sReward_else)
                if ((agent.getPos().equals(self.qList[0].getPos()))
                        and (self.qList[0].vulnerable)):
                    reward += self.sReward_eatQueen
                    self.pReward += self.wReward_queenEaten
                    self.qList = []
                    self.queenEaten = True
                    terminal = True
                remove = []
                for prey in self.pList:
                    if ((agent.getPos().equals(prey.getPos()))
                            and (prey.vulnerable)):
                        remove.append(prey)
                        self.eaten += 1
                        self.pReward -= self.wReward_exit
                for corpse in remove:
                    self.pList.remove(corpse)

            else:
                agent.Do(self.action_space_spider[4], self.maze)
                state_Next = np.asarray(
                    agent.getAugView(agent.getPos(), self.spanM, self.pPos,
                                     self.qPos, self.mPos))

                reward += agent.getReward(
                    agent.getPos(), False, oldPosition,
                    agent.getAugView(agent.getPos(), self.spanM, self.pPos,
                                     self.qPos,
                                     self.mPos), self.sReward_not_possible,
                    self.sReward_wall, [p.getPos() for p in self.pList],
                    self.sReward_eat, self.sReward_towards_prey,
                    self.sReward_repeat_pos, self.sReward_else)

        elif (str(type(agent).__name__) == "Queen"):
            if (self.action_space_queen[action]
                    in self.maze.WhichWayIsClear(oldPosition, True)):
                agent.Do(self.action_space_queen[action], self.maze)
                state_Next = np.asarray(agent.getView())

                #self, pos, possible, oldPos, view, rNotPos, rWall, rEnt, rFinBef, rEx, rToEx, rRep, rElse
                reward += agent.getReward(
                    agent.getPos(), True, oldPosition, agent.getView(),
                    self.qReward_not_possible, self.qReward_wall,
                    self.qReward_entrance, self.qReward_finished_before,
                    self.qReward_exit, self.qReward_towards_exit,
                    self.qReward_repeat_pos, self.qReward_else)

            else:
                agent.Do(self.action_space_queen[4], self.maze)
                state_Next = np.asarray(agent.getView())

                reward += agent.getReward(
                    agent.getPos(), False, oldPosition, agent.getView(),
                    self.qReward_not_possible, self.qReward_wall,
                    self.qReward_entrance, self.qReward_finished_before,
                    self.qReward_exit, self.qReward_towards_exit,
                    self.qReward_repeat_pos, self.qReward_else)

            if (self.maze.CheckExit(agent.getPos())
                    and agent not in self.finishedQ):
                self.finishedQ.append(agent)
                self.queenLeft = True

        #print(agent.getTime(), agent.getName(), oldPosition.CordToString(), " -> ",agent.getPos().CordToString())
        return state_Next, reward, terminal, info

    def reset(self):
        #print("Resetting")
        self.maze = Maze(self.maze.mazeString)
        self.pList = []
        self.qList = []
        self.mList = []
        self.pPos = []
        self.qPos = []
        self.mPos = []
        self.pStateList = []
        self.mStateList = []
        self.qStateList = []
        self.finishedP = []
        self.finishedQ = []
        self.history = self.maze.mazeString + "|" + "0"
        self.finished = 0
        self.finishedP = []
        self.count = 0
        self.queenLeft = False
        self.maxIter = 10 * self.maze.height * self.maze.width

        for j in range(self.wNumber):
            p = Worker(self.maze, self.spanP)
            self.pList.append(p)

        for k in range(self.qNumber):
            q = Queen(self.maze)
            self.qList.append(q)

        for h in range(self.sNumber):
            s = Spider(self.maze)
            self.mList.append(s)

        #print(len(self.pList),len(self.qList),len(self.mList))

        self.eaten = 0
        self.queenEaten = False

        for p in self.pList:
            p.setInitPos(Cord(self.maze.getInitialX(),
                              self.maze.getInitialY()))
            self.pPos.append(p.getPos())

        for q in self.qList:
            q.setInitPos(Cord(self.maze.getInitialX(),
                              self.maze.getInitialY()))
            self.qPos.append(q.getPos())

        for q in self.mList:
            q.setInitPos(Cord(q.start.X, q.start.Y))
            self.mPos.append(q.getPos())

        for p in self.pList:
            state = np.asarray(
                p.getAugView(p.getPos(), self.spanP, self.pPos, self.qPos,
                             self.mPos))
            self.pStateList.append(state)
            self.history += "#" + p.getName() + "-" + p.getPos().CordToString()

        for q in self.qList:
            #state=np.asarray(p.getAugView(p.getPos(),self.spanP,self.pPos, self.qPos, self.mPos))
            #self.pStateList.append(state)
            self.history += "#" + q.getName() + "-" + q.getPos().CordToString()

        for q in self.mList:
            state = np.asarray(
                q.getAugView(q.getPos(), self.spanM, self.pPos, self.qPos,
                             self.mPos))
            self.mStateList.append(state)
            self.history += "#" + q.getName() + "-" + q.getPos().CordToString()

        self.history += "|"
        self.shortestRoute = len(self.maze.GetOptimalRoute()[0])
        self.pReward = 0
        self.qReward = 0
        self.mReward = 0
        #print(self.maze.mazeString)
        #self.maze.printMaze()
        return self.pStateList

    def resetNewMaze(self):
        mazesizeh = self.maze.height + 10
        mazesizew = self.maze.width + 10
        if (self.maze.height == 5):
            mazesizeh = 10
            mazesizew = 10
        m = MazeGenerator(mazesizeh, mazesizew)
        #"Test,10,10,4,0,4,9,1111211111100000000111100110011000000001100001011111011000111000000001100101010110000000011111311111" #MazeGenerator()
        self.maze = Maze(m)
        self.s = Simulator(self.maze)
        self.pList = []
        self.qList = []
        self.mList = []
        self.pPos = []
        self.qPos = []
        self.mPos = []
        self.pStateList = []
        self.qStateList = []
        self.mStateList = []
        self.history = m + "|" + "0"
        self.finished = 0
        self.finishedP = []
        self.finishedQ = []
        self.count = 0
        self.eaten = 0
        self.queenEaten = False
        self.queenLeft = False

        #rewards
        self.wReward_not_possible = -50
        self.wReward_wall = -50
        self.wReward_entrance = -20
        self.wReward_finished_before = 0
        self.wReward_exit = self.maze.height * self.maze.width * 20
        self.wReward_towards_exit = -1
        self.wReward_toQueen = -1
        self.wReward_atQueen = self.maze.height * self.maze.width * 20
        self.wReward_repeat_pos = -20
        self.wReward_else = -3

        self.qReward_not_possible = -50
        self.qReward_wall = -50
        self.qReward_entrance = -20
        self.qReward_finished_before = 0
        self.qReward_exit = self.maze.height * self.maze.width * 20
        self.qReward_towards_exit = -1
        self.qReward_repeat_pos = -20
        self.qReward_else = -3

        self.sReward_not_possible = -50
        self.sReward_wall = -50
        self.sReward_eat = 1000
        self.sReward_eatQueen = 5000
        self.sReward_towards_prey = -1
        self.sReward_repeat_pos = -20
        self.sReward_else = -3

        #for book-keeping
        #self.wNumber=self.wNumber+1
        self.folderName = "Span_" + str(self.spanP) + "Dim_" + str(
            self.maze.height
        ) + "_" + str(
            self.maze.width
        )  #m+"_maze_"+str(self.wNumber)+"_workers_"+str(self.spanP)+"_span_"+str(self.wReward_not_possible)+"_not poss_"+str(self.wReward_wall)+"_wall_"+str(self.wReward_entrance)+"_ent_"+str(self.wReward_finished_before) +"_finished already_"+str(self.wReward_exit)+"_exit_"+str(self.wReward_towards_exit)+"_to exit_"+str(self.wReward_repeat_pos) +"_rep pos_"+str(self.wReward_else)+"_else "

        self.maxIter = 10 * self.maze.height * self.maze.width
        self.completeStop = False
        if (self.wReward_exit == 0):
            self.completeStop = True

        for j in range(self.wNumber):
            p = Worker(self.maze, self.spanP)
            self.s.add(p)
            self.pList.append(p)
            self.pPos.append(p.getPos())

        for j in range(self.qNumber):
            q = Queen(self.maze)
            self.s.add(q)
            self.qList.append(q)
            self.qPos.append(q.getPos())

        #self.history+="|"
        action_space = []

        for j in range(self.sNumber):
            s = Spider(self.maze)
            self.s.add(s)
            self.mList.append(s)
            self.mPos.append(s.getPos())

        for j in range(self.wNumber):
            state = np.asarray(
                p.getAugView(p.getPos(), self.spanP, self.pPos, self.qPos,
                             self.mPos))
            self.pStateList.append(state)
            self.history += "#" + p.getName() + "-" + p.getPos().CordToString()

        for j in range(self.qNumber):
            #state= np.asarray(q.getView())
            #self.qStateList.append(state)
            self.history += "#" + q.getName() + "-" + q.getPos().CordToString()

        for j in range(self.sNumber):
            state = np.asarray(
                s.getAugView(s.getPos(), self.spanM, self.pPos, self.qPos,
                             self.mPos))
            self.mStateList.append(state)
            self.history += "#" + s.getName() + "-" + s.getPos().CordToString(
            )  #fix writeup

        self.history += "|"

        self.shortestRoute = len(self.maze.GetOptimalRoute()[0])
        self.maze.printMaze()
        self.shortestRoute = len(self.maze.GetOptimalRoute()[0])

        self.dqn_solver_worker = DQNSolver(int(self.observation_space_worker),
                                           len(self.action_space_worker))
        self.dqn_solver_queen = DQNSolver(int(self.observation_space_queen),
                                          len(self.action_space_queen))
        self.dqn_solver_spider = DQNSolver(int(self.observation_space_spider),
                                           len(self.action_space_spider))
        self.pReward = 0
        self.qReward = 0
        self.mReward = 0

        return self.pStateList

    def render(self, mode='human', close=False):
        self.s.display()