예제 #1
0
class Interface():
    # -----------------------------------------------------------------------------------------------------
    # INITIALISATION OF VARIABLES
    # -----------------------------------------------------------------------------------------------------

    #To do with agent and score initialisation
    main = None
    agent1 = agent2 = None
    active_agent_1 = active_agent_2 = False
    agent1Progress = agent2Progress = []
    count = ag1TotalWins = ag2TotalWins = ag1Wins = ag2Wins = 0
    initState1 = initState2 = 0

    # To do with simulation control
    speed_of_simulation = 10
    startOver = True
    display = True
    pause = False
    iterations = 0
    number_of_iterations = 10000

    #To do with grid control
    worldGrid = []
    width = height = 0
    worldGrid_Entry = None
    world = World.GridWorld()

    #To do with learning parameters
    alphaAgent1 = alphaAgent2 = 0.9
    gammaAgent1 = gammaAgent2 = 0.1
    epsilonAgent1 = epsilonAgent2 = 20
    graphSettings = 50
    agent1StepCount = agent2StepCount = []

    alpha_agent_1_Entry = alpha_agent_2_Entry = gamma_agent_1_Entry = gamma_agent_2_Entry \
        = graphBtn = iterations_Entry = speed_of_simulation_Entry = epsilon_1_Entry = \
        epsilon_2_Entry = algorithmBtn = policyBtn= graphAg1Btn=None

    #To do with grid from text file setup
    mFromFile = False
    file = None

    # -----------------------------------------------------------------------------------------------------
    # LIST OF ALGORITHMS [EDITABLE]
    # -----------------------------------------------------------------------------------------------------
    currentAlgorithms = [
        "Qlearning", "BFS", "Random"
    ]  #Add the name of an algorithm to insert it into the list

    algorithmAgent1 = algorithmAgent2 = "Qlearning"  #Default algorithm for both agents

    # -----------------------------------------------------------------------------------------------------
    # INITIALISATION OF THE AGENTS [EDITABLE]
    # -----------------------------------------------------------------------------------------------------
    # Add the initialisation for the algorithm
    # -----------------------------------------------------------------------------------------------------
    def initialiseAgents(self):

        if self.active_agent_1 == True:
            if self.algorithmAgent1 == "Qlearning":
                self.agent1 = Qlearn.Qlearning(self.width, self.height,
                                               self.initState1, self.world, 1,
                                               self.alphaAgent1,
                                               self.gammaAgent1,
                                               self.epsilonAgent1)
            if self.algorithmAgent1 == "BFS":
                self.agent1 = BFS.shortestPath(self.width, self.height,
                                               self.initState1, self.world, 1)

            if self.algorithmAgent1 == "Random":
                self.agent1 = Random.Random(self.width, self.height,
                                            self.initState1, self.world, 1)

        if self.active_agent_2 == True:
            if self.algorithmAgent2 == "Qlearning":
                self.agent2 = Qlearn.Qlearning(self.width, self.height,
                                               self.initState2, self.world, 2,
                                               self.alphaAgent2,
                                               self.gammaAgent2,
                                               self.epsilonAgent2)
            if self.algorithmAgent2 == "BFS":
                self.agent2 = BFS.shortestPath(self.width, self.height,
                                               self.initState2, self.world, 2)

            if self.algorithmAgent2 == "Random":
                self.agent2 = Random.Random(self.width, self.height,
                                            self.initState2, self.world, 2)

    # ------------------------------------------------------------------------------------------------------
    # SET DEFAULT MATRIX [EDITABLE]
    # ------------------------------------------------------------------------------------------------------
    def setDefaultMatrix(self):
        defaultMatrix = [
            '1', '0', '0', '0', '\n', '2', '0', '0', '0', '\n', '0', '4', '4',
            '0', '\n', '0', '0', '0', '5', '\n'
        ]

        return defaultMatrix

    # ------------------------------------------------------------------------------------------------------
    # ------------------------------------------------------------------------------------------------------

    # Do not edit past this point --------------------------------------------------------------------------

    # ------------------------------------------------------------------------------------------------------
    # INITIALISATION OF THE ALGORITHM
    # ------------------------------------------------------------------------------------------------------
    def initialiseAlgorithm(self):

        initState1 = 0
        initState2 = 0

        self.world.initialise(self.width, self.height, self.worldGrid)

        self.count = 0
        for i in range(self.height):
            for j in range(self.width):
                if self.worldGrid[i][j] == 1:
                    self.initState1 = self.count
                    self.active_agent_1 = True
                if self.worldGrid[i][j] == 2:
                    self.initState2 = self.count
                    self.active_agent_2 = True
                self.count += 1

        self.initialiseAgents()

        self.agent1Progress = []
        self.agent2Progress = []
        self.ag1Wins = self.ag2Wins = self.ag1TotalWins = self.ag2TotalWins = self.count = 0

        # Set the display of the wins for each agent
        winsDisplay = ''
        winsDisplay = winsDisplay + "Score: agent 1:(" + str(
            self.ag1TotalWins) + ") agent 2:(" + str(
                self.ag2TotalWins) + ") Total:(" + str(self.ag1TotalWins +
                                                       self.ag2TotalWins) + ")"
        pygame.display.set_caption(winsDisplay)

        self.startOver = False

    # ------------------------------------------------------------------------------------------------------
    # GUI FOR THE MAIN MENU
    # ------------------------------------------------------------------------------------------------------
    def startup(self):
        self.main = tk.Tk()
        self.main.title('Simulation')
        self.main.geometry('300x650')
        self.main.resizable(False, False)

        # create button to open file
        instructionsBtn = tk.Button(self.main,
                                    text='Instructions',
                                    command=self.displayInstructions)
        instructionsBtn.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)

        worldGridText = 'Insert world Matrix:'
        worldGridLabel = Label(self.main, text=worldGridText)
        worldGridLabel.pack()
        self.worldGrid_Entry = Text(self.main, height=10, width=10)
        self.worldGrid_Entry.focus_set()
        self.worldGrid_Entry.pack()

        instructionsBtn = tk.Button(self.main,
                                    text='Open matrix from txt file',
                                    command=self.matrixFromFile)
        instructionsBtn.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)

        self.algorithmBtn = tk.Button(self.main,
                                      text='Choose algorithm',
                                      command=self.chooseLearningAlgorithm)
        self.algorithmBtn.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)

        iterations_text = 'Set number of iterations to perform:'
        iterations_Label = Label(self.main, text=iterations_text)
        iterations_Label.pack()
        self.iterations_Entry = Entry(self.main, width=15)
        self.iterations_Entry.focus_set()
        self.iterations_Entry.pack()

        divisorText = ' ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
        divisorLabel = Label(self.main, text=divisorText, height=1)
        divisorLabel.pack()
        startBtn = tk.Button(self.main,
                             text='Start Simulation',
                             command=self.setup)
        startBtn.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)
        divisorText = ' ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
        divisorLabel = Label(self.main, text=divisorText, height=1)
        divisorLabel.pack()

        speed_of_simulation_text = 'Speed of simulation (1 = fastest):'
        speed_of_simulation_Label = Label(self.main,
                                          text=speed_of_simulation_text)
        speed_of_simulation_Label.pack()
        self.speed_of_simulation_Entry = Entry(self.main, width=4)
        self.speed_of_simulation_Entry.focus_set()
        self.speed_of_simulation_Entry.pack()

        self.graphBtn = tk.Button(self.main,
                                  text='View Progress Graph (both agents)',
                                  command=self.viewGraph)
        self.graphBtn.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)
        self.graphBtn.configure(state='disabled')

        self.graphAg1Btn = tk.Button(self.main,
                                     text='View Progress agent 1',
                                     command=self.viewGraphAgent1)
        self.graphAg1Btn.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)
        self.graphAg1Btn.configure(state='disabled')

        restartBtn = tk.Button(self.main, text='Reset', command=self.reset)
        restartBtn.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)

        self.policyBtn = tk.Button(self.main,
                                   text='View Policies',
                                   command=self.viewPolicies)
        self.policyBtn.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)
        self.policyBtn.configure(state='disabled')

        settingsBtn = tk.Button(self.main,
                                text='Settings',
                                command=self.openSettings)
        settingsBtn.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)

        quitBtn = tk.Button(self.main, text='Quit', command=self.main.quit)
        quitBtn.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)

        self.main.mainloop()

    # ------------------------------------------------------------------------------------------------------
    # GUI FOR THE SETTINGS MENU
    # ------------------------------------------------------------------------------------------------------

    def openSettings(self):
        settings_window = tk.Toplevel(self.main)
        settings_window.title("Settings")
        settings_window.geometry("300x200")
        settings_window.resizable(False, False)

        displayTrue = tk.Button(settings_window,
                                text='Simulation OFF',
                                command=self.changeDisplayFalse)
        displayTrue.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)

        displayFalse = tk.Button(settings_window,
                                 text='Simulation ON',
                                 command=self.changeDisplayTrue)
        displayFalse.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)

        graphSettings_text = 'Iterations per unit (for the graph):'
        graphSettings_Label = Label(settings_window, text=graphSettings_text)
        graphSettings_Label.pack()
        self.graphSettings_Entry = Entry(settings_window, width=4)
        self.graphSettings_Entry.focus_set()
        self.graphSettings_Entry.pack()

        setValue = tk.Button(settings_window,
                             text='Set this value',
                             command=self.setSettingsValue)
        setValue.pack(expand=tk.FALSE, fill=tk.X, side=tk.TOP)

    def setSettingsValue(self):
        tmp = self.graphSettings_Entry.get()
        try:
            self.graphSettings = float(tmp)
        except ValueError:
            self.graphSettings = 50

    # Method to read input from a txt file
    def matrixFromFile(self):
        self.file = askopenfilename()
        self.mFromFile = True

    # Activate the simulation
    def changeDisplayTrue(self):
        self.display = True

    # Deactivate the simulation
    def changeDisplayFalse(self):
        self.display = False

    # ------------------------------------------------------------------------------------------------------
    # GUI FOR THE 'CHOOSE ALGORITHM' OPTION
    # ------------------------------------------------------------------------------------------------------
    def chooseLearningAlgorithm(self):

        algorithm_window = tk.Toplevel(self.main)
        algorithm_window.title("Choose algorithm")
        algorithm_window.geometry("400x790")
        algorithm_window.resizable(False, False)

        divisorText = '            '
        divisorLabel = Label(algorithm_window, text=divisorText, height=1)
        divisorLabel.pack()

        listbox = Listbox(algorithm_window)
        listbox.pack()

        for item in self.currentAlgorithms:
            listbox.insert(END, item)

        algorithmText = 'Current algorithm agent 1: ' + self.algorithmAgent1
        algorithmLabel = Label(algorithm_window, text=algorithmText)
        algorithmLabel.pack()

        algorithmText2 = 'Current algorithm agent 2: ' + self.algorithmAgent2
        algorithmLabel2 = Label(algorithm_window, text=algorithmText2)
        algorithmLabel2.pack()

        chooseBtn = Button(
            algorithm_window,
            text="Choose for Agent 1",
            command=lambda listbox=listbox: self.setLearningAlgorithm(
                listbox.get(ANCHOR), algorithmLabel, 1))
        chooseBtn.pack()

        chooseBtn = Button(
            algorithm_window,
            text="Choose for Agent 2",
            command=lambda listbox=listbox: self.setLearningAlgorithm(
                listbox.get(ANCHOR), algorithmLabel2, 2))
        chooseBtn.pack()

        divisorText = '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
        divisorLabel = Label(algorithm_window, text=divisorText, height=1)
        divisorLabel.pack()

        divisorText = 'Insert the alpha and gamma values for agents 1 and 2'
        divisorLabel = Label(algorithm_window, text=divisorText)
        divisorLabel.pack()

        divisorText = 'alpha--learning rate(0-1), gamma--discount reward rate(0-1)'
        divisorLabel = Label(algorithm_window, text=divisorText)
        divisorLabel.pack()

        divisorText = '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
        divisorLabel = Label(algorithm_window, text=divisorText)
        divisorLabel.pack()

        alpha_agent_1_text = 'Insert alpha for agent 1:'
        alpha_agent_1_Label = Label(algorithm_window, text=alpha_agent_1_text)
        alpha_agent_1_Label.pack()
        self.alpha_agent_1_Entry = Entry(algorithm_window, width=4)
        self.alpha_agent_1_Entry.focus_set()
        self.alpha_agent_1_Entry.pack()

        alpha_agent_2_text = 'Insert alpha for agent 2:'
        alpha_agent_2_Label = Label(algorithm_window, text=alpha_agent_2_text)
        alpha_agent_2_Label.pack()
        self.alpha_agent_2_Entry = Entry(algorithm_window, width=4)
        self.alpha_agent_2_Entry.focus_set()
        self.alpha_agent_2_Entry.pack()

        gamma_agent_1_text = 'Insert gamma for agent 1:'
        gamma_agent_1_Label = Label(algorithm_window, text=gamma_agent_1_text)
        gamma_agent_1_Label.pack()
        self.gamma_agent_1_Entry = Entry(algorithm_window, width=4)
        self.gamma_agent_1_Entry.focus_set()
        self.gamma_agent_1_Entry.pack()

        gamma_agent_2_text = 'Insert gamma for agent 2:'
        gamma_agent_2_Label = Label(algorithm_window, text=gamma_agent_2_text)
        gamma_agent_2_Label.pack()
        self.gamma_agent_2_Entry = Entry(algorithm_window, width=4)
        self.gamma_agent_2_Entry.focus_set()
        self.gamma_agent_2_Entry.pack()

        divisorText = '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
        divisorLabel = Label(algorithm_window, text=divisorText)
        divisorLabel.pack()

        divisorText = 'Insert epsilon (Probability 0 - 100 of exploring new states)'
        divisorLabel = Label(algorithm_window, text=divisorText)
        divisorLabel.pack()

        divisorText = '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
        divisorLabel = Label(algorithm_window, text=divisorText)
        divisorLabel.pack()

        epsilon_1_text = 'Epsilon for agent 1:'
        epsilon_1_Label = Label(algorithm_window, text=epsilon_1_text)
        epsilon_1_Label.pack()
        self.epsilon_1_Entry = Entry(algorithm_window, width=4)
        self.epsilon_1_Entry.focus_set()
        self.epsilon_1_Entry.pack()

        epsilon_2_text = 'Epsilon for agent 2:'
        epsilon_2_Label = Label(algorithm_window, text=epsilon_2_text)
        epsilon_2_Label.pack()
        self.epsilon_2_Entry = Entry(algorithm_window, width=4)
        self.epsilon_2_Entry.focus_set()
        self.epsilon_2_Entry.pack()

        setValuesBtn = Button(algorithm_window,
                              text="Set these values",
                              command=lambda listbox=listbox: self.
                              setAlgorithmValues(algorithm_window))
        setValuesBtn.pack()

    # ------------------------------------------------------------------------------------------------------
    # METHOD TO SET THE ALGORITHM VALUES FROM THE ENTRIES
    # ------------------------------------------------------------------------------------------------------
    def setAlgorithmValues(self, algorithm_window):

        tmp = self.alpha_agent_1_Entry.get()
        try:
            self.alphaAgent1 = float(tmp)
        except ValueError:
            self.alphaAgent1 = 0.1

        tmp = self.alpha_agent_2_Entry.get()
        try:
            self.alphaAgent2 = float(tmp)
        except ValueError:
            self.alphaAgent2 = 0.1

        tmp = self.gamma_agent_1_Entry.get()
        try:
            self.gammaAgent1 = float(tmp)
        except ValueError:
            self.gammaAgent1 = 0.9

        tmp = self.gamma_agent_2_Entry.get()
        try:
            self.gammaAgent2 = float(tmp)
        except ValueError:
            self.gammaAgent2 = 0.9

        tmp = self.epsilon_1_Entry.get()
        try:
            self.epsilonAgent1 = float(tmp)
        except ValueError:
            self.epsilonAgent1 = 20

        tmp = self.epsilon_2_Entry.get()
        try:
            self.epsilonAgent2 = float(tmp)
        except ValueError:
            self.epsilonAgent2 = 20

        Text = 'Values set'
        confirmationLabel = Label(algorithm_window, text=Text, height=1)
        confirmationLabel.pack()

    # ------------------------------------------------------------------------------------------------------
    # Method to set the learning algorithm for each agent
    # ------------------------------------------------------------------------------------------------------
    def setLearningAlgorithm(self, algorithm, algorithmLabel, agent):

        if algorithm != "":
            if agent == 1:
                self.algorithmAgent1 = algorithm
            elif agent == 2:
                self.algorithmAgent2 = algorithm

        algorithmText = 'Current algorithm agent ' + str(
            agent) + ': ' + algorithm
        algorithmLabel.config(text=algorithmText)
        algorithmLabel.pack()

    # ------------------------------------------------------------------------------------------------------
    # STARTTING UP THE WORLD MATRIX AND THE VALUES BASED ON THE ENTRIES
    # AND INITIALISING THE SIMULATION
    # ------------------------------------------------------------------------------------------------------
    def setup(self):
        matrix = []

        #To make the speed of simulation editable at pause
        if self.pause == True or self.startOver == True:
            tmp = self.speed_of_simulation_Entry.get()
            try:
                self.speed_of_simulation = int(tmp)
            except ValueError:
                self.speed_of_simulation = 10
            self.pause = False

        # Obtaining the matrix and set values
        if self.pause == False or self.startOver == True:
            self.iterations = 0

            tmp = self.iterations_Entry.get()
            try:
                self.number_of_iterations = int(tmp)
            except ValueError:
                self.number_of_iterations = 100000000

            # Check if a file has been chosen
            if self.mFromFile == False:
                tmp = self.worldGrid_Entry.get(1.0, END)
                matrix = list(tmp)
            else:
                with open(self.file) as fileobj:
                    for line in fileobj:
                        for ch in line:
                            matrix.append(ch)
                matrix.append('\n')

            if matrix == ['\n']:
                matrix = self.setDefaultMatrix()

            # Setting up the matrix
            self.width = 0
            self.height = 0
            i = 0
            numberCount = 0
            length = len(matrix)

            while i < length:
                if matrix[i] == '\n':
                    i = length + 1
                else:
                    self.width += 1
                i += 1

            for i in range(length):
                if matrix[i] == '\n':
                    self.height += 1

            self.worldGrid = [[0 for x in range(self.width)]
                              for y in range(self.height)]

            for i in range(self.height):
                for j in range(self.width):
                    if matrix[numberCount] != '\n':
                        self.worldGrid[i][j] = int(matrix[numberCount])
                    numberCount += 1
                numberCount += 1

            self.worldGrid_Entry.configure(state='disabled')
            self.iterations_Entry.configure(state='disabled')

        self.policyBtn.configure(state='normal')
        self.algorithmBtn.configure(state='disabled')

        #Initialise the simulation
        self.initSimulation()

    # ------------------------------------------------------------------------------------------------------
    # Resetting the simulation
    # ------------------------------------------------------------------------------------------------------
    def reset(self):
        self.worldGrid_Entry.configure(state='normal')
        self.iterations_Entry.configure(state='normal')
        self.graphBtn.configure(state='disabled')
        self.policyBtn.configure(state='disabled')
        self.algorithmBtn.configure(state='normal')
        self.graphAg1Btn.configure(state='disabled')
        self.startOver = True
        self.active_agent_1 = False
        self.active_agent_2 = False
        self.mFromFile = False
        self.agent1StepCount = []
        pygame.quit()

    # ------------------------------------------------------------------------------------------------------
    # DISPLAY FOR THE SIMULATION
    # ------------------------------------------------------------------------------------------------------
    def initSimulation(self):
        tileWidth = 60 - self.width
        tileHeight = 60 - self.width
        separation = 5

        pygame.init()

        white = (255, 255, 255)
        black = (0, 0, 0)
        blue = (0, 0, 255)
        red = (255, 0, 0)
        green = (0, 255, 0)
        grey = (100, 100, 100)
        yellow = (255, 255, 0)

        screenSize = [100 * self.width, 85 * self.height]
        screen = pygame.display.set_mode(screenSize)

        font = pygame.font.Font(pygame.font.get_default_font(), 12)

        #Creating the Pause button
        screen.fill(black)
        stopButton = pygame.Rect(70 * self.width, 55 * self.height, 50, 20)
        pygame.draw.rect(screen, white, stopButton)  # draw button

        stopNowButton = pygame.Rect(70 * self.width, 40 * self.height, 70, 20)
        pygame.draw.rect(screen, white, stopNowButton)  # draw button

        pauseText = font.render("   Pause", True, black)
        screen.blit(pauseText, [70 * self.width, 55 * self.height])
        pygame.display.flip()

        pauseNowText = font.render("Pause Now", True, black)
        screen.blit(pauseNowText, [70 * self.width, 40 * self.height])
        pygame.display.flip()

        finished = self.pause = False

        # ---------------------------------------------------------------------------------------
        # GRID DISPLAY
        # ---------------------------------------------------------------------------------------
        while not finished:

            if self.startOver == True:
                # Initialise the agents
                self.initialiseAlgorithm()
                #self.agent1.printR() #debug

            # ------------------------------------------------------------------------------------
            while self.iterations < self.number_of_iterations and not finished:
                steps = 0

                if self.active_agent_1:
                    self.agent1.resetState(
                    )  # Common method for all learning classes, reset to the original position
                if self.active_agent_2:
                    self.agent2.resetState()

                if self.active_agent_1:
                    if self.active_agent_1:
                        self.agent1.resetLearning()
                if self.active_agent_1:
                    if self.active_agent_2:
                        self.agent2.resetLearning()

                # While no agent has reached its final state or agent 1 hasn't gotten caught
                while self.finishIterating() is False and not finished:

                    grid = self.world.getGrid()

                    if self.display == True:
                        screen.fill(black)
                        pygame.draw.rect(screen, white, stopButton)
                        screen.blit(pauseText,
                                    [70 * self.width, 55 * self.height])

                        pygame.draw.rect(screen, white, stopNowButton)
                        screen.blit(pauseNowText,
                                    [70 * self.width, 40 * self.height])

                        for i in range(self.height):
                            for j in range(self.width):
                                colour = white
                                if grid[i][j] == 5:
                                    colour = green
                                elif grid[i][j] == 1:
                                    colour = blue
                                elif grid[i][j] == 6:
                                    colour = grey
                                elif grid[i][j] == 4:
                                    colour = black
                                elif grid[i][j] == 2:
                                    colour = red
                                elif grid[i][j] == 7:
                                    colour = yellow
                                pygame.draw.rect(screen, colour, [
                                    (separation + tileWidth) * j + separation,
                                    (separation + tileHeight) * i + separation,
                                    tileWidth, tileHeight
                                ])

                    for event in pygame.event.get():
                        if event.type == pygame.QUIT:
                            self.pause = True
                            finished = True
                        if event.type == pygame.MOUSEBUTTONDOWN:
                            mouse_pos = event.pos  # gets mouse position

                            if stopButton.collidepoint(mouse_pos):
                                self.pause = True
                            if stopNowButton.collidepoint(mouse_pos):
                                self.pause = True
                                finished = True

                    pygame.display.flip()
                    pygame.time.wait(self.speed_of_simulation)

                    # Activate the algorithm's learning method
                    # ----------------------------------------------
                    if self.active_agent_1:
                        self.agent1.move()  # Movement
                    if self.active_agent_2 and self.gotCaught() == False:
                        self.agent2.move()
                        # Movement
                    if self.gotCaught() == True:
                        self.world.updateGrid(self.agent2.getState(), 2)

                    steps += 1

                if self.active_agent_1 and not self.active_agent_2 and self.pause == False:
                    self.agent1StepCount.append(steps)

                # Every learning algorithm class must have a method (isFinalState()) which returns True if
                # The agent has reached it's desired location
                if self.gotCaught() == True and self.active_agent_2:
                    self.ag2TotalWins += 1
                    self.ag2Wins += 1
                elif self.active_agent_1:
                    self.ag1TotalWins += 1
                    self.ag1Wins += 1

                if self.gotCaught() == True:
                    self.agent1.caughtHandling()

                self.count += 1

                #ITERATIONS PER UNIT = 50 (default) FOR GRAPH
                if self.count == self.graphSettings and self.pause == False:
                    self.agent1Progress.append(self.ag1Wins)
                    self.agent2Progress.append(self.ag2Wins)
                    self.ag2Wins = 0
                    self.ag1Wins = 0
                    self.count = 0

                if self.pause == True:
                    self.ag1Wins = 0
                    self.ag2Wins = 0
                    self.count = 0

                winsDisplay = ''
                winsDisplay = winsDisplay + "Score: agent 1:(" + str(
                    self.ag1TotalWins) + ") agent 2:(" + str(
                        self.ag2TotalWins) + ") Total:(" + str(
                            self.ag1TotalWins + self.ag2TotalWins) + ")"
                pygame.display.set_caption(winsDisplay)

                self.world.resetGrid()
                self.iterations += 1

                if self.pause == True:
                    if self.active_agent_1 and self.active_agent_2:
                        self.graphBtn.configure(state='normal')
                    if self.active_agent_1 and not self.active_agent_2:
                        self.graphAg1Btn.configure(state='normal')
                    finished = True

            self.pause = True
            if self.active_agent_1 and self.active_agent_2:
                self.graphBtn.configure(state='normal')
            finished = True

    # ------------------------------------------------------------------------------------------------------
    # Method to check whether either agent has reached its objective
    # ------------------------------------------------------------------------------------------------------
    def finishIterating(self):

        if self.active_agent_1 and self.active_agent_2:
            if self.world.isFinalState(self.agent1.getState(), 1) is False and self.world.isFinalState(
                    self.agent2.getState(), 2) is False \
                    and self.gotCaught() is False:
                return False
            else:
                return True

        if self.active_agent_1 and not self.active_agent_2:
            if self.world.isFinalState(self.agent1.getState(), 1) is False:
                return False
            else:
                return True

        if self.active_agent_2 and not self.active_agent_1:
            if self.world.isFinalState(self.agent2.getState(), 2) is False:
                return False
            else:
                return True

    # ------------------------------------------------------------------------------------------------------
    #  Method to check if agent 1 has gotten caught by agent 2
    # ------------------------------------------------------------------------------------------------------
    def gotCaught(self):
        if self.active_agent_1 and self.active_agent_2:
            if self.agent1.getState() == self.agent2.getState():
                return True
            else:
                return False

    # ------------------------------------------------------------------------------------------------------
    #  Method that prints the policies for each agent
    # ------------------------------------------------------------------------------------------------------
    def viewPolicies(self):
        policies_window = tk.Toplevel(self.main)
        policies_window.title("Current Policies")

        policiesText = Text(policies_window, height=10, width=80)
        policiesText.pack()

        if self.active_agent_1:
            policyAgent1 = self.agent1.getPolicy(self.initState1)
            text = ''.join(str(e) for e in policyAgent1)
            policiesText.insert(END, "Policy agent 1: " + text)
        if self.active_agent_2:
            policyAgent2 = self.agent2.getPolicy(self.initState2)
            text = ''.join(str(e) for e in policyAgent2)
            policiesText.insert(END, "\n\nPolicy agent 2: " + text)

    # ------------------------------------------------------------------------------------------------------
    # Instructions panel
    # ------------------------------------------------------------------------------------------------------
    def displayInstructions(self):
        instructions_window = tk.Toplevel(self.main)
        instructions_window.title("Instructions")
        instructions_window.resizable(False, False)

        instructions_frame = tk.Frame(instructions_window,
                                      width=30,
                                      height=120)
        instructions_frame.pack(fill="both", expand=True)

        instructions_frame.grid_propagate(False)
        # implement stretchability
        instructions_frame.grid_rowconfigure(0, weight=1)
        instructions_frame.grid_columnconfigure(0, weight=1)

        instrText = Text(instructions_frame, height=30, width=120)
        instrText.pack()
        instrText.insert(
            END,
            "To Start a customised simulation: \n(If all is left blank and 'start simulation' is\n"
            "pressed, the default world and values will be used.)\n\n"
            "1. Insert a matrix on the 'Insert world matrix' box or choose it from a text file\n"
            "   for the type of world to simulate.\n"
            "   0 -- Empty space[WHITE]\n"
            "   4 -- Impassable space[BLACK]\n"
            "   1 -- Agent 1 [BLUE], agent 2's objective\n"
            "   5 -- Objective for agent 1[GREEN]\n"
            "   2 -- Agent 2 [RED]\n"
            "   6 -- pit [GREY](negative rewards for both agents 1 and 2)\n"
            "   7 -- secondary objective for agent 1 [YELLOW](does not end round)\n\n"
            "  Example matrix:\n"
            "   100044\n"
            "   006600\n"
            "   020005\n"
            "   040400\n\n"
            "2. Set the algorithm to be used by each agent in the 'choose algorithm' tab. The default\n"
            "   is temporal difference q-learning.\n"
            "   -- To choose one, select it from the list of available algorithms and set it to agent 1 and/or 2\n"
            "   -- To change the default values for variables, insert the value/s to change in the corresponding\n"
            "      text boxes and choose 'set these values'\n"
            "   -- Default values for alphas: 0.9, gammas: 0.1, epsilons: 20\n\n"
            "3. Insert the number of iterations to perform (default = 100000000)\n\n"
            "4. Set the speed of simulation by inserting a number 1 or greater the 'speed of simulation' entry\n"
            "   (default speed = 20)\n\n"
            "5. Toggle the display on and off by selecting 'settings' and the 'simulation on' and \n"
            "   'simulation off' buttons from the ‘settings’ menu before starting the simulation or while on pause.\n\n"
            "6. Set the value for the graph display in the 'settings' menu where the number of wins per x\n"
            "   iterations can be changed.\n\n"
            "7. Press the 'start simulation' button to begin. It can be paused instantly by pressing the\n"
            "   'pause now' or 'close' buttons. To wait until a round finishes to stop it, press 'pause'.\n\n"
            "8. One can view the progress of both agents by clicking on the 'view progress graph' button."
            "   The progress graph for agent 1 will only be shown if it is the only agent present in the"
            "   simulation. Used for testing different variable values and learning algorithms.\n\n"
            "9. To print the current policy of the agents, choose 'view policies'.\n\n"
            "10.To re-start the simulation and edit the world Matrix, choose 'restart'. If you cancel the\n"
            "   choosing of a matrix from a text file and get an error when starting the simulation, make sure\n"
            "   to press 'restart' and try again.\n\n"
            "11.To learn how to insert new algorithms, refer to the 'Readme' file."
        )
        instrText.config(state=DISABLED)

        scrollb = tk.Scrollbar(instructions_frame, command=instrText.yview)
        scrollb.grid(row=0, column=1, sticky='nsew')
        instrText['yscrollcommand'] = scrollb.set

    # ------------------------------------------------------------------------------------------------------
    # Graph to show the number of wins for each agent every certain unit of steps
    # ------------------------------------------------------------------------------------------------------
    def viewGraph(self):

        graph = plt.figure()
        graph.canvas.set_window_title('Progress Graph')

        plt.figure(1)  # the first figure
        plt.subplot(211)  # the first subplot in the first figure
        plt.plot(self.agent1Progress, label='Ag1')
        plt.plot(self.agent2Progress, label='Ag2')
        plt.title("Progress graph for both agents")
        plt.ylabel('Number of wins every ' + str(self.graphSettings) +
                   ' iterations')
        plt.xlabel('Number of iterations (1 unit = ' +
                   str(self.graphSettings) + ' iterations)')

        graph.show()

    # ------------------------------------------------------------------------------------------------------
    # Graph to show the number of steps taken for agent 1 to reach it's objectve
    # ------------------------------------------------------------------------------------------------------
    def viewGraphAgent1(self):
        graph = plt.figure()
        graph.canvas.set_window_title('Progress Graph Agent 1')

        plt.figure(1)  # the first figure
        plt.subplot(211)  # the first subplot in the first figure
        plt.plot(self.agent1StepCount, label='Ag1')
        plt.title("Progress graph for agent 1")
        plt.ylabel('Number of steps before reaching goal')
        plt.xlabel('Round number')

        graph.show()