from UI import UI from Service import Service from Repository import Repository from Test import Test if __name__ == "__main__": repo = Repository("repoPuncte.txt", "repoTriunghiuri.txt") serv = Service(repo) ui = UI(serv) teste = Test() running = True while running: ui.display() option = ui.getOption() if option == "0": running = False elif option == "-1": teste.runTests() else: ui.doOption(option)
class MDP(object): def __init__(self, r, ui_width, ui_height, number=1): self.r = r self.start() self.ui_width = ui_width self.ui_height = ui_height self.number = number self.ui = UI(ui_width, ui_height, plus=ui_width * number) def start(self): self.gridInit() self.policyInit() def policyInit(self): self.policy = [] for i in range(3): self.policy.append([]) for j in range(4): self.policy[i].append("*") def gridInit(self): self.grid = [] self.generateMatriz() self.grid[0][3].u = 1 self.grid[0][3].terminal = True self.grid[1][3].u = -1 self.grid[1][3].terminal = True self.grid[2][3].u = 0.2 self.grid[1][1].u = -0.5 # self.grid[1][1].terminal = True def generateMatriz(self): for i in range(3): self.grid.append([]) for j in range(4): cell = Cell(i, j) self.grid[i].append(cell) def update(self): for j in range(4): for i in reversed(range(3)): self.maxUtility(i, j) def maxUtility(self, i, j): cell = self.grid[i][j] if not cell.terminal: maxUtility, action = self.max(i, j) cell.u = self.r + maxUtility self.policy[i][j] = action def max(self, i, j): cell = self.grid[i][j] upN = cell.getUpNeighbor() rightN = cell.getRightNeighbor() downN = cell.getDownNeighbor() leftN = cell.getLeftNeighbor() action = ["UP", "RIGHT", "DOWN", "LEFT"] aux = [ self.utility(upN), self.utility(rightN), self.utility(downN), self.utility(leftN) ] index = 0 i = 0 while i < 3: if aux[index] < aux[i + 1]: index = i + 1 i = i + 1 return aux[index], action[index] def utility(self, neighbors): total = 0 for neighbor in neighbors: cell = self.grid[neighbor[0]][neighbor[1]] total = (cell.u * neighbor[2]) + total return total def display(self): self.ui.display(self.grid, self.policy, self.r)
class QLearning(object): def __init__(self, r, d, ui_width, ui_height, dI, dJ, rows=3, cols=2, a=0.5, g=0.8): self.r = r self.d = d self.a = a self.g = g self.rows = rows self.cols = cols self.start() self.dI = dI self.dJ = dJ self.i = self.dI self.j = self.dJ self.ui_width = ui_width self.ui_height = ui_height self.ui = UI(ui_width, ui_height) def start(self): self.gridInit() self.policyInit() def policyInit(self): self.policy = [] for i in range(self.rows): self.policy.append([]) for j in range(self.cols): self.policy[i].append("*") def gridInit(self): self.grid = [] self.generateMatriz() self.grid[0][1].setTerminal(10) def generateMatriz(self): for i in range(self.rows): self.grid.append([]) for j in range(self.cols): cell = Cell(i, j) self.grid[i].append(cell) # Q(s,a) <- Q(s,a) + alfa[r + gama*max(Q(s+1,a)) - Q(s,a)] def up(self): cell = self.grid[self.i][self.j] if cell.terminal: self.i = self.dI self.j = self.dJ else: if (self.i - 1 < 0): maxUtility, action = self.max(self.i, self.j) u = self.d + (self.g * maxUtility) cell.vU = cell.vU + self.a * (u - cell.vU) cell.setU(cell.vU) self.policy[self.i][self.j] = cell.policy() else: maxUtility, action = self.max(self.i - 1, self.j) u = self.r + (self.g * maxUtility) cell.vU = cell.vU + self.a * (u - cell.vU) cell.setU(cell.vU) self.policy[self.i][self.j] = cell.policy() self.i = self.i - 1 def right(self): cell = self.grid[self.i][self.j] if cell.terminal: self.i = self.dI self.j = self.dJ else: if (self.j + 1 > self.cols - 1): cell = self.grid[self.i][self.j] maxUtility, action = self.max(self.i, self.j) u = self.d + (self.g * maxUtility) cell.vR = cell.vR + self.a * (u - cell.vR) cell.setU(cell.vR) self.policy[self.i][self.j] = cell.policy() else: maxUtility, action = self.max(self.i, self.j + 1) u = self.r + (self.g * maxUtility) cell.vR = cell.vR + self.a * (u - cell.vR) cell.setU(cell.vR) self.policy[self.i][self.j] = cell.policy() self.j = self.j + 1 def down(self): cell = self.grid[self.i][self.j] if cell.terminal: self.i = self.dI self.j = self.dJ else: if (self.i + 1 > self.rows - 1): maxUtility, action = self.max(self.i, self.j) u = self.d + (self.g * maxUtility) cell.vD = cell.vD + self.a * (u - cell.vD) cell.setU(cell.vD) self.policy[self.i][self.j] = cell.policy() else: maxUtility, action = self.max(self.i + 1, self.j) u = self.r + (self.g * maxUtility) cell.vD = cell.vD + self.a * (u - cell.vD) cell.setU(cell.vD) self.policy[self.i][self.j] = cell.policy() self.i = self.i + 1 def left(self): cell = self.grid[self.i][self.j] if cell.terminal: self.i = self.dI self.j = self.dJ else: if (self.j - 1 < 0): cell = self.grid[self.i][self.j] maxUtility, action = self.max(self.i, self.j) u = self.d + (self.g * maxUtility) cell.vL = cell.vL + self.a * (u - cell.vL) cell.setU(cell.vL) self.policy[self.i][self.j] = cell.policy() else: maxUtility, action = self.max(self.i, self.j - 1) u = self.r + (self.g * maxUtility) cell.vL = cell.vL + self.a * (u - cell.vL) cell.setU(cell.vL) self.policy[self.i][self.j] = cell.policy() self.j = self.j - 1 def auto(self): if self.grid[self.i][self.j].terminal: self.i = self.dI self.j = self.dJ while not self.grid[self.i][self.j].terminal: if self.grid[self.i][self.j].policy() == "UP": self.up() elif self.grid[self.i][self.j].policy() == "RIGHT": self.right() elif self.grid[self.i][self.j].policy() == "DOWN": self.down() elif self.grid[self.i][self.j].policy() == "LEFT": self.left() def maxUtility(self, i, j): cell = self.grid[i][j] if not cell.terminal: maxUtility, action = self.max(i, j) cell.u = self.r + maxUtility self.policy[i][j] = action def max(self, i, j): cell = self.grid[i][j] action = ["UP", "RIGHT", "DOWN", "LEFT"] aux = [cell.vU, cell.vR, cell.vD, cell.vL] index = 0 i = 0 while i < 3: if aux[index] < aux[i + 1]: index = i + 1 i = i + 1 return aux[index], action[index] def display(self): self.ui.display(self.grid, self.policy, self.r, self.a, self.g, self.i, self.j)