def __init__(self, numOfDisc): self.hanoi = Hanoi(numOfDisc) self.numDisc = numOfDisc self.moveDict = self._getMoveDict() self.rewards = [-10,0,100000] self.prime = 509 self.totalMoves = 0
class TestMovimentacaoDosDiscos(unittest.TestCase): def setUp(self): self.hanoi = Hanoi() def test_movimenta_A_para_pino2(self): self.hanoi.movimentar(1, 2) self.hanoi.pino1 | should | equal_to(["B", "C", "D", "E", "F", "G"]) self.hanoi.pino2 | should | equal_to(["A"]) self.hanoi.pino3 | should | equal_to([]) def test_movimenta_A_para_pino3(self): self.hanoi.movimentar(1, 3) self.hanoi.pino1 | should | equal_to(["B", "C", "D", "E", "F", "G"]) self.hanoi.pino2 | should | equal_to([]) self.hanoi.pino3 | should | equal_to(["A"]) def test_movimentacao_ilegal_B_sobre_A(self): self.hanoi.movimentar(1, 2) (self.hanoi.movimentar, 1, 2) | should | throw( Exception, message="Peca maior sobre a menor") def test_movimentacao_ilegal_retirada_de_peca_de_pino_vazio(self): (self.hanoi.movimentar, 2, 1) | should | throw( Exception, message='Retirada de peca de pino vazio')
def test_getState_happy_path(): h = Hanoi(4, 0, 2) assert h.getState()
def test_init_happy_path(): h = Hanoi(4, 0, 2) assert h assert not h._lock.locked()
def test_fls_zero(): with pytest.raises(ValueError, match=r'fls should not be called with x == 0'): Hanoi.fls(0)
def test_fls_happy_path(): assert 2 == Hanoi.fls(0b100)
class HanoiEnv(object): ''' state is the state of all the discs in the towers if there are 3 discs and 3 towers like below where it is the the initial state of the discs discs 0 - - 1 - - 2 - - tower: a b c (higher the number means the disc is bigger) so each disc can be in 9 of the grid spaces so the rest can be in any 8 grid spaces and 7 grid respectively so there are 9*8*7 = 504 states but, since 504 is not prime we can use 509 such that we can do modulo arithmetic position of the disc on the towers can be encoded as if they were on a 3X3 grid y-axis | | 3 | 2 | 1 | a b c -------------> x-axis 1 2 3 which mean 0 at (1,1), 1 is at (1,2) and 3 is at (1,3) so if we put these number together we get (111213 % 509) = 251 so we just say it starts out at state 333 and as we go through the states we can fill those out we can also know if we reached the final stage by checking comparing the numbers where 0 will be at (3,1), 1 at (3,2) and 2 at (3,3) which gives (313233 % 509) = 198 Actions: Since we can only take 1 action at a time we can model the action of the agent to mean from tower a move the top disc to tower b or tower c which basically means agent needs to choose the tower and give command as below (left, right, left jump or right jump) jump basically means skip to the next tower tower ACTIONS a left right left_jump right_jump b left right left_jump right_jump c left right left_jump right_jump so there 3 towers * 4 differnt actions = 12 different actions we can number them 0 to 12 to make is easier 0: from tower a move left (not a valid move but, keeping it to make agent simple) 1: from tower a move right (move to tower b 2: from twoer a left jump (not a valid move cause it doesn't wrap around) 3: from tower a right jump (moves to tower 3) ... .... 10: from tower c left jump to tower a 12: from tower c right jump (wrong move) For the reward I am keeping it simple if the action causes any violation of rule like going off board or trying to place bigger disc on top of smaller disc than it gets -10 reward if the state is good and no viloations it gets 10 reward if the goal is reached it gets 1000 reward ''' def __init__(self, numOfDisc): self.hanoi = Hanoi(numOfDisc) self.numDisc = numOfDisc self.moveDict = self._getMoveDict() self.rewards = [-10,0,100000] self.prime = 509 self.totalMoves = 0 def _getMoveDict(self): # pop from rod and push into rod (invalid moves are -1) move_dict = { 0: (0, -1), # tower 0 pop disc and move left (invalid) 1: (0, 1), # tower 0 pop disc and move right 2: (0, -1), # tower 0 pop disc and jump left skip one rod (invalid) 3: (0, 2), # tower 0 pop disc and jump righ skip one rod to tower 2 4: (1, 0), 5: (1, 2), 6: (1, -1), 7: (1, -1), 8: (2, 1), 9: (2, -1), 10: (2, 0), 11: (2, -1), } return move_dict def getActions(self): return [n for n in self.moveDict.keys()] def getState(self): posDict = {} for y in range(self.numDisc): for x in range(3): disc = self.hanoi.atIndex(x, y) if disc: posDict[disc.num] = 10 * (x+1) + (y+1) # print(posDict) state = 0 for n in range(self.numDisc): num = posDict.get(n) state = state * 100 + num # print(state) # print(state % self.prime) return state % self.prime def isSolved(self): return self.hanoi.isSolved() def takeAction(self, actionNumber): self.totalMoves += 1 action = self.moveDict.get(actionNumber, (-1,-1)) res = self.hanoi.move_from_to(*action) solved = self.isSolved() if solved: reward = self.rewards[2] elif res: reward = self.rewards[1] else: reward = self.rewards[0] return self.getState(), reward, solved, self.totalMoves def printState(self): self.hanoi.printState() def nStates(self): return 509 def reset(self): self.hanoi = Hanoi(self.numDisc) self.totalMoves = 0 return self.getState()
def test_move_locked(): h = Hanoi(4, 0, 2) h._lock.acquire() with pytest.raises(TimeoutError): h.move(0, 2, 0)
def test_move_bigger_disc(): h = Hanoi(4, 0, 2) h.move(0, 2) with pytest.raises(ValueError, match=r'cannot put disc 2 on top of disc 1'): h.move(0, 2)
def test_move_empty_source(): h = Hanoi(4, 0, 2) with pytest.raises(ValueError, match=r'source 1 is empty'): h.move(1, 0)
def test_move_source_eq_target(): h = Hanoi(4, 0, 2) with pytest.raises(ValueError, match=r'source may not equal target'): h.move(0, 0)
def test_move_target_n1(): h = Hanoi(4, 0, 2) with pytest.raises(ValueError, match=r'target 3 is invalid'): h.move(0, 3)
def main(): number_of_disks = AskForNumberOfDisks() game = Hanoi(number_of_disks) DrawStacks(game) game_procedur(game) print("WON")
def test_getState_locked(): h = Hanoi(4, 0, 2) h._lock.acquire() with pytest.raises(TimeoutError): h.getState(0)
def test_isComplete_happy_path(): h = Hanoi(4, 0, 2) assert not h.isComplete()
def test_move_happy_path(): h = Hanoi(4, 0, 2) assert h._state.tower[0] == 0b1111 h.move(0, 1) # smallest disc / least-significant bit is moved assert h._state.tower[0] == 0b1110 assert h._state.tower[1] == 0b0001 h.move(0, 2) h.move(1, 2) h.move(0, 1) h.move(2, 0) h.move(2, 1) h.move(0, 1) h.move(0, 2) h.move(1, 2) h.move(1, 0) h.move(2, 0) h.move(1, 2) h.move(0, 1) h.move(0, 2) h.move(1, 2) assert h._state.tower[2] == 0b1111
def test_isComplete_locked(): h = Hanoi(4, 0, 2) h._lock.acquire() with pytest.raises(TimeoutError): h.isComplete(0)
def test_move_source_n1(): h = Hanoi(4, 0, 2) with pytest.raises(ValueError, match=r'source 3 is invalid'): h.move(3, 2)
def test_popcount(): assert 2 == Hanoi.popcount(0b101)
def setUp(self): self.hanoi = Hanoi()
def reset(self): self.hanoi = Hanoi(self.numDisc) self.totalMoves = 0 return self.getState()