Exemple #1
0
 def __init__(self, numOfDisc):
     self.hanoi = Hanoi(numOfDisc)
     self.numDisc = numOfDisc
     self.moveDict = self._getMoveDict()
     self.rewards = [-10,0,100000]
     self.prime = 509
     self.totalMoves = 0
Exemple #2
0
class TestMovimentacaoDosDiscos(unittest.TestCase):
    def setUp(self):
        self.hanoi = Hanoi()

    def test_movimenta_A_para_pino2(self):
        self.hanoi.movimentar(1, 2)
        self.hanoi.pino1 | should | equal_to(["B", "C", "D", "E", "F", "G"])
        self.hanoi.pino2 | should | equal_to(["A"])
        self.hanoi.pino3 | should | equal_to([])

    def test_movimenta_A_para_pino3(self):
        self.hanoi.movimentar(1, 3)
        self.hanoi.pino1 | should | equal_to(["B", "C", "D", "E", "F", "G"])
        self.hanoi.pino2 | should | equal_to([])
        self.hanoi.pino3 | should | equal_to(["A"])

    def test_movimentacao_ilegal_B_sobre_A(self):
        self.hanoi.movimentar(1, 2)
        (self.hanoi.movimentar, 1, 2) | should | throw(
            Exception, message="Peca maior sobre a menor")

    def test_movimentacao_ilegal_retirada_de_peca_de_pino_vazio(self):
        (self.hanoi.movimentar, 2, 1) | should | throw(
            Exception, message='Retirada de peca de pino vazio')
Exemple #3
0
def test_getState_happy_path():
    h = Hanoi(4, 0, 2)
    assert h.getState()
Exemple #4
0
def test_init_happy_path():
    h = Hanoi(4, 0, 2)
    assert h
    assert not h._lock.locked()
Exemple #5
0
def test_fls_zero():
    with pytest.raises(ValueError,
                       match=r'fls should not be called with x == 0'):
        Hanoi.fls(0)
Exemple #6
0
def test_fls_happy_path():
    assert 2 == Hanoi.fls(0b100)
Exemple #7
0
class HanoiEnv(object):

    '''
    state is the state of all the discs in the towers
    if there are 3 discs and 3 towers like below where
    it is the the initial state of the discs
            discs
             0  -  -
             1  -  -
             2  -  -
    tower:   a  b  c

    (higher the number means the disc is bigger)
     so each disc can be in 9 of the grid spaces so the rest can be in any
     8 grid spaces and 7 grid respectively
     so there are 9*8*7 = 504 states

     but, since 504 is not prime we can use 509 such that we can do modulo arithmetic

     position of the disc on the towers can be encoded as if they were on a 3X3 grid



  y-axis
         |
         |
       3 |
       2 |
       1 |
           a b c
          ------------->
x-axis    1 2 3
      which mean 0 at (1,1), 1 is at (1,2) and 3 is at (1,3)


      so if we put these number together we get (111213 % 509) = 251
      so we just say it starts out at state 333 and as we go through
      the states we can fill those out

      we can also know if we reached the final stage by checking comparing the
      numbers where 0 will be at (3,1), 1 at (3,2) and 2 at (3,3) which gives
      (313233 % 509) = 198

     Actions:

     Since we can only take 1 action at a time
     we can model the action of the agent to mean
     from tower a move the top disc to tower b or tower c
     which basically means agent needs to choose the tower
     and give command as below (left, right, left jump or right jump)
     jump basically means skip to the next tower


    tower		     ACTIONS
        a	   left 	right	left_jump	right_jump
        b	   left 	right	left_jump	right_jump
        c	   left 	right	left_jump	right_jump


     so there 3 towers * 4 differnt actions = 12 different actions

     we can number them 0 to 12 to make is easier

     0: from tower a move left (not a valid move but, keeping it to make agent simple)
     1: from tower a move right (move to tower b
     2: from twoer a left jump (not a valid move cause it doesn't wrap around)
     3: from tower a right jump (moves to tower 3)
     ...
     ....
     10: from tower c left jump to tower a
     12: from tower c right jump (wrong move)

     For the reward I am keeping it simple
     if the action causes any violation of rule like going off board or trying to place
     bigger disc on top of smaller disc than it gets -10 reward
     if the state is good and no viloations it gets 10 reward
     if the goal is reached it gets 1000 reward
    '''

    def __init__(self, numOfDisc):
        self.hanoi = Hanoi(numOfDisc)
        self.numDisc = numOfDisc
        self.moveDict = self._getMoveDict()
        self.rewards = [-10,0,100000]
        self.prime = 509
        self.totalMoves = 0

    def _getMoveDict(self):
        # pop from rod and push into rod (invalid moves are -1)
        move_dict = { 0: (0, -1), # tower 0 pop disc and move left (invalid)
                      1: (0, 1), # tower 0 pop disc and move right
                      2: (0, -1), # tower 0 pop disc and jump left skip one rod (invalid)
                      3: (0, 2), # tower 0 pop disc and jump righ skip one rod to tower 2
                      4: (1, 0),
                      5: (1, 2),
                      6: (1, -1),
                      7: (1, -1),
                      8: (2, 1),
                      9: (2, -1),
                      10: (2, 0),
                      11: (2, -1),
        }

        return move_dict

    def getActions(self):
        return [n for n in self.moveDict.keys()]

    def getState(self):
        posDict = {}
        for y in range(self.numDisc):
            for x in range(3):
                disc = self.hanoi.atIndex(x, y)
                if disc:
                    posDict[disc.num] = 10 * (x+1) + (y+1)

        # print(posDict)
        state = 0
        for n in range(self.numDisc):
            num = posDict.get(n)
            state = state * 100 + num
        # print(state)
        # print(state % self.prime)
        return state % self.prime

    def isSolved(self):
        return self.hanoi.isSolved()


    def takeAction(self, actionNumber):
        self.totalMoves += 1
        action = self.moveDict.get(actionNumber, (-1,-1))
        res = self.hanoi.move_from_to(*action)
        solved = self.isSolved()

        if solved:
            reward = self.rewards[2]

        elif res:
            reward = self.rewards[1]
        else:
            reward = self.rewards[0]

        return self.getState(), reward, solved, self.totalMoves


    def printState(self):
        self.hanoi.printState()

    def nStates(self):
        return 509

    def reset(self):
        self.hanoi = Hanoi(self.numDisc)
        self.totalMoves = 0
        return self.getState()
Exemple #8
0
def test_move_locked():
    h = Hanoi(4, 0, 2)
    h._lock.acquire()
    with pytest.raises(TimeoutError):
        h.move(0, 2, 0)
Exemple #9
0
def test_move_bigger_disc():
    h = Hanoi(4, 0, 2)
    h.move(0, 2)
    with pytest.raises(ValueError,
                       match=r'cannot put disc 2 on top of disc 1'):
        h.move(0, 2)
Exemple #10
0
def test_move_empty_source():
    h = Hanoi(4, 0, 2)
    with pytest.raises(ValueError, match=r'source 1 is empty'):
        h.move(1, 0)
Exemple #11
0
def test_move_source_eq_target():
    h = Hanoi(4, 0, 2)
    with pytest.raises(ValueError, match=r'source may not equal target'):
        h.move(0, 0)
Exemple #12
0
def test_move_target_n1():
    h = Hanoi(4, 0, 2)
    with pytest.raises(ValueError, match=r'target 3 is invalid'):
        h.move(0, 3)
def main():
    number_of_disks = AskForNumberOfDisks()
    game = Hanoi(number_of_disks)
    DrawStacks(game)
    game_procedur(game)
    print("WON")
Exemple #14
0
def test_getState_locked():
    h = Hanoi(4, 0, 2)
    h._lock.acquire()
    with pytest.raises(TimeoutError):
        h.getState(0)
Exemple #15
0
def test_isComplete_happy_path():
    h = Hanoi(4, 0, 2)
    assert not h.isComplete()
Exemple #16
0
def test_move_happy_path():
    h = Hanoi(4, 0, 2)
    assert h._state.tower[0] == 0b1111
    h.move(0, 1)
    # smallest disc / least-significant bit is moved
    assert h._state.tower[0] == 0b1110
    assert h._state.tower[1] == 0b0001
    h.move(0, 2)
    h.move(1, 2)
    h.move(0, 1)
    h.move(2, 0)
    h.move(2, 1)
    h.move(0, 1)
    h.move(0, 2)
    h.move(1, 2)
    h.move(1, 0)
    h.move(2, 0)
    h.move(1, 2)
    h.move(0, 1)
    h.move(0, 2)
    h.move(1, 2)
    assert h._state.tower[2] == 0b1111
Exemple #17
0
def test_isComplete_locked():
    h = Hanoi(4, 0, 2)
    h._lock.acquire()
    with pytest.raises(TimeoutError):
        h.isComplete(0)
Exemple #18
0
def test_move_source_n1():
    h = Hanoi(4, 0, 2)
    with pytest.raises(ValueError, match=r'source 3 is invalid'):
        h.move(3, 2)
Exemple #19
0
def test_popcount():
    assert 2 == Hanoi.popcount(0b101)
Exemple #20
0
 def setUp(self):
     self.hanoi = Hanoi()
Exemple #21
0
 def reset(self):
     self.hanoi = Hanoi(self.numDisc)
     self.totalMoves = 0
     return self.getState()