Esempi in Python per possible_moves, esempi in Python per lib.game.possible_moves

Esempio n. 1

0

Mostra file

File: test_game.py Progetto: dhaopku/Deep-Reinforcement-Learning-Hands-On

 def test_possible_moves(self):
     r = game.possible_moves(0)
     self.assertEqual(r, [])
     r = game.possible_moves(0b111111111111111111111111111111111111111111000000000000000000000)
     self.assertEqual(r, [])
     r = game.possible_moves(0b000000000000000000000000000000000000000000110110110110110110110)
     self.assertEqual(r, [0, 1, 2, 3, 4, 5, 6])

Esempio n. 2

0

Mostra file

File: test_game.py Progetto: mecha2k/rl_handson

 def test_possible_moves(self):
     r = game.possible_moves(0)
     self.assertEqual(r, [])
     r = game.possible_moves(
         0b111111111111111111111111111111111111111111000000000000000000000)
     self.assertEqual(r, [])
     r = game.possible_moves(
         0b000000000000000000000000000000000000000000110110110110110110110)
     self.assertEqual(r, [0, 1, 2, 3, 4, 5, 6])

Esempio n. 3

0

Mostra file

    def find_leaf(self, state_int, player):
        """
        Traverse the tree until the end of game or leaf node
        :param state_int: root node state
        :param player: player to move
        :return: tuple of (value, leaf_state, player, states, actions)
        1. value: None if leaf node, otherwise equals to the game outcome for the player at leaf
        2. leaf_state: state_int of the last state
        3. player: player at the leaf node
        4. states: list of states traversed
        5. list of actions taken
        """
        states = []
        actions = []
        cur_state = state_int
        cur_player = player
        value = None

        while not self.is_leaf(cur_state):
            states.append(cur_state)

            counts = self.visit_count[cur_state]
            total_sqrt = m.sqrt(sum(counts))
            probs = self.probs[cur_state]
            values_avg = self.value_avg[cur_state]

            # choose action to take, in the root node add the Dirichlet noise to the probs
            if cur_state == state_int:
                noises = np.random.dirichlet(
                    [0.03] * game.GAME_COLS)
                probs = [
                    0.75 * prob + 0.25 * noise
                    for prob, noise in zip(probs, noises)
                ]
            score = [
                value + self.c_puct*prob*total_sqrt/(1+count)
                for value, prob, count in
                    zip(values_avg, probs, counts)
            ]
            invalid_actions = set(range(game.GAME_COLS)) - \
                              set(game.possible_moves(cur_state))
            for invalid in invalid_actions:
                score[invalid] = -np.inf
            action = int(np.argmax(score))
            actions.append(action)
            cur_state, won = game.move(
                cur_state, action, cur_player)
            if won:
                # if somebody won the game, the value of the final state is -1 (as it is on opponent's turn)
                value = -1.0
            cur_player = 1-cur_player
            # check for the draw
            moves_count = len(game.possible_moves(cur_state))
            if value is None and moves_count == 0:
                value = 0.0

        return value, cur_state, cur_player, states, actions

Esempio n. 4

0

Mostra file

File: mcts.py Progetto: dhaopku/Deep-Reinforcement-Learning-Hands-On

    def find_leaf(self, state_int, player):
        """
        Traverse the tree until the end of game or leaf node
        :param state_int: root node state
        :param player: player to move
        :return: tuple of (value, leaf_state, player, states, actions)
        1. value: None if leaf node, otherwise equals to the game outcome for the player at leaf
        2. leaf_state: state_int of the last state
        3. player: player at the leaf node
        4. states: list of states traversed
        5. list of actions taken
        """
        states = []
        actions = []
        cur_state = state_int
        cur_player = player
        value = None

        while not self.is_leaf(cur_state):
            states.append(cur_state)

            counts = self.visit_count[cur_state]
            total_sqrt = m.sqrt(sum(counts))
            probs = self.probs[cur_state]
            values_avg = self.value_avg[cur_state]

            # choose action to take, in the root node add the Dirichlet noise to the probs
            if cur_state == state_int:
                noises = np.random.dirichlet([0.03] * game.GAME_COLS)
                probs = [0.75 * prob + 0.25 * noise for prob, noise in zip(probs, noises)]
            score = [value + self.c_puct * prob * total_sqrt / (1 + count)
                     for value, prob, count in zip(values_avg, probs, counts)]
            invalid_actions = set(range(game.GAME_COLS)) - set(game.possible_moves(cur_state))
            for invalid in invalid_actions:
                score[invalid] = -np.inf
            action = int(np.argmax(score))
            actions.append(action)
            cur_state, won = game.move(cur_state, action, cur_player)
            if won:
                # if somebody won the game, the value of the final state is -1 (as it is on opponent's turn)
                value = -1.0
            cur_player = 1-cur_player
            # check for the draw
            if value is None and len(game.possible_moves(cur_state)) == 0:
                value = 0.0

        return value, cur_state, cur_player, states, actions

Esempio n. 5

0

Mostra file

    def find_leaf(self, state_int, player, step):
        states = []
        actions = []
        cur_state = state_int
        cur_player = player
        value = None

        while value is None and not self.is_leaf(cur_state):
            states.append(cur_state)

            counts = self.visit_count[cur_state]
            total_sqrt = math.sqrt(sum(counts))
            probs = self.probs[cur_state]
            values_avg = self.value_avg[cur_state]

            movel = game.possible_moves(cur_state, cur_player, step)
            alen = len(actions)
            if alen < 1:
                noises = np.random.dirichlet([0.17] * len(movel))
            max_score = -np.inf
            chList = actionTable.choList if cur_player < 1 else actionTable.hanList
            chDict = actionTable.choDict if cur_player < 1 else actionTable.hanDict
            for i, m in enumerate(movel):
                idx = chDict[m]
                score = values_avg[idx] + self.c_puct * (
                    probs[idx] if alen else 0.75 * probs[idx] +
                    0.25 * noises[i]) * total_sqrt / (1 + counts[idx])
                if score > max_score:
                    max_score = score
                    aidx = idx
            action = chList[aidx]
            actions.append(aidx)
            cur_state, won = game.move(cur_state, action, step)
            if won > 0:
                # if somebody won the game, the value of the final state is -1 (as it is on opponent's turn)
                value = -1.0 if won - 1 == cur_player else 1.0
            cur_player = 1 - cur_player
            step += 1

        if value != None: value *= 1 - (game.MAX_TURN - step) / 1000

        return value, cur_state, step, states, actions

Esempio n. 6

0

Mostra file

    net.train()
    replay_buffer = collections.deque(maxlen=REPLAY_BUFFER)
    f = open("./train.dat", "r")
    ptime = time.time()

    while True:
        for lidx in range(PLAY_EPISODES):
            pan = game.encode_lists([list(i) for i in game.INITIAL_STATE], 0)

            s = f.readline()
            if len(s)<5: lidx -= 1; break
            js = json.loads(s)
            result = -js["result"]
            for idx, (action, probs) in enumerate(js["action"]):
                movelist = game.possible_moves(pan, idx%2, idx)
                #if action not in movelist:
                #    print("Impossible action selected %d %d"%(step_idx, lidx))
                probs1 = [0.0] * actionTable.AllMoveLength
                for n in probs:
                    probs1[n[0]] = n[1]
                replay_buffer.append((pan, idx, probs1, result))
                pan, _ = game.move(pan, action, idx)
                if idx!=1: result = -result
        if lidx < 0: break

        print(step_idx, end=' ')
        step_idx += 1
        if len(replay_buffer) < MIN_REPLAY_TO_TRAIN:
            continue

Esempio n. 7

0

Mostra file

File: model.py Progetto: umair-gujjar/Practical-Deep-Reinforcement-Learning

def play_game(mcts_stores, replay_buffer, net1, net2, steps_before_tau_0, mcts_searches, mcts_batch_size,
              net1_plays_first=None, cuda=False):
    """
    Play one single game, memorizing transitions into the replay buffer
    :param mcts_stores: could be None or single MCTS or two MCTSes for individual net
    :param replay_buffer: queue with (state, probs, values), if None, nothing is stored
    :param net1: player1
    :param net2: player2
    :return: value for the game in respect to player1 (+1 if p1 won, -1 if lost, 0 if draw)
    """
    assert isinstance(replay_buffer, (collections.deque, type(None)))
    assert isinstance(mcts_stores, (mcts.MCTS, type(None), list))
    assert isinstance(net1, Net)
    assert isinstance(net2, Net)
    assert isinstance(steps_before_tau_0, int) and steps_before_tau_0 >= 0
    assert isinstance(mcts_searches, int) and mcts_searches > 0
    assert isinstance(mcts_batch_size, int) and mcts_batch_size > 0

    if mcts_stores is None:
        mcts_stores = [mcts.MCTS(), mcts.MCTS()]
    elif isinstance(mcts_stores, mcts.MCTS):
        mcts_stores = [mcts_stores, mcts_stores]

    state = game.INITIAL_STATE
    nets = [net1, net2]
    if net1_plays_first is None:
        cur_player = np.random.choice(2)
    else:
        cur_player = 0 if net1_plays_first else 1
    step = 0
    tau = 1 if steps_before_tau_0 > 0 else 0
    game_history = []

    result = None
    net1_result = None

    while result is None:
        mcts_stores[cur_player].search_batch(mcts_searches, mcts_batch_size, state, cur_player, nets[cur_player], cuda=cuda)
        probs, _ = mcts_stores[cur_player].get_policy_value(state, tau=tau)
        game_history.append((state, cur_player, probs))
        action = np.random.choice(game.GAME_COLS, p=probs)
        if action not in game.possible_moves(state):
            print("Impossible action selected")
        state, won = game.move(state, action, cur_player)
        if won:
            result = 1
            net1_result = 1 if cur_player == 0 else -1
            break
        cur_player = 1-cur_player
        # check the draw case
        if len(game.possible_moves(state)) == 0:
            result = 0
            net1_result = 0
            break
        step += 1
        if step >= steps_before_tau_0:
            tau = 0

    if replay_buffer is not None:
        for state, cur_player, probs in reversed(game_history):
            replay_buffer.append((state, cur_player, probs, result))
            result = -result

    return net1_result, step

Esempio n. 8

0

Mostra file

File: human_vs_ai.py Progetto: cjssh1002/AlphaJanggi

def play_game(net1, steps_before_tau_0, mcts_batch_size, device="cpu"):
    assert isinstance(net1, model.Net)
    assert isinstance(steps_before_tau_0, int) and steps_before_tau_0 >= 0
    assert isinstance(mcts_batch_size, int) and mcts_batch_size > 0
    global mcts_searches

    pan = game.encode_lists([list(i) for i in game.INITIAL_STATE], 0)
    historystr = []
    cur_player = 0
    step = 0
    mctsi = mcts.MCTS()

    result = None; exitf = False
    a0 = ord('1')
    while True:
        s=input('플레이하려는 진영을 선택하세요 0) 초, 1)한 ?')
        if s.find('level') >= 0:
            mcts_searches = LEVELC * int(s[6:])
            print('OK', flush=True)
        else:
            player_human = 0 if int(s)<1 else 1
            break

    while result is None:
        movelist = game.possible_moves(pan, cur_player, step)
        if step>9 and historystr[-4][:90]==historystr[-8][:90]:
            p = game.decode_binary(pan)
            for idx, m in enumerate(movelist):
                spos = m // 100; tpos = m % 100; y0 = spos // 9; x0 = spos % 9; y1 = tpos // 9; x1 = tpos % 9
                captured = p[y1][x1]; p[y1][x1] = p[y0][x0]; p[y0][x0] = 0
                ps = game.encode_lists(p, step+1)
                if ps[:90]==historystr[-4][:90]:
                    del movelist[idx]; break
                p[y0][x0] = p[y1][x1]; p[y1][x1] = captured

        if (step<2 and cur_player != player_human) or (step>1 and cur_player == player_human):
            if step < 2:
                print("마상 차림을 선택하세요 0) "+masang[0]+", 1) "+masang[1]+", 2) "+masang[2]+", 3) "+masang[3])
            else:
                render(pan, player_human)
                if step==2 or step==3:
                    print("")
                    print("옮기고자 하는 기물의 세로 번호, 가로 번호, 목적지의 세로 번호, 가로 번호 ex) 0010  한수 쉬기: 0")
            action = -1
            while action<0:
                s=input((str(step-1) if step>1 else '')+' ? ')
                if s=="new": exitf=True; break
                elif s.find('level')>=0:
                    mcts_searches=LEVELC*int(s[6:]); print('OK', flush=True)
                elif step<2:
                    if len(s)==1 and s[0]>='0' and s[0]<'4': action = int(s) + 10000
                elif len(s)==1: action = 0
                elif s=='undo' and step>3:
                    step-=2; historystr.pop(); historystr.pop(); pan=historystr[-1]
                    movelist = game.possible_moves(pan, cur_player, step)
                    render(pan, player_human)
                elif len(s)==4 and s[0]>='0' and s[0]<='9' and s[1]>'0' and s[1]<='9' and s[2]>='0' and s[2]<='9' and s[3]>'0' and s[3]<='9':
                    b1=9-ord(s[0])+a0 if s[0]>'0' else 0
                    if player_human<1: b1=9-b1
                    b2 = ord(s[1]) - a0
                    if player_human < 1: b2 = 8 - b2
                    b3 = 9-ord(s[2]) + a0 if s[2]>'0' else 0
                    if player_human < 1: b3 = 9 - b3
                    b4 = ord(s[3]) - a0
                    if player_human < 1: b4 = 8 - b4
                    action = (b1*9 + b2)*100 + b3*9+b4
                if action not in movelist: action = -1
                else: print('OK', flush=True)
        else:
            mctsi.search_batch(mcts_searches, mcts_batch_size, pan,
                            cur_player, net1, step, device=device)
            probs, values = mctsi.get_policy_value(pan, movelist, cur_player)
            chList = actionTable.choList if cur_player < 1 else actionTable.hanList
            n = np.random.choice(actionTable.AllMoveLength, p=probs) if step<steps_before_tau_0 else np.argmax(probs)
            action = chList[n]
            """for m in movelist:
                print('%04d %.2f' % (m, probs[chList.index(m)]), end=',  ')
            print()"""
            if step<2:
                print(('한: ' if step<1 else '초: ')+masang[action-10000]+' '+str(values[n]), flush=True)
                if step==1: render(pan, player_human)
            else:
                if action<1: print('한수쉼'+' '+str(values[n]))
                else:
                    b1=action//100//9
                    if player_human<1: b1=9-b1
                    b2 = action//100%9
                    if player_human < 1: b2 = 8 - b2
                    b3 = action%100//9
                    if player_human < 1: b3 = 9 - b3
                    b4 = action%100%9
                    if player_human < 1: b4 = 8 - b4
                    print((chr(9-b1+a0) if b1>0 else '0')+chr(b2+a0)+(chr(9-b3+a0) if b3>0 else '0')+chr(b4+a0)+' '+str(values[n]))
        if exitf: break
        pan, won = game.move(pan, action, step)
        historystr.append(pan)
        if won>0:
            render(pan, player_human)
            print(('초' if won==1 else '한')+' 승')
            break
        cur_player = 1-cur_player
        step += 1

Esempio n. 9

0

Mostra file

 def is_draw(self):
     return len(game.possible_moves(self.state)) == 0

Esempio n. 10

0

Mostra file

 def is_valid_move(self, move_col):
     return move_col in game.possible_moves(self.state)

Esempio n. 11

0

Mostra file

def play_game(value, mcts_stores, queue, net1, net2, steps_before_tau_0, mcts_searches, mcts_batch_size,
              best_idx, url=None, username=None, device="cpu"):
    assert isinstance(mcts_stores, (mcts.MCTS, type(None), list))
    assert isinstance(net1, Net)
    assert isinstance(net2, Net)
    assert isinstance(steps_before_tau_0, int) and steps_before_tau_0 >= 0
    assert isinstance(mcts_searches, int) and mcts_searches > 0
    assert isinstance(mcts_batch_size, int) and mcts_batch_size > 0

    if mcts_stores is None:
        mcts_stores = [mcts.MCTS(), mcts.MCTS()]
    elif isinstance(mcts_stores, mcts.MCTS):
        mcts_stores.clear()
        mcts_stores = [mcts_stores, mcts_stores]
    else: mcts_stores[0].clear(); mcts_stores[1].clear()

    state = game.encode_lists([list(i) for i in game.INITIAL_STATE], 0)
    nets = [net1, net2]
    cur_player = 0
    step = 0
    tau = 1 if steps_before_tau_0 > 0 else 0
    game_history = []

    net1_result = None
    result = None

    while net1_result is None and (value==None or value[0]>0):
        mcts_stores[cur_player].search_batch(mcts_searches, mcts_batch_size, state,
                                             cur_player, nets[cur_player], step, device=device)
        movel = game.possible_moves(state, cur_player, step)
        probs, _ = mcts_stores[cur_player].get_policy_value(state, movel, cur_player, tau=tau)
        chList = actionTable.choList if cur_player < 1 else actionTable.hanList
        action = chList[np.random.choice(actionTable.AllMoveLength, p=probs)]
        game_history.append((action, probs) if queue is None else (state, step, probs))
        if action not in movel:
            print("Impossible action selected")
        state, won = game.move(state, action, step)
        if step%3<1: print('.', end='', flush=True)
        if won>0:
            net1_result = 1 if won == 1 else -1
            result = -net1_result
            break
        step += 1
        cur_player = 1-cur_player
        if step >= steps_before_tau_0:
            tau = 0

    if net1_result !=None:
        print()
        if queue is not None:
            dequeuef = isinstance(queue, collections.deque)
            for state, hstep, probs in game_history:
                queue.append((state, hstep, probs, result)) if dequeuef else\
                    queue.put((state, hstep, probs, result))
                if hstep!=1: result = -result
        elif best_idx>=0:
            gh = []
            for (action, probs) in game_history:
                prar = []
                for idx, prob in enumerate(probs):
                    if prob>0: prar.append([idx, prob])
                gh.append((action, prar))
            js = {"netIdx":best_idx, "result":net1_result, "username":username, "action":gh}
            hr = webFunction.http_request(url, True, json.dumps(js))
            if hr == None: sys.exit()
            elif hr['status'] == 'error': print('error occured')
            else: print("game is uploaded")

    return net1_result, step if net1_result!=None else 0

Esempio n. 12

0

Mostra file

File: model.py Progetto: dhaopku/Deep-Reinforcement-Learning-Hands-On

def play_game(mcts_stores, replay_buffer, net1, net2, steps_before_tau_0, mcts_searches, mcts_batch_size,
              net1_plays_first=None, device="cpu"):
    """
    Play one single game, memorizing transitions into the replay buffer
    :param mcts_stores: could be None or single MCTS or two MCTSes for individual net
    :param replay_buffer: queue with (state, probs, values), if None, nothing is stored
    :param net1: player1
    :param net2: player2
    :return: value for the game in respect to player1 (+1 if p1 won, -1 if lost, 0 if draw)
    """
    assert isinstance(replay_buffer, (collections.deque, type(None)))
    assert isinstance(mcts_stores, (mcts.MCTS, type(None), list))
    assert isinstance(net1, Net)
    assert isinstance(net2, Net)
    assert isinstance(steps_before_tau_0, int) and steps_before_tau_0 >= 0
    assert isinstance(mcts_searches, int) and mcts_searches > 0
    assert isinstance(mcts_batch_size, int) and mcts_batch_size > 0

    if mcts_stores is None:
        mcts_stores = [mcts.MCTS(), mcts.MCTS()]
    elif isinstance(mcts_stores, mcts.MCTS):
        mcts_stores = [mcts_stores, mcts_stores]

    state = game.INITIAL_STATE
    nets = [net1, net2]
    if net1_plays_first is None:
        cur_player = np.random.choice(2)
    else:
        cur_player = 0 if net1_plays_first else 1
    step = 0
    tau = 1 if steps_before_tau_0 > 0 else 0
    game_history = []

    result = None
    net1_result = None

    while result is None:
        mcts_stores[cur_player].search_batch(mcts_searches, mcts_batch_size, state,
                                             cur_player, nets[cur_player], device=device)
        probs, _ = mcts_stores[cur_player].get_policy_value(state, tau=tau)
        game_history.append((state, cur_player, probs))
        action = np.random.choice(game.GAME_COLS, p=probs)
        if action not in game.possible_moves(state):
            print("Impossible action selected")
        state, won = game.move(state, action, cur_player)
        if won:
            result = 1
            net1_result = 1 if cur_player == 0 else -1
            break
        cur_player = 1-cur_player
        # check the draw case
        if len(game.possible_moves(state)) == 0:
            result = 0
            net1_result = 0
            break
        step += 1
        if step >= steps_before_tau_0:
            tau = 0

    if replay_buffer is not None:
        for state, cur_player, probs in reversed(game_history):
            replay_buffer.append((state, cur_player, probs, result))
            result = -result

    return net1_result, step

Esempio n. 13

0

Mostra file

File: telegram-bot.py Progetto: dhaopku/Deep-Reinforcement-Learning-Hands-On

 def is_draw(self):
     return len(game.possible_moves(self.state)) == 0

Esempio n. 14

0

Mostra file

File: telegram-bot.py Progetto: dhaopku/Deep-Reinforcement-Learning-Hands-On

 def is_valid_move(self, move_col):
     return move_col in game.possible_moves(self.state)