def test_simple_decode(self):
     g = game.decode_binary(0b000000000000000000000000000000000000000000110110110110110110110)
     self.assertEqual(g, [[]]*7)
     g = game.decode_binary(0b111111111111111111111111111111111111111111000000000000000000000)
     self.assertEqual(g, [[1]*6]*7)
     g = game.decode_binary(0)
     self.assertEqual(g, [[0]*6]*7)
Ejemplo n.º 2
0
 def test_simple_decode(self):
     g = game.decode_binary(
         0b000000000000000000000000000000000000000000110110110110110110110)
     self.assertEqual(g, [[]] * 7)
     g = game.decode_binary(
         0b111111111111111111111111111111111111111111000000000000000000000)
     self.assertEqual(g, [[1] * 6] * 7)
     g = game.decode_binary(0)
     self.assertEqual(g, [[0] * 6] * 7)
Ejemplo n.º 3
0
    def search_minibatch(self, count, state_int, player,
                         net, device="cpu"):
        """
        Perform several MCTS searches.
        """
        backup_queue = []
        expand_states = []
        expand_players = []
        expand_queue = []
        planned = set()
        for _ in range(count):
            value, leaf_state, leaf_player, states, actions = \
                self.find_leaf(state_int, player)
            if value is not None:
                backup_queue.append((value, states, actions))
            else:
                if leaf_state not in planned:
                    planned.add(leaf_state)
                    leaf_state_lists = game.decode_binary(
                        leaf_state)
                    expand_states.append(leaf_state_lists)
                    expand_players.append(leaf_player)
                    expand_queue.append((leaf_state, states,
                                         actions))

        # do expansion of nodes
        if expand_queue:
            batch_v = model.state_lists_to_batch(
                expand_states, expand_players, device)
            logits_v, values_v = net(batch_v)
            probs_v = F.softmax(logits_v, dim=1)
            values = values_v.data.cpu().numpy()[:, 0]
            probs = probs_v.data.cpu().numpy()

            # create the nodes
            for (leaf_state, states, actions), value, prob in \
                    zip(expand_queue, values, probs):
                self.visit_count[leaf_state] = [0]*game.GAME_COLS
                self.value[leaf_state] = [0.0]*game.GAME_COLS
                self.value_avg[leaf_state] = [0.0]*game.GAME_COLS
                self.probs[leaf_state] = prob
                backup_queue.append((value, states, actions))

        # perform backup of the searches
        for value, states, actions in backup_queue:
            # leaf state is not stored in states and actions, so the value of the leaf will be the value of the opponent
            cur_value = -value
            for state_int, action in zip(states[::-1],
                                         actions[::-1]):
                self.visit_count[state_int][action] += 1
                self.value[state_int][action] += cur_value
                self.value_avg[state_int][action] = \
                    self.value[state_int][action] / \
                    self.visit_count[state_int][action]
                cur_value = -cur_value
Ejemplo n.º 4
0
    def test_move_horizontal_win(self):
        f = game.encode_lists([[]] * 7)

        f, won = game.move(f, 0, 1)
        self.assertFalse(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1]] + [[]] * 6)

        f, won = game.move(f, 1, 1)
        self.assertFalse(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1], [1]] + [[]] * 5)

        f, won = game.move(f, 3, 1)
        self.assertFalse(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1], [1], [], [1], [], [], []])

        f, won = game.move(f, 2, 1)
        self.assertTrue(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1], [1], [1], [1], [], [], []])
Ejemplo n.º 5
0
    def test_move_vertical_win(self):
        f = game.encode_lists([[]] * 7)

        f, won = game.move(f, 0, 1)
        self.assertFalse(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1]] + [[]] * 6)

        f, won = game.move(f, 0, 1)
        self.assertFalse(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1, 1]] + [[]] * 6)

        f, won = game.move(f, 0, 1)
        self.assertFalse(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1, 1, 1]] + [[]] * 6)

        f, won = game.move(f, 0, 1)
        self.assertTrue(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1, 1, 1, 1]] + [[]] * 6)
    def test_move_horizontal_win(self):
        f = game.encode_lists([[]]*7)

        f, won = game.move(f, 0, 1)
        self.assertFalse(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1]] + [[]]*6)

        f, won = game.move(f, 1, 1)
        self.assertFalse(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1], [1]] + [[]]*5)

        f, won = game.move(f, 3, 1)
        self.assertFalse(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1], [1], [], [1], [], [], []])

        f, won = game.move(f, 2, 1)
        self.assertTrue(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1], [1], [1], [1], [], [], []])
    def test_move_vertical_win(self):
        f = game.encode_lists([[]]*7)

        f, won = game.move(f, 0, 1)
        self.assertFalse(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1]] + [[]]*6)

        f, won = game.move(f, 0, 1)
        self.assertFalse(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1, 1]] + [[]]*6)

        f, won = game.move(f, 0, 1)
        self.assertFalse(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1, 1, 1]] + [[]]*6)

        f, won = game.move(f, 0, 1)
        self.assertTrue(won)
        l = game.decode_binary(f)
        self.assertEqual(l, [[1, 1, 1, 1]] + [[]]*6)
Ejemplo n.º 8
0
    def search_minibatch(self,
                         count,
                         state_int,
                         player,
                         net,
                         step,
                         device="cpu"):
        backup_queue = []
        expand_states = []
        expand_steps = []
        expand_queue = []
        planned = set()
        for _ in range(count):
            value, leaf_state, leaf_step, states, actions = self.find_leaf(
                state_int, player, step)
            if value is not None:
                backup_queue.append((value, states, actions))
            else:
                if leaf_state not in planned:
                    planned.add(leaf_state)
                    leaf_state_lists = game.decode_binary(leaf_state)
                    expand_states.append(leaf_state_lists)
                    expand_steps.append(leaf_step)
                    expand_queue.append((leaf_state, states, actions))

        if expand_queue:
            batch_v = model.state_lists_to_batch(expand_states, expand_steps,
                                                 device)
            logits_v, values_v = net(batch_v)
            probs_v = F.softmax(logits_v, dim=1)
            values = values_v.data.cpu().numpy()[:, 0]
            probs = probs_v.data.cpu().numpy()

            for (leaf_state, states,
                 actions), value, prob in zip(expand_queue, values, probs):
                self.visit_count[leaf_state] = [0] * actionTable.AllMoveLength
                self.value[leaf_state] = [0.0] * actionTable.AllMoveLength
                self.value_avg[leaf_state] = [0.0] * actionTable.AllMoveLength
                self.probs[leaf_state] = prob
                backup_queue.append((value, states, actions))

        for value, states, actions in backup_queue:
            cur_value = -value
            for state_int, action in zip(states[::-1], actions[::-1]):
                self.visit_count[state_int][action] += 1
                self.value[state_int][action] += cur_value
                self.value_avg[state_int][action] =\
                    self.value[state_int][action] / self.visit_count[state_int][action]
                cur_value = -cur_value
Ejemplo n.º 9
0
def render(pan_str, player_human):
    pan = game.decode_binary(pan_str)
    print("   1  2  3  4  5  6  7  8  9")
    for y in range(10):
        s = chr((10-y if y>0 else 0)+ord('0'))+" "
        for x in range(9):
            a = pan[y if player_human>0 else 9-y][x if player_human>0 else 8-x]
            s += piece_str[a // 10 * 7 + a % 10 - 1] + ('-' if x < 8 else ' ') if a > 0 else \
                (('┌' if x < 1 else '┬' if x < 8 else '┐') if y < 1 else \
                ('├' if x < 1 else '╋' if x < 8 else '┤') if y < 9 else \
                ('└' if x < 1 else '┴' if x < 8 else '┘')) + ('─-' if x < 8 else ' ')
        print(s)
        if y<9:
            print("  │  │  │  │\│/│  │  │  │ " if y<1 or y==7 else "  │  │  │  │/│\│  │  │  │ "\
                if y==1 or y>7 else "  │  │  │  │  │  │  │  │  │ ")
    def search_minibatch(self, count, state_int, player, net, device="cpu"):
        """
        Perform several MCTS searches.
        """
        backup_queue = []
        expand_states = []
        expand_players = []
        expand_queue = []
        planned = set()
        for _ in range(count):
            value, leaf_state, leaf_player, states, actions = self.find_leaf(state_int, player)
            if value is not None:
                backup_queue.append((value, states, actions))
            else:
                if leaf_state not in planned:
                    planned.add(leaf_state)
                    leaf_state_lists = game.decode_binary(leaf_state)
                    expand_states.append(leaf_state_lists)
                    expand_players.append(leaf_player)
                    expand_queue.append((leaf_state, states, actions))

        # do expansion of nodes
        if expand_queue:
            batch_v = model.state_lists_to_batch(expand_states, expand_players, device)
            logits_v, values_v = net(batch_v)
            probs_v = F.softmax(logits_v, dim=1)
            values = values_v.data.cpu().numpy()[:, 0]
            probs = probs_v.data.cpu().numpy()

            # create the nodes
            for (leaf_state, states, actions), value, prob in zip(expand_queue, values, probs):
                self.visit_count[leaf_state] = [0] * game.GAME_COLS
                self.value[leaf_state] = [0.0] * game.GAME_COLS
                self.value_avg[leaf_state] = [0.0] * game.GAME_COLS
                self.probs[leaf_state] = prob
                backup_queue.append((value, states, actions))

        # perform backup of the searches
        for value, states, actions in backup_queue:
            # leaf state is not stored in states and actions, so the value of the leaf will be the value of the opponent
            cur_value = -value
            for state_int, action in zip(states[::-1], actions[::-1]):
                self.visit_count[state_int][action] += 1
                self.value[state_int][action] += cur_value
                self.value_avg[state_int][action] = self.value[state_int][action] / self.visit_count[state_int][action]
                cur_value = -cur_value
Ejemplo n.º 11
0
            print("Step %d, steps %3d, leaves %4d, steps/s %5.2f, leaves/s %6.2f, best_idx %d, replay %d" % (
                step_idx, game_steps, game_nodes, speed_steps, speed_nodes, best_idx, len(replay_buffer)))
            step_idx += 1

            if len(replay_buffer) < MIN_REPLAY_TO_TRAIN:
                continue

            # train
            sum_loss = 0.0
            sum_value_loss = 0.0
            sum_policy_loss = 0.0

            for _ in range(TRAIN_ROUNDS):
                batch = random.sample(replay_buffer, BATCH_SIZE)
                batch_states, batch_who_moves, batch_probs, batch_values = zip(*batch)
                batch_states_lists = [game.decode_binary(state) for state in batch_states]
                states_v = model.state_lists_to_batch(batch_states_lists, batch_who_moves, device)

                optimizer.zero_grad()
                probs_v = torch.FloatTensor(batch_probs).to(device)
                values_v = torch.FloatTensor(batch_values).to(device)
                out_logits_v, out_values_v = net(states_v)

                loss_value_v = F.mse_loss(out_values_v.squeeze(-1), values_v)
                loss_policy_v = -F.log_softmax(out_logits_v, dim=1) * probs_v
                loss_policy_v = loss_policy_v.sum(dim=1).mean()

                loss_v = loss_policy_v + loss_value_v
                loss_v.backward()
                optimizer.step()
                sum_loss += loss_v.item()
Ejemplo n.º 12
0
def play_game(net1, steps_before_tau_0, mcts_batch_size, device="cpu"):
    assert isinstance(net1, model.Net)
    assert isinstance(steps_before_tau_0, int) and steps_before_tau_0 >= 0
    assert isinstance(mcts_batch_size, int) and mcts_batch_size > 0
    global mcts_searches

    pan = game.encode_lists([list(i) for i in game.INITIAL_STATE], 0)
    historystr = []
    cur_player = 0
    step = 0
    mctsi = mcts.MCTS()

    result = None; exitf = False
    a0 = ord('1')
    while True:
        s=input('플레이하려는 진영을 선택하세요 0) 초, 1)한 ?')
        if s.find('level') >= 0:
            mcts_searches = LEVELC * int(s[6:])
            print('OK', flush=True)
        else:
            player_human = 0 if int(s)<1 else 1
            break

    while result is None:
        movelist = game.possible_moves(pan, cur_player, step)
        if step>9 and historystr[-4][:90]==historystr[-8][:90]:
            p = game.decode_binary(pan)
            for idx, m in enumerate(movelist):
                spos = m // 100; tpos = m % 100; y0 = spos // 9; x0 = spos % 9; y1 = tpos // 9; x1 = tpos % 9
                captured = p[y1][x1]; p[y1][x1] = p[y0][x0]; p[y0][x0] = 0
                ps = game.encode_lists(p, step+1)
                if ps[:90]==historystr[-4][:90]:
                    del movelist[idx]; break
                p[y0][x0] = p[y1][x1]; p[y1][x1] = captured

        if (step<2 and cur_player != player_human) or (step>1 and cur_player == player_human):
            if step < 2:
                print("마상 차림을 선택하세요 0) "+masang[0]+", 1) "+masang[1]+", 2) "+masang[2]+", 3) "+masang[3])
            else:
                render(pan, player_human)
                if step==2 or step==3:
                    print("")
                    print("옮기고자 하는 기물의 세로 번호, 가로 번호, 목적지의 세로 번호, 가로 번호 ex) 0010  한수 쉬기: 0")
            action = -1
            while action<0:
                s=input((str(step-1) if step>1 else '')+' ? ')
                if s=="new": exitf=True; break
                elif s.find('level')>=0:
                    mcts_searches=LEVELC*int(s[6:]); print('OK', flush=True)
                elif step<2:
                    if len(s)==1 and s[0]>='0' and s[0]<'4': action = int(s) + 10000
                elif len(s)==1: action = 0
                elif s=='undo' and step>3:
                    step-=2; historystr.pop(); historystr.pop(); pan=historystr[-1]
                    movelist = game.possible_moves(pan, cur_player, step)
                    render(pan, player_human)
                elif len(s)==4 and s[0]>='0' and s[0]<='9' and s[1]>'0' and s[1]<='9' and s[2]>='0' and s[2]<='9' and s[3]>'0' and s[3]<='9':
                    b1=9-ord(s[0])+a0 if s[0]>'0' else 0
                    if player_human<1: b1=9-b1
                    b2 = ord(s[1]) - a0
                    if player_human < 1: b2 = 8 - b2
                    b3 = 9-ord(s[2]) + a0 if s[2]>'0' else 0
                    if player_human < 1: b3 = 9 - b3
                    b4 = ord(s[3]) - a0
                    if player_human < 1: b4 = 8 - b4
                    action = (b1*9 + b2)*100 + b3*9+b4
                if action not in movelist: action = -1
                else: print('OK', flush=True)
        else:
            mctsi.search_batch(mcts_searches, mcts_batch_size, pan,
                            cur_player, net1, step, device=device)
            probs, values = mctsi.get_policy_value(pan, movelist, cur_player)
            chList = actionTable.choList if cur_player < 1 else actionTable.hanList
            n = np.random.choice(actionTable.AllMoveLength, p=probs) if step<steps_before_tau_0 else np.argmax(probs)
            action = chList[n]
            """for m in movelist:
                print('%04d %.2f' % (m, probs[chList.index(m)]), end=',  ')
            print()"""
            if step<2:
                print(('한: ' if step<1 else '초: ')+masang[action-10000]+' '+str(values[n]), flush=True)
                if step==1: render(pan, player_human)
            else:
                if action<1: print('한수쉼'+' '+str(values[n]))
                else:
                    b1=action//100//9
                    if player_human<1: b1=9-b1
                    b2 = action//100%9
                    if player_human < 1: b2 = 8 - b2
                    b3 = action%100//9
                    if player_human < 1: b3 = 9 - b3
                    b4 = action%100%9
                    if player_human < 1: b4 = 8 - b4
                    print((chr(9-b1+a0) if b1>0 else '0')+chr(b2+a0)+(chr(9-b3+a0) if b3>0 else '0')+chr(b4+a0)+' '+str(values[n]))
        if exitf: break
        pan, won = game.move(pan, action, step)
        historystr.append(pan)
        if won>0:
            render(pan, player_human)
            print(('초' if won==1 else '한')+' 승')
            break
        cur_player = 1-cur_player
        step += 1
Ejemplo n.º 13
0
            step_idx += 1

            if len(replay_buffer) < MIN_REPLAY_TO_TRAIN:
                continue

            # train
            sum_loss = 0.0
            sum_value_loss = 0.0
            sum_policy_loss = 0.0

            for _ in range(TRAIN_ROUNDS):
                batch = random.sample(replay_buffer, BATCH_SIZE)
                batch_states, batch_who_moves, batch_probs, batch_values = zip(
                    *batch)
                batch_states_lists = [
                    game.decode_binary(state) for state in batch_states
                ]
                states_v = model.state_lists_to_batch(batch_states_lists,
                                                      batch_who_moves,
                                                      args.cuda)

                optimizer.zero_grad()
                probs_v = Variable(torch.FloatTensor(batch_probs))
                values_v = Variable(torch.FloatTensor(batch_values))
                if args.cuda:
                    probs_v = probs_v.cuda()
                    values_v = values_v.cuda()
                out_logits_v, out_values_v = net(states_v)

                loss_value_v = F.mse_loss(out_values_v, values_v)
                loss_policy_v = -F.log_softmax(out_logits_v, dim=1) * probs_v