예제 #1
0
파일: player.py 프로젝트: xmgfx/doudizhu
def main():
	connection = connect()
	config.set_device(args.gpu)

	# while True:
	name = None
	while name is None:
		name = connection.recv()
	model = Model('model_' + name)
	model.forced_restore()
	# print(model)
	agent = Agent(model)

	simulations = None
	while simulations is None:
		simulations = connection.recv()
	simulations = int(simulations)
	config.simulations = simulations

	env = GameEnv()
	init = deal_init(connection)

	env.load(init)

	game = Game(env, args.player)
	game_start(agent, simulations, game, connection)
예제 #2
0
    def __call__(self):
        if self.empty():
            return None

        file, init, player, p, v = self.data[self.iter]
        env = GameEnv(False)
        env.load(init)
        game = Game(env, player)

        self.iter += 1
        return game, p, v
예제 #3
0
파일: unpack.py 프로젝트: xmgfx/doudizhu
def unpack(init, ps, player):
    env = GameEnv(False)
    env.load(init)
    game = Game(env, player)
    histories = []
    for action in ps:
        state = game.to_model_input()
        histories.append((histories[-1] if len(histories) else []) + [state])
        game.move(action)
    for i in range(len(histories)):
        histories[i] = np.array(histories[i], dtype=np.int8)
    return histories
예제 #4
0
파일: agent.py 프로젝트: xmgfx/doudizhu
def main():
    model: Model

    config.set_device(1)
    env = GameEnv()
    # model = Model('model')
    model = Model('model_201912080009')
    # model = Model('model_tencent0824')
    model.forced_restore()
    # agent = Agent(model)
    agent = Agent(model)

    init = [[2, 1, 2, 1, 0, 1, 3, 1, 2, 1, 0, 1, 2, 0, 0],
            [2, 1, 1, 2, 1, 1, 0, 3, 1, 2, 0, 2, 0, 0, 1],
            [0, 1, 1, 1, 3, 2, 0, 0, 1, 1, 3, 1, 2, 1, 0]]
    actions = [
        352, 352, 353, 338, 343, 347, 123, 0, 0, 20, 22, 23, 24, 26, 0, 28, 0,
        29, 0, 0, 39, 0, 0, 116, 0, 0, 76, 324, 0, 0, 41, 42, 0, 0, 92, 317,
        320, 0, 0, 31
    ]  #, 42, 0, 0, 15, 18]
    init = np.array(init, dtype=np.int32)

    env.load(init)
    print(env)

    root, player = new_Root(), 2

    game = Game(env, player)
    for action in actions:
        mct = MCT(game, root)
        agent.simulates([mct], store_rnn_states='Always')
        root.root(action)
        game.move(action)
        print(game)
        print('GAUSS', game.gauss())
        print(game.curr_player(), game.my_player())

    print('====')
    print(game.curr_player())
    print(game.my_player())
    print(game.lord_player())
    print(game.hand_cards_num())
    print(game.bottom())
    print('====')
    mct = MCT(game, root)
    for cnt in range(2000):
        agent.simulates([mct], store_rnn_states='Always')

        # if cnt == 0:
        # 	history, rnn_state = root.get_model_input()
        # 	print(history)
        # if (cnt + 1) % 10 == 0:
        if (cnt + 1) % 10 == 0:
            print(cnt + 1)
            for action, P, son in mct.root.edges():
                print('%d: %.8f %d %.3f' % (action, P, son.N(), son.Q()))
    print('-------------------------')
    t = 1.0
    s = np.array([son.N() for action, P, son in mct.root.edges()])
    p = np.array([P for action, P, son in mct.root.edges()])
    print(s)
    print(np.mean(s))
    w = s + 0.001 - np.mean(s)
    w[w < 0] = 0
    w = (w**t) / (w**t).sum()
    print(w)
    print(s / s.sum())
    print(p)
    mct.json()
예제 #5
0
파일: test.py 프로젝트: xmgfx/doudizhu
def main():
    np.set_printoptions(precision=2, linewidth=128, suppress=True)
    path = 'test'
    config.device = torch.device('cuda:1')
    config.set_device(-1)
    model = Model('model_201912080009')
    model.restore()

    files = os.listdir('gen/%s/' % path)
    files.sort(key=lambda k: k.split('_')[0], reverse=True)
    # print(files[-1])
    file = np.random.choice(files[:100])
    # file = '201912101326_85d94af6fe1a588b.pkl'

    print(file)
    data = pickle_load('gen/%s/%s' % (path, file))
    # data = pickle_load('gen/test/' + files[-2])
    # np.random.shuffle(data)

    player = 2

    init, actions, _v = [None, [], -1.0]

    init = [[2, 1, 2, 1, 0, 1, 3, 1, 2, 1, 0, 1, 2, 0, 0],
            [2, 1, 1, 2, 1, 1, 0, 3, 1, 2, 0, 2, 0, 0, 1],
            [0, 1, 1, 1, 3, 2, 0, 0, 1, 1, 3, 1, 2, 1, 0]]
    actions = [
        352, 352, 353, 338, 343, 347, 123, 0, 0, 20, 22, 23, 24, 26, 0, 28, 0,
        29, 0, 0, 39, 0, 0, 116, 0, 0, 76, 324, 0, 0, 41, 42, 0, 0, 92, 317,
        320, 0, 0, 31, 42, 0, 0, 15, 18
    ]
    init = np.array(init, dtype=np.int32)

    # actions = [353, 352, 352, 339, 349, 349, 15]

    # init = [[2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0],
    # 		[2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0],
    # 		[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 4, 4, 4, 1, 1]]
    # init = np.array(init, dtype=np.int32)
    # actions = [353, 352, 352, 344, 345, 346, 151]

    # init, actions, _v = data[player]
    print(_v, player)
    print(init)
    print(actions)
    print('=============================================')
    print('player:', player)

    histories = unpack(init, actions, player)
    histories, lengths = history_regularize(histories)
    histories, lengths = to_cuda(histories, lengths)

    vs, ps, _ = model(histories, lengths, None)

    env = GameEnv(False)
    env.load(init)
    game = Game(env, player)
    for v, p, action in zip(vs, ps, actions):
        print('----------------------------------')
        print('my_player: %d, curr_player: %d' % (player, game.curr_player()))
        # for action, _dist in enumerate(dist):
        # 	print(action, _dist)
        idx = np.argsort(p)[::-1]
        for i in range(8):
            print(game.could_move(idx[i]), end=' ')
            print('(%d, %.2f%%)' % (idx[i], p[idx[i]] * 100), end='\n')
        print('action: %d, %.2f%%' % (action, p[action] * 100))

        if idx[0] == 60 and p[idx[0]] > 0.3:
            print(game)
            print(game.policy())
            print(game.hand_cards_num())
            print(game.bottom())
            print(game.curr_player(), game.lord_player())
            return 0

        # model_input = game.to_model_input()
        # for channel in range(26, 28):
        # 	print(channel)
        # 	print(model_input[channel])
        print('%.1f, %.3f' % (_v, v[0]))

        game.move(action)
        print(game)
        print('Gauss:', game.gauss())
예제 #6
0
import time
from game import Game, GameEnv
import numpy as np

path = 'data/'

files = os.listdir(path)
file = np.random.choice(files)

print(file)
data = pickle_load(path + file)

init, policy, v = data[0]
env = GameEnv()
print(init)
env.load(init)
print(policy)
for action in policy:
    print(env.curr_player(), env.lord_player())
    print(action)
    if env.period() != 3:
        env.move(action)
    else:
        env.move(-1)
    print(env)
    print('---------------------')
    time.sleep(0.5)
print(v)

# TODO: MCTS simulate should be same as action choice