Beispiel #1
0
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)
    # If the human does not take the final action, we need to
    # print other players action
    final_state = trajectories[0][-1][-2]
    action_record = final_state['action_record']
    state = final_state['raw_obs']
    _action_list = []
    for i in range(1, len(action_record) + 1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
    for pair in _action_list:
        print('>> Player', pair[0], 'chooses', pair[1])

    # Let's take a look at what the agent card is
    print('===============     Cards all Players    ===============')
    for hands in env.get_perfect_information()['hand_cards']:
        print_card(hands)

    print('===============     Result     ===============')
    if payoffs[0] > 0:
        print('You win {} chips!'.format(payoffs[0]))
    elif payoffs[0] == 0:
        print('It is a tie.')
    else:
        print('You lose {} chips!'.format(-payoffs[0]))
    print('')

    input("Press any key to continue...")
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)
    # If the human does not take the final action, we need to
    # print other players action
    final_state = trajectories[0][-1]
    action_record = final_state['action_record']
    state = final_state['raw_obs']
    _action_list = []
    for i in range(1, len(action_record) + 1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
    for pair in _action_list:
        print('>> Player', pair[0], 'chooses', pair[1])

    # Let's take a look at what the agent card is
    print('===============     CFR Agent    ===============')
    print_card(env.get_perfect_information()['hand_cards'][1])

    print('===============     Result     ===============')
    if payoffs[0] > 0:
        print('You win {} chips!'.format(payoffs[0]))
    elif payoffs[0] == 0:
        print('It is a tie.')
    else:
        print('You lose {} chips!'.format(-payoffs[0]))
    print('')

    input("Press any key to continue...")
Beispiel #3
0
def run():
    torch.multiprocessing.freeze_support()
    env = rlcard.make('no-limit-holdem',
                      config={
                          'record_action': True,
                          'game_player_num': 2,
                          'env_num': 8,
                          'use_raw': True
                      })
    # eval_env = rlcard.make('no-limit-holdem', config={'seed': 12, 'game_player_num': 2})
    # eval_env2 = rlcard.make('no-limit-holdem', config={'seed': 43, 'game_player_num': 2})
    #eval_env3 = rlcard.make('no-limit-holdem', config={'seed': 43, 'game_player_num': 2})
    # Set the iterations numbers and how frequently we evaluate the performance

    evaluate_every = 1024
    evaluate_num = 32
    episode_num = 20480

    # The intial memory size
    memory_init_size = 256

    # Train the agent every X steps
    train_every = 256
    agents = []

    agents.append(
        NFSPAgent(scope='nfsp' + str(0),
                  action_num=env.action_num,
                  state_shape=env.state_shape,
                  hidden_layers_sizes=[512, 512],
                  anticipatory_param=0.1,
                  rl_learning_rate=0.015,
                  sl_learning_rate=0.0075,
                  q_epsilon_start=.3,
                  min_buffer_size_to_learn=memory_init_size,
                  q_replay_memory_size=20480,
                  q_replay_memory_init_size=memory_init_size,
                  train_every=train_every + 44,
                  q_train_every=train_every,
                  q_mlp_layers=[512, 512],
                  evaluate_with='best_response'))

    agents.append(
        NFSPAgent(scope='nfsp' + str(1),
                  action_num=env.action_num,
                  state_shape=env.state_shape,
                  hidden_layers_sizes=[512, 512],
                  anticipatory_param=0.1,
                  rl_learning_rate=0.015,
                  sl_learning_rate=0.0075,
                  q_epsilon_start=.3,
                  q_replay_memory_size=20480,
                  min_buffer_size_to_learn=memory_init_size,
                  q_replay_memory_init_size=memory_init_size,
                  train_every=train_every + 44,
                  q_train_every=train_every,
                  q_mlp_layers=[512, 512],
                  evaluate_with='best_response'))

    # 7, 5 - all in junkies
    check_point_path = os.path.join('models/ivvan/cp/8/model-nfsp1.pth')
    checkpoint = torch.load(check_point_path)
    check_point_path = os.path.join('models/ivvan/cp/8/model-nfsp0.pth')
    checkpoint2 = torch.load(check_point_path)
    # for agent in agents:
    #     agent.load(checkpoint)
    agents[1].load(checkpoint)
    agents[0].load(checkpoint2)
    human = nolimit_holdem_human_agent.HumanAgent(env.action_num)
    env.set_agents([agents[0], agents[1]])

    while (True):
        print(">> Start a new game")

        trajectories, payoffs = env.run(is_training=False)
        if (len(trajectories[0]) == 0):
            # the bot folded immediately
            continue

        # If the human does not take the final action, we need to
        # print other players action
        final_state = trajectories[0][-1][-2]
        # print(final_state, 'waa')
        action_record = final_state['action_record']
        state = final_state['raw_obs']
        _action_list = []
        for i in range(1, len(action_record) + 1):
            if action_record[-i][0] == state['current_player']:
                break
            _action_list.insert(0, action_record[-i])

        for pair in _action_list:
            print('>> Player', pair[0], 'chooses', pair[1])

        # Let's take a look at what the agent card is
        print('===============     CFR Agent    ===============')
        print_card(env.get_perfect_information()['hand_cards'][1])

        print('===============     Result     ===============')
        if payoffs[0] > 0:
            print('You win {} chips!'.format(payoffs[0]))
        elif payoffs[0] == 0:
            print('It is a tie.')
        else:
            print('You lose {} chips!'.format(-payoffs[0]))
        print('')

        input("Press any key to continue...")
Beispiel #4
0
def play():
    import tensorflow as tf
    # We have a pretrained model here. Change the path for your model.

    if tf.test.gpu_device_name():
        print('GPU found')
    else:
        print("No GPU found")

    #os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

    # Make environment
    env = rlcard.make('no-limit-holdem',
                      config={
                          'record_action': True,
                          'game_player_num': 2
                      })

    # Set a global seed
    set_global_seed(0)

    evaluate_every = 2048
    evaluate_num = 32
    episode_num = 262144

    # The intial memory size
    memory_init_size = 256

    # Train the agent every X steps
    train_every = 256
    graph = tf.Graph()
    sess = tf.Session(graph=graph)

    with graph.as_default():
        agents = []

        agents.append(
            NFSPAgent(sess,
                      scope='nfsp' + str(0),
                      action_num=env.action_num,
                      state_shape=env.state_shape,
                      hidden_layers_sizes=[512, 512],
                      anticipatory_param=0.1,
                      rl_learning_rate=0.01,
                      sl_learning_rate=0.005,
                      q_epsilon_start=.6,
                      min_buffer_size_to_learn=memory_init_size,
                      q_replay_memory_size=80000,
                      q_replay_memory_init_size=memory_init_size,
                      train_every=train_every + 44,
                      q_train_every=train_every,
                      q_mlp_layers=[512, 512],
                      evaluate_with='best_response'))

        agents.append(
            NFSPAgent(sess,
                      scope='nfsp' + str(1),
                      action_num=env.action_num,
                      state_shape=env.state_shape,
                      hidden_layers_sizes=[512, 512],
                      anticipatory_param=0.1,
                      rl_learning_rate=0.01,
                      sl_learning_rate=0.005,
                      q_epsilon_start=.6,
                      q_replay_memory_size=80000,
                      min_buffer_size_to_learn=memory_init_size,
                      q_replay_memory_init_size=memory_init_size,
                      train_every=train_every + 44,
                      q_train_every=train_every,
                      q_mlp_layers=[512, 512],
                      evaluate_with='best_response'))

    check_point_path = os.path.join(
        'models/nolimit_holdem_nfsp/no_all_in/cp/9/')
    print(
        '-------------------------------------------------------------------------------------'
    )
    print(check_point_path)
    with sess.as_default():
        with graph.as_default():
            saver = tf.train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(check_point_path))

    human = nolimit_holdem_human_agent.HumanAgent(env.action_num)
    env.set_agents([human, agents[1]])

    while (True):
        print(">> Start a new game")

        trajectories, payoffs = env.run(is_training=False)
        if (len(trajectories[0]) == 0):
            # the bot folded immediately
            continue

        # If the human does not take the final action, we need to
        # print other players action
        final_state = trajectories[0][-1][-2]
        action_record = final_state['action_record']
        state = final_state['raw_obs']
        _action_list = []
        for i in range(1, len(action_record) + 1):
            if action_record[-i][0] == state['current_player']:
                break
            _action_list.insert(0, action_record[-i])
        for pair in _action_list:
            print('>> Player', pair[0], 'chooses', pair[1])

        # Let's take a look at what the agent card is
        print('===============     CFR Agent    ===============')
        print_card(env.get_perfect_information()['hand_cards'][1])

        print('===============     Result     ===============')
        if payoffs[0] > 0:
            print('You win {} chips!'.format(payoffs[0]))
        elif payoffs[0] == 0:
            print('It is a tie.')
        else:
            print('You lose {} chips!'.format(-payoffs[0]))
        print('')

        input("Press any key to continue...")