Esempio n. 1
0
    def dqn_train_keras_rl(self, num_par_agents, model_name):
        """Implementation of kreras-rl deep q learing."""
        from agents.agent_consider_equity import Player as EquityPlayer
        from agents.agent_keras_rl_dqn import Player as DQNPlayer
        from agents.agent_random import Player as RandomPlayer
        env_name = 'neuron_poker-v0'
        env = gym.make(env_name, initial_stacks=self.stack, funds_plot=self.funds_plot, render=self.render,
                       use_cpp_montecarlo=self.use_cpp_montecarlo)

        np.random.seed(123)
        env.seed(123)
        env.add_player(EquityPlayer(name='equity/50/70',
                                    min_call_equity=.5, min_bet_equity=.7))
        env.add_player(EquityPlayer(name='equity/20/30',
                                    min_call_equity=.2, min_bet_equity=.3))
        env.add_player(RandomPlayer())
        env.add_player(RandomPlayer())
        env.add_player(RandomPlayer())
        # shell is used for callback to keras rl
        env.add_player(PlayerShell(name='keras-rl', stack_size=self.stack))

        env.reset()

        env_names = np.full((1, num_par_agents), model_name)

        dqn = DQNPlayer()

        with multiprocessing.Pool(num_par_agents) as pool:
            pool.apply_async(parallel_dqn_train(dqn, env, env_name))
Esempio n. 2
0
    def dqn_play_keras_rl(self):
        """Create 6 players, one of them a trained DQN"""
        env_name = 'neuron_poker-v0'
        stack = 500
        self.env = gym.make(env_name, initial_stacks=stack, render=self.render)
        self.env.add_player(
            EquityPlayer(name='equity/50/50',
                         min_call_equity=.5,
                         min_bet_equity=.5))
        self.env.add_player(
            EquityPlayer(name='equity/50/80',
                         min_call_equity=.8,
                         min_bet_equity=.8))
        self.env.add_player(
            EquityPlayer(name='equity/70/70',
                         min_call_equity=.7,
                         min_bet_equity=.7))
        self.env.add_player(
            EquityPlayer(name='equity/20/30',
                         min_call_equity=.2,
                         min_bet_equity=.3))
        self.env.add_player(RandomPlayer())
        self.env.add_player(PlayerShell(name='keras-rl', stack_size=stack))

        self.env.reset()

        dqn = DQNPlayer(load_model='dqn1', env=self.env)
        dqn.play(nb_episodes=self.num_episodes, render=self.render)
Esempio n. 3
0
    def dqn_play(self):
        """Create 6 players, one of them a trained DQN"""
        env_name = 'neuron_poker-v0'
        stack = 500
        num_of_plrs = 6
        self.env = gym.make(env_name,
                            num_of_players=num_of_plrs,
                            initial_stacks=stack,
                            render=self.render)
        self.env.add_player(
            EquityPlayer(name='equity/50/50',
                         min_call_equity=.5,
                         min_bet_equity=-.5))
        self.env.add_player(
            EquityPlayer(name='equity/50/80',
                         min_call_equity=.8,
                         min_bet_equity=-.8))
        self.env.add_player(
            EquityPlayer(name='equity/70/70',
                         min_call_equity=.7,
                         min_bet_equity=-.7))
        self.env.add_player(
            EquityPlayer(name='equity/20/30',
                         min_call_equity=.2,
                         min_bet_equity=-.3))
        self.env.add_player(RandomPlayer())
        self.env.add_player(DQNPlayer(load_model='neuron_poker-v0'))

        for _ in range(self.num_episodes):
            self.env.reset()
Esempio n. 4
0
    def dqn_train_keras_rl(self, model_name):
        """Implementation of kreras-rl deep q learing."""
        env_name = 'neuron_poker-v0'
        stack = 100
        env = gym.make(env_name,
                       initial_stacks=stack,
                       funds_plot=self.funds_plot,
                       render=self.render,
                       use_cpp_montecarlo=self.use_cpp_montecarlo)

        np.random.seed(123)
        env.seed(123)
        env.add_player(
            EquityPlayer(name='equity/50/70',
                         min_call_equity=.5,
                         min_bet_equity=.7))
        env.add_player(
            EquityPlayer(name='equity/20/30',
                         min_call_equity=.2,
                         min_bet_equity=.3))
        env.add_player(RandomPlayer())
        env.add_player(RandomPlayer())
        env.add_player(RandomPlayer())
        env.add_player(PlayerShell(
            name='keras-rl',
            stack_size=stack))  # shell is used for callback to keras rl

        env.reset()

        dqn = DQNPlayer()
        dqn.initiate_agent(env)
        dqn.train(env_name=model_name)
Esempio n. 5
0
    def dqn_play_keras_rl(self, model_name):
        """Create 6 players, one of them a trained DQN"""
        from agents.agent_consider_equity import Player as EquityPlayer
        from agents.agent_keras_rl_dqn import Player as DQNPlayer
        from agents.agent_random import Player as RandomPlayer
        env_name = 'neuron_poker-v0'
        self.env = gym.make(env_name,
                            initial_stacks=self.stack,
                            render=self.render)
        self.env.add_player(
            EquityPlayer(name='equity/50/50',
                         min_call_equity=.5,
                         min_bet_equity=.5))
        self.env.add_player(
            EquityPlayer(name='equity/50/80',
                         min_call_equity=.8,
                         min_bet_equity=.8))
        self.env.add_player(
            EquityPlayer(name='equity/70/70',
                         min_call_equity=.7,
                         min_bet_equity=.7))
        self.env.add_player(
            EquityPlayer(name='equity/20/30',
                         min_call_equity=.2,
                         min_bet_equity=.3))
        self.env.add_player(RandomPlayer())
        self.env.add_player(PlayerShell(name='keras-rl',
                                        stack_size=self.stack))

        self.env.reset()

        dqn = DQNPlayer(load_model=model_name, env=self.env)
        dqn.play(nb_episodes=self.num_episodes, render=self.render)
Esempio n. 6
0
    def equity_vs_random(self):
        """Create 6 players, 4 of them equity based, 2 of them random"""
        from agents.agent_consider_equity import Player as EquityPlayer
        from agents.agent_random import Player as RandomPlayer
        env_name = 'neuron_poker-v0'
        self.env = gym.make(env_name, initial_stacks=self.stack, render=self.render, 
            use_nn_equity=self.use_nn_equity, use_cpp_montecarlo=self.use_cpp_montecarlo)
        self.env.add_player(EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=-.5))
        self.env.add_player(EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=-.8))
        # self.env.add_player(EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=-.7))
        # self.env.add_player(EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3))
        # self.env.add_player(RandomPlayer())
        # self.env.add_player(RandomPlayer())

        for _ in range(self.num_episodes):
            self.env.reset()
            self.winner_in_episodes.append(self.env.winner_ix)

        league_table = pd.Series(self.winner_in_episodes).value_counts()
        best_player = league_table.index[0]

        print("League Table")
        print("============")
        print(league_table)
        print(f"Best Player: {best_player}")
Esempio n. 7
0
    def dqn_train_custom_q1(self):
        """Create 6 players, 4 of them equity based, 2 of them random"""
        env_name = 'neuron_poker-v0'
        stack = 500
        self.env = gym.make(env_name, initial_stacks=stack, render=self.render)
        # self.env.add_player(EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=-.5))
        # self.env.add_player(EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=-.8))
        # self.env.add_player(EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=-.7))
        self.env.add_player(
            EquityPlayer(name='equity/20/30',
                         min_call_equity=.2,
                         min_bet_equity=-.3))
        # self.env.add_player(RandomPlayer())
        self.env.add_player(RandomPlayer())
        self.env.add_player(RandomPlayer())
        self.env.add_player(Custom_Q1(name='Deep_Q1'))

        for _ in range(self.num_episodes):
            self.env.reset()
            self.winner_in_episodes.append(self.env.winner_ix)

        league_table = pd.Series(self.winner_in_episodes).value_counts()
        best_player = league_table.index[0]

        print("League Table")
        print("============")
        print(league_table)
        print(f"Best Player: {best_player}")
Esempio n. 8
0
    def equity_self_improvement(self, improvement_rounds):
        """Create 6 players, 4 of them equity based, 2 of them random"""
        from agents.agent_consider_equity import Player as EquityPlayer
        calling = [.1, .2, .3, .4, .5, .6]
        betting = [.2, .3, .4, .5, .6, .7]

        for improvement_round in range(improvement_rounds):
            env_name = 'neuron_poker-v0'
            self.env = gym.make(env_name, initial_stacks=self.stack, render=self.render, 
                use_nn_equity=self.use_nn_equity, use_cpp_montecarlo=self.use_cpp_montecarlo)
            for i in range(6):
                self.env.add_player(EquityPlayer(name=f'Equity/{calling[i]}/{betting[i]}',
                                                 min_call_equity=calling[i],
                                                 min_bet_equity=betting[i]))

            for _ in range(self.num_episodes):
                self.env.reset()
                self.winner_in_episodes.append(self.env.winner_ix)

            league_table = pd.Series(self.winner_in_episodes).value_counts()
            best_player = int(league_table.index[0])
            print(league_table)
            print(f"Best Player: {best_player}")

            # self improve:
            self.log.info(f"Self improvment round {improvement_round}")
            for i in range(6):
                calling[i] = np.mean([calling[i], calling[best_player]])
                self.log.info(f"New calling for player {i} is {calling[i]}")
                betting[i] = np.mean([betting[i], betting[best_player]])
                self.log.info(f"New betting for player {i} is {betting[i]}")
Esempio n. 9
0
    def dqn_train_heads_up_keras_rl(self, model_name):
        """Implementation of kreras-rl deep q learing."""
        from agents.agent_consider_equity import Player as EquityPlayer
        from agents.agent_keras_rl_dqn import Player as DQNPlayer
        from agents.agent_random import Player as RandomPlayer
        env_name = 'neuron_poker-v0'
        env = gym.make(env_name, initial_stacks=self.stack, funds_plot=self.funds_plot, render=self.render,
                       use_nn_equity=self.use_nn_equity, use_cpp_montecarlo=self.use_cpp_montecarlo)

        # np.random.seed(123)
        # env.seed(123)
        env.add_player(EquityPlayer(name='equity/50/70', min_call_equity=.5, min_bet_equity=.7))
        env.add_player(PlayerShell(name='keras-rl', stack_size=self.stack))  # shell is used for callback to keras rl

        env.reset()

        dqn = DQNPlayer()
        # dqn.initiate_agent(env, load_memory=model_name, load_model=model_name, load_optimizer=model_name)
        # # dqn.initiate_agent(env, load_memory=None, load_model=None, load_optimizer=None)
        # dqn.initiate_agent(env, load_memory=None, load_model=None, load_optimizer=None, batch_size=128)
        # dqn.train(env_name=model_name, policy_epsilon=0.9)

        batch_sizes = [128, 128, 128, 128, 128, 128, 128, 128]
        policy_epsilon = [0.1, 0.1,0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
        learn_rate = np.geomspace(1e-2, 1e-4, 5)

        for x in range(10):
            dqn = DQNPlayer()
            # dqn.initiate_agent(env, load_memory=model_name, load_model=model_name, load_optimizer=model_name, batch_size=128)
            dqn.initiate_agent(env, model_name=None, load_memory=None, load_model=None, load_optimizer=None, batch_size=128, learn_rate=learn_rate[x])

            dqn.train(env_name=model_name, policy_epsilon=policy_epsilon[x])
Esempio n. 10
0
    def equity_self_improvement(self, improvement_rounds):
        """Create 6 players, 4 of them equity based, 2 of them random"""
        calling = [.1, .2, .3, .4, .5, .6]
        betting = [.2, .3, .4, .5, .6, .7]

        for improvement_round in range(improvement_rounds):
            self.env = HoldemTable(num_of_players=5, initial_stacks=100)
            for i in range(6):
                self.env.add_player(EquityPlayer(name=f'Equity/{calling[i]}/{betting[i]}',
                                                 min_call_equity=calling[i],
                                                 min_bet_equity=betting[i]))

            for _ in range(self.num_episodes):
                self.run_episode()
                self.winner_in_episodes.append(self.env.winner_ix)

            league_table = pd.Series(self.winner_in_episodes).value_counts()
            best_player = int(league_table.index[0])
            print(league_table)
            print(f"Best Player: {best_player}")

            # self improve:
            self.log.info(f"Self improvment round {improvement_round}")
            for i in range(6):
                calling[i] = np.mean([calling[i], calling[best_player]])
                self.log.info(f"New calling for player {i} is {calling[i]}")
                betting[i] = np.mean([betting[i], betting[best_player]])
                self.log.info(f"New betting for player {i} is {betting[i]}")
Esempio n. 11
0
    def dqn_train():
        """Implementation of kreras-rl deep q learing."""
        env_name = 'neuron_poker-v0'
        stack = 100
        env = gym.make(env_name, num_of_players=2, initial_stacks=stack)

        np.random.seed(123)
        env.seed(123)
        env.add_player(
            EquityPlayer(name='equity/50/70',
                         min_call_equity=.5,
                         min_bet_equity=.7))
        # env.add_player(EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3))
        # env.add_player(RandomPlayer())
        # env.add_player(RandomPlayer())
        # env.add_player(RandomPlayer())
        env.add_player(PlayerShell(
            name='keras-rl',
            stack_size=stack))  # shell is used for callback to keras rl

        env.reset()

        dqn = DQNPlayer()
        dqn.initiate_agent(env)
        dqn.train(env_name='dqn1')
Esempio n. 12
0
    def equity_vs_random(self):
        """Create 6 players, 4 of them equity based, 2 of them random"""
        self.env = HoldemTable(num_of_players=5, initial_stacks=500)
        self.env.add_player(EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=-.5))
        self.env.add_player(EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=-.8))
        self.env.add_player(EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=-.7))
        self.env.add_player(EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3))
        self.env.add_player(RandomPlayer())
        self.env.add_player(RandomPlayer())

        for _ in range(self.num_episodes):
            self.run_episode()
            self.winner_in_episodes.append(self.env.winner_ix)

        league_table = pd.Series(self.winner_in_episodes).value_counts()
        best_player = league_table.index[0]

        print(league_table)
        print(f"Best Player: {best_player}")
Esempio n. 13
0
    def dqn_train_keras_rl(self, model_name):
        """Implementation of kreras-rl deep q learing."""
        from agents.agent_consider_equity import Player as EquityPlayer
        from agents.dqn_agent import Player as DQNPlayer
        from agents.agent_random import Player as RandomPlayer
        env_name = 'neuron_poker-v0'
        env = gym.make(env_name, initial_stacks=self.stack, funds_plot=self.funds_plot, render=self.render,
                       use_cpp_montecarlo=self.use_cpp_montecarlo)

        np.random.seed(123)
        env.seed(123)
        env.add_player(EquityPlayer(name='equity/40/50_1', min_call_equity=.4, min_bet_equity=.5))
        env.add_player(EquityPlayer(name='equity/40/50_2', min_call_equity=.4, min_bet_equity=.5))
        env.add_player(EquityPlayer(name='equity/40/50_3', min_call_equity=.4, min_bet_equity=.5))

        env.add_player(PlayerShell(name='keras-rl', stack_size=self.stack))

        env.reset()

        dqn = DQNPlayer()
        dqn.initiate_agent(env)
        dqn.train(env_name=model_name)
Esempio n. 14
0
    def uto_plays(self):
        """Create an environment with 6 random players"""
        env_name = 'neuron_poker-v0'
        stack = 500
        num_of_plrs = 6
        self.env = gym.make(env_name,
                            num_of_players=num_of_plrs,
                            initial_stacks=stack,
                            render=self.render)
        self.env.add_player(RandomPlayer())
        self.env.add_player(
            EquityPlayer(name='equity/50/80',
                         min_call_equity=.8,
                         min_bet_equity=.4))
        self.env.add_player(
            EquityPlayer(name='equity/70/70',
                         min_call_equity=.7,
                         min_bet_equity=.5))
        self.env.add_player(
            EquityPlayer(name='equity/20/30',
                         min_call_equity=.2,
                         min_bet_equity=.6))
        self.env.add_player(UtoPlayer(name='Uto1 1'))
        self.env.add_player(
            UtoPlayer(name='Uto1 2',
                      min_call_equity=0.46,
                      min_bet_equity=0.56,
                      min_call_equity_allin=0.7))

        for _ in range(self.num_episodes):
            self.env.reset()
            self.winner_in_episodes.append(self.env.winner_ix)

        league_table = pd.Series(self.winner_in_episodes).value_counts()
        best_player = league_table.index[0]

        print(league_table)
        print(f"Best Player: {best_player}")
Esempio n. 15
0
    def create_env_sac(self):
        from agents.agent_consider_equity import Player as EquityPlayer
        env_name = 'neuron_poker-v0'

        env = gym.make(env_name, initial_stacks=self.stack, render=self.render)

        env.add_player(
            EquityPlayer(name='equity/40/50_1',
                         min_call_equity=.4,
                         min_bet_equity=.5))
        env.add_player(PlayerShell(name='sac', stack_size=self.stack))

        env.reset()

        return env
Esempio n. 16
0
    def dqn_agent(self, mode):
        my_import = __import__('agents.'+self.agent, fromlist=['Player'])
        player = getattr(my_import, 'Player')

        env_path = 'env'
        if self.env_name != 'v0':
            env_path += '_' + self.env_name

        shell_import = __import__(
            'gym_env.' + env_path, fromlist=['PlayerShell'])
        PlayerShell_import = getattr(shell_import, 'PlayerShell')

        env_name = 'neuron_poker-' + self.env_name
        self.env = gym.make(env_name, initial_stacks=self.stack, funds_plot=self.funds_plot, render=self.render,
                            use_cpp_montecarlo=self.use_cpp_montecarlo)
        np.random.seed(42)
        self.env.seed(42)

        count = 1

        for player_type in self.players:
            if player_type == 0:
                self.env.add_player(RandomPlayer(env_path))
            elif type(player_type) == tuple and len(player_type) == 2:
                self.env.add_player(EquityPlayer(name='equity_' + str(count), env=env_path,
                                                 min_call_equity=player_type[0], min_bet_equity=player_type[1]))
                count += 1

        self.env.add_player(PlayerShell_import(
            name='keras-rl', stack_size=self.stack))

        self.env.reset()

        if mode == 'train':
            dqn = player()
            dqn.initiate_agent(self.env)
            dqn.train(env_name=self.model_name)
        elif mode == 'play':
            dqn = player(load_model=self.model_name, env=self.env)
            dqn.play(nb_episodes=self.num_episodes, render=self.render)
Esempio n. 17
0
                    type=int,
                    default=500,
                    help='# of episodes to train agent')
parser.add_argument('--env_version',
                    type=int,
                    default=0,
                    help='Specifies the version of environment to train on')
parser.add_argument('--eval',
                    type=bool,
                    default=False,
                    help='Determines if we want to evaluate the agent or not')
args = parser.parse_args()

if __name__ == '__main__':

    poker_env = gym.make(f'neuron_poker-v{args.env_version}',
                         initial_stacks=500,
                         render=False,
                         funds_plot=False)
    poker_env.add_player(
        EquityPlayer(name='equity/60/80',
                     min_call_equity=.6,
                     min_bet_equity=.8))
    poker_env.add_player(PlayerShell(name='ppo_agent', stack_size=500))
    poker_env.reset()

    ppo_agent = PPOPlayer(env=poker_env)
    if not args.eval:
        ppo_agent.train(args.model_name, num_ep=args.episodes)
    else:
        ppo_agent.play(args.model_name)
Esempio n. 18
0
    def deep_q_learning():
        """Implementation of kreras-rl deep q learing."""
        env_name = 'neuron_poker-v0'
        stack = 100
        env = gym.make(env_name, num_of_players=5, initial_stacks=stack)

        np.random.seed(123)
        env.seed(123)

        env.add_player(
            EquityPlayer(name='equity/50/50',
                         min_call_equity=.5,
                         min_bet_equity=-.5))
        env.add_player(
            EquityPlayer(name='equity/50/80',
                         min_call_equity=.8,
                         min_bet_equity=-.8))
        env.add_player(
            EquityPlayer(name='equity/70/70',
                         min_call_equity=.7,
                         min_bet_equity=-.7))
        env.add_player(
            EquityPlayer(name='equity/20/30',
                         min_call_equity=.2,
                         min_bet_equity=-.3))
        env.add_player(RandomPlayer())
        env.add_player(PlayerShell(
            name='keras-rl',
            stack_size=stack))  # shell is used for callback to keras rl

        env.reset()

        nb_actions = len(env.action_space)

        # Next, we build a very simple model.
        from keras import Sequential
        from keras.optimizers import Adam
        from keras.layers import Dense, Dropout
        from rl.memory import SequentialMemory
        from rl.agents import DQNAgent
        from rl.policy import BoltzmannQPolicy

        model = Sequential()
        model.add(
            Dense(64, activation='relu', input_shape=env.observation_space))
        model.add(Dropout(0.2))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(nb_actions, activation='linear'))
        print(model.summary())

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=50000, window_length=1)
        policy = BoltzmannQPolicy()
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=10,
                       target_model_update=1e-2,
                       policy=policy)
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

        # After training is done, we save the final weights.
        dqn.save_weights('dqn_{}_weights.h5f'.format(env_name), overwrite=True)

        # Finally, evaluate our algorithm for 5 episodes.
        dqn.test(env, nb_episodes=5, visualize=True)