def dqn_train_keras_rl(self, num_par_agents, model_name): """Implementation of kreras-rl deep q learing.""" from agents.agent_consider_equity import Player as EquityPlayer from agents.agent_keras_rl_dqn import Player as DQNPlayer from agents.agent_random import Player as RandomPlayer env_name = 'neuron_poker-v0' env = gym.make(env_name, initial_stacks=self.stack, funds_plot=self.funds_plot, render=self.render, use_cpp_montecarlo=self.use_cpp_montecarlo) np.random.seed(123) env.seed(123) env.add_player(EquityPlayer(name='equity/50/70', min_call_equity=.5, min_bet_equity=.7)) env.add_player(EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=.3)) env.add_player(RandomPlayer()) env.add_player(RandomPlayer()) env.add_player(RandomPlayer()) # shell is used for callback to keras rl env.add_player(PlayerShell(name='keras-rl', stack_size=self.stack)) env.reset() env_names = np.full((1, num_par_agents), model_name) dqn = DQNPlayer() with multiprocessing.Pool(num_par_agents) as pool: pool.apply_async(parallel_dqn_train(dqn, env, env_name))
def dqn_play_keras_rl(self): """Create 6 players, one of them a trained DQN""" env_name = 'neuron_poker-v0' stack = 500 self.env = gym.make(env_name, initial_stacks=stack, render=self.render) self.env.add_player( EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=.5)) self.env.add_player( EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=.8)) self.env.add_player( EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=.7)) self.env.add_player( EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=.3)) self.env.add_player(RandomPlayer()) self.env.add_player(PlayerShell(name='keras-rl', stack_size=stack)) self.env.reset() dqn = DQNPlayer(load_model='dqn1', env=self.env) dqn.play(nb_episodes=self.num_episodes, render=self.render)
def dqn_play(self): """Create 6 players, one of them a trained DQN""" env_name = 'neuron_poker-v0' stack = 500 num_of_plrs = 6 self.env = gym.make(env_name, num_of_players=num_of_plrs, initial_stacks=stack, render=self.render) self.env.add_player( EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=-.5)) self.env.add_player( EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=-.8)) self.env.add_player( EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=-.7)) self.env.add_player( EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3)) self.env.add_player(RandomPlayer()) self.env.add_player(DQNPlayer(load_model='neuron_poker-v0')) for _ in range(self.num_episodes): self.env.reset()
def dqn_train_keras_rl(self, model_name): """Implementation of kreras-rl deep q learing.""" env_name = 'neuron_poker-v0' stack = 100 env = gym.make(env_name, initial_stacks=stack, funds_plot=self.funds_plot, render=self.render, use_cpp_montecarlo=self.use_cpp_montecarlo) np.random.seed(123) env.seed(123) env.add_player( EquityPlayer(name='equity/50/70', min_call_equity=.5, min_bet_equity=.7)) env.add_player( EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=.3)) env.add_player(RandomPlayer()) env.add_player(RandomPlayer()) env.add_player(RandomPlayer()) env.add_player(PlayerShell( name='keras-rl', stack_size=stack)) # shell is used for callback to keras rl env.reset() dqn = DQNPlayer() dqn.initiate_agent(env) dqn.train(env_name=model_name)
def dqn_play_keras_rl(self, model_name): """Create 6 players, one of them a trained DQN""" from agents.agent_consider_equity import Player as EquityPlayer from agents.agent_keras_rl_dqn import Player as DQNPlayer from agents.agent_random import Player as RandomPlayer env_name = 'neuron_poker-v0' self.env = gym.make(env_name, initial_stacks=self.stack, render=self.render) self.env.add_player( EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=.5)) self.env.add_player( EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=.8)) self.env.add_player( EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=.7)) self.env.add_player( EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=.3)) self.env.add_player(RandomPlayer()) self.env.add_player(PlayerShell(name='keras-rl', stack_size=self.stack)) self.env.reset() dqn = DQNPlayer(load_model=model_name, env=self.env) dqn.play(nb_episodes=self.num_episodes, render=self.render)
def equity_vs_random(self): """Create 6 players, 4 of them equity based, 2 of them random""" from agents.agent_consider_equity import Player as EquityPlayer from agents.agent_random import Player as RandomPlayer env_name = 'neuron_poker-v0' self.env = gym.make(env_name, initial_stacks=self.stack, render=self.render, use_nn_equity=self.use_nn_equity, use_cpp_montecarlo=self.use_cpp_montecarlo) self.env.add_player(EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=-.5)) self.env.add_player(EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=-.8)) # self.env.add_player(EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=-.7)) # self.env.add_player(EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3)) # self.env.add_player(RandomPlayer()) # self.env.add_player(RandomPlayer()) for _ in range(self.num_episodes): self.env.reset() self.winner_in_episodes.append(self.env.winner_ix) league_table = pd.Series(self.winner_in_episodes).value_counts() best_player = league_table.index[0] print("League Table") print("============") print(league_table) print(f"Best Player: {best_player}")
def dqn_train_custom_q1(self): """Create 6 players, 4 of them equity based, 2 of them random""" env_name = 'neuron_poker-v0' stack = 500 self.env = gym.make(env_name, initial_stacks=stack, render=self.render) # self.env.add_player(EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=-.5)) # self.env.add_player(EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=-.8)) # self.env.add_player(EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=-.7)) self.env.add_player( EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3)) # self.env.add_player(RandomPlayer()) self.env.add_player(RandomPlayer()) self.env.add_player(RandomPlayer()) self.env.add_player(Custom_Q1(name='Deep_Q1')) for _ in range(self.num_episodes): self.env.reset() self.winner_in_episodes.append(self.env.winner_ix) league_table = pd.Series(self.winner_in_episodes).value_counts() best_player = league_table.index[0] print("League Table") print("============") print(league_table) print(f"Best Player: {best_player}")
def equity_self_improvement(self, improvement_rounds): """Create 6 players, 4 of them equity based, 2 of them random""" from agents.agent_consider_equity import Player as EquityPlayer calling = [.1, .2, .3, .4, .5, .6] betting = [.2, .3, .4, .5, .6, .7] for improvement_round in range(improvement_rounds): env_name = 'neuron_poker-v0' self.env = gym.make(env_name, initial_stacks=self.stack, render=self.render, use_nn_equity=self.use_nn_equity, use_cpp_montecarlo=self.use_cpp_montecarlo) for i in range(6): self.env.add_player(EquityPlayer(name=f'Equity/{calling[i]}/{betting[i]}', min_call_equity=calling[i], min_bet_equity=betting[i])) for _ in range(self.num_episodes): self.env.reset() self.winner_in_episodes.append(self.env.winner_ix) league_table = pd.Series(self.winner_in_episodes).value_counts() best_player = int(league_table.index[0]) print(league_table) print(f"Best Player: {best_player}") # self improve: self.log.info(f"Self improvment round {improvement_round}") for i in range(6): calling[i] = np.mean([calling[i], calling[best_player]]) self.log.info(f"New calling for player {i} is {calling[i]}") betting[i] = np.mean([betting[i], betting[best_player]]) self.log.info(f"New betting for player {i} is {betting[i]}")
def dqn_train_heads_up_keras_rl(self, model_name): """Implementation of kreras-rl deep q learing.""" from agents.agent_consider_equity import Player as EquityPlayer from agents.agent_keras_rl_dqn import Player as DQNPlayer from agents.agent_random import Player as RandomPlayer env_name = 'neuron_poker-v0' env = gym.make(env_name, initial_stacks=self.stack, funds_plot=self.funds_plot, render=self.render, use_nn_equity=self.use_nn_equity, use_cpp_montecarlo=self.use_cpp_montecarlo) # np.random.seed(123) # env.seed(123) env.add_player(EquityPlayer(name='equity/50/70', min_call_equity=.5, min_bet_equity=.7)) env.add_player(PlayerShell(name='keras-rl', stack_size=self.stack)) # shell is used for callback to keras rl env.reset() dqn = DQNPlayer() # dqn.initiate_agent(env, load_memory=model_name, load_model=model_name, load_optimizer=model_name) # # dqn.initiate_agent(env, load_memory=None, load_model=None, load_optimizer=None) # dqn.initiate_agent(env, load_memory=None, load_model=None, load_optimizer=None, batch_size=128) # dqn.train(env_name=model_name, policy_epsilon=0.9) batch_sizes = [128, 128, 128, 128, 128, 128, 128, 128] policy_epsilon = [0.1, 0.1,0.1, 0.1, 0.1, 0.1, 0.1, 0.1] learn_rate = np.geomspace(1e-2, 1e-4, 5) for x in range(10): dqn = DQNPlayer() # dqn.initiate_agent(env, load_memory=model_name, load_model=model_name, load_optimizer=model_name, batch_size=128) dqn.initiate_agent(env, model_name=None, load_memory=None, load_model=None, load_optimizer=None, batch_size=128, learn_rate=learn_rate[x]) dqn.train(env_name=model_name, policy_epsilon=policy_epsilon[x])
def equity_self_improvement(self, improvement_rounds): """Create 6 players, 4 of them equity based, 2 of them random""" calling = [.1, .2, .3, .4, .5, .6] betting = [.2, .3, .4, .5, .6, .7] for improvement_round in range(improvement_rounds): self.env = HoldemTable(num_of_players=5, initial_stacks=100) for i in range(6): self.env.add_player(EquityPlayer(name=f'Equity/{calling[i]}/{betting[i]}', min_call_equity=calling[i], min_bet_equity=betting[i])) for _ in range(self.num_episodes): self.run_episode() self.winner_in_episodes.append(self.env.winner_ix) league_table = pd.Series(self.winner_in_episodes).value_counts() best_player = int(league_table.index[0]) print(league_table) print(f"Best Player: {best_player}") # self improve: self.log.info(f"Self improvment round {improvement_round}") for i in range(6): calling[i] = np.mean([calling[i], calling[best_player]]) self.log.info(f"New calling for player {i} is {calling[i]}") betting[i] = np.mean([betting[i], betting[best_player]]) self.log.info(f"New betting for player {i} is {betting[i]}")
def dqn_train(): """Implementation of kreras-rl deep q learing.""" env_name = 'neuron_poker-v0' stack = 100 env = gym.make(env_name, num_of_players=2, initial_stacks=stack) np.random.seed(123) env.seed(123) env.add_player( EquityPlayer(name='equity/50/70', min_call_equity=.5, min_bet_equity=.7)) # env.add_player(EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3)) # env.add_player(RandomPlayer()) # env.add_player(RandomPlayer()) # env.add_player(RandomPlayer()) env.add_player(PlayerShell( name='keras-rl', stack_size=stack)) # shell is used for callback to keras rl env.reset() dqn = DQNPlayer() dqn.initiate_agent(env) dqn.train(env_name='dqn1')
def equity_vs_random(self): """Create 6 players, 4 of them equity based, 2 of them random""" self.env = HoldemTable(num_of_players=5, initial_stacks=500) self.env.add_player(EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=-.5)) self.env.add_player(EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=-.8)) self.env.add_player(EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=-.7)) self.env.add_player(EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3)) self.env.add_player(RandomPlayer()) self.env.add_player(RandomPlayer()) for _ in range(self.num_episodes): self.run_episode() self.winner_in_episodes.append(self.env.winner_ix) league_table = pd.Series(self.winner_in_episodes).value_counts() best_player = league_table.index[0] print(league_table) print(f"Best Player: {best_player}")
def dqn_train_keras_rl(self, model_name): """Implementation of kreras-rl deep q learing.""" from agents.agent_consider_equity import Player as EquityPlayer from agents.dqn_agent import Player as DQNPlayer from agents.agent_random import Player as RandomPlayer env_name = 'neuron_poker-v0' env = gym.make(env_name, initial_stacks=self.stack, funds_plot=self.funds_plot, render=self.render, use_cpp_montecarlo=self.use_cpp_montecarlo) np.random.seed(123) env.seed(123) env.add_player(EquityPlayer(name='equity/40/50_1', min_call_equity=.4, min_bet_equity=.5)) env.add_player(EquityPlayer(name='equity/40/50_2', min_call_equity=.4, min_bet_equity=.5)) env.add_player(EquityPlayer(name='equity/40/50_3', min_call_equity=.4, min_bet_equity=.5)) env.add_player(PlayerShell(name='keras-rl', stack_size=self.stack)) env.reset() dqn = DQNPlayer() dqn.initiate_agent(env) dqn.train(env_name=model_name)
def uto_plays(self): """Create an environment with 6 random players""" env_name = 'neuron_poker-v0' stack = 500 num_of_plrs = 6 self.env = gym.make(env_name, num_of_players=num_of_plrs, initial_stacks=stack, render=self.render) self.env.add_player(RandomPlayer()) self.env.add_player( EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=.4)) self.env.add_player( EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=.5)) self.env.add_player( EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=.6)) self.env.add_player(UtoPlayer(name='Uto1 1')) self.env.add_player( UtoPlayer(name='Uto1 2', min_call_equity=0.46, min_bet_equity=0.56, min_call_equity_allin=0.7)) for _ in range(self.num_episodes): self.env.reset() self.winner_in_episodes.append(self.env.winner_ix) league_table = pd.Series(self.winner_in_episodes).value_counts() best_player = league_table.index[0] print(league_table) print(f"Best Player: {best_player}")
def create_env_sac(self): from agents.agent_consider_equity import Player as EquityPlayer env_name = 'neuron_poker-v0' env = gym.make(env_name, initial_stacks=self.stack, render=self.render) env.add_player( EquityPlayer(name='equity/40/50_1', min_call_equity=.4, min_bet_equity=.5)) env.add_player(PlayerShell(name='sac', stack_size=self.stack)) env.reset() return env
def dqn_agent(self, mode): my_import = __import__('agents.'+self.agent, fromlist=['Player']) player = getattr(my_import, 'Player') env_path = 'env' if self.env_name != 'v0': env_path += '_' + self.env_name shell_import = __import__( 'gym_env.' + env_path, fromlist=['PlayerShell']) PlayerShell_import = getattr(shell_import, 'PlayerShell') env_name = 'neuron_poker-' + self.env_name self.env = gym.make(env_name, initial_stacks=self.stack, funds_plot=self.funds_plot, render=self.render, use_cpp_montecarlo=self.use_cpp_montecarlo) np.random.seed(42) self.env.seed(42) count = 1 for player_type in self.players: if player_type == 0: self.env.add_player(RandomPlayer(env_path)) elif type(player_type) == tuple and len(player_type) == 2: self.env.add_player(EquityPlayer(name='equity_' + str(count), env=env_path, min_call_equity=player_type[0], min_bet_equity=player_type[1])) count += 1 self.env.add_player(PlayerShell_import( name='keras-rl', stack_size=self.stack)) self.env.reset() if mode == 'train': dqn = player() dqn.initiate_agent(self.env) dqn.train(env_name=self.model_name) elif mode == 'play': dqn = player(load_model=self.model_name, env=self.env) dqn.play(nb_episodes=self.num_episodes, render=self.render)
type=int, default=500, help='# of episodes to train agent') parser.add_argument('--env_version', type=int, default=0, help='Specifies the version of environment to train on') parser.add_argument('--eval', type=bool, default=False, help='Determines if we want to evaluate the agent or not') args = parser.parse_args() if __name__ == '__main__': poker_env = gym.make(f'neuron_poker-v{args.env_version}', initial_stacks=500, render=False, funds_plot=False) poker_env.add_player( EquityPlayer(name='equity/60/80', min_call_equity=.6, min_bet_equity=.8)) poker_env.add_player(PlayerShell(name='ppo_agent', stack_size=500)) poker_env.reset() ppo_agent = PPOPlayer(env=poker_env) if not args.eval: ppo_agent.train(args.model_name, num_ep=args.episodes) else: ppo_agent.play(args.model_name)
def deep_q_learning(): """Implementation of kreras-rl deep q learing.""" env_name = 'neuron_poker-v0' stack = 100 env = gym.make(env_name, num_of_players=5, initial_stacks=stack) np.random.seed(123) env.seed(123) env.add_player( EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=-.5)) env.add_player( EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=-.8)) env.add_player( EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=-.7)) env.add_player( EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3)) env.add_player(RandomPlayer()) env.add_player(PlayerShell( name='keras-rl', stack_size=stack)) # shell is used for callback to keras rl env.reset() nb_actions = len(env.action_space) # Next, we build a very simple model. from keras import Sequential from keras.optimizers import Adam from keras.layers import Dense, Dropout from rl.memory import SequentialMemory from rl.agents import DQNAgent from rl.policy import BoltzmannQPolicy model = Sequential() model.add( Dense(64, activation='relu', input_shape=env.observation_space)) model.add(Dropout(0.2)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(nb_actions, activation='linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=True, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(env_name), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)