def dqn_play_keras_rl(self): """Create 6 players, one of them a trained DQN""" env_name = 'neuron_poker-v0' stack = 500 self.env = gym.make(env_name, initial_stacks=stack, render=self.render) self.env.add_player( EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=.5)) self.env.add_player( EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=.8)) self.env.add_player( EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=.7)) self.env.add_player( EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=.3)) self.env.add_player(RandomPlayer()) self.env.add_player(PlayerShell(name='keras-rl', stack_size=stack)) self.env.reset() dqn = DQNPlayer(load_model='dqn1', env=self.env) dqn.play(nb_episodes=self.num_episodes, render=self.render)
def dqn_train_keras_rl(self): """Implementation of kreras-rl deep q learing.""" env_name = 'neuron_poker-v0' stack = 100 env = gym.make(env_name, initial_stacks=stack, funds_plot=self.funds_plot, render=self.render, use_cpp_montecarlo=self.use_cpp_montecarlo) np.random.seed(123) env.seed(123) # env.add_player(EquityPlayer(name='equity/50/70', min_call_equity=.5, min_bet_equity=.7)) env.add_player( EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=.3)) env.add_player(RandomPlayer()) # env.add_player(RandomPlayer()) # env.add_player(RandomPlayer()) env.add_player(PlayerShell( name='keras-rl', stack_size=stack)) # shell is used for callback to keras rl env.reset() dqn = DQNPlayer() dqn.initiate_agent(env) dqn.train(env_name='dqn1')
def dqn_play_keras_rl(self, model_name): """Create 6 players, one of them a trained DQN""" from agents.agent_consider_equity import Player as EquityPlayer from agents.agent_keras_rl_dqn import Player as DQNPlayer from agents.agent_random import Player as RandomPlayer env_name = 'neuron_poker-v0' self.env = gym.make(env_name, initial_stacks=self.stack, render=self.render) self.env.add_player( EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=.5)) self.env.add_player( EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=.8)) self.env.add_player( EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=.7)) self.env.add_player( EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=.3)) self.env.add_player(RandomPlayer()) self.env.add_player(PlayerShell(name='keras-rl', stack_size=self.stack)) self.env.reset() dqn = DQNPlayer(load_model=model_name, env=self.env) dqn.play(nb_episodes=self.num_episodes, render=self.render)
def dqn_train_keras_rl(self, model_name): """Implementation of kreras-rl deep q learing.""" from agents.agent_consider_equity import Player as EquityPlayer from agents.agent_keras_rl_dqn import Player as DQNPlayer from agents.agent_random import Player as RandomPlayer env_name = 'neuron_poker-v0' env = gym.make(env_name, initial_stacks=self.stack, funds_plot=self.funds_plot, render=self.render, use_cpp_montecarlo=self.use_cpp_montecarlo) np.random.seed(123) env.seed(123) # env.add_player(EquityPlayer(name='equity/50/70', # min_call_equity=.5, min_bet_equity=.7)) # env.add_player(EquityPlayer(name='equity/20/30', # min_call_equity=.2, min_bet_equity=.3)) # env.add_player(RandomPlayer()) # env.add_player(RandomPlayer()) # env.add_player(RandomPlayer()) # shell is used for callback to keras rl env.add_player( EquityPlayer(name='equity_default', min_call_equity=.4, min_bet_equity=.5)) env.add_player(PlayerShell(name='keras-rl', stack_size=self.stack)) env.reset() dqn = DQNPlayer() dqn.initiate_agent(env) dqn.train(env_name=model_name)
def dqn_train_keras_rl(self): """Implementation of kreras-rl deep q learing.""" env_name = 'neuron_poker-v0' stack = 2000 env = gym.make(env_name, initial_stacks=stack, funds_plot=self.funds_plot, render=self.render, use_cpp_montecarlo=self.use_cpp_montecarlo) np.random.seed(123) env.seed(123) # env.add_player(EquityPlayer(name='equity/50/70', min_call_equity=.5, min_bet_equity=.7)) # env.add_player(RandomPlayer()) # env.add_player(RandomPlayer()) # env.add_player(RandomPlayer()) # env.add_player(PlayerShell(name='keras-rl-1', stack_size=stack), range=0.9) # shell is used for callback to keras rl # env.add_player(PlayerShell(name='keras-rl-2', stack_size=stack), range=0.9) # shell is used for callback to keras rl # env.add_player(PlayerShell(name='keras-rl-3', stack_size=stack), range=0.9) # shell is used for callback to keras rl # env.add_player(PlayerShell(name='keras-rl-4', stack_size=stack), range=0.9) # shell is used for callback to keras rl # env.add_player(PlayerShell(name='keras-rl-5', stack_size=stack), range=0.9) # shell is used for callback to keras rl # env.add_player(PlayerShell(name='keras-rl-6', stack_size=stack), range=0.9) # shell is used for callback to keras rl # env.add_player(PlayerShell(name='keras-rl-7', stack_size=stack), range=0.9) # shell is used for callback to keras rl env.add_player(PlayerShell(name='LJY', stack_size=stack, range=0.33)) # shell is used for callback to keras rl # dqn = DQNPlayer(name='DQN-1',stack_size=2000, range=0.9, env=env , load_model=None) # env.add_player(dqn) env.add_player(RandomPlayer(name='Random-1',range=1)) # env.add_player(RandomPlayer(name='Random-2',range=1)) # env.add_player(RandomPlayer(name='Random-3',range=1)) # env.add_player(RandomPlayer(name='Random-4',range=1)) # env.add_player(RandomPlayer(name='Random-5',range=1)) # env.add_player(RandomPlayer(name='Random-6',range=1)) # env.add_player(RandomPlayer(name='Random-7',range=1)) # env.add_player(DQNPlayer(name='DQN-2',stack_size=2000, range=0.9, env=env , load_model=None)) # env.add_player(DQNPlayer(name='DQN-3',stack_size=2000, range=0.9, env=env , load_model=None)) # env.add_player(DQNPlayer(name='DQN-4',stack_size=2000, range=0.9, env=env , load_model=None)) # env.add_player(DQNPlayer(name='DQN-5',stack_size=2000, range=0.9, env=env , load_model=None)) # env.add_player(DQNPlayer(name='DQN-6',stack_size=2000, range=0.9, env=env , load_model=None)) # env.add_player(DQNPlayer(name='DQN-7',stack_size=2000, range=0.9, env=env , load_model=None)) # env.add_player(DQNPlayer(name='DQN-8',stack_size=2000, range=0.9, env=env , load_model=None)) env.reset() # print(env.players[0].range) # print(env.players[1].range) # print(env.players[2].range) # print(env.players[3].range) # print(env.players[4].range) # print(env.players[5].range) dqn = DQNPlayer() # dqn.initiate_agent(env,load_model='3dqn_vs_3rd') dqn.initiate_agent(env) dqn.train(ckpt_name='LJY')
def dqn_train_heads_up_keras_rl(self, model_name): """Implementation of kreras-rl deep q learing.""" from agents.agent_consider_equity import Player as EquityPlayer from agents.agent_keras_rl_dqn import Player as DQNPlayer from agents.agent_random import Player as RandomPlayer env_name = 'neuron_poker-v0' env = gym.make(env_name, initial_stacks=self.stack, funds_plot=self.funds_plot, render=self.render, use_nn_equity=self.use_nn_equity, use_cpp_montecarlo=self.use_cpp_montecarlo) # np.random.seed(123) # env.seed(123) env.add_player(EquityPlayer(name='equity/50/70', min_call_equity=.5, min_bet_equity=.7)) env.add_player(PlayerShell(name='keras-rl', stack_size=self.stack)) # shell is used for callback to keras rl env.reset() dqn = DQNPlayer() # dqn.initiate_agent(env, load_memory=model_name, load_model=model_name, load_optimizer=model_name) # # dqn.initiate_agent(env, load_memory=None, load_model=None, load_optimizer=None) # dqn.initiate_agent(env, load_memory=None, load_model=None, load_optimizer=None, batch_size=128) # dqn.train(env_name=model_name, policy_epsilon=0.9) batch_sizes = [128, 128, 128, 128, 128, 128, 128, 128] policy_epsilon = [0.1, 0.1,0.1, 0.1, 0.1, 0.1, 0.1, 0.1] learn_rate = np.geomspace(1e-2, 1e-4, 5) for x in range(10): dqn = DQNPlayer() # dqn.initiate_agent(env, load_memory=model_name, load_model=model_name, load_optimizer=model_name, batch_size=128) dqn.initiate_agent(env, model_name=None, load_memory=None, load_model=None, load_optimizer=None, batch_size=128, learn_rate=learn_rate[x]) dqn.train(env_name=model_name, policy_epsilon=policy_epsilon[x])