def __init__( self, # --- general name, log_export_freq=1000, checkpoint_freq=99999999, eval_agent_export_freq=99999999, # --- Computing path_data=None, local_crayon_server_docker_address="localhost", device_inference="cpu", device_parameter_server="cpu", n_learner_actor_workers=8, max_n_las_sync_simultaneously=100, DISTRIBUTED=False, CLUSTER=False, DEBUGGING=False, VERBOSE=True, # --- env game_cls=StandardLeduc, n_seats=2, use_simplified_headsup_obs=True, start_chips=None, agent_bet_set=bet_sets.B_2, stack_randomization_range=(0, 0), uniform_action_interpolation=False, # --- Evaluation eval_modes_of_algo=(EvalAgentNFSP.EVAL_MODE_AVG, ), eval_stack_sizes=None, # --- NFSP nn_type="feedforward", linear=False, sampling=None, # "adam" or "clean" for enhanced variants # Original NFSP also adds epsilon-exploration actions to the averaging buffer. add_random_actions_to_avg_buffer=True, n_br_updates_per_iter=2, n_avg_updates_per_iter=2, target_net_update_freq=300, # every N neural net updates. Not every N global iters, episodes, or steps cir_buf_size_each_la=2e5, res_buf_size_each_la=2e6, # the more the better to infinity min_prob_add_res_buf=0.0, # 0.0 = vanilla reservoir; >0 exponential averaging. eps_start=0.06, eps_const=0.01, eps_exponent=0.5, eps_min=0.0, antic_start=0.1, antic_const=0.01, antic_exponent=0.5, antic_min=0.1, # No decay # For clean sampling constant_eps_expl=0.1, # --- Training. n_steps_avg_per_iter_per_la=None, n_steps_br_per_iter_per_la=None, n_steps_per_iter_per_la=None, training_multiplier_iter_0=1, # In iter 0 the BR net is clueless, but adds to res_buf. -> "pretrain" # --- Q-Learning Hyperparameters mini_batch_size_br_per_la=128, dim_br=64, normalize_last_layer_flat=True, lr_br=0.1, deep_br=True, # True -> Use deep multi-branch network; False -> Use shallow net grad_norm_clipping_br=1.0, optimizer_br="sgd", loss_br="mse", # --- Avg Network Hyperparameters mini_batch_size_avg_per_la=128, dim_avg=64, lr_avg=0.005, deep_avg=True, # True -> Use deep multi-branch network; False -> Use shallow net grad_norm_clipping_avg=1.0, optimizer_avg="sgd", loss_avg="weighted_ce", # Option lbr_args=None, rlbr_args=None, ): if nn_type == "recurrent": raise NotImplementedError elif nn_type == "feedforward": env_bldr_cls = FlatLimitPokerEnvBuilder mpm_args_br = MPMArgsFLAT(dim=dim_br, normalize=normalize_last_layer_flat, deep=deep_br) mpm_args_avg = MPMArgsFLAT(dim=dim_avg, normalize=normalize_last_layer_flat, deep=deep_avg) else: raise ValueError(nn_type) super().__init__( name=name, log_verbose=VERBOSE, log_export_freq=log_export_freq, checkpoint_freq=checkpoint_freq, eval_agent_export_freq=eval_agent_export_freq, path_data=path_data, game_cls=game_cls, env_bldr_cls=env_bldr_cls, start_chips=start_chips, eval_modes_of_algo=eval_modes_of_algo, eval_stack_sizes=eval_stack_sizes, DEBUGGING=DEBUGGING, DISTRIBUTED=DISTRIBUTED, CLUSTER=CLUSTER, device_inference=device_inference, local_crayon_server_docker_address= local_crayon_server_docker_address, module_args={ "ddqn": DDQNArgs( q_args=DuelingQArgs( mpm_args=mpm_args_br, n_units_final=dim_br, ), cir_buf_size=int(cir_buf_size_each_la), batch_size=mini_batch_size_br_per_la, target_net_update_freq=target_net_update_freq, optim_str=optimizer_br, loss_str=loss_br, lr=lr_br, eps_start=eps_start, eps_const=eps_const, eps_exponent=eps_exponent, eps_min=eps_min, grad_norm_clipping=grad_norm_clipping_br, ), "avg": AvgWrapperArgs( avg_net_args=AvrgNetArgs( mpm_args=mpm_args_avg, n_units_final=dim_avg, ), batch_size=mini_batch_size_avg_per_la, res_buf_size=int(res_buf_size_each_la), min_prob_add_res_buf=min_prob_add_res_buf, loss_str=loss_avg, optim_str=optimizer_avg, lr=lr_avg, grad_norm_clipping=grad_norm_clipping_avg, ), "env": game_cls.ARGS_CLS( n_seats=n_seats, starting_stack_sizes_list=[ start_chips for _ in range(n_seats) ], stack_randomization_range=stack_randomization_range, use_simplified_headsup_obs=use_simplified_headsup_obs, uniform_action_interpolation=uniform_action_interpolation, # Set up in a way that just ignores this if not DiscretizedLimit bet_sizes_list_as_n_chips=copy.deepcopy(agent_bet_set), # Set up in a way that just ignores this if not Discretized bet_sizes_list_as_frac_of_pot=copy.deepcopy(agent_bet_set), ), "lbr": lbr_args, "rlbr": rlbr_args, }, log_memory=False, ) # ____________________________________________________ NFSP ____________________________________________________ self.nn_type = nn_type self.n_br_updates_per_iter = int(n_br_updates_per_iter) self.n_avg_updates_per_iter = int(n_avg_updates_per_iter) self.antic_start = antic_start self.antic_const = antic_const self.antic_exponent = antic_exponent self.antic_min = antic_min self.linear = linear self.sampling = sampling self.add_random_actions_to_buffer = add_random_actions_to_avg_buffer self.training_multiplier_iter_0 = int(training_multiplier_iter_0) self.constant_eps_expl = constant_eps_expl if sampling == "adam" or sampling == "clean": assert n_steps_per_iter_per_la is None assert n_steps_br_per_iter_per_la is not None assert n_steps_avg_per_iter_per_la is not None self.n_steps_br_per_iter_per_la = int(n_steps_br_per_iter_per_la) self.n_steps_avg_per_iter_per_la = int(n_steps_avg_per_iter_per_la) else: assert n_steps_per_iter_per_la is not None assert n_steps_br_per_iter_per_la is None assert n_steps_avg_per_iter_per_la is None self.n_steps_per_iter_per_la = int(n_steps_per_iter_per_la) if DISTRIBUTED or CLUSTER: self.n_learner_actors = int(n_learner_actor_workers) else: self.n_learner_actors = 1 self.max_n_las_sync_simultaneously = int(max_n_las_sync_simultaneously) assert isinstance( device_parameter_server, str), "Please pass a string (either 'cpu' or 'cuda')!" self.device_parameter_server = torch.device(device_parameter_server)
def __init__( self, # ------ General name="", log_verbose=False, log_memory=False, log_export_freq=1, checkpoint_freq=99999999, eval_agent_export_freq=999999999, n_learner_actor_workers=8, max_n_las_sync_simultaneously=10, nn_type="feedforward", # "recurrent" or "feedforward" # ------ Computing path_data=None, local_crayon_server_docker_address="localhost", device_inference="cpu", device_training="cpu", device_parameter_server="cpu", DISTRIBUTED=False, CLUSTER=False, DEBUGGING=False, # ------ Env game_cls=DiscretizedNLLeduc, n_seats=2, agent_bet_set=bet_sets.B_2, start_chips=None, chip_randomness=(0, 0), uniform_action_interpolation=False, use_simplified_headsup_obs=True, # ------ Evaluation eval_modes_of_algo=(EvalAgentDeepCFR.EVAL_MODE_SINGLE, ), eval_stack_sizes=None, # ------ General Deep CFR params n_traversals_per_iter=30000, iter_weighting_exponent=1.0, n_actions_traverser_samples=3, sampler="mo", turn_off_baseline=False, # Only for VR-OS os_eps=1, periodic_restart=1, # --- Baseline Hyperparameters max_buffer_size_baseline=2e5, batch_size_baseline=512, n_batches_per_iter_baseline=300, dim_baseline=64, deep_baseline=True, normalize_last_layer_FLAT_baseline=True, # --- Adv Hyperparameters n_batches_adv_training=5000, init_adv_model="random", mini_batch_size_adv=2048, dim_adv=64, deep_adv=True, optimizer_adv="adam", loss_adv="weighted_mse", lr_adv=0.001, grad_norm_clipping_adv=1.0, lr_patience_adv=999999999, normalize_last_layer_FLAT_adv=True, max_buffer_size_adv=2e6, # ------ SPECIFIC TO AVRG NET n_batches_avrg_training=15000, init_avrg_model="random", dim_avrg=64, deep_avrg=True, mini_batch_size_avrg=2048, loss_avrg="weighted_mse", optimizer_avrg="adam", lr_avrg=0.001, grad_norm_clipping_avrg=1.0, lr_patience_avrg=999999999, normalize_last_layer_FLAT_avrg=True, max_buffer_size_avrg=2e6, # ------ SPECIFIC TO SINGLE export_each_net=False, eval_agent_max_strat_buf_size=None, # ------ Optional lbr_args=None, rlbr_args=None, h2h_args=None, ): if nn_type == "feedforward": env_bldr_cls = FlatLimitPokerEnvBuilder from PokerRL.rl.neural.MainPokerModuleFLAT import MPMArgsFLAT mpm_args_adv = MPMArgsFLAT(deep=deep_adv, dim=dim_adv, normalize=normalize_last_layer_FLAT_adv) mpm_args_baseline = MPMArgsFLAT_Baseline( deep=deep_baseline, dim=dim_baseline, normalize=normalize_last_layer_FLAT_baseline) mpm_args_avrg = MPMArgsFLAT( deep=deep_avrg, dim=dim_avrg, normalize=normalize_last_layer_FLAT_avrg) else: raise ValueError(nn_type) super().__init__( name=name, log_verbose=log_verbose, log_export_freq=log_export_freq, checkpoint_freq=checkpoint_freq, eval_agent_export_freq=eval_agent_export_freq, path_data=path_data, game_cls=game_cls, env_bldr_cls=env_bldr_cls, start_chips=start_chips, eval_modes_of_algo=eval_modes_of_algo, eval_stack_sizes=eval_stack_sizes, DEBUGGING=DEBUGGING, DISTRIBUTED=DISTRIBUTED, CLUSTER=CLUSTER, device_inference=device_inference, local_crayon_server_docker_address= local_crayon_server_docker_address, module_args={ "adv_training": AdvTrainingArgs( adv_net_args=DuelingQArgs(mpm_args=mpm_args_adv, n_units_final=dim_adv), n_batches_adv_training=n_batches_adv_training, init_adv_model=init_adv_model, batch_size=mini_batch_size_adv, optim_str=optimizer_adv, loss_str=loss_adv, lr=lr_adv, grad_norm_clipping=grad_norm_clipping_adv, device_training=device_training, max_buffer_size=max_buffer_size_adv, lr_patience=lr_patience_adv, ), "avrg_training": AvrgTrainingArgs( avrg_net_args=AvrgNetArgs( mpm_args=mpm_args_avrg, n_units_final=dim_avrg, ), n_batches_avrg_training=n_batches_avrg_training, init_avrg_model=init_avrg_model, batch_size=mini_batch_size_avrg, loss_str=loss_avrg, optim_str=optimizer_avrg, lr=lr_avrg, grad_norm_clipping=grad_norm_clipping_avrg, device_training=device_training, max_buffer_size=max_buffer_size_avrg, lr_patience=lr_patience_avrg, ), "env": game_cls.ARGS_CLS( n_seats=n_seats, starting_stack_sizes_list=[ start_chips for _ in range(n_seats) ], bet_sizes_list_as_frac_of_pot=copy.deepcopy(agent_bet_set), stack_randomization_range=chip_randomness, use_simplified_headsup_obs=use_simplified_headsup_obs, uniform_action_interpolation=uniform_action_interpolation), "mccfr_baseline": BaselineArgs( q_net_args=DuelingQArgs( mpm_args=mpm_args_baseline, n_units_final=dim_baseline, ), max_buffer_size=max_buffer_size_baseline, batch_size=batch_size_baseline, n_batches_per_iter_baseline=n_batches_per_iter_baseline, ), "lbr": lbr_args, "rlbr": rlbr_args, "h2h": h2h_args, }, log_memory=log_memory, ) self.nn_type = nn_type self.n_traversals_per_iter = int(n_traversals_per_iter) self.iter_weighting_exponent = iter_weighting_exponent self.sampler = sampler self.os_eps = os_eps self.periodic_restart = periodic_restart self.turn_off_baseline = turn_off_baseline self.n_actions_traverser_samples = n_actions_traverser_samples # SINGLE self.export_each_net = export_each_net self.eval_agent_max_strat_buf_size = eval_agent_max_strat_buf_size # Different for dist and local if DISTRIBUTED or CLUSTER: print("Running with ", n_learner_actor_workers, "LearnerActor Workers.") self.n_learner_actors = n_learner_actor_workers else: self.n_learner_actors = 1 self.max_n_las_sync_simultaneously = max_n_las_sync_simultaneously assert isinstance( device_parameter_server, str), "Please pass a string (either 'cpu' or 'cuda')!" self.device_parameter_server = torch.device(device_parameter_server)
def __init__( self, # ------ General name="", log_verbose=True, log_export_freq=1, checkpoint_freq=99999999, eval_agent_export_freq=999999999, n_learner_actor_workers=8, max_n_las_sync_simultaneously=10, nn_type="feedforward", # "recurrent" or "feedforward" # ------ Computing path_data=None, local_crayon_server_docker_address="localhost", device_inference="cpu", device_training="cpu", device_parameter_server="cpu", DISTRIBUTED=False, CLUSTER=False, DEBUGGING=False, # ------ Env game_cls=DiscretizedNLLeduc, env_bldr_cls=FlatLimitPokerEnvBuilder, n_seats=2, agent_bet_set=bet_sets.B_2, start_chips=None, chip_randomness=(0, 0), uniform_action_interpolation=False, use_simplified_headsup_obs=True, # ------ Evaluation eval_modes_of_algo=(EvalAgentDeepCFR.EVAL_MODE_SINGLE, ), eval_stack_sizes=None, # ------ General Deep CFR params n_traversals_per_iter=30000, online=False, iter_weighting_exponent=1.0, n_actions_traverser_samples=3, sampler="mo", # --- Adv Hyperparameters n_batches_adv_training=5000, init_adv_model="random", rnn_cls_str_adv="lstm", rnn_units_adv=128, rnn_stack_adv=1, dropout_adv=0.0, use_pre_layers_adv=False, n_cards_state_units_adv=96, n_merge_and_table_layer_units_adv=32, n_units_final_adv=64, mini_batch_size_adv=4096, n_mini_batches_per_la_per_update_adv=1, optimizer_adv="adam", loss_adv="weighted_mse", lr_adv=0.001, grad_norm_clipping_adv=10.0, lr_patience_adv=999999999, normalize_last_layer_FLAT_adv=True, max_buffer_size_adv=3e6, # ------ SPECIFIC TO AVRG NET n_batches_avrg_training=15000, init_avrg_model="random", rnn_cls_str_avrg="lstm", rnn_units_avrg=128, rnn_stack_avrg=1, dropout_avrg=0.0, use_pre_layers_avrg=False, n_cards_state_units_avrg=96, n_merge_and_table_layer_units_avrg=32, n_units_final_avrg=64, mini_batch_size_avrg=4096, n_mini_batches_per_la_per_update_avrg=1, loss_avrg="weighted_mse", optimizer_avrg="adam", lr_avrg=0.001, grad_norm_clipping_avrg=10.0, lr_patience_avrg=999999999, normalize_last_layer_FLAT_avrg=True, max_buffer_size_avrg=3e6, # ------ SPECIFIC TO SINGLE export_each_net=False, eval_agent_max_strat_buf_size=None, # ------ Optional lbr_args=None, rl_br_args=None, h2h_args=None, ): print(" ************************** Initing args for: ", name, " **************************") if nn_type == "recurrent": from PokerRL.rl.neural.MainPokerModuleRNN import MPMArgsRNN # env_bldr_cls = HistoryEnvBuilder mpm_args_adv = MPMArgsRNN( rnn_cls_str=rnn_cls_str_adv, rnn_units=rnn_units_adv, rnn_stack=rnn_stack_adv, rnn_dropout=dropout_adv, use_pre_layers=use_pre_layers_adv, n_cards_state_units=n_cards_state_units_adv, n_merge_and_table_layer_units=n_merge_and_table_layer_units_adv ) mpm_args_avrg = MPMArgsRNN( rnn_cls_str=rnn_cls_str_avrg, rnn_units=rnn_units_avrg, rnn_stack=rnn_stack_avrg, rnn_dropout=dropout_avrg, use_pre_layers=use_pre_layers_avrg, n_cards_state_units=n_cards_state_units_avrg, n_merge_and_table_layer_units=n_merge_and_table_layer_units_avrg ) elif nn_type == "feedforward": from PokerRL.rl.neural.MainPokerModuleFLAT import MPMArgsFLAT mpm_args_adv = MPMArgsFLAT( use_pre_layers=use_pre_layers_adv, card_block_units=n_cards_state_units_adv, other_units=n_merge_and_table_layer_units_adv, normalize=normalize_last_layer_FLAT_adv) mpm_args_avrg = MPMArgsFLAT( use_pre_layers=use_pre_layers_avrg, card_block_units=n_cards_state_units_avrg, other_units=n_merge_and_table_layer_units_avrg, normalize=normalize_last_layer_FLAT_avrg) elif nn_type == "convolutional": from PokerRL.rl.neural.MainPokerModuleCNN import MPMArgsCNN mpm_args_adv = MPMArgsCNN( use_pre_layers=use_pre_layers_adv, card_block_units=n_cards_state_units_adv, other_units=n_merge_and_table_layer_units_adv, normalize=normalize_last_layer_FLAT_adv, dropout=dropout_adv) mpm_args_avrg = MPMArgsCNN( use_pre_layers=use_pre_layers_avrg, card_block_units=n_cards_state_units_avrg, other_units=n_merge_and_table_layer_units_avrg, normalize=normalize_last_layer_FLAT_avrg, dropout=dropout_avrg) elif nn_type == "dense_residual": from PokerRL.rl.neural.MainPokerModuleFLAT2 import MPMArgsFLAT2 mpm_args_adv = MPMArgsFLAT2( use_pre_layers=use_pre_layers_adv, card_block_units=n_cards_state_units_adv, other_units=n_merge_and_table_layer_units_adv, normalize=normalize_last_layer_FLAT_adv, dropout=dropout_adv) mpm_args_avrg = MPMArgsFLAT2( use_pre_layers=use_pre_layers_avrg, card_block_units=n_cards_state_units_avrg, other_units=n_merge_and_table_layer_units_avrg, normalize=normalize_last_layer_FLAT_avrg, dropout=dropout_avrg) else: raise ValueError(nn_type) super().__init__( name=name, log_verbose=log_verbose, log_export_freq=log_export_freq, checkpoint_freq=checkpoint_freq, eval_agent_export_freq=eval_agent_export_freq, path_data=path_data, game_cls=game_cls, env_bldr_cls=env_bldr_cls, start_chips=start_chips, eval_modes_of_algo=eval_modes_of_algo, eval_stack_sizes=eval_stack_sizes, DEBUGGING=DEBUGGING, DISTRIBUTED=DISTRIBUTED, CLUSTER=CLUSTER, device_inference=device_inference, local_crayon_server_docker_address= local_crayon_server_docker_address, module_args={ "adv_training": AdvTrainingArgs( adv_net_args=DuelingQArgs( mpm_args=mpm_args_adv, n_units_final=n_units_final_adv, ), n_batches_adv_training=n_batches_adv_training, init_adv_model=init_adv_model, batch_size=mini_batch_size_adv, n_mini_batches_per_update= n_mini_batches_per_la_per_update_adv, optim_str=optimizer_adv, loss_str=loss_adv, lr=lr_adv, grad_norm_clipping=grad_norm_clipping_adv, device_training=device_training, max_buffer_size=max_buffer_size_adv, lr_patience=lr_patience_adv, ), "avrg_training": AvrgTrainingArgs( avrg_net_args=AvrgNetArgs( mpm_args=mpm_args_avrg, n_units_final=n_units_final_avrg, ), n_batches_avrg_training=n_batches_avrg_training, init_avrg_model=init_avrg_model, batch_size=mini_batch_size_avrg, n_mini_batches_per_update= n_mini_batches_per_la_per_update_avrg, loss_str=loss_avrg, optim_str=optimizer_avrg, lr=lr_avrg, grad_norm_clipping=grad_norm_clipping_avrg, device_training=device_training, max_buffer_size=max_buffer_size_avrg, lr_patience=lr_patience_avrg, ), "env": game_cls.ARGS_CLS( n_seats=n_seats, starting_stack_sizes_list=[ start_chips for _ in range(n_seats) ], bet_sizes_list_as_frac_of_pot=copy.deepcopy(agent_bet_set), stack_randomization_range=chip_randomness, use_simplified_headsup_obs=use_simplified_headsup_obs, uniform_action_interpolation=uniform_action_interpolation), "lbr": lbr_args, "rlbr": rl_br_args, "h2h": h2h_args, }) self.nn_type = nn_type self.online = online self.n_traversals_per_iter = n_traversals_per_iter self.iter_weighting_exponent = iter_weighting_exponent self.sampler = sampler self.n_actions_traverser_samples = n_actions_traverser_samples # SINGLE self.export_each_net = export_each_net self.eval_agent_max_strat_buf_size = eval_agent_max_strat_buf_size # Different for dist and local if DISTRIBUTED or CLUSTER: print("Running with ", n_learner_actor_workers, "LearnerActor Workers.") self.n_learner_actors = n_learner_actor_workers else: self.n_learner_actors = 1 self.max_n_las_sync_simultaneously = max_n_las_sync_simultaneously assert isinstance( device_parameter_server, str), "Please pass a string (either 'cpu' or 'cuda')!" self.device_parameter_server = torch.device(device_parameter_server)
def __init__( self, # --- general name, log_export_freq=200, checkpoint_freq=99999999, lite_checkpoint=False, lite_checkpoint_steps=128000, export_hands_freq=10000, eval_agent_export_freq=99999999, # --- Computing path_data=None, local_crayon_server_docker_address="localhost", device_inference="cpu", device_parameter_server="cpu", n_learner_actor_workers=8, max_n_las_sync_simultaneously=100, DISTRIBUTED=False, CLUSTER=False, DEBUGGING=False, VERBOSE=True, TESTING=False, # --- env game_cls=StandardLeduc, n_seats=2, use_simplified_headsup_obs=True, start_chips=None, use_canonical=False, agent_bet_set=bet_sets.B_2, stack_randomization_range=(0, 0), uniform_action_interpolation=False, # --- Evaluation eval_modes_of_algo=(EvalAgentNFSP.EVAL_MODE_AVG, ), eval_stack_sizes=None, # --- NFSP nn_type="feedforward", nn_structure="paper", feedforward_env_builder=FlatLimitPokerEnvBuilder, anticipatory_parameter=0.1, first_and_third_units=1024, second_and_fourth_units=512, # Original NFSP also adds epsilon-exploration actions to the averaging buffer. add_random_actions_to_avg_buffer=True, n_br_updates_per_iter=2, n_avg_updates_per_iter=2, target_net_update_freq=300, # every N neural net updates. Not every N global iters, episodes, or steps cir_buf_size_each_la=2e5, res_buf_size_each_la=2e6, # the more the better to infinity min_prob_add_res_buf=0.0, # 0.0 = vanilla reservoir; >0 exponential averaging action_and_hand_buffer_size=20000, eps_start=0.06, eps_const=0.01, eps_exponent=0.5, eps_min=0.0, # --- Training. n_steps_per_iter_per_la=128, n_steps_pretrain_per_la=0, n_envs=128, mini_batch_size_br_per_la=128, n_mini_batches_per_la_per_update_br=1, # total num of samples per iter is that * batch_size above. mini_batch_size_avg_per_la=128, n_mini_batches_per_la_per_update_avg=1, # total num of samples per iter is that * batch_size above. training_multiplier_iter_0=1, # In iter 0 the BR net is clueless, but adds to res_buf. -> "pretrain" # --- Q-Learning Hyperparameters n_cards_state_units_br=192, n_merge_and_table_layer_units_br=64, n_units_final_br=64, normalize_last_layer_flat=False, rnn_cls_str_br="lstm", rnn_units_br=128, rnn_stack_br=1, lr_br=0.1, dropout_br=0.0, use_pre_layers_br=True, # True -> Use deep multi-branch network; False -> Use shallow net grad_norm_clipping_br=10.0, optimizer_br="sgd", loss_br="mse", # --- Avg Network Hyperparameters n_cards_state_units_avg=192, n_merge_and_table_layer_units_avg=64, n_units_final_avg=64, rnn_cls_str_avg="lstm", rnn_units_avg=128, rnn_stack_avg=1, lr_avg=0.005, dropout_avg=0.0, use_pre_layers_avg=True, # True -> Use deep multi-branch network; False -> Use shallow net grad_norm_clipping_avg=10.0, optimizer_avg="sgd", loss_avg="ce", # Option lbr_args=None, rlbr_args=None, history_args=None, offline_args=None, ): print(" ************************** Initing args for: ", name, " **************************") if nn_type == "recurrent": env_bldr_cls = HistoryEnvBuilder mpm_args_br = MPMArgsRNN( rnn_cls_str=rnn_cls_str_br, rnn_units=rnn_units_br, rnn_stack=rnn_stack_br, rnn_dropout=dropout_br, use_pre_layers=use_pre_layers_br, n_cards_state_units=n_cards_state_units_br, n_merge_and_table_layer_units=n_merge_and_table_layer_units_br) mpm_args_avg = MPMArgsRNN( rnn_cls_str=rnn_cls_str_avg, rnn_units=rnn_units_avg, rnn_stack=rnn_stack_avg, rnn_dropout=dropout_avg, use_pre_layers=use_pre_layers_avg, n_cards_state_units=n_cards_state_units_avg, n_merge_and_table_layer_units=n_merge_and_table_layer_units_avg ) elif nn_type == "feedforward": env_bldr_cls = feedforward_env_builder if nn_structure == "paper": mpm_args_br = PaperMPMArgsFLAT( first_units=first_and_third_units, second_units=second_and_fourth_units, third_units=first_and_third_units, fourth_units=second_and_fourth_units, normalize=normalize_last_layer_flat) mpm_args_avg = PaperMPMArgsFLAT( first_units=first_and_third_units, second_units=second_and_fourth_units, third_units=first_and_third_units, fourth_units=second_and_fourth_units) else: mpm_args_br = MPMArgsFLAT( use_pre_layers=use_pre_layers_br, card_block_units=n_cards_state_units_br, other_units=n_merge_and_table_layer_units_br, normalize=normalize_last_layer_flat, ) mpm_args_avg = MPMArgsFLAT( use_pre_layers=use_pre_layers_avg, card_block_units=n_cards_state_units_avg, other_units=n_merge_and_table_layer_units_avg) else: raise ValueError(nn_type) super().__init__( name=name, log_verbose=VERBOSE, log_export_freq=log_export_freq, checkpoint_freq=checkpoint_freq, export_hands_freq=export_hands_freq, eval_agent_export_freq=eval_agent_export_freq, path_data=path_data, game_cls=game_cls, env_bldr_cls=env_bldr_cls, start_chips=start_chips, eval_modes_of_algo=eval_modes_of_algo, eval_stack_sizes=eval_stack_sizes, DEBUGGING=DEBUGGING, TESTING=TESTING, DISTRIBUTED=DISTRIBUTED, CLUSTER=CLUSTER, device_inference=device_inference, local_crayon_server_docker_address= local_crayon_server_docker_address, module_args={ "ddqn": DDQNArgs( q_args=DuelingQArgs( mpm_args=mpm_args_br, n_units_final=n_units_final_br, ), cir_buf_size=int(cir_buf_size_each_la), batch_size=mini_batch_size_br_per_la, n_mini_batches_per_update= n_mini_batches_per_la_per_update_br, target_net_update_freq=target_net_update_freq, optim_str=optimizer_br, loss_str=loss_br, lr=lr_br, eps_start=eps_start, eps_const=eps_const, eps_exponent=eps_exponent, eps_min=eps_min, grad_norm_clipping=grad_norm_clipping_br, ), "avg": AvgWrapperArgs( avg_net_args=AvrgNetArgs( mpm_args=mpm_args_avg, n_units_final=n_units_final_avg, ), batch_size=mini_batch_size_avg_per_la, n_mini_batches_per_update= n_mini_batches_per_la_per_update_avg, res_buf_size=int(res_buf_size_each_la), min_prob_add_res_buf=min_prob_add_res_buf, loss_str=loss_avg, optim_str=optimizer_avg, lr=lr_avg, grad_norm_clipping=grad_norm_clipping_avg, ), "env": game_cls.ARGS_CLS( n_seats=n_seats, starting_stack_sizes_list=[ start_chips for _ in range(n_seats) ], stack_randomization_range=stack_randomization_range, use_simplified_headsup_obs=use_simplified_headsup_obs, uniform_action_interpolation=uniform_action_interpolation, # Set up in a way that just ignores this if not Discretized bet_sizes_list_as_frac_of_pot=copy.deepcopy(agent_bet_set), ), "lbr": lbr_args, "rlbr": rlbr_args, "history": history_args, "offline": offline_args, }) # ____________________________________________________ NFSP ____________________________________________________ self.lite_checkpoint = lite_checkpoint self.lite_checkpoint_steps = lite_checkpoint_steps self.use_canonical = use_canonical self.nn_type = nn_type self.n_br_updates_per_iter = int(n_br_updates_per_iter) self.n_avg_updates_per_iter = int(n_avg_updates_per_iter) self.anticipatory_parameter = anticipatory_parameter self.add_random_actions_to_buffer = add_random_actions_to_avg_buffer self.training_multiplier_iter_0 = int(training_multiplier_iter_0) self.n_envs = int(n_envs) self.n_steps_pretrain_per_la = int(n_steps_pretrain_per_la) self.n_steps_per_iter_per_la = int(n_steps_per_iter_per_la) self.action_and_hand_buffer_size = action_and_hand_buffer_size if DISTRIBUTED or CLUSTER: self.n_learner_actors = int(n_learner_actor_workers) else: self.n_learner_actors = 1 self.max_n_las_sync_simultaneously = int(max_n_las_sync_simultaneously) assert isinstance( device_parameter_server, str), "Please pass a string (either 'cpu' or 'cuda')!" self.device_parameter_server = torch.device(device_parameter_server)