def new_tp(i): TP = TrainingProfile(name=exp_name, nn_type="feedforward", eval_agent_export_freq=3, checkpoint_freq=3, max_buffer_size_adv=1e6, n_traversals_per_iter=500, n_batches_adv_training=250, mini_batch_size_adv=2048, game_cls=StandardLeduc, n_units_final_adv=64, n_merge_and_table_layer_units_adv=64, init_adv_model="random", # warm start neural weights with init from last iter use_pre_layers_adv=False, # shallower nets use_pre_layers_avrg=False, # shallower nets # You can specify one or both modes. Choosing both is useful to compare them. eval_modes_of_algo=( EvalAgentDeepCFR.EVAL_MODE_SINGLE, # SD-CFR ), DISTRIBUTED=True, log_verbose=True, rl_br_args=RLBRArgs(rlbr_bet_set=None, #n_hands_each_seat=5*i, n_workers=5, # Training DISTRIBUTED=True, n_iterations=1000*(i+1), play_n_games_per_iter=50, # The DDQN batch_size=512, ) #lbr_args = LBRArgs() ) return TP
init_adv_model="random", # warm start neural weights with init from last iter use_pre_layers_adv=False, # shallower nets use_pre_layers_avrg=False, # shallower nets # You can specify one or both modes. Choosing both is useful to compare them. eval_modes_of_algo=( EvalAgentDeepCFR.EVAL_MODE_SINGLE, # SD-CFR ), DISTRIBUTED=True, log_verbose=True, rl_br_args=RLBRArgs(rlbr_bet_set=None, n_hands_each_seat=200, n_workers=1, # Training DISTRIBUTED=False, n_iterations=100, play_n_games_per_iter=50, # The DDQN batch_size=512, ), lbr_args=LBRArgs(n_lbr_hands_per_seat=30000, n_parallel_lbr_workers=10, DISTRIBUTED=True, ), ), eval_methods={'br': 1, #'rlbr': 1, 'lbr': 1, }, n_iterations=12) ctrl.run()
use_pre_layers_avrg=True, n_cards_state_units_avrg=192, n_merge_and_table_layer_units_avrg=64, n_units_final_avrg=64, max_buffer_size_avrg=2e6, lr_avrg=0.001, lr_patience_avrg=99999999, # No lr decay # With the H2H evaluator, these two are evaluated against eachother. eval_modes_of_algo=( EvalAgentDeepCFR.EVAL_MODE_AVRG_NET, ), log_verbose=True, game_cls=Flop5Holdem, # enables simplified obs. Default works also for 3+ players use_simplified_headsup_obs=True, rl_br_args=RLBRArgs(rlbr_bet_set=POT_ONLY), ), # Evaluate Head-to-Head every 15 iterations of both players (= every 30 alternating iterations) eval_methods={}, # 150 = 300 when 2 viewing alternating iterations as 2 (as usually done). # This repo implements alternating iters as a single iter, which is why this says 150. n_iterations=10, ) ctrl.run()
use_pre_layers_adv=True, n_cards_state_units_adv=192, n_merge_and_table_layer_units_adv=64, n_units_final_adv=64, max_buffer_size_adv=1e6, # *20 LAs = 40M lr_adv=0.001, lr_patience_adv=99999999, # No lr decay # With the H2H evaluator, these two are evaluated against eachother. eval_modes_of_algo=( EvalAgentDeepCFR.EVAL_MODE_SINGLE, ), log_verbose=True, game_cls=Flop5Holdem, # enables simplified obs. Default works also for 3+ players use_simplified_headsup_obs=True, rl_br_args=RLBRArgs(rlbr_bet_set=POT_ONLY, n_iterations=2000), ), # Evaluate Head-to-Head every 15 iterations of both players (= every 30 alternating iterations) eval_methods={'rlbr': 5}, # 150 = 300 when 2 viewing alternating iterations as 2 (as usually done). # This repo implements alternating iters as a single iter, which is why this says 150. n_iterations=50, ) ctrl.run()