Exemple #1
0
def new_tp(i):
    TP = TrainingProfile(name=exp_name,
                         nn_type="feedforward",

                         eval_agent_export_freq=3,
                         checkpoint_freq=3,

                         max_buffer_size_adv=1e6,
                         n_traversals_per_iter=500,
                         n_batches_adv_training=250,
                         mini_batch_size_adv=2048,

                         game_cls=StandardLeduc,


                         n_units_final_adv=64,
                         n_merge_and_table_layer_units_adv=64,
                         init_adv_model="random",  # warm start neural weights with init from last iter
                         use_pre_layers_adv=False,  # shallower nets
                         use_pre_layers_avrg=False,  # shallower nets

                         # You can specify one or both modes. Choosing both is useful to compare them.
                         eval_modes_of_algo=(
                             EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                         ),

                         DISTRIBUTED=True,
                         log_verbose=True,
                         rl_br_args=RLBRArgs(rlbr_bet_set=None,

                                             #n_hands_each_seat=5*i,
                                             n_workers=5,
                                             # Training
                                             DISTRIBUTED=True,
                                             n_iterations=1000*(i+1),
                                             play_n_games_per_iter=50,
                                             # The DDQN
                                             batch_size=512,
                                             )
                         #lbr_args = LBRArgs()
                         )
    return TP
Exemple #2
0
                                         init_adv_model="random",  # warm start neural weights with init from last iter
                                         use_pre_layers_adv=False,  # shallower nets
                                         use_pre_layers_avrg=False,  # shallower nets

                                         # You can specify one or both modes. Choosing both is useful to compare them.
                                         eval_modes_of_algo=(
                                             EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                                         ),

                                         DISTRIBUTED=True,
                                         log_verbose=True,
                                         rl_br_args=RLBRArgs(rlbr_bet_set=None,
                                                             n_hands_each_seat=200,
                                                             n_workers=1,
                                                             # Training
                                                             DISTRIBUTED=False,
                                                             n_iterations=100,
                                                             play_n_games_per_iter=50,
                                                             # The DDQN
                                                             batch_size=512,
                                                             ),
                                         lbr_args=LBRArgs(n_lbr_hands_per_seat=30000,
                                                          n_parallel_lbr_workers=10,
                                                          DISTRIBUTED=True,
                                                          ),
                                         ),
                  eval_methods={'br': 1,
                                #'rlbr': 1,
                                'lbr': 1,
                  },
                  n_iterations=12)
    ctrl.run()
Exemple #3
0
                                         use_pre_layers_avrg=True,
                                         n_cards_state_units_avrg=192,
                                         n_merge_and_table_layer_units_avrg=64,
                                         n_units_final_avrg=64,

                                         max_buffer_size_avrg=2e6,
                                         lr_avrg=0.001,
                                         lr_patience_avrg=99999999,  # No lr decay

                                         # With the H2H evaluator, these two are evaluated against eachother.
                                         eval_modes_of_algo=(
                                             EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,
                                         ),

                                         log_verbose=True,
                                         game_cls=Flop5Holdem,

                                         # enables simplified obs. Default works also for 3+ players
                                         use_simplified_headsup_obs=True,

                                         rl_br_args=RLBRArgs(rlbr_bet_set=POT_ONLY),
                                         ),
                  # Evaluate Head-to-Head every 15 iterations of both players (= every 30 alternating iterations)
                  eval_methods={},

                  # 150 = 300 when 2 viewing alternating iterations as 2 (as usually done).
                  # This repo implements alternating iters as a single iter, which is why this says 150.
                  n_iterations=10,
                  )
    ctrl.run()
Exemple #4
0
                                         use_pre_layers_adv=True,
                                         n_cards_state_units_adv=192,
                                         n_merge_and_table_layer_units_adv=64,
                                         n_units_final_adv=64,

                                         max_buffer_size_adv=1e6,  # *20 LAs = 40M
                                         lr_adv=0.001,
                                         lr_patience_adv=99999999,  # No lr decay

                                         # With the H2H evaluator, these two are evaluated against eachother.
                                         eval_modes_of_algo=(
                                             EvalAgentDeepCFR.EVAL_MODE_SINGLE,
                                         ),

                                         log_verbose=True,
                                         game_cls=Flop5Holdem,

                                         # enables simplified obs. Default works also for 3+ players
                                         use_simplified_headsup_obs=True,

                                         rl_br_args=RLBRArgs(rlbr_bet_set=POT_ONLY, n_iterations=2000),
                                         ),
                  # Evaluate Head-to-Head every 15 iterations of both players (= every 30 alternating iterations)
                  eval_methods={'rlbr': 5},

                  # 150 = 300 when 2 viewing alternating iterations as 2 (as usually done).
                  # This repo implements alternating iters as a single iter, which is why this says 150.
                  n_iterations=50,
                  )
    ctrl.run()