Python Driver 예제들, DeepCFR.workers.driver.Driver.Driver Python 예제들

예제 #1

0

파일 보기

파일: leduc_example.py 프로젝트: ridhi1412/Deep-CFR

    ctrl = Driver(t_prof=TrainingProfile(
#                                            name="SD-CFR_LEDUC_EXAMPLE",
#                                         nn_type="feedforward",
#                                         n_learner_actor_workers=N_WORKERS,
#                                         max_buffer_size_adv=3e6,
#                                         eval_agent_export_freq=20,  # export API to play against the agent
#                                         n_traversals_per_iter=1500,
#                                         n_batches_adv_training=750,
#                                         n_batches_avrg_training=2000,
#                                         n_merge_and_table_layer_units_adv=64,
#                                         n_merge_and_table_layer_units_avrg=64,
#                                         n_units_final_adv=64,
#                                         n_units_final_avrg=64,
#                                         mini_batch_size_adv=2048,
#                                         mini_batch_size_avrg=2048,
#                                         init_adv_model="last",
#                                         init_avrg_model="last",
#                                         use_pre_layers_adv=False,
#                                         use_pre_layers_avrg=False,

                                         eval_agent_export_freq=9999999,  # Don't export
                                         
                                         max_buffer_size_adv=3.636e5,  # 364k * 11 = ~4M
                                         max_buffer_size_avrg=3.636e5,  # 364k * 11 = ~4M
                                         
                                         n_traversals_per_iter=5,  # 800 * 11 = 8,800
                                         
                                         n_batches_adv_training=12,
                                         n_batches_avrg_training=100,  # trained far more than necessary
                                         n_merge_and_table_layer_units_adv=64,
                                         n_merge_and_table_layer_units_avrg=64,
                                         n_units_final_adv=64,
                                         n_units_final_avrg=64,
                                         n_cards_state_units_adv=64,
                                         n_cards_state_units_avrg=64,
                                         mini_batch_size_adv=8,  # 256 * 11 = 2,816
                                         mini_batch_size_avrg=16,  # 512 * 11 = 5,632
                                         init_adv_model="last",  # warm start neural weights with init from last iter
                                         init_avrg_model="random",
                                         use_pre_layers_adv=True,
                                         use_pre_layers_avrg=True,
                                         
                                         
                                         
                                         
                                         name="DISNLHT_DISTRIBUTED_LH_RNN",

                                         DISTRIBUTED=False,
                                         n_learner_actor_workers=N_WORKERS,

                                         nn_type="recurrent",

                                         game_cls=DiscretizedNLHoldem,
                                         agent_bet_set=bet_sets.B_5,
#                                         checkpoint_freq=1,

#                                         use_pre_layers_br=True,
#                                         use_pre_layers_avg=True,
#                                         n_units_final_br=64,
#                                         n_units_final_avg=64,
#                                         n_merge_and_table_layer_units_br=64,
#                                         n_merge_and_table_layer_units_avg=64,
#                                         rnn_units_br=64,
#                                         rnn_units_avg=64,
#                                         n_cards_state_units_br=128,
#                                         n_cards_state_units_avg=128,
#                                         
#                                         cir_buf_size_each_la=6e5 / N_WORKERS,
#                                         res_buf_size_each_la=2e6,
#                                         n_envs=128,
#                                         n_steps_per_iter_per_la=128,
#
#                                         lr_br=0.1,
#                                         lr_avg=0.01,
#
#                                         mini_batch_size_br_per_la=64,
#                                         mini_batch_size_avg_per_la=64,
#                                         n_br_updates_per_iter=1,
#                                         n_avg_updates_per_iter=1,
#
#                                         eps_start=0.08,
#                                         eps_const=0.007,
#                                         eps_exponent=0.5,
#                                         eps_min=0.0,

                                         lbr_args=LBRArgs(
                                             lbr_bet_set=bet_sets.B_5,
                                             n_lbr_hands_per_seat=8,
                                             lbr_check_to_round=Poker.TURN,
#                                             lbr_check_to_round=None,
                                             n_parallel_lbr_workers=N_LBR_WORKERS,
                                             use_gpu_for_batch_eval=False,
                                             DISTRIBUTED=True,
                                         )
                                         ),
                  eval_methods={"lbr": 10},
                  n_iterations=50)

예제 #2

0

파일 보기

파일: mo_leduc.py 프로젝트: mohamedun/Deep-CFR

    ctrl = Driver(t_prof=TrainingProfile(name="MO_LEDUC_BigLeduc_LBR",
                                         nn_type="feedforward",

                                         eval_agent_export_freq=3,
                                         checkpoint_freq=3,
                                         n_learner_actor_workers=5,

                                         max_buffer_size_adv=1e6,
                                         n_traversals_per_iter=500,
                                         n_batches_adv_training=250,
                                         mini_batch_size_adv=2048,

                                         game_cls=BigLeduc,

                                         n_units_final_adv=64,
                                         n_merge_and_table_layer_units_adv=64,
                                         init_adv_model="random",  # warm start neural weights with init from last iter
                                         use_pre_layers_adv=False,  # shallower nets
                                         use_pre_layers_avrg=False,  # shallower nets

                                         # You can specify one or both modes. Choosing both is useful to compare them.
                                         eval_modes_of_algo=(
                                             EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                                         ),

                                         DISTRIBUTED=True,
                                         log_verbose=True,
                                         rl_br_args=RLBRArgs(rlbr_bet_set=None,
                                                             n_hands_each_seat=200,
                                                             n_workers=1,
                                                             # Training
                                                             DISTRIBUTED=False,
                                                             n_iterations=100,
                                                             play_n_games_per_iter=50,
                                                             # The DDQN
                                                             batch_size=512,
                                                             ),
                                         lbr_args=LBRArgs(n_lbr_hands_per_seat=30000,
                                                          n_parallel_lbr_workers=10,
                                                          DISTRIBUTED=True,
                                                          ),
                                         ),
                  eval_methods={'br': 1,
                                #'rlbr': 1,
                                'lbr': 1,
                  },
                  n_iterations=12)

예제 #3

0

파일 보기

파일: load_checkpoint_analyze_advbuf.py 프로젝트: xiniuniu/PokerRL-Omaha

    ctrl = Driver(
        iteration_to_import=30,
        name_to_import='NLH_1.5m_10mX2-b2048-last-patience200-Leaky-lr0.004_',
        t_prof=TrainingProfile(
            name="NLH_1.5m_10mX2-b2048-last-patience200-Leaky-lr0.004",
            nn_type="feedforward",
            DISTRIBUTED=False,
            CLUSTER=False,
            n_learner_actor_workers=1,  # 20 workers
            max_buffer_size_adv=1500000,  # 1.5e6
            export_each_net=False,
            # path_strategy_nets="",
            checkpoint_freq=5,  # produces A SHITLOAD of Gbs!
            eval_agent_export_freq=1,  # produces GBs!

            # How many actions out of all legal on current step to branch randomly = action bredth limit
            n_actions_traverser_samples=4,
            # 3 is the default, 4 is the current max for b_2
            # number of traversals gives some amount of otcomes to train network on
            # mult = 1...4, buffer appends every() step with new data
            n_traversals_per_iter=3500,
            # number of mini_batch fetches and model updates on each step
            n_batches_adv_training=6000,  # 5000
            use_pre_layers_adv=True,
            n_cards_state_units_adv=192,
            n_merge_and_table_layer_units_adv=64,  # 64
            n_units_final_adv=64,  # 64
            dropout_adv=0.0,
            lr_patience_adv=750,  # decrease by a factor 0.5(in PSWorker)
            lr_adv=0.004,  # if no better after 150 batches

            # amount of batch to feed to NN at once, fetched from buffer randomly.
            mini_batch_size_adv=10000,  # 512
            init_adv_model="last",  # last, random
            game_cls=DiscretizedNLHoldem,  # PLO or DiscretizedNLHoldem
            env_bldr_cls=VanillaEnvBuilder,
            agent_bet_set=bet_sets.PL_2,
            n_seats=2,
            start_chips=10000,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                # EvalAgentDeepCFR.EVAL_MODE_AVRG_NET
            ),

            # enables simplified obs. Default works also for 3+ players
            use_simplified_headsup_obs=True,
            log_verbose=True,
            lbr_args=LBRArgs(
                lbr_bet_set=bet_sets.PL_2,
                n_lbr_hands_per_seat=1,
                lbr_check_to_round=Poker.TURN,
                # recommended to set to Poker.TURN for 4-round games.
                n_parallel_lbr_workers=1,
                use_gpu_for_batch_eval=False,
                DISTRIBUTED=False,
            ),
        ),
        eval_methods={
            "lbr": 99,  # lbr, br, h2h
        },
        n_iterations=64)

예제 #4

0

파일 보기

 ctrl = Driver(
     t_prof=TrainingProfile(
         name="SD-CFR_LEDUC_BUF_500",
         nn_type="feedforward",
         max_buffer_size_adv=1e6,
         max_buffer_size_avrg=1e6,
         eval_agent_export_freq=999999,
         n_traversals_per_iter=1500,
         n_batches_adv_training=750,
         n_batches_avrg_training=5000,
         n_merge_and_table_layer_units_adv=64,
         n_merge_and_table_layer_units_avrg=64,
         n_units_final_adv=64,
         n_units_final_avrg=64,
         mini_batch_size_adv=2048,
         mini_batch_size_avrg=2048,
         init_adv_model="last",
         init_avrg_model="random",
         use_pre_layers_adv=False,
         use_pre_layers_avrg=False,
         eval_agent_max_strat_buf_size=500,
         game_cls=StandardLeduc,
         eval_modes_of_algo=(
             EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
         ),
         DISTRIBUTED=False,
         log_verbose=False,
     ),
     eval_methods={
         "br": 15,
     },
     n_iterations=None)

예제 #5

0

파일 보기

    ctrl = Driver(
        t_prof=TrainingProfile(
            name="SD-CFR_LEDUC_EXAMPLE",
            nn_type="feedforward",
            max_buffer_size_adv=3e6,
            eval_agent_export_freq=20,  # export API to play against the agent
            n_traversals_per_iter=1500,
            n_batches_adv_training=750,
            n_batches_avrg_training=2000,
            n_merge_and_table_layer_units_adv=64,
            n_merge_and_table_layer_units_avrg=64,
            n_units_final_adv=64,
            n_units_final_avrg=64,
            mini_batch_size_adv=2048,
            mini_batch_size_avrg=2048,
            init_adv_model="last",
            init_avrg_model="last",
            use_pre_layers_adv=False,
            use_pre_layers_avrg=False,
            game_cls=StandardLeduc,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,  # Deep CFR
            ),
            DISTRIBUTED=False,
        ),
        eval_methods={
            "br": 3,
        },
        n_iterations=None)

예제 #6

0

파일 보기

    ctrl = Driver(
        t_prof=TrainingProfile(
            name="EXPERIMENT_SD-CFR_vs_Deep-CFR_FHP",
            nn_type="feedforward",  # We also support RNNs, but the paper uses FF
            DISTRIBUTED=True,
            CLUSTER=False,
            n_learner_actor_workers=20,  # 20 workers

            # regulate exports
            export_each_net=False,
            checkpoint_freq=99999999,
            eval_agent_export_freq=
            1,  # produces around 15GB over 150 iterations!
            n_actions_traverser_samples=3,  # = external sampling in FHP
            n_traversals_per_iter=15000,
            n_batches_adv_training=4000,
            mini_batch_size_adv=512,  # *20=10240
            init_adv_model="random",
            use_pre_layers_adv=True,
            n_cards_state_units_adv=192,
            n_merge_and_table_layer_units_adv=64,
            n_units_final_adv=64,
            max_buffer_size_adv=2e6,  # *20 LAs = 40M
            lr_adv=0.001,
            lr_patience_adv=99999999,  # No lr decay
            n_batches_avrg_training=20000,
            mini_batch_size_avrg=1024,  # *20=20480
            init_avrg_model="random",
            use_pre_layers_avrg=True,
            n_cards_state_units_avrg=192,
            n_merge_and_table_layer_units_avrg=64,
            n_units_final_avrg=64,
            max_buffer_size_avrg=2e6,
            lr_avrg=0.001,
            lr_patience_avrg=99999999,  # No lr decay

            # With the H2H evaluator, these two are evaluated against eachother.
            eval_modes_of_algo=(EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,
                                EvalAgentDeepCFR.EVAL_MODE_SINGLE),
            log_verbose=True,
            game_cls=Flop5Holdem,

            # enables simplified obs. Default works also for 3+ players
            use_simplified_headsup_obs=True,
            h2h_args=H2HArgs(
                n_hands=
                1500000,  # this is per seat; so in total 3M hands per eval
            ),
        ),
        # Evaluate Head-to-Head every 15 iterations of both players (= every 30 alternating iterations)
        eval_methods={"h2h": 15},

        # 150 = 300 when 2 viewing alternating iterations as 2 (as usually done).
        # This repo implements alternating iters as a single iter, which is why this says 150.
        n_iterations=150,
    )

예제 #7

0

파일 보기

    ctrl = Driver(
        t_prof=TrainingProfile(
            name="BIGLEDUC_EXPLOITABILITY",
            DISTRIBUTED=True,
            n_learner_actor_workers=11,
            eval_agent_export_freq=9999999,  # Don't export
            nn_type="feedforward",
            max_buffer_size_adv=3.636e5,  # 364k * 11 = ~4M
            max_buffer_size_avrg=3.636e5,  # 364k * 11 = ~4M

            # longer action sequences than FHP -> more samples/iter because external sampling.
            n_traversals_per_iter=800,  # 800 * 11 = 8,800
            n_batches_adv_training=1200,
            n_batches_avrg_training=10000,  # trained far more than necessary
            n_merge_and_table_layer_units_adv=64,
            n_merge_and_table_layer_units_avrg=64,
            n_units_final_adv=64,
            n_units_final_avrg=64,
            n_cards_state_units_adv=64,
            n_cards_state_units_avrg=64,
            mini_batch_size_adv=256,  # 256 * 11 = 2,816
            mini_batch_size_avrg=512,  # 512 * 11 = 5,632
            init_adv_model=
            "last",  # warm start neural weights with init from last iter
            init_avrg_model="random",
            use_pre_layers_adv=True,
            use_pre_layers_avrg=True,
            game_cls=BigLeduc,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,  # Deep CFR
            ),
            h2h_args=H2HArgs(n_hands=500000, ),
            log_verbose=False,
        ),
        eval_methods={
            "br": 15,
        },
        n_iterations=None)

예제 #8

0

파일 보기

    ctrl = Driver(
        t_prof=TrainingProfile(
            name="Hanul_EXAMPLE",
            nn_type="recurrent",
            max_buffer_size_adv=3e6,
            eval_agent_export_freq=20,  # export API to play against the agent
            n_traversals_per_iter=200,
            n_batches_adv_training=8,
            n_batches_avrg_training=2000,
            n_merge_and_table_layer_units_adv=64,
            n_merge_and_table_layer_units_avrg=64,
            n_units_final_adv=64,
            n_units_final_avrg=64,
            mini_batch_size_adv=16,
            mini_batch_size_avrg=16,
            init_adv_model="last",
            init_avrg_model="last",
            use_pre_layers_adv=False,
            use_pre_layers_avrg=False,
            game_cls=DiscretizedNLHoldem,
            lbr_args=LBRArgs(
                lbr_bet_set=bet_sets.B_5,
                n_lbr_hands_per_seat=80,
                lbr_check_to_round=Poker.TURN,
                n_parallel_lbr_workers=N_LBR_WORKERS,
                use_gpu_for_batch_eval=False,
                DISTRIBUTED=True,
            ),

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,  # Deep CFR
            ),
            DISTRIBUTED=False,
        ),
        eval_methods={
            "lbr": 5,
        },
        n_iterations=300)

예제 #9

0

파일 보기

파일: mo_HULH.py 프로젝트: mohamedun/Deep-CFR

    ctrl = Driver(
        t_prof=TrainingProfile(
            name="MO_HULH_1",
            nn_type="feedforward",  # We also support RNNs, but the paper uses FF
            DISTRIBUTED=True,
            CLUSTER=False,
            n_learner_actor_workers=40,  # 20 workers

            # regulate exports
            export_each_net=False,
            checkpoint_freq=99999999,
            eval_agent_export_freq=5,
            n_actions_traverser_samples=3,  # = external sampling in FHP
            n_traversals_per_iter=500,
            n_batches_adv_training=2000,
            mini_batch_size_adv=512,  # *20=10240
            init_adv_model="random",
            use_pre_layers_adv=True,
            n_cards_state_units_adv=192,
            n_merge_and_table_layer_units_adv=64,
            n_units_final_adv=64,
            max_buffer_size_adv=2e6,  # *20 LAs = 40M
            lr_adv=0.001,
            lr_patience_adv=99999999,  # No lr decay

            # With the H2H evaluator, these two are evaluated against eachother.
            eval_modes_of_algo=(EvalAgentDeepCFR.EVAL_MODE_SINGLE, ),
            log_verbose=True,
            game_cls=LimitHoldem,

            # enables simplified obs. Default works also for 3+ players
            use_simplified_headsup_obs=True,
        ),
        eval_methods={},
        n_iterations=50,
    )

예제 #10

0

파일 보기

파일: paper_experiment_leduc_exploitability.py 프로젝트: ew423/Deep-CFR-Train

    ctrl = Driver(
        t_prof=TrainingProfile(
            name="LEDUC_EXPLOITABILITY",
            nn_type="feedforward",
            max_buffer_size_adv=1e6,
            max_buffer_size_avrg=1e6,
            eval_agent_export_freq=999999,  # Don't export
            n_traversals_per_iter=1500,
            n_batches_adv_training=750,
            n_batches_avrg_training=5000,
            n_merge_and_table_layer_units_adv=64,
            n_merge_and_table_layer_units_avrg=64,
            n_units_final_adv=64,
            n_units_final_avrg=64,
            mini_batch_size_adv=2048,
            mini_batch_size_avrg=2048,
            init_adv_model=
            "last",  # warm start neural weights with init from last iter
            init_avrg_model="random",
            use_pre_layers_adv=False,  # shallower nets
            use_pre_layers_avrg=False,  # shallower nets
            game_cls=StandardLeduc,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,  # Deep CFR
            ),
            DISTRIBUTED=False,
            log_verbose=False,
        ),
        eval_methods={
            "br": 15,
        },
        n_iterations=None)

예제 #11

0

파일 보기

파일: RLBR_exp.py 프로젝트: mohamedun/Deep-CFR

                         log_verbose=True,
                         rl_br_args=RLBRArgs(rlbr_bet_set=None,

                                             #n_hands_each_seat=5*i,
                                             n_workers=5,
                                             # Training
                                             DISTRIBUTED=True,
                                             n_iterations=1000*(i+1),
                                             play_n_games_per_iter=50,
                                             # The DDQN
                                             batch_size=512,
                                             )
                         #lbr_args = LBRArgs()
                         )
    return TP


for i in range(n_iter):
    TP = new_tp(i)
    ctrl = Driver(t_prof=TP, eval_methods={'br': 1, 'rlbr': 1}, n_iterations=driver_iterations)
    ctrl.run()
    dfs = logs_util.logs_to_dfs(exp_name=exp_name, iter_number=driver_iterations)
    # extract a measure for the hp
    RLBR_df = dfs['MO_LEDUC_EVAL SINGLE_stack_13: RL-BR Total']
    BR_df = dfs['MO_LEDUC_EVAL SINGLE_stack_13: BR Total']
    diff = RLBR_df - BR_df

    # append to a dict hp -> measure
    hp_measure[5 * i] = (diff.mean()['Evaluation/MA_per_G'], diff.std()['Evaluation/MA_per_G'])

pdb.set_trace()

예제 #12

0

파일 보기

파일: mo_evaluate.py 프로젝트: mohamedun/Deep-CFR

# agent_prof.module_args['lbr'] = LBRArgs()
# lbr_chief = Chief(t_prof=agent_prof)
# eval_master = LBRMaster(t_prof=agent_to_eval.t_prof,
#                                chief_handle=lbr_chief)
# num_workers = 3
# LBR_workers = [LBRWorker(t_prof=agent_prof, chief_handle=lbr_chief, eval_agent_cls=EvalAgentDeepCFR) for _ in range(num_workers)]
# eval_master.set_worker_handles(*LBR_workers)
# #
#
# #eval_master.evaluate(0)

#-------- Driver Approach
from DeepCFR.workers.driver.Driver import Driver
from PokerRL.eval.lbr.LBRArgs import LBRArgs
agent_prof.module_args['lbr'] = LBRArgs()
ctrl = Driver(agent_prof, eval_methods={'lbr': 1})
ctrl.chief_handle._strategy_buffers = agent_to_eval._strategy_buffers
w, info = ctrl.chief_handle._pull_current_eval_strategy()

ctrl.eval_masters['lbr'][0]._eval_agent = agent_to_eval
ctrl.eval_masters['lbr'][0].weights_for_eval_agent = w
ctrl.eval_masters['lbr'][0].evaluate(0)

# ctrl.eval_masters['br'][0]._eval_agent = agent_to_eval
# ctrl.eval_masters['br'][0].evaluate(0)

# agent_prof.module_args['lbr'] = LBRArgs()
# ctrl = Driver(agent_prof, eval_methods={'lbr': 1})
#
# ctrl.eval_masters['lbr'][0]._eval_agent = agent_to_eval
# ctrl.eval_masters['lbr'][0].evaluate(0)

예제 #13

0

파일 보기

    ctrl = Driver(
        t_prof=TrainingProfile(
            name=
            "PLO_3m_7.5mX14-b5000-last-patience350-Leaky-lr0.004-dense_residual",
            nn_type="dense_residual",
            DISTRIBUTED=True,
            CLUSTER=False,
            n_learner_actor_workers=14,  # 14 workers
            max_buffer_size_adv=3000000,  # 3e6
            export_each_net=False,
            # path_strategy_nets="",
            checkpoint_freq=9999,  # produces A SHITLOAD of Gbs!
            eval_agent_export_freq=1,

            # How many actions out of all legal on current step to branch randomly
            # = action breadth limit
            # 3 is the default, 4 is the current max for b_2
            n_actions_traverser_samples=4,
            # number of traversals equal to the number of entries that will be added
            # to adv buffer
            n_traversals_per_iter=150000,
            # number of mini_batch fetches and model updates on each iteration
            n_batches_adv_training=1500,
            max_n_las_sync_simultaneously=20,
            use_pre_layers_adv=True,
            n_cards_state_units_adv=192,
            n_merge_and_table_layer_units_adv=64,  # 64
            n_units_final_adv=64,  # 64
            lr_patience_adv=350,  # decrease by a factor 0.5(in PSWorker)
            lr_adv=0.004,

            # size of batch to feed to NN at once, fetched from buffer randomly.
            mini_batch_size_adv=5000,
            init_adv_model="last",  # last, random
            game_cls=PLO,  # PLO or DiscretizedNLHoldem
            env_bldr_cls=VanillaEnvBuilder,
            agent_bet_set=bet_sets.PL_2,
            n_seats=2,
            start_chips=10000,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
            ),

            # enables simplified obs. Default works also for 3+ players
            use_simplified_headsup_obs=True,
            log_verbose=True,
            lbr_args=LBRArgs(
                lbr_bet_set=bet_sets.PL_2,
                n_lbr_hands_per_seat=1,
                lbr_check_to_round=Poker.TURN,
                # recommended to set to Poker.TURN for 4-round games.
                n_parallel_lbr_workers=1,
                use_gpu_for_batch_eval=False,
                DISTRIBUTED=False,
            ),
        ),
        eval_methods={
            "": 99,  # lbr, br, h2h
        },
        n_iterations=64)

예제 #14

0

파일 보기

파일: mo_leduc_continue.py 프로젝트: mohamedun/Deep-CFR

if __name__ == '__main__':
    ctrl = Driver(
        t_prof=TrainingProfile(
            name="MO_LEDUC_EXPLOITABILITY",
            nn_type="feedforward",
            n_learner_actor_workers=5,
            eval_agent_export_freq=3,
            checkpoint_freq=1,
            max_buffer_size_adv=1e6,
            n_traversals_per_iter=500,
            n_batches_adv_training=250,
            mini_batch_size_adv=2048,
            game_cls=StandardLeduc,
            n_units_final_adv=64,
            n_merge_and_table_layer_units_adv=64,
            init_adv_model=
            "random",  # warm start neural weights with init from last iter
            use_pre_layers_adv=False,  # shallower nets
            use_pre_layers_avrg=False,  # shallower nets

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
            ),
            DISTRIBUTED=False,
            log_verbose=True,
        ),
        eval_methods={},
        n_iterations=4,
        name_to_import='MO_LEDUC_EXPLOITABILITY',
        iteration_to_import=4)
    ctrl.run()