import numpy as np from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile from DREAM_and_DeepCFR.workers.driver.Driver import Driver from HYPERS import * from PokerRL.game.games import StandardLeduc # or any other game if __name__ == '__main__': ctrl = Driver(t_prof=TrainingProfile( name="Leduc_DREAM_BaselineT100_v001_SEED" + str(np.random.randint(1000000)), nn_type="feedforward", n_batches_adv_training=SDCFR_LEDUC_BATCHES, periodic_restart=SDCFR_LEDUC_PERIOD, n_traversals_per_iter=SDCFR_LEDUC_TRAVERSALS_OS, sampler="learned_baseline", n_batches_per_iter_baseline=100, os_eps=OS_EPS, game_cls=StandardLeduc, DISTRIBUTED=False, ), eval_methods={ "br": 3, }) ctrl.run()
import numpy as np from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile from DREAM_and_DeepCFR.workers.driver.Driver import Driver from HYPERS import * from PokerRL.game.games import Flop3Holdem # or any other game if __name__ == '__main__': ctrl = Driver( t_prof=TrainingProfile( name="FHP_OSSDCFR_v001_SEED" + str(np.random.randint(1000000)), n_traversals_per_iter=(SDCFR_FHP_TRAVERSALS_OS / N_LA_FHP_CFR), sampler="eq_baseline", DREAM_and_DeepCFR= True, # turns baseline off. Hacky but allows code sharing os_eps=OS_EPS, game_cls=Flop3Holdem, n_batches_adv_training=SDCFR_FHP_BATCHES, n_learner_actor_workers=N_LA_FHP_CFR, mini_batch_size_adv=int(SDCFR_FHP_BATCH_SIZE / N_LA_FHP_CFR), max_buffer_size_adv=int(4e7 / N_LA_FHP_NFSP), DISTRIBUTED=True, rlbr_args=DIST_RLBR_ARGS_games, ), eval_methods={"rlbr": RL_BR_FREQ_CFR}) ctrl.run()
import numpy as np from DREAM_and_DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile from DREAM_and_DeepCFR.workers.driver.Driver import Driver from HYPERS import * from PokerRL.game.games import StandardLeduc # or any other game if __name__ == '__main__': ctrl = Driver(t_prof=TrainingProfile( name="Leduc_DREAM_v001_SEED" + str(np.random.randint(1000000)), nn_type="feedforward", n_batches_adv_training=SDCFR_LEDUC_BATCHES, n_traversals_per_iter=SDCFR_LEDUC_TRAVERSALS_OS, sampler="learned_baseline", n_batches_per_iter_baseline=SDCFR_LEDUC_BASELINE_BATCHES, os_eps=OS_EPS, game_cls=StandardLeduc, eval_modes_of_algo=(EvalAgentDeepCFR.EVAL_MODE_SINGLE, EvalAgentDeepCFR.EVAL_MODE_AVRG_NET), n_batches_avrg_training=4000, DISTRIBUTED=False, ), eval_methods={ "br": 20, }) ctrl.run()
import numpy as np from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile from DREAM_and_DeepCFR.workers.driver.Driver import Driver from PokerRL.game.games import StandardLeduc # or any other game if __name__ == '__main__': ctrl = Driver(t_prof=TrainingProfile( name="SD-CFR_LEDUC_LB_2700trav_095_SEED" + str(np.random.randint(1000000)), n_traversals_per_iter=2700, n_batches_adv_training=3000, sampler="learned_baseline", os_eps=0.5, game_cls=StandardLeduc, ), eval_methods={ "br": 3, }) ctrl.run()
import numpy as np from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile from DREAM_and_DeepCFR.workers.driver.Driver import Driver from HYPERS import * from PokerRL.game.games import Flop3Holdem # or any other game if __name__ == '__main__': ctrl = Driver(t_prof=TrainingProfile( name="FHP_ESSDCFR_v001_SEED" + str(np.random.randint(1000000)), n_traversals_per_iter=(SDCFR_FHP_TRAVERSALS_ES / N_LA_FHP_CFR), sampler="es", game_cls=Flop3Holdem, n_batches_adv_training=SDCFR_FHP_BATCHES, n_learner_actor_workers=N_LA_FHP_CFR, mini_batch_size_adv=int(SDCFR_FHP_BATCH_SIZE / N_LA_FHP_CFR), max_buffer_size_adv=int(4e7 / N_LA_FHP_CFR), DISTRIBUTED=True, rlbr_args=DIST_RLBR_ARGS_games, ), eval_methods={"rlbr": RL_BR_FREQ_CFR}) ctrl.run()
import numpy as np from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile from DREAM_and_DeepCFR.workers.driver.Driver import Driver from HYPERS import * from PokerRL.game.games import StandardLeduc # or any other game if __name__ == '__main__': ctrl = Driver(t_prof=TrainingProfile( name="Leduc_ESSDCFR1500_v001_SEED" + str(np.random.randint(1000000)), n_batches_adv_training=SDCFR_LEDUC_BATCHES, n_traversals_per_iter=1500, sampler="es", game_cls=StandardLeduc, DISTRIBUTED=False, ), eval_methods={ "br": 20, }) ctrl.run()
import numpy as np from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile from DREAM_and_DeepCFR.workers.driver.Driver import Driver from HYPERS import * from PokerRL.game.games import StandardLeduc # or any other game if __name__ == '__main__': ctrl = Driver(t_prof=TrainingProfile( name="Leduc_OSSDCFR_v001_SEED" + str(np.random.randint(1000000)), n_batches_adv_training=SDCFR_LEDUC_BATCHES, n_traversals_per_iter=SDCFR_LEDUC_TRAVERSALS_OS, sampler="mo", n_actions_traverser_samples=1, os_eps=OS_EPS, game_cls=StandardLeduc, DISTRIBUTED=False, ), eval_methods={ "br": 20, }) ctrl.run()
import numpy as np from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile from DREAM_and_DeepCFR.workers.driver.Driver import Driver from PokerRL.game.games import StandardLeduc # or any other game if __name__ == '__main__': ctrl = Driver(t_prof=TrainingProfile( name="SD-Leduc_DREAM_ResetNever_v001_SEED" + str(np.random.randint(1000000)), n_traversals_per_iter=900, n_batches_adv_training=3000, periodic_restart=100000000, sampler="learned_baseline", os_eps=0.5, game_cls=StandardLeduc, ), eval_methods={ "br": 3, }) ctrl.run()
import numpy as np from DREAM_and_DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile from DREAM_and_DeepCFR.workers.driver.Driver import Driver from HYPERS import * from PokerRL.game.games import StandardLeduc # or any other game if __name__ == '__main__': ctrl = Driver(t_prof=TrainingProfile( name="Leduc_ESSDCFR_v001_SEED" + str(np.random.randint(1000000)), n_batches_adv_training=SDCFR_LEDUC_BATCHES, n_traversals_per_iter=SDCFR_LEDUC_TRAVERSALS_ES, sampler="es", eval_modes_of_algo=(EvalAgentDeepCFR.EVAL_MODE_SINGLE, ), n_batches_avrg_training=4000, game_cls=StandardLeduc, DISTRIBUTED=False, ), eval_methods={ "br": 20, }) ctrl.run()