Beispiel #1
0
import numpy as np

from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile
from DREAM_and_DeepCFR.workers.driver.Driver import Driver
from HYPERS import *
from PokerRL.game.games import StandardLeduc  # or any other game

if __name__ == '__main__':
    ctrl = Driver(t_prof=TrainingProfile(
        name="Leduc_DREAM_BaselineT100_v001_SEED" +
        str(np.random.randint(1000000)),
        nn_type="feedforward",
        n_batches_adv_training=SDCFR_LEDUC_BATCHES,
        periodic_restart=SDCFR_LEDUC_PERIOD,
        n_traversals_per_iter=SDCFR_LEDUC_TRAVERSALS_OS,
        sampler="learned_baseline",
        n_batches_per_iter_baseline=100,
        os_eps=OS_EPS,
        game_cls=StandardLeduc,
        DISTRIBUTED=False,
    ),
                  eval_methods={
                      "br": 3,
                  })
    ctrl.run()
Beispiel #2
0
import numpy as np

from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile
from DREAM_and_DeepCFR.workers.driver.Driver import Driver
from HYPERS import *
from PokerRL.game.games import Flop3Holdem  # or any other game

if __name__ == '__main__':
    ctrl = Driver(
        t_prof=TrainingProfile(
            name="FHP_OSSDCFR_v001_SEED" + str(np.random.randint(1000000)),
            n_traversals_per_iter=(SDCFR_FHP_TRAVERSALS_OS / N_LA_FHP_CFR),
            sampler="eq_baseline",
            DREAM_and_DeepCFR=
            True,  # turns baseline off. Hacky but allows code sharing
            os_eps=OS_EPS,
            game_cls=Flop3Holdem,
            n_batches_adv_training=SDCFR_FHP_BATCHES,
            n_learner_actor_workers=N_LA_FHP_CFR,
            mini_batch_size_adv=int(SDCFR_FHP_BATCH_SIZE / N_LA_FHP_CFR),
            max_buffer_size_adv=int(4e7 / N_LA_FHP_NFSP),
            DISTRIBUTED=True,
            rlbr_args=DIST_RLBR_ARGS_games,
        ),
        eval_methods={"rlbr": RL_BR_FREQ_CFR})
    ctrl.run()
Beispiel #3
0
import numpy as np

from DREAM_and_DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR
from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile
from DREAM_and_DeepCFR.workers.driver.Driver import Driver
from HYPERS import *
from PokerRL.game.games import StandardLeduc  # or any other game

if __name__ == '__main__':
    ctrl = Driver(t_prof=TrainingProfile(
        name="Leduc_DREAM_v001_SEED" + str(np.random.randint(1000000)),
        nn_type="feedforward",
        n_batches_adv_training=SDCFR_LEDUC_BATCHES,
        n_traversals_per_iter=SDCFR_LEDUC_TRAVERSALS_OS,
        sampler="learned_baseline",
        n_batches_per_iter_baseline=SDCFR_LEDUC_BASELINE_BATCHES,
        os_eps=OS_EPS,
        game_cls=StandardLeduc,
        eval_modes_of_algo=(EvalAgentDeepCFR.EVAL_MODE_SINGLE,
                            EvalAgentDeepCFR.EVAL_MODE_AVRG_NET),
        n_batches_avrg_training=4000,
        DISTRIBUTED=False,
    ),
                  eval_methods={
                      "br": 20,
                  })
    ctrl.run()
Beispiel #4
0
import numpy as np

from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile
from DREAM_and_DeepCFR.workers.driver.Driver import Driver
from PokerRL.game.games import StandardLeduc  # or any other game

if __name__ == '__main__':
    ctrl = Driver(t_prof=TrainingProfile(
        name="SD-CFR_LEDUC_LB_2700trav_095_SEED" +
        str(np.random.randint(1000000)),
        n_traversals_per_iter=2700,
        n_batches_adv_training=3000,
        sampler="learned_baseline",
        os_eps=0.5,
        game_cls=StandardLeduc,
    ),
                  eval_methods={
                      "br": 3,
                  })
    ctrl.run()
Beispiel #5
0
import numpy as np

from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile
from DREAM_and_DeepCFR.workers.driver.Driver import Driver
from HYPERS import *
from PokerRL.game.games import Flop3Holdem  # or any other game

if __name__ == '__main__':
    ctrl = Driver(t_prof=TrainingProfile(
        name="FHP_ESSDCFR_v001_SEED" + str(np.random.randint(1000000)),
        n_traversals_per_iter=(SDCFR_FHP_TRAVERSALS_ES / N_LA_FHP_CFR),
        sampler="es",
        game_cls=Flop3Holdem,
        n_batches_adv_training=SDCFR_FHP_BATCHES,
        n_learner_actor_workers=N_LA_FHP_CFR,
        mini_batch_size_adv=int(SDCFR_FHP_BATCH_SIZE / N_LA_FHP_CFR),
        max_buffer_size_adv=int(4e7 / N_LA_FHP_CFR),
        DISTRIBUTED=True,
        rlbr_args=DIST_RLBR_ARGS_games,
    ),
                  eval_methods={"rlbr": RL_BR_FREQ_CFR})
    ctrl.run()
Beispiel #6
0
import numpy as np

from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile
from DREAM_and_DeepCFR.workers.driver.Driver import Driver
from HYPERS import *
from PokerRL.game.games import StandardLeduc  # or any other game

if __name__ == '__main__':
    ctrl = Driver(t_prof=TrainingProfile(
        name="Leduc_ESSDCFR1500_v001_SEED" + str(np.random.randint(1000000)),

        n_batches_adv_training=SDCFR_LEDUC_BATCHES,
        n_traversals_per_iter=1500,
        sampler="es",

        game_cls=StandardLeduc,

        DISTRIBUTED=False,
    ),
        eval_methods={
            "br": 20,
        })
    ctrl.run()
Beispiel #7
0
import numpy as np

from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile
from DREAM_and_DeepCFR.workers.driver.Driver import Driver
from HYPERS import *
from PokerRL.game.games import StandardLeduc  # or any other game

if __name__ == '__main__':
    ctrl = Driver(t_prof=TrainingProfile(
        name="Leduc_OSSDCFR_v001_SEED" + str(np.random.randint(1000000)),
        n_batches_adv_training=SDCFR_LEDUC_BATCHES,
        n_traversals_per_iter=SDCFR_LEDUC_TRAVERSALS_OS,
        sampler="mo",
        n_actions_traverser_samples=1,
        os_eps=OS_EPS,
        game_cls=StandardLeduc,
        DISTRIBUTED=False,
    ),
                  eval_methods={
                      "br": 20,
                  })
    ctrl.run()
import numpy as np

from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile
from DREAM_and_DeepCFR.workers.driver.Driver import Driver
from PokerRL.game.games import StandardLeduc  # or any other game

if __name__ == '__main__':
    ctrl = Driver(t_prof=TrainingProfile(
        name="SD-Leduc_DREAM_ResetNever_v001_SEED" + str(np.random.randint(1000000)),

        n_traversals_per_iter=900,

        n_batches_adv_training=3000,
        periodic_restart=100000000,
        sampler="learned_baseline",
        os_eps=0.5,
        game_cls=StandardLeduc,
    ),
        eval_methods={
            "br": 3,
        })
    ctrl.run()
Beispiel #9
0
import numpy as np

from DREAM_and_DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR
from DREAM_and_DeepCFR.TrainingProfile import TrainingProfile
from DREAM_and_DeepCFR.workers.driver.Driver import Driver
from HYPERS import *
from PokerRL.game.games import StandardLeduc  # or any other game

if __name__ == '__main__':
    ctrl = Driver(t_prof=TrainingProfile(
        name="Leduc_ESSDCFR_v001_SEED" + str(np.random.randint(1000000)),
        n_batches_adv_training=SDCFR_LEDUC_BATCHES,
        n_traversals_per_iter=SDCFR_LEDUC_TRAVERSALS_ES,
        sampler="es",
        eval_modes_of_algo=(EvalAgentDeepCFR.EVAL_MODE_SINGLE, ),
        n_batches_avrg_training=4000,
        game_cls=StandardLeduc,
        DISTRIBUTED=False,
    ),
                  eval_methods={
                      "br": 20,
                  })
    ctrl.run()