Example #1
0
def train():
    dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME))
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    nr_gpu = get_nr_gpu()
    global PREDICTOR_THREAD
    if nr_gpu > 0:
        if nr_gpu > 1:
            # use half gpus for inference
            predict_tower = list(range(nr_gpu))[-nr_gpu // 2:]
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))
    else:
        logger.warn("Without GPU this model will never learn! CPU is only useful for debug.")
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
    config = TrainConfig(
        model=Model(),
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            HumanHyperParamSetter('learning_rate'),
            HumanHyperParamSetter('entropy_beta'),
            master,
            StartProcOrThread(master),
            PeriodicTrigger(Evaluator(
                EVAL_EPISODE, ['state'], ['policy'], get_player),
                every_k_epochs=3),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        session_init=get_model_loader(args.load) if args.load else None,
        max_epoch=1000,
    )
    trainer = SimpleTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower)
    launch_train_with_config(config, trainer)
Example #2
0
 def _before_train(self):
     self.async_predictor.start()
     logger.info("Starting MySimulatorMaster ...")
     start_proc_mask_signal(self)
Example #3
0
def train():
    dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME))
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    num_gpu = get_num_gpu()
    global PREDICTOR_THREAD
    if num_gpu > 0:
        if num_gpu > 1:
            # use half gpus for inference
            predict_tower = list(range(num_gpu))[-num_gpu // 2:]
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str,
                                                          predict_tower))))
    else:
        logger.warn(
            "Without GPU this model will never learn! CPU is only useful for debug."
        )
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [
        MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)
    ]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
    config = TrainConfig(
        model=Model(),
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(20, 0.0003),
                                                        (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            HumanHyperParamSetter('learning_rate'),
            HumanHyperParamSetter('entropy_beta'),
            master,
            StartProcOrThread(master),
            PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'], ['policy'],
                                      get_player),
                            every_k_epochs=3),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        session_init=get_model_loader(args.load) if args.load else None,
        max_epoch=1000,
    )
    trainer = SimpleTrainer(
    ) if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower)
    launch_train_with_config(config, trainer)
Example #4
0
def train():
    dirname = os.path.join('train_log', 'A3C-{}'.format(ENV_NAME))
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    num_gpu = get_num_gpu()
    global PREDICTOR_THREAD
    if num_gpu > 0:
        if num_gpu > 1:
            # use half gpus for inference
            predict_tower = list(range(num_gpu))[-num_gpu // 2:]
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))
    else:
        logger.warn("Without GPU this model will never learn! CPU is only useful for debug.")
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
    # config = TrainConfig(
    #     model=Model(),
    #     dataflow=dataflow,
    #     callbacks=[
    #         ModelSaver(),
    #         ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
    #         ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
    #         HumanHyperParamSetter('learning_rate'),
    #         HumanHyperParamSetter('entropy_beta'),
    #         master,
    #         StartProcOrThread(master),
    #         PeriodicTrigger(Evaluator(
    #             EVAL_EPISODE, ['state'], ['policy'], get_player),
    #             every_k_epochs=3),
    #         PeriodicTrigger(LogVisualizeEpisode(
    #             ['state'], ['policy'], get_player),
    #             every_k_epochs=1),
    #     ],
    #     session_creator=sesscreate.NewSessionCreator(
    #         config=get_default_sess_config(0.5)),
    #     steps_per_epoch=STEPS_PER_EPOCH,
    #     session_init=get_model_loader(args.load) if args.load else None,
    #     max_epoch=1000,
    # )
    # config = get_config()
    expreplay = ExpReplay(
        predictor_io_names=(['state'], ['policy']),
        player=get_player(train=True),
        state_shape=IMAGE_SHAPE3,
        batch_size=BATCH_SIZE,
        memory_size=MEMORY_SIZE,
        init_memory_size=INIT_MEMORY_SIZE,
        init_exploration=1.0,
        update_frequency=UPDATE_FREQ,
        history_len=FRAME_HISTORY
    )
    config = TrainConfig(
        model=Model(),
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            HumanHyperParamSetter('learning_rate'),
            HumanHyperParamSetter('entropy_beta'),
            master,
            StartProcOrThread(master),
            PeriodicTrigger(Evaluator(
                EVAL_EPISODE, ['state'], ['policy'], get_player),
                every_k_epochs=3),
            expreplay,
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
                [(0, 1), (10, 0.9), (50, 0.1), (320, 0.01)],   # 1->0.1 in the first million steps
                interp='linear'),
            PeriodicTrigger(LogVisualizeEpisode(
                ['state'], ['policy'], get_player),
                every_k_epochs=1),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        session_init=get_model_loader(args.load) if args.load else None,
        max_epoch=1000,
    )
    trainer = SimpleTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower)
    launch_train_with_config(config, trainer)
def train():
    assert tf.test.is_gpu_available(), "Training requires GPUs!"
    dirname = os.path.join(
        '/mnt/research/judy/reward_shaping/sanity_reward_shaping/',
        'train-atari-{}'.format(ENV_NAME))
    logger.set_logger_dir(dirname)

    #####################
    # assign GPUs for training & inference
    #num_gpu = get_num_gpu() - 1
    num_gpu = 1
    #####################
    global PREDICTOR_THREAD
    if num_gpu > 0:
        if num_gpu > 1:
            # use half gpus for inference
            predict_tower = list(range(num_gpu))[-num_gpu // 2:]
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str,
                                                          predict_tower))))
    else:
        logger.warn(
            "Without GPU this model will never learn! CPU is only useful for debug."
        )
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    #####################
    # setup actor process
    #####################
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [
        MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)
    ]

    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)
    master = MySimulatorMaster(namec2s,
                               names2c,
                               predict_tower,
                               reward_shaping=True)
    model = Model()
    config = TrainConfig(
        model=model,
        dataflow=master.get_training_dataflow(),
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(20, 0.0003),
                                                        (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            master,
            PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'], ['policy'],
                                      get_player),
                            every_k_steps=2000),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        session_init=SmartInit(
            "/mnt/research/judy/reward_shaping/sanity/model_checkpoint/checkpoint"
        ),
        max_epoch=1000,
    )
    trainer = SimpleTrainer(
    )  #if num_gpu == 1 else AsyncMultiGPUTrainer(train_tower)
    launch_train_with_config(config, trainer)
Example #6
0
def train():
    dirname = os.path.join('train_log', 'A3C-LSTM')
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    nr_gpu = get_nr_gpu()
    global PREDICTOR_THREAD
    if nr_gpu > 0:
        if nr_gpu > 1:
            # use all gpus for inference
            predict_tower = list(range(nr_gpu))
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str,
                                                          predict_tower))))
    else:
        logger.warn(
            "Without GPU this model will never learn! CPU is only useful for debug."
        )
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    if os.name == 'nt':
        namec2s = 'tcp://127.0.0.1:8000'
        names2c = 'tcp://127.0.0.1:9000'
    else:
        prefix = '@' if sys.platform.startswith('linux') else ''
        namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
        names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)

    procs = [
        MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)
    ]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
    config = AutoResumeTrainConfig(
        always_resume=True,
        # starting_epoch=0,
        model=Model(),
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            MaxSaver('true_reward_2'),
            HumanHyperParamSetter('learning_rate'),
            # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            # ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            master,
            StartProcOrThread(master),
            Evaluator(100, [
                'role_id', 'policy_state_in', 'last_cards_in', 'lstm_state_in'
            ], ['active_prob', 'passive_prob', 'new_lstm_state'], get_player),
            # SendStat(
            #     'export http_proxy=socks5://127.0.0.1:1080 https_proxy=socks5://127.0.0.1:1080 && /home/neil/anaconda3/bin/curl --header "Access-Token: o.CUdAMXqiVz9qXTxLYIXc0XkcAfZMpNGM" -d type=note -d title="doudizhu" '
            #     '-d body="lord win rate: {lord_win_rate}\n policy loss: {policy_loss_2}\n value loss: {value_loss_2}\n entropy loss: {entropy_loss_2}\n'
            #     'true reward: {true_reward_2}\n predict reward: {predict_reward_2}\n advantage: {rms_advantage_2}\n" '
            #     '--request POST https://api.pushbullet.com/v2/pushes',
            #     ['lord_win_rate', 'policy_loss_2', 'value_loss_2', 'entropy_loss_2',
            #      'true_reward_2', 'predict_reward_2', 'rms_advantage_2']
            #     ),
        ],
        # session_init=SaverRestore('./train_log/a3c_action_1d/max-true_reward_2'),
        # session_init=ModelLoader('policy_network_2', 'SL_policy_network', 'value_network', 'SL_value_network'),
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
    trainer = SimpleTrainer(
    ) if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower)
    launch_train_with_config(config, trainer)
Example #7
0
def get_config():
    M = Model()

    name_base = str(uuid.uuid1())[:6]
    PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR', '/tmp/.ipcpipe').rstrip('/')
    if not os.path.exists(PIPE_DIR): os.makedirs(PIPE_DIR)
    else: os.system('rm -f {}/sim-*'.format(PIPE_DIR))
    namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base)
    names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base)
    # AgentTorcs * SIMULATOR_PROC, AgentReplay * SIMULATOR_PROC
    procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC*2)]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, M)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)

    class CBSyncWeight(Callback):



        def _after_run(self,ctx,_):
            if self.local_step > 1 and self.local_step % SIMULATOR_PROC ==0:
                # print("before step ",self.local_step)
                return [M._td_sync_op]

        def _before_run(self, ctx):

            if self.local_step % 10 == 0:
                return [M._sync_op,M._td_sync_op]
            if self.local_step % SIMULATOR_PROC ==0 and 0:
                return [M._td_sync_op]

    import functools
    return TrainConfig(
        model=M,
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            HyperParamSetterWithFunc(
                'learning_rate/actor',
                functools.partial(M._calc_learning_rate, 'actor')),
            HyperParamSetterWithFunc(
                'learning_rate/critic',
                functools.partial(M._calc_learning_rate, 'critic')),

            # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            # HumanHyperParamSetter('learning_rate'),
            # HumanHyperParamSetter('entropy_beta'),
            # ScheduledHyperParamSetter('actor/sigma_beta_accel', [(1, 0.2), (2, 0.01), (3, 1e-3), (4, 1e-4)]),
            # ScheduledHyperParamSetter('actor/sigma_beta_steering', [(1, 0.1), (2, 0.01), (3, 1e-3), (4, 1e-4)]),
            master,
            StartProcOrThread(master),
            CBSyncWeight(),
            # CBTDSyncWeight()
            # PeriodicTrigger(Evaluator(
            #     EVAL_EPISODE, ['state'], ['policy'], get_player),
            #     every_k_epochs=3),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
Example #8
0
def get_config():
    M = Model()

    name_base = str(uuid.uuid1())[:6]
    PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR',
                              '/tmp/.ipcpipe').rstrip('/')
    if not os.path.exists(PIPE_DIR): os.makedirs(PIPE_DIR)
    else: os.system('rm -f {}/sim-*'.format(PIPE_DIR))
    namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base)
    names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base)
    # AgentTorcs * SIMULATOR_PROC, AgentReplay * SIMULATOR_PROC
    procs = [
        MySimulatorWorker(k, namec2s, names2c)
        for k in range(SIMULATOR_PROC * 2)
    ]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, M)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)

    class CBSyncWeight(Callback):
        def _after_run(self, ctx, _):
            if self.local_step > 1 and self.local_step % SIMULATOR_PROC == 0:
                # print("before step ",self.local_step)
                return [M._td_sync_op]

        def _before_run(self, ctx):

            if self.local_step % 10 == 0:
                return [M._sync_op, M._td_sync_op]
            if self.local_step % SIMULATOR_PROC == 0 and 0:
                return [M._td_sync_op]

    import functools
    return TrainConfig(
        model=M,
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            HyperParamSetterWithFunc(
                'learning_rate/actor',
                functools.partial(M._calc_learning_rate, 'actor')),
            HyperParamSetterWithFunc(
                'learning_rate/critic',
                functools.partial(M._calc_learning_rate, 'critic')),

            # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            # HumanHyperParamSetter('learning_rate'),
            # HumanHyperParamSetter('entropy_beta'),
            # ScheduledHyperParamSetter('actor/sigma_beta_accel', [(1, 0.2), (2, 0.01), (3, 1e-3), (4, 1e-4)]),
            # ScheduledHyperParamSetter('actor/sigma_beta_steering', [(1, 0.1), (2, 0.01), (3, 1e-3), (4, 1e-4)]),
            master,
            StartProcOrThread(master),
            CBSyncWeight(),
            # CBTDSyncWeight()
            # PeriodicTrigger(Evaluator(
            #     EVAL_EPISODE, ['state'], ['policy'], get_player),
            #     every_k_epochs=3),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
Example #9
0
def train(args):
    assert tf.test.is_gpu_available(), "Training requires GPUs!"
    if args.logit_render_model_checkpoint == "pretrained":
        args.logit_render_model_checkpoint = settings.pretraind_model_path[
            args.env]
        render = "pretrained"
    else:
        args.logit_render_model_checkpoint = os.path.join(
            settings.supervised_model_checkpoint[args.env], 'checkpoint')
        render = "surpervised"
    dirname = os.path.join(
        settings.path_prefix,
        "reward_shaping_model/env-{}-shaping-{}-logit-render-{}")
    dirname = dirname.format(args.env, args.shaping, render)
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    num_gpu = args.num_gpu
    global PREDICTOR_THREAD
    if num_gpu > 1:
        # use half gpus for inference
        predict_tower = list(range(num_gpu))[-num_gpu // 2:]
    else:
        predict_tower = [0]
    PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
    train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0]
    logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
        ','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [
        MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)
    ]

    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower, args)
    config = TrainConfig(
        model=Model(),
        dataflow=master.get_training_dataflow(),
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(20, 0.0003),
                                                        (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            master,
            PeriodicTrigger(
                Evaluator(EVAL_EPISODE, ['state'], ['policy'], get_player),
                #EVAL_EPISODE, ['state'], ['reward_logits'], get_player),
                every_k_steps=2000),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        session_init=SmartInit(args.logit_render_model_checkpoint),
        max_epoch=1000,
    )
    trainer = SimpleTrainer(
    )  #if num_gpu == 1 else AsyncMultiGPUTrainer(train_tower)
    launch_train_with_config(config, trainer)
Example #10
0
        predict_tower = [0]
    PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
    train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0]
    logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
        ','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [
        MySimulatorWorker(k, namec2s, names2c, goals)
        for k in range(SIMULATOR_PROC)
    ]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower)
    a3c_input = QueueInput(BatchData(DataFromQueue(master.queue), BATCH_SIZE))
    a3c_model = A3CModel()
    AGTrainer(gan_input=gan_input,
              gan_model=gan_model,
              a3c_input=a3c_input,
              a3c_model=a3c_model).train_with_defaults(
                  callbacks=[ModelSaver(), master,
                             StartProcOrThread(master)],
                  steps_per_epoch=300,
                  max_epoch=200,
                  session_init=SaverRestore(args.load) if args.load else None)
Example #11
0
def train():
    dirname = os.path.join('train_log', 'A3C-LSTM')
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    nr_gpu = get_nr_gpu()
    global PREDICTOR_THREAD
    if nr_gpu > 0:
        if nr_gpu > 1:
            # use all gpus for inference
            predict_tower = list(range(nr_gpu))
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str,
                                                          predict_tower))))
    else:
        logger.warn(
            "Without GPU this model will never learn! CPU is only useful for debug."
        )
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    if os.name == 'nt':
        namec2s = 'tcp://127.0.0.1:8000'
        names2c = 'tcp://127.0.0.1:9000'
    else:
        prefix = '@' if sys.platform.startswith('linux') else ''
        namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
        names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)

    procs = [
        MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)
    ]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
    config = AutoResumeTrainConfig(
        always_resume=True,
        # starting_epoch=0,
        model=Model(),
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            MaxSaver('true_reward_2'),
            HumanHyperParamSetter('learning_rate'),
            # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            # ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            master,
            StartProcOrThread(master),
            Evaluator(100, [
                'role_id', 'policy_state_in', 'last_cards_in', 'lstm_state_in'
            ], ['active_prob', 'passive_prob', 'new_lstm_state'], get_player),
        ],
        # session_init=SaverRestore('./train_log/a3c_action_1d/max-true_reward_2'),
        # session_init=ModelLoader('policy_network_2', 'SL_policy_network', 'value_network', 'SL_value_network'),
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )

    trainer = SimpleTrainer() if nr_gpu <= 1 else AsyncMultiGPUTrainer(
        train_tower)
    launch_train_with_config(config, trainer)