def train(): dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) # assign GPUs for training & inference nr_gpu = get_nr_gpu() global PREDICTOR_THREAD if nr_gpu > 0: if nr_gpu > 1: # use half gpus for inference predict_tower = list(range(nr_gpu))[-nr_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn("Without GPU this model will never learn! CPU is only useful for debug.") PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) config = TrainConfig( model=Model(), dataflow=dataflow, callbacks=[ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('entropy_beta'), master, StartProcOrThread(master), PeriodicTrigger(Evaluator( EVAL_EPISODE, ['state'], ['policy'], get_player), every_k_epochs=3), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, session_init=get_model_loader(args.load) if args.load else None, max_epoch=1000, ) trainer = SimpleTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)
def _before_train(self): self.async_predictor.start() logger.info("Starting MySimulatorMaster ...") start_proc_mask_signal(self)
def train(): dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) # assign GPUs for training & inference num_gpu = get_num_gpu() global PREDICTOR_THREAD if num_gpu > 0: if num_gpu > 1: # use half gpus for inference predict_tower = list(range(num_gpu))[-num_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn( "Without GPU this model will never learn! CPU is only useful for debug." ) PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) config = TrainConfig( model=Model(), dataflow=dataflow, callbacks=[ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('entropy_beta'), master, StartProcOrThread(master), PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'], ['policy'], get_player), every_k_epochs=3), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, session_init=get_model_loader(args.load) if args.load else None, max_epoch=1000, ) trainer = SimpleTrainer( ) if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)
def train(): dirname = os.path.join('train_log', 'A3C-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) # assign GPUs for training & inference num_gpu = get_num_gpu() global PREDICTOR_THREAD if num_gpu > 0: if num_gpu > 1: # use half gpus for inference predict_tower = list(range(num_gpu))[-num_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn("Without GPU this model will never learn! CPU is only useful for debug.") PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) # config = TrainConfig( # model=Model(), # dataflow=dataflow, # callbacks=[ # ModelSaver(), # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), # ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), # HumanHyperParamSetter('learning_rate'), # HumanHyperParamSetter('entropy_beta'), # master, # StartProcOrThread(master), # PeriodicTrigger(Evaluator( # EVAL_EPISODE, ['state'], ['policy'], get_player), # every_k_epochs=3), # PeriodicTrigger(LogVisualizeEpisode( # ['state'], ['policy'], get_player), # every_k_epochs=1), # ], # session_creator=sesscreate.NewSessionCreator( # config=get_default_sess_config(0.5)), # steps_per_epoch=STEPS_PER_EPOCH, # session_init=get_model_loader(args.load) if args.load else None, # max_epoch=1000, # ) # config = get_config() expreplay = ExpReplay( predictor_io_names=(['state'], ['policy']), player=get_player(train=True), state_shape=IMAGE_SHAPE3, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1.0, update_frequency=UPDATE_FREQ, history_len=FRAME_HISTORY ) config = TrainConfig( model=Model(), dataflow=dataflow, callbacks=[ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('entropy_beta'), master, StartProcOrThread(master), PeriodicTrigger(Evaluator( EVAL_EPISODE, ['state'], ['policy'], get_player), every_k_epochs=3), expreplay, ScheduledHyperParamSetter( ObjAttrParam(expreplay, 'exploration'), [(0, 1), (10, 0.9), (50, 0.1), (320, 0.01)], # 1->0.1 in the first million steps interp='linear'), PeriodicTrigger(LogVisualizeEpisode( ['state'], ['policy'], get_player), every_k_epochs=1), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, session_init=get_model_loader(args.load) if args.load else None, max_epoch=1000, ) trainer = SimpleTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)
def train(): assert tf.test.is_gpu_available(), "Training requires GPUs!" dirname = os.path.join( '/mnt/research/judy/reward_shaping/sanity_reward_shaping/', 'train-atari-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) ##################### # assign GPUs for training & inference #num_gpu = get_num_gpu() - 1 num_gpu = 1 ##################### global PREDICTOR_THREAD if num_gpu > 0: if num_gpu > 1: # use half gpus for inference predict_tower = list(range(num_gpu))[-num_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn( "Without GPU this model will never learn! CPU is only useful for debug." ) PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] ##################### # setup actor process ##################### name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower, reward_shaping=True) model = Model() config = TrainConfig( model=model, dataflow=master.get_training_dataflow(), callbacks=[ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), master, PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'], ['policy'], get_player), every_k_steps=2000), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, session_init=SmartInit( "/mnt/research/judy/reward_shaping/sanity/model_checkpoint/checkpoint" ), max_epoch=1000, ) trainer = SimpleTrainer( ) #if num_gpu == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)
def train(): dirname = os.path.join('train_log', 'A3C-LSTM') logger.set_logger_dir(dirname) # assign GPUs for training & inference nr_gpu = get_nr_gpu() global PREDICTOR_THREAD if nr_gpu > 0: if nr_gpu > 1: # use all gpus for inference predict_tower = list(range(nr_gpu)) else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn( "Without GPU this model will never learn! CPU is only useful for debug." ) PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] if os.name == 'nt': namec2s = 'tcp://127.0.0.1:8000' names2c = 'tcp://127.0.0.1:9000' else: prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) config = AutoResumeTrainConfig( always_resume=True, # starting_epoch=0, model=Model(), dataflow=dataflow, callbacks=[ ModelSaver(), MaxSaver('true_reward_2'), HumanHyperParamSetter('learning_rate'), # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), # ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), master, StartProcOrThread(master), Evaluator(100, [ 'role_id', 'policy_state_in', 'last_cards_in', 'lstm_state_in' ], ['active_prob', 'passive_prob', 'new_lstm_state'], get_player), # SendStat( # 'export http_proxy=socks5://127.0.0.1:1080 https_proxy=socks5://127.0.0.1:1080 && /home/neil/anaconda3/bin/curl --header "Access-Token: o.CUdAMXqiVz9qXTxLYIXc0XkcAfZMpNGM" -d type=note -d title="doudizhu" ' # '-d body="lord win rate: {lord_win_rate}\n policy loss: {policy_loss_2}\n value loss: {value_loss_2}\n entropy loss: {entropy_loss_2}\n' # 'true reward: {true_reward_2}\n predict reward: {predict_reward_2}\n advantage: {rms_advantage_2}\n" ' # '--request POST https://api.pushbullet.com/v2/pushes', # ['lord_win_rate', 'policy_loss_2', 'value_loss_2', 'entropy_loss_2', # 'true_reward_2', 'predict_reward_2', 'rms_advantage_2'] # ), ], # session_init=SaverRestore('./train_log/a3c_action_1d/max-true_reward_2'), # session_init=ModelLoader('policy_network_2', 'SL_policy_network', 'value_network', 'SL_value_network'), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, ) trainer = SimpleTrainer( ) if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)
def get_config(): M = Model() name_base = str(uuid.uuid1())[:6] PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR', '/tmp/.ipcpipe').rstrip('/') if not os.path.exists(PIPE_DIR): os.makedirs(PIPE_DIR) else: os.system('rm -f {}/sim-*'.format(PIPE_DIR)) namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base) names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base) # AgentTorcs * SIMULATOR_PROC, AgentReplay * SIMULATOR_PROC procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC*2)] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, M) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) class CBSyncWeight(Callback): def _after_run(self,ctx,_): if self.local_step > 1 and self.local_step % SIMULATOR_PROC ==0: # print("before step ",self.local_step) return [M._td_sync_op] def _before_run(self, ctx): if self.local_step % 10 == 0: return [M._sync_op,M._td_sync_op] if self.local_step % SIMULATOR_PROC ==0 and 0: return [M._td_sync_op] import functools return TrainConfig( model=M, dataflow=dataflow, callbacks=[ ModelSaver(), HyperParamSetterWithFunc( 'learning_rate/actor', functools.partial(M._calc_learning_rate, 'actor')), HyperParamSetterWithFunc( 'learning_rate/critic', functools.partial(M._calc_learning_rate, 'critic')), # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), # HumanHyperParamSetter('learning_rate'), # HumanHyperParamSetter('entropy_beta'), # ScheduledHyperParamSetter('actor/sigma_beta_accel', [(1, 0.2), (2, 0.01), (3, 1e-3), (4, 1e-4)]), # ScheduledHyperParamSetter('actor/sigma_beta_steering', [(1, 0.1), (2, 0.01), (3, 1e-3), (4, 1e-4)]), master, StartProcOrThread(master), CBSyncWeight(), # CBTDSyncWeight() # PeriodicTrigger(Evaluator( # EVAL_EPISODE, ['state'], ['policy'], get_player), # every_k_epochs=3), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, )
def get_config(): M = Model() name_base = str(uuid.uuid1())[:6] PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR', '/tmp/.ipcpipe').rstrip('/') if not os.path.exists(PIPE_DIR): os.makedirs(PIPE_DIR) else: os.system('rm -f {}/sim-*'.format(PIPE_DIR)) namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base) names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base) # AgentTorcs * SIMULATOR_PROC, AgentReplay * SIMULATOR_PROC procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC * 2) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, M) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) class CBSyncWeight(Callback): def _after_run(self, ctx, _): if self.local_step > 1 and self.local_step % SIMULATOR_PROC == 0: # print("before step ",self.local_step) return [M._td_sync_op] def _before_run(self, ctx): if self.local_step % 10 == 0: return [M._sync_op, M._td_sync_op] if self.local_step % SIMULATOR_PROC == 0 and 0: return [M._td_sync_op] import functools return TrainConfig( model=M, dataflow=dataflow, callbacks=[ ModelSaver(), HyperParamSetterWithFunc( 'learning_rate/actor', functools.partial(M._calc_learning_rate, 'actor')), HyperParamSetterWithFunc( 'learning_rate/critic', functools.partial(M._calc_learning_rate, 'critic')), # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), # HumanHyperParamSetter('learning_rate'), # HumanHyperParamSetter('entropy_beta'), # ScheduledHyperParamSetter('actor/sigma_beta_accel', [(1, 0.2), (2, 0.01), (3, 1e-3), (4, 1e-4)]), # ScheduledHyperParamSetter('actor/sigma_beta_steering', [(1, 0.1), (2, 0.01), (3, 1e-3), (4, 1e-4)]), master, StartProcOrThread(master), CBSyncWeight(), # CBTDSyncWeight() # PeriodicTrigger(Evaluator( # EVAL_EPISODE, ['state'], ['policy'], get_player), # every_k_epochs=3), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, )
def train(args): assert tf.test.is_gpu_available(), "Training requires GPUs!" if args.logit_render_model_checkpoint == "pretrained": args.logit_render_model_checkpoint = settings.pretraind_model_path[ args.env] render = "pretrained" else: args.logit_render_model_checkpoint = os.path.join( settings.supervised_model_checkpoint[args.env], 'checkpoint') render = "surpervised" dirname = os.path.join( settings.path_prefix, "reward_shaping_model/env-{}-shaping-{}-logit-render-{}") dirname = dirname.format(args.env, args.shaping, render) logger.set_logger_dir(dirname) # assign GPUs for training & inference num_gpu = args.num_gpu global PREDICTOR_THREAD if num_gpu > 1: # use half gpus for inference predict_tower = list(range(num_gpu))[-num_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower, args) config = TrainConfig( model=Model(), dataflow=master.get_training_dataflow(), callbacks=[ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), master, PeriodicTrigger( Evaluator(EVAL_EPISODE, ['state'], ['policy'], get_player), #EVAL_EPISODE, ['state'], ['reward_logits'], get_player), every_k_steps=2000), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, session_init=SmartInit(args.logit_render_model_checkpoint), max_epoch=1000, ) trainer = SimpleTrainer( ) #if num_gpu == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)
predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c, goals) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) a3c_input = QueueInput(BatchData(DataFromQueue(master.queue), BATCH_SIZE)) a3c_model = A3CModel() AGTrainer(gan_input=gan_input, gan_model=gan_model, a3c_input=a3c_input, a3c_model=a3c_model).train_with_defaults( callbacks=[ModelSaver(), master, StartProcOrThread(master)], steps_per_epoch=300, max_epoch=200, session_init=SaverRestore(args.load) if args.load else None)
def train(): dirname = os.path.join('train_log', 'A3C-LSTM') logger.set_logger_dir(dirname) # assign GPUs for training & inference nr_gpu = get_nr_gpu() global PREDICTOR_THREAD if nr_gpu > 0: if nr_gpu > 1: # use all gpus for inference predict_tower = list(range(nr_gpu)) else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn( "Without GPU this model will never learn! CPU is only useful for debug." ) PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] if os.name == 'nt': namec2s = 'tcp://127.0.0.1:8000' names2c = 'tcp://127.0.0.1:9000' else: prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) config = AutoResumeTrainConfig( always_resume=True, # starting_epoch=0, model=Model(), dataflow=dataflow, callbacks=[ ModelSaver(), MaxSaver('true_reward_2'), HumanHyperParamSetter('learning_rate'), # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), # ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), master, StartProcOrThread(master), Evaluator(100, [ 'role_id', 'policy_state_in', 'last_cards_in', 'lstm_state_in' ], ['active_prob', 'passive_prob', 'new_lstm_state'], get_player), ], # session_init=SaverRestore('./train_log/a3c_action_1d/max-true_reward_2'), # session_init=ModelLoader('policy_network_2', 'SL_policy_network', 'value_network', 'SL_value_network'), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, ) trainer = SimpleTrainer() if nr_gpu <= 1 else AsyncMultiGPUTrainer( train_tower) launch_train_with_config(config, trainer)