def train(args): data_folder = args.get("data_folder") save_folder = args.get("save_folder") image_size = args.get("image_size") max_epoch = args.get("max_epoch") save_epoch = args.get("save_epoch") or max_epoch // 10 # Scale lr and steps_per_epoch accordingly. # Make sure the total number of gradient evaluations is consistent. n_gpu = args.get("n_gpu") or 1 batch_size = args.get("batch_size") or BATCH equi_batch_size = max(n_gpu, 1) * batch_size lr = args.get("lr") or LR lr *= equi_batch_size steps_per_epoch = args.get("steps_per_epoch") or 1000 steps_per_epoch /= equi_batch_size image_steps = args.get("image_steps") or steps_per_epoch // 10 scalar_steps = args.get("scalar_steps") if scalar_steps > 0: scalar_steps = max(scalar_steps // equi_batch_size, 1) else: scalar_steps = 0 # merge scalar summary every epoch # lr starts decreasing at half of max epoch start_dec_epoch = max_epoch // 2 # stops when lr is 0.01 of its initial value end_epoch = max_epoch - int((max_epoch - start_dec_epoch) * 0.01) # adjust noise input range according to the input act zmin, zmax = (0, 1) if args.get("act_input") == "identity" else (-1, 1) if save_folder == None: logger.auto_set_dir() else: logger.set_logger_dir(save_folder) df = get_data(data_folder, image_size, zmin=zmin, zmax=zmax, batch=batch_size) df = PrintData(df) data = QueueInput(df) SynTexTrainer(data, Style2PO(args), n_gpu).train_with_defaults( callbacks=[ PeriodicTrigger(ModelSaver(), every_k_epochs=save_epoch), PeriodicTrigger(ModelSaver(), every_k_epochs=end_epoch), # save model at last ScheduledHyperParamSetter('learning_rate', [(start_dec_epoch, lr), (max_epoch, 0)], interp="linear"), PeriodicTrigger(VisualizeTestSet(data_folder, image_size), every_k_epochs=max(1, max_epoch // 100)), #MergeAllSummaries(period=scalar_steps), # scalar only, slowdown in training, use TCMalloc MergeAllSummaries(period=image_steps, key="image_summaries"), MergeAllSummaries(key="acti_summaries"), ], max_epoch=end_epoch, steps_per_epoch=steps_per_epoch, session_init=None)
def get_config( files_list, input_names=["state_1", "state_2"], output_names=["Qvalue_1", "Qvalue_2"], agents=2, ): """This is only used during training.""" expreplay = ExpReplay( predictor_io_names=(input_names, output_names), player=get_player(task="train", files_list=files_list, agents=agents), state_shape=IMAGE_SIZE, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1.0, update_frequency=UPDATE_FREQ, history_len=FRAME_HISTORY, agents=agents, ) return TrainConfig( # dataflow=expreplay, data=QueueInput(expreplay), model=Model(agents=agents), callbacks=[ ModelSaver(), PeriodicTrigger( RunOp(DQNModel.update_target_param, verbose=True), # update target network every 10k steps every_k_steps=10000 // UPDATE_FREQ, ), expreplay, ScheduledHyperParamSetter("learning_rate", [(60, 4e-4), (100, 2e-4)]), ScheduledHyperParamSetter( ObjAttrParam(expreplay, "exploration"), # 1->0.1 in the first million steps [(0, 1), (10, 0.1), (320, 0.01)], interp="linear", ), PeriodicTrigger( Evaluator( nr_eval=EVAL_EPISODE, input_names=input_names, output_names=output_names, files_list=files_list, get_player_fn=get_player, agents=agents, ), every_k_epochs=EPOCHS_PER_EVAL, ), HumanHyperParamSetter("learning_rate"), ], steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, )
def get_config(): """This is only used during training.""" expreplay = ExpReplay(predictor_io_names=(['state'], ['Qvalue']), player=get_player(directory=data_dir, task='train', files_list=train_data_fpaths), state_shape=OBSERVATION_DIMS, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1.0, update_frequency=UPDATE_FREQ, frame_history_len=FRAME_HISTORY) return TrainConfig( # dataflow=expreplay, data=QueueInput(expreplay), model=Model(), callbacks=[ # TODO: periodically save videos ModelSaver(checkpoint_dir="model_checkpoints", keep_checkpoint_every_n_hours=0.25, max_to_keep=1000), # TODO: og was just ModelSaver() PeriodicTrigger( RunOp(DQNModel.update_target_param, verbose=True), # update target network every 10k/freq steps every_k_steps=10000 // UPDATE_FREQ), # expreplay, ScheduledHyperParamSetter('learning_rate', [(60, 4e-4), (100, 2e-4)]), ScheduledHyperParamSetter( ObjAttrParam(expreplay, 'exploration'), # 1->0.1 in the first 10M steps [(0, 1), (100, 0.1), (120, 0.01)], interp='linear'), PeriodicTrigger( # runs exprelay._trigger() expreplay, every_k_steps=5000), PeriodicTrigger( # eval_model_multithread(pred, EVAL_EPISODE, get_player) Evaluator(nr_eval=EVAL_EPISODE, input_names=['state'], output_names=['Qvalue'], directory=data_dir, files_list=test_data_fpaths, get_player_fn=get_player), every_k_steps=10000 // UPDATE_FREQ), HumanHyperParamSetter('learning_rate'), ], steps_per_epoch=STEPS_PER_EPOCH, max_epoch=NUM_EPOCHS, )
def get_config(): """This is only used during training.""" expreplay = ExpReplay(predictor_io_names=(['state'], ['Qvalue']), player=get_player(directory=data_dir, task='train', files_list=train_list), state_shape=IMAGE_SIZE, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1.0, update_frequency=UPDATE_FREQ, history_len=FRAME_HISTORY) return TrainConfig( # dataflow=expreplay, data=QueueInput(expreplay), model=Model(), callbacks=[ ModelSaver(), PeriodicTrigger( RunOp(DQNModel.update_target_param, verbose=True), # update target network every 10k steps every_k_steps=10000 // UPDATE_FREQ), expreplay, ScheduledHyperParamSetter('learning_rate', [(60, 4e-4), (100, 2e-4)]), ScheduledHyperParamSetter( ObjAttrParam(expreplay, 'exploration'), # 1->0.1 in the first million steps [(0, 1), (10, 0.1), (320, 0.01)], interp='linear'), PeriodicTrigger(Evaluator(nr_eval=EVAL_EPISODE, input_names=['state'], output_names=['Qvalue'], directory=data_dir, files_list=test_list, get_player_fn=get_player), every_k_epochs=EPOCHS_PER_EVAL), HumanHyperParamSetter('learning_rate'), ], steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, )
def get_config(files_list, data_type, trainable_variables): """This is only used during training.""" expreplay = ExpReplay( predictor_io_names=(['state'], ['Qvalue']), player=get_player(task='train', files_list=files_list, data_type=data_type), state_shape=IMAGE_SIZE, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=0.8, #0.0 ############################################################################### # HITL UPDATE update_frequency=INIT_UPDATE_FREQ, ############################################################################### history_len=FRAME_HISTORY, arg_type=data_type) return TrainConfig( # dataflow=expreplay, data=QueueInput(expreplay), model=Model(IMAGE_SIZE, FRAME_HISTORY, METHOD, NUM_ACTIONS, GAMMA, trainable_variables), callbacks=[ ModelSaver(), PeriodicTrigger( RunOp(DQNModel.update_target_param, verbose=True), # update target network every 10k steps every_k_steps=10000 // UPDATE_FREQ), expreplay, ScheduledHyperParamSetter('learning_rate', [(60, 4e-4), (100, 2e-4)]), ScheduledHyperParamSetter( ObjAttrParam(expreplay, 'exploration'), # 1->0.1 in the first million steps [(0, 0.8), (1000000, 0.1), (32000000, 0.01)], interp='linear', step_based=True), ############################################################################### # HITL UPDATE # Here the number of steps taken in the environment is increased from 0, during # the pretraining phase, to 4 to allow the agent to take 4 steps in the env # between each TD update. ScheduledHyperParamSetter(ObjAttrParam(expreplay, 'update_frequency'), [(0, INIT_UPDATE_FREQ), (NUM_PRETRAIN, UPDATE_FREQ)], interp=None, step_based=True), ############################################################################### PeriodicTrigger(Evaluator(nr_eval=EVAL_EPISODE, input_names=['state'], output_names=['Qvalue'], files_list=files_list, data_type=data_type, get_player_fn=get_player), every_k_steps=STEPS_PER_EVAL), HumanHyperParamSetter('learning_rate'), ], steps_per_epoch=STEPS_PER_EPOCH, max_epoch=MAX_EPOCHS, )
start_dec_epoch = max_epoch // 2 # stops when lr is 0.01 of its initial value end_epoch = max_epoch - int((max_epoch - start_dec_epoch) * 0.01) # adjust noise input range according to the input act zmin, zmax = (0, 1) if args.get("act") == "identity" else (-1, 1) if save_folder == None: logger.auto_set_dir() else: logger.set_logger_dir(save_folder) df = get_data(data_folder, image_size, zmin=zmin, zmax=zmax) df = PrintData(df) data = QueueInput(df) SynTexTrainer(data, AdaptiveSynTex(args), n_gpu).train_with_defaults( callbacks=[ PeriodicTrigger(ModelSaver(), every_k_epochs=save_epoch), PeriodicTrigger(ModelSaver(), every_k_epochs=end_epoch), # save model at last ScheduledHyperParamSetter('learning_rate', [(start_dec_epoch, lr), (max_epoch, 0)], interp="linear"), #PeriodicTrigger(VisualizeTestSet(data_folder, image_size), every_k_epochs=10), MergeAllSummaries(period=scalar_steps), # scalar only MergeAllSummaries(period=image_steps, key="image_summaries"), ], max_epoch=end_epoch, steps_per_epoch=steps_per_epoch, session_init=None)
vlen, nviews = Ppy.shape[-1], Ppy.shape[0] os.environ['CUDA_VISIBLE_DEVICES'] = get_visible_device_list(3) global_step = get_global_step_var() # set logger directory for checkpoints, etc logger.set_logger_dir(args.logdir, action='k') steps_per_epoch = cfg.EPOCH_STEPS model = Model(vlen, nviews) # config.gpu_options.allow_growth = True traincfg = TrainConfig( model=model, data=QueueInput(ProjDataFlow(Ppy)), callbacks=[ PeriodicTrigger(ModelSaver(), every_k_epochs=5), PeriodicTrigger(VolumeSaver(model), every_k_epochs=5), # prevent learning in the first epoch # MemInitHyperParamSetter('learning_rate_mask',(0,1)), # controls learning rate as a function of epoch HyperParamSetterWithFunc('learning_rate', learning_rate_fun), # GraphProfiler() # PeakMemoryTracker() # GPUUtilizationTracker(), ], steps_per_epoch=steps_per_epoch, max_epoch=200000, # first time load model from checkpoint and reset GRU state session_init=ChainInit([TryResumeTraining()]), #,ResetInit(model)]) # session_config=tf.ConfigProto(log_device_placement=True) #config_gpus(1) )