def get_config( files_list, input_names=["state_1", "state_2"], output_names=["Qvalue_1", "Qvalue_2"], agents=2, ): """This is only used during training.""" expreplay = ExpReplay( predictor_io_names=(input_names, output_names), player=get_player(task="train", files_list=files_list, agents=agents), state_shape=IMAGE_SIZE, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1.0, update_frequency=UPDATE_FREQ, history_len=FRAME_HISTORY, agents=agents, ) return TrainConfig( # dataflow=expreplay, data=QueueInput(expreplay), model=Model(agents=agents), callbacks=[ ModelSaver(), PeriodicTrigger( RunOp(DQNModel.update_target_param, verbose=True), # update target network every 10k steps every_k_steps=10000 // UPDATE_FREQ, ), expreplay, ScheduledHyperParamSetter("learning_rate", [(60, 4e-4), (100, 2e-4)]), ScheduledHyperParamSetter( ObjAttrParam(expreplay, "exploration"), # 1->0.1 in the first million steps [(0, 1), (10, 0.1), (320, 0.01)], interp="linear", ), PeriodicTrigger( Evaluator( nr_eval=EVAL_EPISODE, input_names=input_names, output_names=output_names, files_list=files_list, get_player_fn=get_player, agents=agents, ), every_k_epochs=EPOCHS_PER_EVAL, ), HumanHyperParamSetter("learning_rate"), ], steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, )
def get_config(): """This is only used during training.""" expreplay = ExpReplay(predictor_io_names=(['state'], ['Qvalue']), player=get_player(directory=data_dir, task='train', files_list=train_data_fpaths), state_shape=OBSERVATION_DIMS, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1.0, update_frequency=UPDATE_FREQ, frame_history_len=FRAME_HISTORY) return TrainConfig( # dataflow=expreplay, data=QueueInput(expreplay), model=Model(), callbacks=[ # TODO: periodically save videos ModelSaver(checkpoint_dir="model_checkpoints", keep_checkpoint_every_n_hours=0.25, max_to_keep=1000), # TODO: og was just ModelSaver() PeriodicTrigger( RunOp(DQNModel.update_target_param, verbose=True), # update target network every 10k/freq steps every_k_steps=10000 // UPDATE_FREQ), # expreplay, ScheduledHyperParamSetter('learning_rate', [(60, 4e-4), (100, 2e-4)]), ScheduledHyperParamSetter( ObjAttrParam(expreplay, 'exploration'), # 1->0.1 in the first 10M steps [(0, 1), (100, 0.1), (120, 0.01)], interp='linear'), PeriodicTrigger( # runs exprelay._trigger() expreplay, every_k_steps=5000), PeriodicTrigger( # eval_model_multithread(pred, EVAL_EPISODE, get_player) Evaluator(nr_eval=EVAL_EPISODE, input_names=['state'], output_names=['Qvalue'], directory=data_dir, files_list=test_data_fpaths, get_player_fn=get_player), every_k_steps=10000 // UPDATE_FREQ), HumanHyperParamSetter('learning_rate'), ], steps_per_epoch=STEPS_PER_EPOCH, max_epoch=NUM_EPOCHS, )
def get_config(): """This is only used during training.""" expreplay = ExpReplay(predictor_io_names=(['state'], ['Qvalue']), player=get_player(directory=data_dir, task='train', files_list=train_list), state_shape=IMAGE_SIZE, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1.0, update_frequency=UPDATE_FREQ, history_len=FRAME_HISTORY) return TrainConfig( # dataflow=expreplay, data=QueueInput(expreplay), model=Model(), callbacks=[ ModelSaver(), PeriodicTrigger( RunOp(DQNModel.update_target_param, verbose=True), # update target network every 10k steps every_k_steps=10000 // UPDATE_FREQ), expreplay, ScheduledHyperParamSetter('learning_rate', [(60, 4e-4), (100, 2e-4)]), ScheduledHyperParamSetter( ObjAttrParam(expreplay, 'exploration'), # 1->0.1 in the first million steps [(0, 1), (10, 0.1), (320, 0.01)], interp='linear'), PeriodicTrigger(Evaluator(nr_eval=EVAL_EPISODE, input_names=['state'], output_names=['Qvalue'], directory=data_dir, files_list=test_list, get_player_fn=get_player), every_k_epochs=EPOCHS_PER_EVAL), HumanHyperParamSetter('learning_rate'), ], steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, )
def get_config(files_list, data_type, trainable_variables): """This is only used during training.""" expreplay = ExpReplay( predictor_io_names=(['state'], ['Qvalue']), player=get_player(task='train', files_list=files_list, data_type=data_type), state_shape=IMAGE_SIZE, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=0.8, #0.0 ############################################################################### # HITL UPDATE update_frequency=INIT_UPDATE_FREQ, ############################################################################### history_len=FRAME_HISTORY, arg_type=data_type) return TrainConfig( # dataflow=expreplay, data=QueueInput(expreplay), model=Model(IMAGE_SIZE, FRAME_HISTORY, METHOD, NUM_ACTIONS, GAMMA, trainable_variables), callbacks=[ ModelSaver(), PeriodicTrigger( RunOp(DQNModel.update_target_param, verbose=True), # update target network every 10k steps every_k_steps=10000 // UPDATE_FREQ), expreplay, ScheduledHyperParamSetter('learning_rate', [(60, 4e-4), (100, 2e-4)]), ScheduledHyperParamSetter( ObjAttrParam(expreplay, 'exploration'), # 1->0.1 in the first million steps [(0, 0.8), (1000000, 0.1), (32000000, 0.01)], interp='linear', step_based=True), ############################################################################### # HITL UPDATE # Here the number of steps taken in the environment is increased from 0, during # the pretraining phase, to 4 to allow the agent to take 4 steps in the env # between each TD update. ScheduledHyperParamSetter(ObjAttrParam(expreplay, 'update_frequency'), [(0, INIT_UPDATE_FREQ), (NUM_PRETRAIN, UPDATE_FREQ)], interp=None, step_based=True), ############################################################################### PeriodicTrigger(Evaluator(nr_eval=EVAL_EPISODE, input_names=['state'], output_names=['Qvalue'], files_list=files_list, data_type=data_type, get_player_fn=get_player), every_k_steps=STEPS_PER_EVAL), HumanHyperParamSetter('learning_rate'), ], steps_per_epoch=STEPS_PER_EPOCH, max_epoch=MAX_EPOCHS, )