def get_config( files_list, input_names=["state_1", "state_2"], output_names=["Qvalue_1", "Qvalue_2"], agents=2, ): """This is only used during training.""" expreplay = ExpReplay( predictor_io_names=(input_names, output_names), player=get_player(task="train", files_list=files_list, agents=agents), state_shape=IMAGE_SIZE, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1.0, update_frequency=UPDATE_FREQ, history_len=FRAME_HISTORY, agents=agents, ) return TrainConfig( # dataflow=expreplay, data=QueueInput(expreplay), model=Model(agents=agents), callbacks=[ ModelSaver(), PeriodicTrigger( RunOp(DQNModel.update_target_param, verbose=True), # update target network every 10k steps every_k_steps=10000 // UPDATE_FREQ, ), expreplay, ScheduledHyperParamSetter("learning_rate", [(60, 4e-4), (100, 2e-4)]), ScheduledHyperParamSetter( ObjAttrParam(expreplay, "exploration"), # 1->0.1 in the first million steps [(0, 1), (10, 0.1), (320, 0.01)], interp="linear", ), PeriodicTrigger( Evaluator( nr_eval=EVAL_EPISODE, input_names=input_names, output_names=output_names, files_list=files_list, get_player_fn=get_player, agents=agents, ), every_k_epochs=EPOCHS_PER_EVAL, ), HumanHyperParamSetter("learning_rate"), ], steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, )
def get_config(): """This is only used during training.""" expreplay = ExpReplay(predictor_io_names=(['state'], ['Qvalue']), player=get_player(directory=data_dir, task='train', files_list=train_data_fpaths), state_shape=OBSERVATION_DIMS, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1.0, update_frequency=UPDATE_FREQ, frame_history_len=FRAME_HISTORY) return TrainConfig( # dataflow=expreplay, data=QueueInput(expreplay), model=Model(), callbacks=[ # TODO: periodically save videos ModelSaver(checkpoint_dir="model_checkpoints", keep_checkpoint_every_n_hours=0.25, max_to_keep=1000), # TODO: og was just ModelSaver() PeriodicTrigger( RunOp(DQNModel.update_target_param, verbose=True), # update target network every 10k/freq steps every_k_steps=10000 // UPDATE_FREQ), # expreplay, ScheduledHyperParamSetter('learning_rate', [(60, 4e-4), (100, 2e-4)]), ScheduledHyperParamSetter( ObjAttrParam(expreplay, 'exploration'), # 1->0.1 in the first 10M steps [(0, 1), (100, 0.1), (120, 0.01)], interp='linear'), PeriodicTrigger( # runs exprelay._trigger() expreplay, every_k_steps=5000), PeriodicTrigger( # eval_model_multithread(pred, EVAL_EPISODE, get_player) Evaluator(nr_eval=EVAL_EPISODE, input_names=['state'], output_names=['Qvalue'], directory=data_dir, files_list=test_data_fpaths, get_player_fn=get_player), every_k_steps=10000 // UPDATE_FREQ), HumanHyperParamSetter('learning_rate'), ], steps_per_epoch=STEPS_PER_EPOCH, max_epoch=NUM_EPOCHS, )
def train_net(net, session_init, batch_size, num_epochs, train_dataflow, val_dataflow): num_towers = max(get_num_gpu(), 1) batch_per_tower = batch_size // num_towers logger.info("Running on {} towers. Batch size per tower: {}".format(num_towers, batch_per_tower)) num_training_samples = 1281167 step_size = num_training_samples // batch_size max_iter = (num_epochs - 1) * step_size callbacks = [ ModelSaver(), ScheduledHyperParamSetter( 'learning_rate', [(0, 0.5), (max_iter, 0)], interp='linear', step_based=True), EstimatedTimeLeft()] infs = [ClassificationError('wrong-top1', 'val-error-top1'), ClassificationError('wrong-top5', 'val-error-top5')] if num_towers == 1: # single-GPU inference with queue prefetch callbacks.append(InferenceRunner( input=QueueInput(val_dataflow), infs=infs)) else: # multi-GPU inference (with mandatory queue prefetch) callbacks.append(DataParallelInferenceRunner( input=val_dataflow, infs=infs, gpus=list(range(num_towers)))) config = TrainConfig( dataflow=train_dataflow, model=net, callbacks=callbacks, session_init=session_init, steps_per_epoch=step_size, max_epoch=num_epochs) launch_train_with_config( config=config, trainer=SyncMultiGPUTrainerParameterServer(num_towers))
def train(train_cfg, model_cfg, common_cfg, dataflow): epochs = train_cfg.epochs assert epochs, epochs epoch_size = train_cfg.epoch_size assert epoch_size, epoch_size config = TrainConfig( model=ModelWrapSingle(train_cfg, model_cfg, common_cfg), dataflow=dataflow, #data=my_inputsource, # alternatively, use a customized InputSource #callbacks=[...], # some default callbacks are automatically applied # some default monitors are automatically applied steps_per_epoch= epoch_size, # default to the size of your InputSource/DataFlow max_epoch=epochs) print("Create trainer") trainer = SimpleTrainer() print("Run train") launch_train_with_config(config, trainer)
def get_config(): """This is only used during training.""" expreplay = ExpReplay(predictor_io_names=(['state'], ['Qvalue']), player=get_player(directory=data_dir, task='train', files_list=train_list), state_shape=IMAGE_SIZE, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1.0, update_frequency=UPDATE_FREQ, history_len=FRAME_HISTORY) return TrainConfig( # dataflow=expreplay, data=QueueInput(expreplay), model=Model(), callbacks=[ ModelSaver(), PeriodicTrigger( RunOp(DQNModel.update_target_param, verbose=True), # update target network every 10k steps every_k_steps=10000 // UPDATE_FREQ), expreplay, ScheduledHyperParamSetter('learning_rate', [(60, 4e-4), (100, 2e-4)]), ScheduledHyperParamSetter( ObjAttrParam(expreplay, 'exploration'), # 1->0.1 in the first million steps [(0, 1), (10, 0.1), (320, 0.01)], interp='linear'), PeriodicTrigger(Evaluator(nr_eval=EVAL_EPISODE, input_names=['state'], output_names=['Qvalue'], directory=data_dir, files_list=test_list, get_player_fn=get_player), every_k_epochs=EPOCHS_PER_EVAL), HumanHyperParamSetter('learning_rate'), ], steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, )
def get_config(files_list, data_type, trainable_variables): """This is only used during training.""" expreplay = ExpReplay( predictor_io_names=(['state'], ['Qvalue']), player=get_player(task='train', files_list=files_list, data_type=data_type), state_shape=IMAGE_SIZE, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=0.8, #0.0 ############################################################################### # HITL UPDATE update_frequency=INIT_UPDATE_FREQ, ############################################################################### history_len=FRAME_HISTORY, arg_type=data_type) return TrainConfig( # dataflow=expreplay, data=QueueInput(expreplay), model=Model(IMAGE_SIZE, FRAME_HISTORY, METHOD, NUM_ACTIONS, GAMMA, trainable_variables), callbacks=[ ModelSaver(), PeriodicTrigger( RunOp(DQNModel.update_target_param, verbose=True), # update target network every 10k steps every_k_steps=10000 // UPDATE_FREQ), expreplay, ScheduledHyperParamSetter('learning_rate', [(60, 4e-4), (100, 2e-4)]), ScheduledHyperParamSetter( ObjAttrParam(expreplay, 'exploration'), # 1->0.1 in the first million steps [(0, 0.8), (1000000, 0.1), (32000000, 0.01)], interp='linear', step_based=True), ############################################################################### # HITL UPDATE # Here the number of steps taken in the environment is increased from 0, during # the pretraining phase, to 4 to allow the agent to take 4 steps in the env # between each TD update. ScheduledHyperParamSetter(ObjAttrParam(expreplay, 'update_frequency'), [(0, INIT_UPDATE_FREQ), (NUM_PRETRAIN, UPDATE_FREQ)], interp=None, step_based=True), ############################################################################### PeriodicTrigger(Evaluator(nr_eval=EVAL_EPISODE, input_names=['state'], output_names=['Qvalue'], files_list=files_list, data_type=data_type, get_player_fn=get_player), every_k_steps=STEPS_PER_EVAL), HumanHyperParamSetter('learning_rate'), ], steps_per_epoch=STEPS_PER_EPOCH, max_epoch=MAX_EPOCHS, )
# set logger directory for checkpoints, etc logger.set_logger_dir(args.logdir, action='k') steps_per_epoch = cfg.EPOCH_STEPS model = Model(vlen, nviews) # config.gpu_options.allow_growth = True traincfg = TrainConfig( model=model, data=QueueInput(ProjDataFlow(Ppy)), callbacks=[ PeriodicTrigger(ModelSaver(), every_k_epochs=5), PeriodicTrigger(VolumeSaver(model), every_k_epochs=5), # prevent learning in the first epoch # MemInitHyperParamSetter('learning_rate_mask',(0,1)), # controls learning rate as a function of epoch HyperParamSetterWithFunc('learning_rate', learning_rate_fun), # GraphProfiler() # PeakMemoryTracker() # GPUUtilizationTracker(), ], steps_per_epoch=steps_per_epoch, max_epoch=200000, # first time load model from checkpoint and reset GRU state session_init=ChainInit([TryResumeTraining()]), #,ResetInit(model)]) # session_config=tf.ConfigProto(log_device_placement=True) #config_gpus(1) ) trainer = SimpleTrainer() # with tf.contrib.tfprof.ProfileContext(logger.get_logger_dir()) as pctx: launch_train_with_config(traincfg, trainer) ################# JUNK ###############
if save_dir is None: logger.auto_set_dir() else: logger.set_logger_dir(save_dir) dataset_train = get_data('train') dataset_test = get_data('test') config = TrainConfig( model=CifarResNet(n=NUM_UNITS, mult_decay=mult_decay, lr_init=lr_base * 0.1), dataflow=dataset_train, callbacks=[ ModelSaver(), InferenceRunner( dataset_test, [ScalarStats('cost'), ClassificationError('wrong_vector')]), ScheduledHyperParamSetter('learning_rate', [(1, lr_base), (82, lr_base * 0.1), (123, lr_base * 0.01), (164, lr_base * 0.002)]) ], max_epoch=200, session_init=SmartInit(args.load), ) num_gpu = max(get_num_gpu(), 1) launch_train_with_config(config, SyncMultiGPUTrainerParameterServer(num_gpu))