Ejemplo n.º 1
0
def train(args):
    data_folder = args.get("data_folder")
    save_folder = args.get("save_folder")
    image_size = args.get("image_size")
    max_epoch = args.get("max_epoch")
    save_epoch = args.get("save_epoch") or max_epoch // 10
    # Scale lr and steps_per_epoch accordingly.
    # Make sure the total number of gradient evaluations is consistent.
    n_gpu = args.get("n_gpu") or 1
    batch_size = args.get("batch_size") or BATCH
    equi_batch_size = max(n_gpu, 1) * batch_size
    lr = args.get("lr") or LR
    lr *= equi_batch_size
    steps_per_epoch = args.get("steps_per_epoch") or 1000
    steps_per_epoch /= equi_batch_size
    image_steps = args.get("image_steps") or steps_per_epoch // 10
    scalar_steps = args.get("scalar_steps")
    if scalar_steps > 0:
        scalar_steps = max(scalar_steps // equi_batch_size, 1)
    else:
        scalar_steps = 0  # merge scalar summary every epoch
    # lr starts decreasing at half of max epoch
    start_dec_epoch = max_epoch // 2
    # stops when lr is 0.01 of its initial value
    end_epoch = max_epoch - int((max_epoch - start_dec_epoch) * 0.01)
    # adjust noise input range according to the input act
    zmin, zmax = (0, 1) if args.get("act_input") == "identity" else (-1, 1)

    if save_folder == None:
        logger.auto_set_dir()
    else:
        logger.set_logger_dir(save_folder)

    df = get_data(data_folder,
                  image_size,
                  zmin=zmin,
                  zmax=zmax,
                  batch=batch_size)
    df = PrintData(df)
    data = QueueInput(df)

    SynTexTrainer(data, Style2PO(args), n_gpu).train_with_defaults(
        callbacks=[
            PeriodicTrigger(ModelSaver(), every_k_epochs=save_epoch),
            PeriodicTrigger(ModelSaver(),
                            every_k_epochs=end_epoch),  # save model at last
            ScheduledHyperParamSetter('learning_rate', [(start_dec_epoch, lr),
                                                        (max_epoch, 0)],
                                      interp="linear"),
            PeriodicTrigger(VisualizeTestSet(data_folder, image_size),
                            every_k_epochs=max(1, max_epoch // 100)),
            #MergeAllSummaries(period=scalar_steps), # scalar only, slowdown in training, use TCMalloc
            MergeAllSummaries(period=image_steps, key="image_summaries"),
            MergeAllSummaries(key="acti_summaries"),
        ],
        max_epoch=end_epoch,
        steps_per_epoch=steps_per_epoch,
        session_init=None)
Ejemplo n.º 2
0
def get_config(
    files_list,
    input_names=["state_1", "state_2"],
    output_names=["Qvalue_1", "Qvalue_2"],
    agents=2,
):
    """This is only used during training."""
    expreplay = ExpReplay(
        predictor_io_names=(input_names, output_names),
        player=get_player(task="train", files_list=files_list, agents=agents),
        state_shape=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        memory_size=MEMORY_SIZE,
        init_memory_size=INIT_MEMORY_SIZE,
        init_exploration=1.0,
        update_frequency=UPDATE_FREQ,
        history_len=FRAME_HISTORY,
        agents=agents,
    )

    return TrainConfig(
        # dataflow=expreplay,
        data=QueueInput(expreplay),
        model=Model(agents=agents),
        callbacks=[
            ModelSaver(),
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
                # update target network every 10k steps
                every_k_steps=10000 // UPDATE_FREQ,
            ),
            expreplay,
            ScheduledHyperParamSetter("learning_rate", [(60, 4e-4),
                                                        (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, "exploration"),
                # 1->0.1 in the first million steps
                [(0, 1), (10, 0.1), (320, 0.01)],
                interp="linear",
            ),
            PeriodicTrigger(
                Evaluator(
                    nr_eval=EVAL_EPISODE,
                    input_names=input_names,
                    output_names=output_names,
                    files_list=files_list,
                    get_player_fn=get_player,
                    agents=agents,
                ),
                every_k_epochs=EPOCHS_PER_EVAL,
            ),
            HumanHyperParamSetter("learning_rate"),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
Ejemplo n.º 3
0
def get_config():
    """This is only used during training."""
    expreplay = ExpReplay(predictor_io_names=(['state'], ['Qvalue']),
                          player=get_player(directory=data_dir,
                                            task='train',
                                            files_list=train_data_fpaths),
                          state_shape=OBSERVATION_DIMS,
                          batch_size=BATCH_SIZE,
                          memory_size=MEMORY_SIZE,
                          init_memory_size=INIT_MEMORY_SIZE,
                          init_exploration=1.0,
                          update_frequency=UPDATE_FREQ,
                          frame_history_len=FRAME_HISTORY)

    return TrainConfig(
        # dataflow=expreplay,
        data=QueueInput(expreplay),
        model=Model(),
        callbacks=[  # TODO: periodically save videos
            ModelSaver(checkpoint_dir="model_checkpoints",
                       keep_checkpoint_every_n_hours=0.25,
                       max_to_keep=1000),  # TODO: og was just ModelSaver()
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
                # update target network every 10k/freq steps
                every_k_steps=10000 // UPDATE_FREQ),
            # expreplay,
            ScheduledHyperParamSetter('learning_rate', [(60, 4e-4),
                                                        (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
                # 1->0.1 in the first 10M steps
                [(0, 1), (100, 0.1), (120, 0.01)],
                interp='linear'),
            PeriodicTrigger(  # runs exprelay._trigger()
                expreplay, every_k_steps=5000),
            PeriodicTrigger(
                # eval_model_multithread(pred, EVAL_EPISODE, get_player)
                Evaluator(nr_eval=EVAL_EPISODE,
                          input_names=['state'],
                          output_names=['Qvalue'],
                          directory=data_dir,
                          files_list=test_data_fpaths,
                          get_player_fn=get_player),
                every_k_steps=10000 // UPDATE_FREQ),
            HumanHyperParamSetter('learning_rate'),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=NUM_EPOCHS,
    )
Ejemplo n.º 4
0
def get_config():
    """This is only used during training."""
    expreplay = ExpReplay(predictor_io_names=(['state'], ['Qvalue']),
                          player=get_player(directory=data_dir,
                                            task='train',
                                            files_list=train_list),
                          state_shape=IMAGE_SIZE,
                          batch_size=BATCH_SIZE,
                          memory_size=MEMORY_SIZE,
                          init_memory_size=INIT_MEMORY_SIZE,
                          init_exploration=1.0,
                          update_frequency=UPDATE_FREQ,
                          history_len=FRAME_HISTORY)

    return TrainConfig(
        # dataflow=expreplay,
        data=QueueInput(expreplay),
        model=Model(),
        callbacks=[
            ModelSaver(),
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
                # update target network every 10k steps
                every_k_steps=10000 // UPDATE_FREQ),
            expreplay,
            ScheduledHyperParamSetter('learning_rate', [(60, 4e-4),
                                                        (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
                # 1->0.1 in the first million steps
                [(0, 1), (10, 0.1), (320, 0.01)],
                interp='linear'),
            PeriodicTrigger(Evaluator(nr_eval=EVAL_EPISODE,
                                      input_names=['state'],
                                      output_names=['Qvalue'],
                                      directory=data_dir,
                                      files_list=test_list,
                                      get_player_fn=get_player),
                            every_k_epochs=EPOCHS_PER_EVAL),
            HumanHyperParamSetter('learning_rate'),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
Ejemplo n.º 5
0
def get_config(files_list, data_type, trainable_variables):
    """This is only used during training."""
    expreplay = ExpReplay(
        predictor_io_names=(['state'], ['Qvalue']),
        player=get_player(task='train',
                          files_list=files_list,
                          data_type=data_type),
        state_shape=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        memory_size=MEMORY_SIZE,
        init_memory_size=INIT_MEMORY_SIZE,
        init_exploration=0.8,  #0.0
        ###############################################################################
        # HITL UPDATE
        update_frequency=INIT_UPDATE_FREQ,
        ###############################################################################
        history_len=FRAME_HISTORY,
        arg_type=data_type)

    return TrainConfig(
        # dataflow=expreplay,
        data=QueueInput(expreplay),
        model=Model(IMAGE_SIZE, FRAME_HISTORY, METHOD, NUM_ACTIONS, GAMMA,
                    trainable_variables),
        callbacks=[
            ModelSaver(),
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
                # update target network every 10k steps
                every_k_steps=10000 // UPDATE_FREQ),
            expreplay,
            ScheduledHyperParamSetter('learning_rate', [(60, 4e-4),
                                                        (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
                # 1->0.1 in the first million steps
                [(0, 0.8), (1000000, 0.1), (32000000, 0.01)],
                interp='linear',
                step_based=True),
            ###############################################################################
            # HITL UPDATE
            # Here the number of steps taken in the environment is increased from 0, during
            # the pretraining phase, to 4 to allow the agent to take 4 steps in the env
            # between each TD update.
            ScheduledHyperParamSetter(ObjAttrParam(expreplay,
                                                   'update_frequency'),
                                      [(0, INIT_UPDATE_FREQ),
                                       (NUM_PRETRAIN, UPDATE_FREQ)],
                                      interp=None,
                                      step_based=True),

            ###############################################################################
            PeriodicTrigger(Evaluator(nr_eval=EVAL_EPISODE,
                                      input_names=['state'],
                                      output_names=['Qvalue'],
                                      files_list=files_list,
                                      data_type=data_type,
                                      get_player_fn=get_player),
                            every_k_steps=STEPS_PER_EVAL),
            HumanHyperParamSetter('learning_rate'),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=MAX_EPOCHS,
    )
Ejemplo n.º 6
0
    start_dec_epoch = max_epoch // 2
    # stops when lr is 0.01 of its initial value
    end_epoch = max_epoch - int((max_epoch - start_dec_epoch) * 0.01)
    # adjust noise input range according to the input act
    zmin, zmax = (0, 1) if args.get("act") == "identity" else (-1, 1)

    if save_folder == None:
        logger.auto_set_dir()
    else:
        logger.set_logger_dir(save_folder)

    df = get_data(data_folder, image_size, zmin=zmin, zmax=zmax)
    df = PrintData(df)
    data = QueueInput(df)

    SynTexTrainer(data, AdaptiveSynTex(args), n_gpu).train_with_defaults(
        callbacks=[
            PeriodicTrigger(ModelSaver(), every_k_epochs=save_epoch),
            PeriodicTrigger(ModelSaver(),
                            every_k_epochs=end_epoch),  # save model at last
            ScheduledHyperParamSetter('learning_rate', [(start_dec_epoch, lr),
                                                        (max_epoch, 0)],
                                      interp="linear"),
            #PeriodicTrigger(VisualizeTestSet(data_folder, image_size), every_k_epochs=10),
            MergeAllSummaries(period=scalar_steps),  # scalar only
            MergeAllSummaries(period=image_steps, key="image_summaries"),
        ],
        max_epoch=end_epoch,
        steps_per_epoch=steps_per_epoch,
        session_init=None)
Ejemplo n.º 7
0
    vlen, nviews = Ppy.shape[-1], Ppy.shape[0]

    os.environ['CUDA_VISIBLE_DEVICES'] = get_visible_device_list(3)
    global_step = get_global_step_var()

    # set logger directory for checkpoints, etc
    logger.set_logger_dir(args.logdir, action='k')

    steps_per_epoch = cfg.EPOCH_STEPS
    model = Model(vlen, nviews)
    # config.gpu_options.allow_growth = True
    traincfg = TrainConfig(
        model=model,
        data=QueueInput(ProjDataFlow(Ppy)),
        callbacks=[
            PeriodicTrigger(ModelSaver(), every_k_epochs=5),
            PeriodicTrigger(VolumeSaver(model), every_k_epochs=5),
            # prevent learning in the first epoch
            # MemInitHyperParamSetter('learning_rate_mask',(0,1)),
            # controls learning rate as a function of epoch
            HyperParamSetterWithFunc('learning_rate', learning_rate_fun),
            # GraphProfiler()
            # PeakMemoryTracker()
            # GPUUtilizationTracker(),
        ],
        steps_per_epoch=steps_per_epoch,
        max_epoch=200000,
        # first time load model from checkpoint and reset GRU state
        session_init=ChainInit([TryResumeTraining()]),  #,ResetInit(model)])
        # session_config=tf.ConfigProto(log_device_placement=True) #config_gpus(1)
    )