Exemple #1
0
def get_config(
    files_list,
    input_names=["state_1", "state_2"],
    output_names=["Qvalue_1", "Qvalue_2"],
    agents=2,
):
    """This is only used during training."""
    expreplay = ExpReplay(
        predictor_io_names=(input_names, output_names),
        player=get_player(task="train", files_list=files_list, agents=agents),
        state_shape=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        memory_size=MEMORY_SIZE,
        init_memory_size=INIT_MEMORY_SIZE,
        init_exploration=1.0,
        update_frequency=UPDATE_FREQ,
        history_len=FRAME_HISTORY,
        agents=agents,
    )

    return TrainConfig(
        # dataflow=expreplay,
        data=QueueInput(expreplay),
        model=Model(agents=agents),
        callbacks=[
            ModelSaver(),
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
                # update target network every 10k steps
                every_k_steps=10000 // UPDATE_FREQ,
            ),
            expreplay,
            ScheduledHyperParamSetter("learning_rate", [(60, 4e-4),
                                                        (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, "exploration"),
                # 1->0.1 in the first million steps
                [(0, 1), (10, 0.1), (320, 0.01)],
                interp="linear",
            ),
            PeriodicTrigger(
                Evaluator(
                    nr_eval=EVAL_EPISODE,
                    input_names=input_names,
                    output_names=output_names,
                    files_list=files_list,
                    get_player_fn=get_player,
                    agents=agents,
                ),
                every_k_epochs=EPOCHS_PER_EVAL,
            ),
            HumanHyperParamSetter("learning_rate"),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
Exemple #2
0
def get_config():
    """This is only used during training."""
    expreplay = ExpReplay(predictor_io_names=(['state'], ['Qvalue']),
                          player=get_player(directory=data_dir,
                                            task='train',
                                            files_list=train_data_fpaths),
                          state_shape=OBSERVATION_DIMS,
                          batch_size=BATCH_SIZE,
                          memory_size=MEMORY_SIZE,
                          init_memory_size=INIT_MEMORY_SIZE,
                          init_exploration=1.0,
                          update_frequency=UPDATE_FREQ,
                          frame_history_len=FRAME_HISTORY)

    return TrainConfig(
        # dataflow=expreplay,
        data=QueueInput(expreplay),
        model=Model(),
        callbacks=[  # TODO: periodically save videos
            ModelSaver(checkpoint_dir="model_checkpoints",
                       keep_checkpoint_every_n_hours=0.25,
                       max_to_keep=1000),  # TODO: og was just ModelSaver()
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
                # update target network every 10k/freq steps
                every_k_steps=10000 // UPDATE_FREQ),
            # expreplay,
            ScheduledHyperParamSetter('learning_rate', [(60, 4e-4),
                                                        (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
                # 1->0.1 in the first 10M steps
                [(0, 1), (100, 0.1), (120, 0.01)],
                interp='linear'),
            PeriodicTrigger(  # runs exprelay._trigger()
                expreplay, every_k_steps=5000),
            PeriodicTrigger(
                # eval_model_multithread(pred, EVAL_EPISODE, get_player)
                Evaluator(nr_eval=EVAL_EPISODE,
                          input_names=['state'],
                          output_names=['Qvalue'],
                          directory=data_dir,
                          files_list=test_data_fpaths,
                          get_player_fn=get_player),
                every_k_steps=10000 // UPDATE_FREQ),
            HumanHyperParamSetter('learning_rate'),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=NUM_EPOCHS,
    )
Exemple #3
0
def get_config():
    """This is only used during training."""
    expreplay = ExpReplay(predictor_io_names=(['state'], ['Qvalue']),
                          player=get_player(directory=data_dir,
                                            task='train',
                                            files_list=train_list),
                          state_shape=IMAGE_SIZE,
                          batch_size=BATCH_SIZE,
                          memory_size=MEMORY_SIZE,
                          init_memory_size=INIT_MEMORY_SIZE,
                          init_exploration=1.0,
                          update_frequency=UPDATE_FREQ,
                          history_len=FRAME_HISTORY)

    return TrainConfig(
        # dataflow=expreplay,
        data=QueueInput(expreplay),
        model=Model(),
        callbacks=[
            ModelSaver(),
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
                # update target network every 10k steps
                every_k_steps=10000 // UPDATE_FREQ),
            expreplay,
            ScheduledHyperParamSetter('learning_rate', [(60, 4e-4),
                                                        (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
                # 1->0.1 in the first million steps
                [(0, 1), (10, 0.1), (320, 0.01)],
                interp='linear'),
            PeriodicTrigger(Evaluator(nr_eval=EVAL_EPISODE,
                                      input_names=['state'],
                                      output_names=['Qvalue'],
                                      directory=data_dir,
                                      files_list=test_list,
                                      get_player_fn=get_player),
                            every_k_epochs=EPOCHS_PER_EVAL),
            HumanHyperParamSetter('learning_rate'),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
Exemple #4
0
def get_config(files_list, data_type, trainable_variables):
    """This is only used during training."""
    expreplay = ExpReplay(
        predictor_io_names=(['state'], ['Qvalue']),
        player=get_player(task='train',
                          files_list=files_list,
                          data_type=data_type),
        state_shape=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        memory_size=MEMORY_SIZE,
        init_memory_size=INIT_MEMORY_SIZE,
        init_exploration=0.8,  #0.0
        ###############################################################################
        # HITL UPDATE
        update_frequency=INIT_UPDATE_FREQ,
        ###############################################################################
        history_len=FRAME_HISTORY,
        arg_type=data_type)

    return TrainConfig(
        # dataflow=expreplay,
        data=QueueInput(expreplay),
        model=Model(IMAGE_SIZE, FRAME_HISTORY, METHOD, NUM_ACTIONS, GAMMA,
                    trainable_variables),
        callbacks=[
            ModelSaver(),
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
                # update target network every 10k steps
                every_k_steps=10000 // UPDATE_FREQ),
            expreplay,
            ScheduledHyperParamSetter('learning_rate', [(60, 4e-4),
                                                        (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
                # 1->0.1 in the first million steps
                [(0, 0.8), (1000000, 0.1), (32000000, 0.01)],
                interp='linear',
                step_based=True),
            ###############################################################################
            # HITL UPDATE
            # Here the number of steps taken in the environment is increased from 0, during
            # the pretraining phase, to 4 to allow the agent to take 4 steps in the env
            # between each TD update.
            ScheduledHyperParamSetter(ObjAttrParam(expreplay,
                                                   'update_frequency'),
                                      [(0, INIT_UPDATE_FREQ),
                                       (NUM_PRETRAIN, UPDATE_FREQ)],
                                      interp=None,
                                      step_based=True),

            ###############################################################################
            PeriodicTrigger(Evaluator(nr_eval=EVAL_EPISODE,
                                      input_names=['state'],
                                      output_names=['Qvalue'],
                                      files_list=files_list,
                                      data_type=data_type,
                                      get_player_fn=get_player),
                            every_k_steps=STEPS_PER_EVAL),
            HumanHyperParamSetter('learning_rate'),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=MAX_EPOCHS,
    )