Exemple #1
0
def get_config(
    files_list,
    input_names=["state_1", "state_2"],
    output_names=["Qvalue_1", "Qvalue_2"],
    agents=2,
):
    """This is only used during training."""
    expreplay = ExpReplay(
        predictor_io_names=(input_names, output_names),
        player=get_player(task="train", files_list=files_list, agents=agents),
        state_shape=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        memory_size=MEMORY_SIZE,
        init_memory_size=INIT_MEMORY_SIZE,
        init_exploration=1.0,
        update_frequency=UPDATE_FREQ,
        history_len=FRAME_HISTORY,
        agents=agents,
    )

    return TrainConfig(
        # dataflow=expreplay,
        data=QueueInput(expreplay),
        model=Model(agents=agents),
        callbacks=[
            ModelSaver(),
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
                # update target network every 10k steps
                every_k_steps=10000 // UPDATE_FREQ,
            ),
            expreplay,
            ScheduledHyperParamSetter("learning_rate", [(60, 4e-4),
                                                        (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, "exploration"),
                # 1->0.1 in the first million steps
                [(0, 1), (10, 0.1), (320, 0.01)],
                interp="linear",
            ),
            PeriodicTrigger(
                Evaluator(
                    nr_eval=EVAL_EPISODE,
                    input_names=input_names,
                    output_names=output_names,
                    files_list=files_list,
                    get_player_fn=get_player,
                    agents=agents,
                ),
                every_k_epochs=EPOCHS_PER_EVAL,
            ),
            HumanHyperParamSetter("learning_rate"),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
Exemple #2
0
def get_config():
    """This is only used during training."""
    expreplay = ExpReplay(predictor_io_names=(['state'], ['Qvalue']),
                          player=get_player(directory=data_dir,
                                            task='train',
                                            files_list=train_data_fpaths),
                          state_shape=OBSERVATION_DIMS,
                          batch_size=BATCH_SIZE,
                          memory_size=MEMORY_SIZE,
                          init_memory_size=INIT_MEMORY_SIZE,
                          init_exploration=1.0,
                          update_frequency=UPDATE_FREQ,
                          frame_history_len=FRAME_HISTORY)

    return TrainConfig(
        # dataflow=expreplay,
        data=QueueInput(expreplay),
        model=Model(),
        callbacks=[  # TODO: periodically save videos
            ModelSaver(checkpoint_dir="model_checkpoints",
                       keep_checkpoint_every_n_hours=0.25,
                       max_to_keep=1000),  # TODO: og was just ModelSaver()
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
                # update target network every 10k/freq steps
                every_k_steps=10000 // UPDATE_FREQ),
            # expreplay,
            ScheduledHyperParamSetter('learning_rate', [(60, 4e-4),
                                                        (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
                # 1->0.1 in the first 10M steps
                [(0, 1), (100, 0.1), (120, 0.01)],
                interp='linear'),
            PeriodicTrigger(  # runs exprelay._trigger()
                expreplay, every_k_steps=5000),
            PeriodicTrigger(
                # eval_model_multithread(pred, EVAL_EPISODE, get_player)
                Evaluator(nr_eval=EVAL_EPISODE,
                          input_names=['state'],
                          output_names=['Qvalue'],
                          directory=data_dir,
                          files_list=test_data_fpaths,
                          get_player_fn=get_player),
                every_k_steps=10000 // UPDATE_FREQ),
            HumanHyperParamSetter('learning_rate'),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=NUM_EPOCHS,
    )
Exemple #3
0
def train_net(net,
              session_init,
              batch_size,
              num_epochs,
              train_dataflow,
              val_dataflow):

    num_towers = max(get_num_gpu(), 1)
    batch_per_tower = batch_size // num_towers
    logger.info("Running on {} towers. Batch size per tower: {}".format(num_towers, batch_per_tower))

    num_training_samples = 1281167
    step_size = num_training_samples // batch_size
    max_iter = (num_epochs - 1) * step_size
    callbacks = [
        ModelSaver(),
        ScheduledHyperParamSetter(
            'learning_rate',
            [(0, 0.5), (max_iter, 0)],
            interp='linear',
            step_based=True),
        EstimatedTimeLeft()]

    infs = [ClassificationError('wrong-top1', 'val-error-top1'),
            ClassificationError('wrong-top5', 'val-error-top5')]
    if num_towers == 1:
        # single-GPU inference with queue prefetch
        callbacks.append(InferenceRunner(
            input=QueueInput(val_dataflow),
            infs=infs))
    else:
        # multi-GPU inference (with mandatory queue prefetch)
        callbacks.append(DataParallelInferenceRunner(
            input=val_dataflow,
            infs=infs,
            gpus=list(range(num_towers))))

    config = TrainConfig(
        dataflow=train_dataflow,
        model=net,
        callbacks=callbacks,
        session_init=session_init,
        steps_per_epoch=step_size,
        max_epoch=num_epochs)

    launch_train_with_config(
        config=config,
        trainer=SyncMultiGPUTrainerParameterServer(num_towers))
Exemple #4
0
def train(train_cfg, model_cfg, common_cfg, dataflow):
    epochs = train_cfg.epochs
    assert epochs, epochs
    epoch_size = train_cfg.epoch_size
    assert epoch_size, epoch_size
    config = TrainConfig(
        model=ModelWrapSingle(train_cfg, model_cfg, common_cfg),
        dataflow=dataflow,
        #data=my_inputsource, # alternatively, use a customized InputSource
        #callbacks=[...],    # some default callbacks are automatically applied
        # some default monitors are automatically applied
        steps_per_epoch=
        epoch_size,  # default to the size of your InputSource/DataFlow
        max_epoch=epochs)
    print("Create trainer")
    trainer = SimpleTrainer()
    print("Run train")
    launch_train_with_config(config, trainer)
Exemple #5
0
def get_config():
    """This is only used during training."""
    expreplay = ExpReplay(predictor_io_names=(['state'], ['Qvalue']),
                          player=get_player(directory=data_dir,
                                            task='train',
                                            files_list=train_list),
                          state_shape=IMAGE_SIZE,
                          batch_size=BATCH_SIZE,
                          memory_size=MEMORY_SIZE,
                          init_memory_size=INIT_MEMORY_SIZE,
                          init_exploration=1.0,
                          update_frequency=UPDATE_FREQ,
                          history_len=FRAME_HISTORY)

    return TrainConfig(
        # dataflow=expreplay,
        data=QueueInput(expreplay),
        model=Model(),
        callbacks=[
            ModelSaver(),
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
                # update target network every 10k steps
                every_k_steps=10000 // UPDATE_FREQ),
            expreplay,
            ScheduledHyperParamSetter('learning_rate', [(60, 4e-4),
                                                        (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
                # 1->0.1 in the first million steps
                [(0, 1), (10, 0.1), (320, 0.01)],
                interp='linear'),
            PeriodicTrigger(Evaluator(nr_eval=EVAL_EPISODE,
                                      input_names=['state'],
                                      output_names=['Qvalue'],
                                      directory=data_dir,
                                      files_list=test_list,
                                      get_player_fn=get_player),
                            every_k_epochs=EPOCHS_PER_EVAL),
            HumanHyperParamSetter('learning_rate'),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
Exemple #6
0
def get_config(files_list, data_type, trainable_variables):
    """This is only used during training."""
    expreplay = ExpReplay(
        predictor_io_names=(['state'], ['Qvalue']),
        player=get_player(task='train',
                          files_list=files_list,
                          data_type=data_type),
        state_shape=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        memory_size=MEMORY_SIZE,
        init_memory_size=INIT_MEMORY_SIZE,
        init_exploration=0.8,  #0.0
        ###############################################################################
        # HITL UPDATE
        update_frequency=INIT_UPDATE_FREQ,
        ###############################################################################
        history_len=FRAME_HISTORY,
        arg_type=data_type)

    return TrainConfig(
        # dataflow=expreplay,
        data=QueueInput(expreplay),
        model=Model(IMAGE_SIZE, FRAME_HISTORY, METHOD, NUM_ACTIONS, GAMMA,
                    trainable_variables),
        callbacks=[
            ModelSaver(),
            PeriodicTrigger(
                RunOp(DQNModel.update_target_param, verbose=True),
                # update target network every 10k steps
                every_k_steps=10000 // UPDATE_FREQ),
            expreplay,
            ScheduledHyperParamSetter('learning_rate', [(60, 4e-4),
                                                        (100, 2e-4)]),
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
                # 1->0.1 in the first million steps
                [(0, 0.8), (1000000, 0.1), (32000000, 0.01)],
                interp='linear',
                step_based=True),
            ###############################################################################
            # HITL UPDATE
            # Here the number of steps taken in the environment is increased from 0, during
            # the pretraining phase, to 4 to allow the agent to take 4 steps in the env
            # between each TD update.
            ScheduledHyperParamSetter(ObjAttrParam(expreplay,
                                                   'update_frequency'),
                                      [(0, INIT_UPDATE_FREQ),
                                       (NUM_PRETRAIN, UPDATE_FREQ)],
                                      interp=None,
                                      step_based=True),

            ###############################################################################
            PeriodicTrigger(Evaluator(nr_eval=EVAL_EPISODE,
                                      input_names=['state'],
                                      output_names=['Qvalue'],
                                      files_list=files_list,
                                      data_type=data_type,
                                      get_player_fn=get_player),
                            every_k_steps=STEPS_PER_EVAL),
            HumanHyperParamSetter('learning_rate'),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=MAX_EPOCHS,
    )
Exemple #7
0
    # set logger directory for checkpoints, etc
    logger.set_logger_dir(args.logdir, action='k')

    steps_per_epoch = cfg.EPOCH_STEPS
    model = Model(vlen, nviews)
    # config.gpu_options.allow_growth = True
    traincfg = TrainConfig(
        model=model,
        data=QueueInput(ProjDataFlow(Ppy)),
        callbacks=[
            PeriodicTrigger(ModelSaver(), every_k_epochs=5),
            PeriodicTrigger(VolumeSaver(model), every_k_epochs=5),
            # prevent learning in the first epoch
            # MemInitHyperParamSetter('learning_rate_mask',(0,1)),
            # controls learning rate as a function of epoch
            HyperParamSetterWithFunc('learning_rate', learning_rate_fun),
            # GraphProfiler()
            # PeakMemoryTracker()
            # GPUUtilizationTracker(),
        ],
        steps_per_epoch=steps_per_epoch,
        max_epoch=200000,
        # first time load model from checkpoint and reset GRU state
        session_init=ChainInit([TryResumeTraining()]),  #,ResetInit(model)])
        # session_config=tf.ConfigProto(log_device_placement=True) #config_gpus(1)
    )

    trainer = SimpleTrainer()
    # with tf.contrib.tfprof.ProfileContext(logger.get_logger_dir()) as pctx:
    launch_train_with_config(traincfg, trainer)

################# JUNK ###############
Exemple #8
0
    if save_dir is None:
        logger.auto_set_dir()
    else:
        logger.set_logger_dir(save_dir)

    dataset_train = get_data('train')
    dataset_test = get_data('test')

    config = TrainConfig(
        model=CifarResNet(n=NUM_UNITS,
                          mult_decay=mult_decay,
                          lr_init=lr_base * 0.1),
        dataflow=dataset_train,
        callbacks=[
            ModelSaver(),
            InferenceRunner(
                dataset_test,
                [ScalarStats('cost'),
                 ClassificationError('wrong_vector')]),
            ScheduledHyperParamSetter('learning_rate',
                                      [(1, lr_base), (82, lr_base * 0.1),
                                       (123, lr_base * 0.01),
                                       (164, lr_base * 0.002)])
        ],
        max_epoch=200,
        session_init=SmartInit(args.load),
    )
    num_gpu = max(get_num_gpu(), 1)
    launch_train_with_config(config,
                             SyncMultiGPUTrainerParameterServer(num_gpu))