Python JSONDataset Examples, ml.rl.workflow.training_data_reader.JSONDataset Python Examples

Example #1

0

Show file

File: create_normalization_metadata.py Project: yishuihanhan/Horizon-1

def create_norm_table(params):
    training_data_path = params["training_data_path"]
    logger.info("Generating norm table based on {}".format(training_data_path))

    norm_params = get_norm_params(params["norm_params"])
    dataset = JSONDataset(
        params["training_data_path"], batch_size=NORMALIZATION_BATCH_READ_SIZE
    )

    for col in norm_params["cols_to_norm"]:
        logger.info("Creating normalization metadata for `{}` column".format(col))
        norm_metadata = get_norm_metadata(dataset, norm_params, col)
        path = norm_params["output_dir"] + "{}_norm.json".format(col)
        with open(os.path.expanduser(path), "w") as outfile:
            json.dump(norm_metadata, outfile)
            logger.info("`{}` normalization metadata written to {}".format(col, path))

Example #2

0

Show file

def train_network(params):
    logger.info("Running DQN workflow with params:")
    logger.info(params)

    action_names = np.array(params["actions"])
    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])

    trainer_params = DiscreteActionModelParameters(
        actions=params["actions"],
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters,
    )

    dataset = JSONDataset(params["training_data_path"],
                          batch_size=training_parameters.minibatch_size)
    norm_data = JSONDataset(params["state_norm_data_path"])
    state_normalization = read_norm_params(norm_data.read_all())

    num_batches = int(len(dataset) / training_parameters.minibatch_size)

    logger.info("Read in batch data set {} of size {} examples. Data split "
                "into {} batches of size {}.".format(
                    params["training_data_path"],
                    len(dataset),
                    num_batches,
                    training_parameters.minibatch_size,
                ))

    trainer = DQNTrainer(trainer_params, state_normalization,
                         params["use_gpu"])

    for epoch in range(params["epochs"]):
        for batch_idx in range(num_batches):
            helpers.report_training_status(batch_idx, num_batches, epoch,
                                           params["epochs"])
            batch = dataset.read_batch(batch_idx)
            tdp = preprocess_batch_for_training(action_names, batch,
                                                state_normalization)
            trainer.train(tdp)

    logger.info("Training finished. Saving PyTorch model to {}".format(
        params["pytorch_output_path"]))
    helpers.save_model_to_file(trainer, params["pytorch_output_path"])

Example #3

0

Show file

File: dqn_workflow.py Project: ozzie00/Horizon-1

def train_network(params):
    writer = None
    if params["model_output_path"] is not None:
        writer = SummaryWriter(log_dir=params["model_output_path"])

    logger.info("Running DQN workflow with params:")
    logger.info(params)

    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    action_names = np.array(params["actions"])
    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])

    trainer_params = DiscreteActionModelParameters(
        actions=params["actions"],
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters,
    )

    dataset = JSONDataset(params["training_data_path"],
                          batch_size=training_parameters.minibatch_size)
    eval_dataset = JSONDataset(params["eval_data_path"], batch_size=16)
    state_normalization = read_norm_file(params["state_norm_data_path"])

    num_batches = int(len(dataset) / training_parameters.minibatch_size)
    logger.info("Read in batch data set {} of size {} examples. Data split "
                "into {} batches of size {}.".format(
                    params["training_data_path"],
                    len(dataset),
                    num_batches,
                    training_parameters.minibatch_size,
                ))

    trainer = DQNTrainer(
        trainer_params,
        state_normalization,
        use_gpu=params["use_gpu"],
        use_all_avail_gpus=params["use_all_avail_gpus"],
    )
    trainer = update_model_for_warm_start(trainer)
    preprocessor = Preprocessor(state_normalization, False)

    evaluator = Evaluator(
        trainer_params.actions,
        trainer_params.rl.gamma,
        trainer,
        metrics_to_score=trainer.metrics_to_score,
    )

    start_time = time.time()
    for epoch in range(int(params["epochs"])):
        dataset.reset_iterator()
        for batch_idx in range(num_batches):
            report_training_status(batch_idx, num_batches, epoch,
                                   int(params["epochs"]))
            batch = dataset.read_batch(batch_idx)
            tdp = preprocess_batch_for_training(preprocessor, batch,
                                                action_names)

            tdp.set_type(trainer.dtype)
            trainer.train(tdp)

        eval_dataset.reset_iterator()
        accumulated_edp = None
        while True:
            batch = eval_dataset.read_batch(batch_idx)
            if batch is None:
                break
            tdp = preprocess_batch_for_training(preprocessor, batch,
                                                action_names)
            edp = EvaluationDataPage.create_from_tdp(tdp, trainer)
            if accumulated_edp is None:
                accumulated_edp = edp
            else:
                accumulated_edp = accumulated_edp.append(edp)
        accumulated_edp = accumulated_edp.compute_values(trainer.gamma)

        cpe_start_time = time.time()
        details = evaluator.evaluate_post_training(accumulated_edp)
        details.log()
        logger.info("CPE evaluation took {} seconds.".format(time.time() -
                                                             cpe_start_time))

    through_put = (len(dataset) * int(params["epochs"])) / (time.time() -
                                                            start_time)
    logger.info("Training finished. Processed ~{} examples / s.".format(
        round(through_put)))

    if writer is not None:
        writer.close()

    return export_trainer_and_predictor(trainer, params["model_output_path"])

Example #4

0

Show file

def train_network(params):
    logger.info("Running Parametric DQN workflow with params:")
    logger.info(params)

    # Set minibatch size based on # of devices being used to train
    params["shared_training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = DDPGTrainingParameters(**params["shared_training"])
    actor_parameters = DDPGNetworkParameters(**params["actor_training"])
    critic_parameters = DDPGNetworkParameters(**params["critic_training"])

    trainer_params = DDPGModelParameters(
        rl=rl_parameters,
        shared_training=training_parameters,
        actor_training=actor_parameters,
        critic_training=critic_parameters,
    )

    dataset = JSONDataset(params["training_data_path"],
                          batch_size=training_parameters.minibatch_size)
    state_normalization = read_norm_file(params["state_norm_data_path"])
    action_normalization = read_norm_file(params["action_norm_data_path"])

    num_batches = int(len(dataset) / training_parameters.minibatch_size)
    logger.info("Read in batch data set {} of size {} examples. Data split "
                "into {} batches of size {}.".format(
                    params["training_data_path"],
                    len(dataset),
                    num_batches,
                    training_parameters.minibatch_size,
                ))

    min_action_range_tensor_serving, max_action_range_tensor_serving = construct_action_scale_tensor(
        action_normalization, trainer_params.action_rescale_map)

    trainer = DDPGTrainer(
        trainer_params,
        state_normalization,
        action_normalization,
        min_action_range_tensor_serving,
        max_action_range_tensor_serving,
        use_gpu=params["use_gpu"],
        use_all_avail_gpus=params["use_all_avail_gpus"],
    )
    trainer = update_model_for_warm_start(trainer)
    state_preprocessor = Preprocessor(state_normalization, params["use_gpu"])
    action_preprocessor = Preprocessor(action_normalization, params["use_gpu"])

    start_time = time.time()
    for epoch in range(params["epochs"]):
        dataset.reset_iterator()
        for batch_idx in range(num_batches):
            report_training_status(batch_idx, num_batches, epoch,
                                   params["epochs"])
            batch = dataset.read_batch(batch_idx)
            tdp = preprocess_batch_for_training(
                state_preprocessor,
                batch,
                action_preprocessor=action_preprocessor)
            tdp.set_type(trainer.dtype)
            trainer.train(tdp)

    through_put = (len(dataset) * params["epochs"]) / (time.time() -
                                                       start_time)
    logger.info("Training finished. Processed ~{} examples / s.".format(
        round(through_put)))

    return export_trainer_and_predictor(trainer, params["model_output_path"])

Example #5

0

Show file

def train_network(params):
    logger.info("Running Parametric DQN workflow with params:")
    logger.info(params)

    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])
    if params["in_training_cpe"] is not None:
        in_training_cpe_parameters = InTrainingCPEParameters(
            **params["in_training_cpe"])
    else:
        in_training_cpe_parameters = None

    trainer_params = ContinuousActionModelParameters(
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters,
        in_training_cpe=in_training_cpe_parameters,
    )

    dataset = JSONDataset(params["training_data_path"],
                          batch_size=training_parameters.minibatch_size)
    state_normalization = read_norm_file(params["state_norm_data_path"])
    action_normalization = read_norm_file(params["action_norm_data_path"])

    num_batches = int(len(dataset) / training_parameters.minibatch_size)
    logger.info("Read in batch data set {} of size {} examples. Data split "
                "into {} batches of size {}.".format(
                    params["training_data_path"],
                    len(dataset),
                    num_batches,
                    training_parameters.minibatch_size,
                ))

    trainer = ParametricDQNTrainer(
        trainer_params,
        state_normalization,
        action_normalization,
        use_gpu=params["use_gpu"],
        use_all_avail_gpus=params["use_all_avail_gpus"],
    )
    trainer = update_model_for_warm_start(trainer)
    state_preprocessor = Preprocessor(state_normalization, params["use_gpu"])
    action_preprocessor = Preprocessor(action_normalization, params["use_gpu"])

    if trainer_params.in_training_cpe is not None:
        evaluator = Evaluator(
            None,
            100,
            trainer_params.rl.gamma,
            trainer,
            trainer_params.in_training_cpe.mdp_sampled_rate,
        )
    else:
        evaluator = Evaluator(
            None,
            100,
            trainer_params.rl.gamma,
            trainer,
            float(DEFAULT_NUM_SAMPLES_FOR_CPE) / len(dataset),
        )

    start_time = time.time()
    for epoch in range(params["epochs"]):
        dataset.reset_iterator()
        for batch_idx in range(num_batches):
            report_training_status(batch_idx, num_batches, epoch,
                                   params["epochs"])
            batch = dataset.read_batch(batch_idx)
            tdp = preprocess_batch_for_training(
                state_preprocessor,
                batch,
                action_preprocessor=action_preprocessor)

            tdp.set_type(trainer.dtype)
            trainer.train(tdp, evaluator)

            evaluator.collect_parametric_action_samples(
                mdp_ids=tdp.mdp_ids,
                sequence_numbers=tdp.sequence_numbers.cpu().numpy(),
                logged_state_actions=np.concatenate(
                    (tdp.states.cpu().numpy(), tdp.actions.cpu().numpy()),
                    axis=1),
                logged_rewards=tdp.rewards.cpu().numpy(),
                logged_propensities=tdp.propensities.cpu().numpy(),
                logged_terminals=(1.0 - tdp.not_terminals),
                possible_state_actions=tdp.state_pas_concat.cpu().numpy(),
                pas_lens=tdp.possible_actions_lengths.cpu().numpy(),
            )

        cpe_start_time = time.time()
        evaluator.recover_samples_to_be_unshuffled()
        evaluator.score_cpe(trainer_params.rl.gamma)
        evaluator.clear_collected_samples()
        logger.info("CPE evaluation took {} seconds.".format(time.time() -
                                                             cpe_start_time))

    through_put = (len(dataset) * params["epochs"]) / (time.time() -
                                                       start_time)
    logger.info("Training finished. Processed ~{} examples / s.".format(
        round(through_put)))

    return export_trainer_and_predictor(trainer, params["model_output_path"])

Example #6

0

Show file

File: parametric_dqn_workflow.py Project: gamma-lab/Horizon

def train_network(params):
    logger.info("Running Parametric DQN workflow with params:")
    logger.info(params)

    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])
    if params["in_training_cpe"] is not None:
        in_training_cpe_parameters = InTrainingCPEParameters(
            **params["in_training_cpe"])
    else:
        in_training_cpe_parameters = None

    trainer_params = ContinuousActionModelParameters(
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters,
        in_training_cpe=in_training_cpe_parameters,
    )

    dataset = JSONDataset(params["training_data_path"],
                          batch_size=training_parameters.minibatch_size)
    eval_dataset = JSONDataset(params["eval_data_path"],
                               batch_size=training_parameters.minibatch_size)
    state_normalization = read_norm_file(params["state_norm_data_path"])
    action_normalization = read_norm_file(params["action_norm_data_path"])

    num_batches = int(len(dataset) / training_parameters.minibatch_size)
    logger.info("Read in batch data set {} of size {} examples. Data split "
                "into {} batches of size {}.".format(
                    params["training_data_path"],
                    len(dataset),
                    num_batches,
                    training_parameters.minibatch_size,
                ))

    trainer = ParametricDQNTrainer(
        trainer_params,
        state_normalization,
        action_normalization,
        use_gpu=params["use_gpu"],
        use_all_avail_gpus=params["use_all_avail_gpus"],
    )
    trainer = update_model_for_warm_start(trainer)
    state_preprocessor = Preprocessor(state_normalization, False)
    action_preprocessor = Preprocessor(action_normalization, False)

    if trainer_params.in_training_cpe is not None:
        evaluator = Evaluator(
            None,
            trainer_params.rl.gamma,
            trainer,
            trainer_params.in_training_cpe.mdp_sampled_rate,
            metrics_to_score=trainer.metrics_to_score,
        )
    else:
        evaluator = Evaluator(
            None,
            trainer_params.rl.gamma,
            trainer,
            float(DEFAULT_NUM_SAMPLES_FOR_CPE) / len(dataset),
            metrics_to_score=trainer.metrics_to_score,
        )

    start_time = time.time()
    for epoch in range(params["epochs"]):
        dataset.reset_iterator()
        for batch_idx in range(num_batches):
            report_training_status(batch_idx, num_batches, epoch,
                                   params["epochs"])
            batch = dataset.read_batch(batch_idx)
            tdp = preprocess_batch_for_training(
                state_preprocessor,
                batch,
                action_preprocessor=action_preprocessor)

            tdp.set_type(trainer.dtype)
            trainer.train(tdp)

        eval_dataset.reset_iterator()
        accumulated_edp = None
        for batch_idx in range(num_batches):
            batch = eval_dataset.read_batch(batch_idx)
            tdp = preprocess_batch_for_training(
                state_preprocessor,
                batch,
                action_preprocessor=action_preprocessor)
            edp = EvaluationDataPage.create_from_tdp(tdp, trainer)
            if accumulated_edp is None:
                accumulated_edp = edp
            else:
                accumulated_edp = accumulated_edp.append(edp)
        accumulated_edp = accumulated_edp.compute_values(trainer.gamma)

        cpe_start_time = time.time()
        details = evaluator.evaluate_post_training(accumulated_edp)
        details.log()
        logger.info("CPE evaluation took {} seconds.".format(time.time() -
                                                             cpe_start_time))

    through_put = (len(dataset) * params["epochs"]) / (time.time() -
                                                       start_time)
    logger.info("Training finished. Processed ~{} examples / s.".format(
        round(through_put)))

    return export_trainer_and_predictor(trainer, params["model_output_path"])

Example #7

0

Show file

File: dqn_workflow.py Project: sra4077/Horizon

def train_network(params):
    writer = None
    if params["model_output_path"] is not None:
        writer = SummaryWriter(log_dir=params["model_output_path"])

    logger.info("Running DQN workflow with params:")
    logger.info(params)

    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"]
    )

    action_names = np.array(params["actions"])
    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])

    trainer_params = DiscreteActionModelParameters(
        actions=params["actions"],
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters,
    )

    dataset = JSONDataset(
        params["training_data_path"], batch_size=training_parameters.minibatch_size
    )
    eval_dataset = JSONDataset(params["eval_data_path"], batch_size=16)
    state_normalization = read_norm_file(params["state_norm_data_path"])

    num_batches = int(len(dataset) / training_parameters.minibatch_size)
    logger.info(
        "Read in batch data set {} of size {} examples. Data split "
        "into {} batches of size {}.".format(
            params["training_data_path"],
            len(dataset),
            num_batches,
            training_parameters.minibatch_size,
        )
    )

    trainer = DQNTrainer(
        trainer_params,
        state_normalization,
        use_gpu=params["use_gpu"],
        use_all_avail_gpus=params["use_all_avail_gpus"],
    )
    trainer = update_model_for_warm_start(trainer)
    preprocessor = Preprocessor(state_normalization, False)

    evaluator = Evaluator(
        trainer_params.actions,
        trainer_params.rl.gamma,
        trainer,
        metrics_to_score=trainer.metrics_to_score,
    )

    start_time = time.time()
    for epoch in range(int(params["epochs"])):
        dataset.reset_iterator()
        batch_idx = -1
        while True:
            batch_idx += 1
            report_training_status(batch_idx, num_batches, epoch, int(params["epochs"]))
            batch = dataset.read_batch()
            if batch is None:
                break
            tdp = preprocess_batch_for_training(preprocessor, batch, action_names)

            tdp.set_type(trainer.dtype)
            trainer.train(tdp)

        eval_dataset.reset_iterator()
        accumulated_edp = None
        while True:
            batch = eval_dataset.read_batch()
            if batch is None:
                break
            tdp = preprocess_batch_for_training(preprocessor, batch, action_names)
            tdp.set_type(trainer.dtype)
            edp = EvaluationDataPage.create_from_tdp(tdp, trainer)
            if accumulated_edp is None:
                accumulated_edp = edp
            else:
                accumulated_edp = accumulated_edp.append(edp)
        accumulated_edp = accumulated_edp.compute_values(trainer.gamma)

        cpe_start_time = time.time()
        details = evaluator.evaluate_post_training(accumulated_edp)
        details.log()
        logger.info(
            "CPE evaluation took {} seconds.".format(time.time() - cpe_start_time)
        )

    through_put = (len(dataset) * int(params["epochs"])) / (time.time() - start_time)
    logger.info(
        "Training finished. Processed ~{} examples / s.".format(round(through_put))
    )

    if writer is not None:
        writer.close()

    return export_trainer_and_predictor(trainer, params["model_output_path"])

Example #8

0

Show file

def train_network(params):
    writer = None
    if params["model_output_path"] is not None:
        writer = SummaryWriter(
            log_dir=os.path.join(
                os.path.expanduser(params["model_output_path"]), "training_data"
            )
        )

    logger.info("Running DQN workflow with params:")
    logger.info(params)

    action_names = np.array(params["actions"])
    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])
    if params["in_training_cpe"] is not None:
        in_training_cpe_parameters = InTrainingCPEParameters(
            **params["in_training_cpe"]
        )
    else:
        in_training_cpe_parameters = None

    trainer_params = DiscreteActionModelParameters(
        actions=params["actions"],
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters,
        in_training_cpe=in_training_cpe_parameters,
    )

    dataset = JSONDataset(
        params["training_data_path"], batch_size=training_parameters.minibatch_size
    )
    state_normalization = read_norm_file(params["state_norm_data_path"])

    num_batches = int(len(dataset) / training_parameters.minibatch_size)
    logger.info(
        "Read in batch data set {} of size {} examples. Data split "
        "into {} batches of size {}.".format(
            params["training_data_path"],
            len(dataset),
            num_batches,
            training_parameters.minibatch_size,
        )
    )

    trainer = DQNTrainer(trainer_params, state_normalization, params["use_gpu"])
    trainer = update_model_for_warm_start(trainer)
    preprocessor = Preprocessor(state_normalization, params["use_gpu"])

    if trainer_params.in_training_cpe is not None:
        evaluator = Evaluator(
            trainer_params.actions,
            10,
            trainer_params.rl.gamma,
            trainer,
            trainer_params.in_training_cpe.mdp_sampled_rate,
        )
    else:
        evaluator = Evaluator(
            trainer_params.actions,
            10,
            trainer_params.rl.gamma,
            trainer,
            float(DEFAULT_NUM_SAMPLES_FOR_CPE) / len(dataset),
        )

    start_time = time.time()
    for epoch in range(int(params["epochs"])):
        for batch_idx in range(num_batches):
            report_training_status(batch_idx, num_batches, epoch, int(params["epochs"]))
            batch = dataset.read_batch(batch_idx)
            tdp = preprocess_batch_for_training(preprocessor, batch, action_names)

            trainer.train(tdp)

            trainer.evaluate(
                evaluator, tdp.actions, None, tdp.rewards, tdp.episode_values
            )

            evaluator.collect_discrete_action_samples(
                mdp_ids=tdp.mdp_ids,
                sequence_numbers=tdp.sequence_numbers.cpu().numpy(),
                states=tdp.states.cpu().numpy(),
                logged_actions=tdp.actions.cpu().numpy(),
                logged_rewards=tdp.rewards.cpu().numpy(),
                logged_propensities=tdp.propensities.cpu().numpy(),
                logged_terminals=np.invert(
                    tdp.not_terminals.cpu().numpy().astype(np.bool)
                ),
            )

        cpe_start_time = time.time()
        evaluator.recover_samples_to_be_unshuffled()
        evaluator.score_cpe()
        if writer is not None:
            evaluator.log_to_tensorboard(writer, epoch)
        evaluator.clear_collected_samples()
        logger.info(
            "CPE evaluation took {} seconds.".format(time.time() - cpe_start_time)
        )

    through_put = (len(dataset) * int(params["epochs"])) / (time.time() - start_time)
    logger.info(
        "Training finished. Processed ~{} examples / s.".format(round(through_put))
    )

    if writer is not None:
        writer.close()

    return export_trainer_and_predictor(trainer, params["model_output_path"])

Example #9

0

Show file

File: ddpg_workflow.py Project: sra4077/Horizon

def train_network(params):
    logger.info("Running Parametric DQN workflow with params:")
    logger.info(params)

    # Set minibatch size based on # of devices being used to train
    params["shared_training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"]
    )

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = DDPGTrainingParameters(**params["shared_training"])
    actor_parameters = DDPGNetworkParameters(**params["actor_training"])
    critic_parameters = DDPGNetworkParameters(**params["critic_training"])

    trainer_params = DDPGModelParameters(
        rl=rl_parameters,
        shared_training=training_parameters,
        actor_training=actor_parameters,
        critic_training=critic_parameters,
    )

    dataset = JSONDataset(
        params["training_data_path"], batch_size=training_parameters.minibatch_size
    )
    state_normalization = read_norm_file(params["state_norm_data_path"])
    action_normalization = read_norm_file(params["action_norm_data_path"])

    num_batches = int(len(dataset) / training_parameters.minibatch_size)
    logger.info(
        "Read in batch data set {} of size {} examples. Data split "
        "into {} batches of size {}.".format(
            params["training_data_path"],
            len(dataset),
            num_batches,
            training_parameters.minibatch_size,
        )
    )

    min_action_range_tensor_serving, max_action_range_tensor_serving = construct_action_scale_tensor(
        action_normalization, trainer_params.action_rescale_map
    )

    trainer = DDPGTrainer(
        trainer_params,
        state_normalization,
        action_normalization,
        min_action_range_tensor_serving,
        max_action_range_tensor_serving,
        use_gpu=params["use_gpu"],
        use_all_avail_gpus=params["use_all_avail_gpus"],
    )
    trainer = update_model_for_warm_start(trainer)
    state_preprocessor = Preprocessor(state_normalization, False)
    action_preprocessor = Preprocessor(action_normalization, False)

    start_time = time.time()
    for epoch in range(params["epochs"]):
        dataset.reset_iterator()
        batch_idx = -1
        while True:
            batch_idx += 1
            report_training_status(batch_idx, num_batches, epoch, params["epochs"])
            batch = dataset.read_batch()
            if batch is None:
                break
            tdp = preprocess_batch_for_training(
                state_preprocessor, batch, action_preprocessor=action_preprocessor
            )
            tdp.set_type(trainer.dtype)
            trainer.train(tdp)

    through_put = (len(dataset) * params["epochs"]) / (time.time() - start_time)
    logger.info(
        "Training finished. Processed ~{} examples / s.".format(round(through_put))
    )

    return export_trainer_and_predictor(trainer, params["model_output_path"])