def test_ddpg_trainer(self): environment = GridworldContinuous() samples = environment.generate_samples(500000, 0.25) trainer = DDPGTrainer( self.get_ddpg_parameters(), environment.normalization, environment.normalization_action, environment.min_action_range, environment.max_action_range, ) evaluator = GridworldDDPGEvaluator(environment, True, DISCOUNT, False, samples) tdps = environment.preprocess_samples(samples, self.minibatch_size) critic_predictor = trainer.predictor(actor=False) evaluator.evaluate_critic(critic_predictor) for tdp in tdps: tdp.rewards = tdp.rewards.flatten() tdp.not_terminals = tdp.not_terminals.flatten() trainer.train(tdp) # Make sure actor predictor works actor = trainer.predictor(actor=True) evaluator.evaluate_actor(actor) # Evaluate critic predicor for correctness critic_predictor = trainer.predictor(actor=False) error = evaluator.evaluate_critic(critic_predictor) print("gridworld MAE: {0:.3f}".format(error))
def test_ddpg_trainer(self): environment = GridworldContinuous() samples = environment.generate_samples(200000, 1.0) epochs = 3 trainer = DDPGTrainer( self.get_ddpg_parameters(), environment.normalization, environment.normalization_action, ) evaluator = GridworldDDPGEvaluator(environment, True) tdps = environment.preprocess_samples(samples, self.minibatch_size) for epoch in range(epochs): print("On epoch {} of {}".format(epoch + 1, epochs)) critic_predictor = trainer.predictor() evaluator.evaluate_critic(critic_predictor) for tdp in tdps: training_samples = [ tdp.states, tdp.actions, tdp.rewards.flatten(), tdp.next_states, None, 1 - tdp.not_terminals.flatten(), # done None, None, [1 for i in range(len(tdp.states))], # time diff ] trainer.train(training_samples) critic_predictor = trainer.predictor() error = evaluator.evaluate_critic(critic_predictor) print("gridworld MAE: {0:.3f}".format(error))
def train_network(params): logger.info("Running Parametric DQN workflow with params:") logger.info(params) # Set minibatch size based on # of devices being used to train params["shared_training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = DDPGTrainingParameters(**params["shared_training"]) actor_parameters = DDPGNetworkParameters(**params["actor_training"]) critic_parameters = DDPGNetworkParameters(**params["critic_training"]) trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=training_parameters, actor_training=actor_parameters, critic_training=critic_parameters, ) dataset = JSONDataset(params["training_data_path"], batch_size=training_parameters.minibatch_size) state_normalization = read_norm_file(params["state_norm_data_path"]) action_normalization = read_norm_file(params["action_norm_data_path"]) num_batches = int(len(dataset) / training_parameters.minibatch_size) logger.info("Read in batch data set {} of size {} examples. Data split " "into {} batches of size {}.".format( params["training_data_path"], len(dataset), num_batches, training_parameters.minibatch_size, )) min_action_range_tensor_serving, max_action_range_tensor_serving = construct_action_scale_tensor( action_normalization, trainer_params.action_rescale_map) trainer = DDPGTrainer( trainer_params, state_normalization, action_normalization, min_action_range_tensor_serving, max_action_range_tensor_serving, use_gpu=params["use_gpu"], use_all_avail_gpus=params["use_all_avail_gpus"], ) trainer = update_model_for_warm_start(trainer) state_preprocessor = Preprocessor(state_normalization, params["use_gpu"]) action_preprocessor = Preprocessor(action_normalization, params["use_gpu"]) start_time = time.time() for epoch in range(params["epochs"]): dataset.reset_iterator() for batch_idx in range(num_batches): report_training_status(batch_idx, num_batches, epoch, params["epochs"]) batch = dataset.read_batch(batch_idx) tdp = preprocess_batch_for_training( state_preprocessor, batch, action_preprocessor=action_preprocessor) tdp.set_type(trainer.dtype) trainer.train(tdp) through_put = (len(dataset) * params["epochs"]) / (time.time() - start_time) logger.info("Training finished. Processed ~{} examples / s.".format( round(through_put))) return export_trainer_and_predictor(trainer, params["model_output_path"])
def train_network(params): logger.info("Running Parametric DQN workflow with params:") logger.info(params) # Set minibatch size based on # of devices being used to train params["shared_training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"] ) rl_parameters = RLParameters(**params["rl"]) training_parameters = DDPGTrainingParameters(**params["shared_training"]) actor_parameters = DDPGNetworkParameters(**params["actor_training"]) critic_parameters = DDPGNetworkParameters(**params["critic_training"]) trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=training_parameters, actor_training=actor_parameters, critic_training=critic_parameters, ) dataset = JSONDataset( params["training_data_path"], batch_size=training_parameters.minibatch_size ) state_normalization = read_norm_file(params["state_norm_data_path"]) action_normalization = read_norm_file(params["action_norm_data_path"]) num_batches = int(len(dataset) / training_parameters.minibatch_size) logger.info( "Read in batch data set {} of size {} examples. Data split " "into {} batches of size {}.".format( params["training_data_path"], len(dataset), num_batches, training_parameters.minibatch_size, ) ) min_action_range_tensor_serving, max_action_range_tensor_serving = construct_action_scale_tensor( action_normalization, trainer_params.action_rescale_map ) trainer = DDPGTrainer( trainer_params, state_normalization, action_normalization, min_action_range_tensor_serving, max_action_range_tensor_serving, use_gpu=params["use_gpu"], use_all_avail_gpus=params["use_all_avail_gpus"], ) trainer = update_model_for_warm_start(trainer) state_preprocessor = Preprocessor(state_normalization, False) action_preprocessor = Preprocessor(action_normalization, False) start_time = time.time() for epoch in range(params["epochs"]): dataset.reset_iterator() batch_idx = -1 while True: batch_idx += 1 report_training_status(batch_idx, num_batches, epoch, params["epochs"]) batch = dataset.read_batch() if batch is None: break tdp = preprocess_batch_for_training( state_preprocessor, batch, action_preprocessor=action_preprocessor ) tdp.set_type(trainer.dtype) trainer.train(tdp) through_put = (len(dataset) * params["epochs"]) / (time.time() - start_time) logger.info( "Training finished. Processed ~{} examples / s.".format(round(through_put)) ) return export_trainer_and_predictor(trainer, params["model_output_path"])