def test_ddpg_trainer(self): environment = GridworldContinuous() samples = environment.generate_samples(500000, 0.25) trainer = DDPGTrainer( self.get_ddpg_parameters(), environment.normalization, environment.normalization_action, environment.min_action_range, environment.max_action_range, ) evaluator = GridworldDDPGEvaluator(environment, True, DISCOUNT, False, samples) tdps = environment.preprocess_samples(samples, self.minibatch_size) critic_predictor = trainer.predictor(actor=False) evaluator.evaluate_critic(critic_predictor) for tdp in tdps: tdp.rewards = tdp.rewards.flatten() tdp.not_terminals = tdp.not_terminals.flatten() trainer.train(tdp) # Make sure actor predictor works actor = trainer.predictor(actor=True) evaluator.evaluate_actor(actor) # Evaluate critic predicor for correctness critic_predictor = trainer.predictor(actor=False) error = evaluator.evaluate_critic(critic_predictor) print("gridworld MAE: {0:.3f}".format(error))
def test_ddpg_trainer(self): environment = GridworldContinuous() samples = environment.generate_samples(200000, 1.0) epochs = 3 trainer = DDPGTrainer( self.get_ddpg_parameters(), environment.normalization, environment.normalization_action, ) evaluator = GridworldDDPGEvaluator(environment, True) tdps = environment.preprocess_samples(samples, self.minibatch_size) for epoch in range(epochs): print("On epoch {} of {}".format(epoch + 1, epochs)) critic_predictor = trainer.predictor() evaluator.evaluate_critic(critic_predictor) for tdp in tdps: training_samples = [ tdp.states, tdp.actions, tdp.rewards.flatten(), tdp.next_states, None, 1 - tdp.not_terminals.flatten(), # done None, None, [1 for i in range(len(tdp.states))], # time diff ] trainer.train(training_samples) critic_predictor = trainer.predictor() error = evaluator.evaluate_critic(critic_predictor) print("gridworld MAE: {0:.3f}".format(error))
def _test_ddpg_trainer(self, use_gpu=False, use_all_avail_gpus=False): self.check_tolerance = False self.tolerance_threshold = 1.0 environment = GridworldContinuous() trainer = DDPGTrainer( self.get_ddpg_parameters(), environment.normalization, environment.normalization_action, environment.min_action_range, environment.max_action_range, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) evaluator = GridworldDDPGEvaluator(environment, DISCOUNT) self.evaluate_gridworld(environment, evaluator, trainer, trainer, use_gpu)
def __init__( self, model_params: ContinuousActionModelParameters, preprocess_handler: PreprocessHandler, state_normalization: Dict[int, NormalizationParameters], action_normalization: Dict[int, NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool, ): logger.info("Running continuous workflow with params:") logger.info(model_params) model_params = model_params min_action_range_tensor_serving, max_action_range_tensor_serving = construct_action_scale_tensor( action_normalization, model_params.action_rescale_map) trainer = DDPGTrainer( model_params, state_normalization, action_normalization, min_action_range_tensor_serving, max_action_range_tensor_serving, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) trainer = update_model_for_warm_start(trainer) assert type( trainer) == DDPGTrainer, "Warm started wrong model type: " + str( type(trainer)) evaluator = Evaluator( None, model_params.rl.gamma, trainer, metrics_to_score=trainer.metrics_to_score, ) super(ContinuousWorkflow, self).__init__( preprocess_handler, trainer, evaluator, model_params.shared_training.minibatch_size, )
def create_trainer(model_type, params, rl_parameters, use_gpu, env): if model_type == ModelType.PYTORCH_DISCRETE_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" if isinstance(training_parameters.cnn_parameters, dict): training_parameters.cnn_parameters = CNNParameters( **training_parameters.cnn_parameters) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels) else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) trainer = DQNTrainer(trainer_params, env.normalization, use_gpu) elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters) trainer = ParametricDQNTrainer(trainer_params, env.normalization, env.normalization_action, use_gpu) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_parameters = params["shared_training"] if isinstance(training_parameters, dict): training_parameters = DDPGTrainingParameters(**training_parameters) actor_parameters = params["actor_training"] if isinstance(actor_parameters, dict): actor_parameters = DDPGNetworkParameters(**actor_parameters) critic_parameters = params["critic_training"] if isinstance(critic_parameters, dict): critic_parameters = DDPGNetworkParameters(**critic_parameters) trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=training_parameters, actor_training=actor_parameters, critic_training=critic_parameters, ) action_range_low = env.action_space.low.astype(np.float32) action_range_high = env.action_space.high.astype(np.float32) trainer = DDPGTrainer( trainer_params, env.normalization, env.normalization_action, torch.from_numpy(action_range_low).unsqueeze(dim=0), torch.from_numpy(action_range_high).unsqueeze(dim=0), use_gpu, ) elif model_type == ModelType.SOFT_ACTOR_CRITIC.value: trainer_params = SACModelParameters( rl=rl_parameters, training=SACTrainingParameters( minibatch_size=params["sac_training"]["minibatch_size"], use_2_q_functions=params["sac_training"]["use_2_q_functions"], q_network_optimizer=OptimizerParameters( **params["sac_training"]["q_network_optimizer"]), value_network_optimizer=OptimizerParameters( **params["sac_training"]["value_network_optimizer"]), actor_network_optimizer=OptimizerParameters( **params["sac_training"]["actor_network_optimizer"]), entropy_temperature=params["sac_training"] ["entropy_temperature"], ), q_network=FeedForwardParameters(**params["sac_q_training"]), value_network=FeedForwardParameters( **params["sac_value_training"]), actor_network=FeedForwardParameters( **params["sac_actor_training"]), ) trainer = get_sac_trainer(env, trainer_params, use_gpu) else: raise NotImplementedError( "Model of type {} not supported".format(model_type)) return trainer
def run_gym(params, score_bar, gpu_id, save_timesteps_to_dataset=None): logger.info("Running gym with params") logger.info(params) rl_parameters = RLParameters(**params["rl"]) env_type = params["env"] env = OpenAIGymEnvironment( env_type, rl_parameters.epsilon, rl_parameters.softmax_policy, params["max_replay_memory_size"], ) model_type = params["model_type"] c2_device = core.DeviceOption( caffe2_pb2.CPU if gpu_id == USE_CPU else caffe2_pb2.CUDA, gpu_id ) if model_type == ModelType.DISCRETE_ACTION.value: with core.DeviceScope(c2_device): training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"] ) training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels ) else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters ) trainer = DiscreteActionTrainer(trainer_params, env.normalization) elif model_type == ModelType.PARAMETRIC_ACTION.value: with core.DeviceScope(c2_device): training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"] ) training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, knn=KnnParameters(model_type="DQN"), ) trainer = ContinuousActionDQNTrainer( trainer_params, env.normalization, env.normalization_action ) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_settings = params["shared_training"] actor_settings = params["actor_training"] critic_settings = params["critic_training"] trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=DDPGTrainingParameters(**training_settings), actor_training=DDPGNetworkParameters(**actor_settings), critic_training=DDPGNetworkParameters(**critic_settings), ) # DDPG can handle continuous and discrete action spaces if env.action_type == EnvType.CONTINUOUS_ACTION: action_range = env.action_space.high else: action_range = None trainer = DDPGTrainer( trainer_params, env.normalization, env.normalization_action, use_gpu=False, action_range=action_range, ) else: raise NotImplementedError("Model of type {} not supported".format(model_type)) return run( c2_device, env, model_type, trainer, "{} test run".format(env_type), score_bar, **params["run_details"], save_timesteps_to_dataset=save_timesteps_to_dataset, )
def train_network(params): logger.info("Running Parametric DQN workflow with params:") logger.info(params) # Set minibatch size based on # of devices being used to train params["shared_training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = DDPGTrainingParameters(**params["shared_training"]) actor_parameters = DDPGNetworkParameters(**params["actor_training"]) critic_parameters = DDPGNetworkParameters(**params["critic_training"]) trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=training_parameters, actor_training=actor_parameters, critic_training=critic_parameters, ) dataset = JSONDataset(params["training_data_path"], batch_size=training_parameters.minibatch_size) state_normalization = read_norm_file(params["state_norm_data_path"]) action_normalization = read_norm_file(params["action_norm_data_path"]) num_batches = int(len(dataset) / training_parameters.minibatch_size) logger.info("Read in batch data set {} of size {} examples. Data split " "into {} batches of size {}.".format( params["training_data_path"], len(dataset), num_batches, training_parameters.minibatch_size, )) min_action_range_tensor_serving, max_action_range_tensor_serving = construct_action_scale_tensor( action_normalization, trainer_params.action_rescale_map) trainer = DDPGTrainer( trainer_params, state_normalization, action_normalization, min_action_range_tensor_serving, max_action_range_tensor_serving, use_gpu=params["use_gpu"], use_all_avail_gpus=params["use_all_avail_gpus"], ) trainer = update_model_for_warm_start(trainer) state_preprocessor = Preprocessor(state_normalization, params["use_gpu"]) action_preprocessor = Preprocessor(action_normalization, params["use_gpu"]) start_time = time.time() for epoch in range(params["epochs"]): dataset.reset_iterator() for batch_idx in range(num_batches): report_training_status(batch_idx, num_batches, epoch, params["epochs"]) batch = dataset.read_batch(batch_idx) tdp = preprocess_batch_for_training( state_preprocessor, batch, action_preprocessor=action_preprocessor) tdp.set_type(trainer.dtype) trainer.train(tdp) through_put = (len(dataset) * params["epochs"]) / (time.time() - start_time) logger.info("Training finished. Processed ~{} examples / s.".format( round(through_put))) return export_trainer_and_predictor(trainer, params["model_output_path"])
def _test_ddpg_trainer(self, use_gpu=False, use_all_avail_gpus=False): # FIXME:the test not really working self.run_pre_training_eval = False self.check_tolerance = False environment = GridworldContinuous() parameters = self.get_ddpg_parameters() state_dim = get_num_output_features(environment.normalization) action_dim = get_num_output_features(environment.normalization_action) # Build Actor Network actor_network = ActorNetModel( layers=[state_dim] + parameters.actor_training.layers[1:-1] + [action_dim], activations=parameters.actor_training.activations, fl_init=parameters.shared_training.final_layer_init, state_dim=state_dim, action_dim=action_dim, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) # Build Critic Network critic_network = CriticNetModel( # Ensure dims match input state and scalar output layers=[state_dim] + parameters.critic_training.layers[1:-1] + [1], activations=parameters.critic_training.activations, fl_init=parameters.shared_training.final_layer_init, state_dim=state_dim, action_dim=action_dim, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) trainer = DDPGTrainer( actor_network, critic_network, parameters, environment.normalization, environment.normalization_action, environment.min_action_range, environment.max_action_range, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) exporter = ParametricDQNExporter.from_state_action_normalization( trainer.critic, state_normalization=environment.normalization, action_normalization=environment.normalization_action, ) evaluator = GridworldDDPGEvaluator(environment, DISCOUNT) self.evaluate_gridworld(environment, evaluator, trainer, exporter, use_gpu) # Make sure actor predictor works actor = ActorExporter.from_state_action_normalization( trainer.actor, state_normalization=environment.normalization, action_normalization=environment.normalization_action, ).export() # Make sure all actions are optimal error = evaluator.evaluate_actor(actor, thres=0.2) print("gridworld optimal action match MAE: {0:.3f}".format(error))
def create_trainer(model_type, params, rl_parameters, use_gpu, env): c2_device = core.DeviceOption(caffe2_pb2.CUDA if use_gpu else caffe2_pb2.CPU) if model_type == ModelType.PYTORCH_DISCRETE_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels ) else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) trainer = DQNTrainer(trainer_params, env.normalization, use_gpu) elif model_type == ModelType.DISCRETE_ACTION.value: with core.DeviceScope(c2_device): training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels ) else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters ) trainer = DiscreteActionTrainer(trainer_params, env.normalization) elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, knn=KnnParameters(model_type="DQN"), rainbow=rainbow_parameters, ) trainer = ParametricDQNTrainer( trainer_params, env.normalization, env.normalization_action, use_gpu ) elif model_type == ModelType.PARAMETRIC_ACTION.value: with core.DeviceScope(c2_device): training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, knn=KnnParameters(model_type="DQN"), ) trainer = ContinuousActionDQNTrainer( trainer_params, env.normalization, env.normalization_action ) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_parameters = params["shared_training"] if isinstance(training_parameters, dict): training_parameters = DDPGTrainingParameters(**training_parameters) actor_parameters = params["actor_training"] if isinstance(actor_parameters, dict): actor_parameters = DDPGNetworkParameters(**actor_parameters) critic_parameters = params["critic_training"] if isinstance(critic_parameters, dict): critic_parameters = DDPGNetworkParameters(**critic_parameters) trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=training_parameters, actor_training=actor_parameters, critic_training=critic_parameters, ) action_range_low = env.action_space.low.astype(np.float32) action_range_high = env.action_space.high.astype(np.float32) trainer = DDPGTrainer( trainer_params, env.normalization, env.normalization_action, torch.from_numpy(action_range_low).unsqueeze(dim=0), torch.from_numpy(action_range_high).unsqueeze(dim=0), use_gpu, ) else: raise NotImplementedError("Model of type {} not supported".format(model_type)) return trainer
def create_park_trainer(model_type, params, rl_parameters, use_gpu, env): if model_type == ModelType.PYTORCH_DISCRETE_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" if isinstance(training_parameters.cnn_parameters, dict): training_parameters.cnn_parameters = CNNParameters( **training_parameters.cnn_parameters) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels) else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) trainer = create_park_dqn_trainer_from_params( model=trainer_params, normalization_parameters=env.normalization, use_gpu=use_gpu, env=env.env) elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters) trainer = create_parametric_dqn_trainer_from_params( trainer_params, env.normalization, env.normalization_action, use_gpu, env=env.env) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_parameters = params["shared_training"] if isinstance(training_parameters, dict): training_parameters = DDPGTrainingParameters(**training_parameters) actor_parameters = params["actor_training"] if isinstance(actor_parameters, dict): actor_parameters = DDPGNetworkParameters(**actor_parameters) critic_parameters = params["critic_training"] if isinstance(critic_parameters, dict): critic_parameters = DDPGNetworkParameters(**critic_parameters) trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=training_parameters, actor_training=actor_parameters, critic_training=critic_parameters, ) action_range_low = env.action_space.low.astype(np.float32) action_range_high = env.action_space.high.astype(np.float32) state_dim = get_num_output_features(env.normalization) action_dim = get_num_output_features(env.normalization_action) # Build Actor Network actor_network = ActorNetModel( layers=([state_dim] + trainer_params.actor_training.layers[1:-1] + [action_dim]), activations=trainer_params.actor_training.activations, fl_init=trainer_params.shared_training.final_layer_init, state_dim=state_dim, action_dim=action_dim, use_gpu=use_gpu, use_all_avail_gpus=False, ) # Build Critic Network critic_network = CriticNetModel( # Ensure dims match input state and scalar output layers=[state_dim] + \ trainer_params.critic_training.layers[1:-1] + [1], activations=trainer_params.critic_training.activations, fl_init=trainer_params.shared_training.final_layer_init, state_dim=state_dim, action_dim=action_dim, use_gpu=use_gpu, use_all_avail_gpus=False, ) trainer = DDPGTrainer( actor_network, critic_network, trainer_params, env.normalization, env.normalization_action, torch.from_numpy(action_range_low).unsqueeze(dim=0), torch.from_numpy(action_range_high).unsqueeze(dim=0), use_gpu, ) elif model_type == ModelType.SOFT_ACTOR_CRITIC.value: value_network = None value_network_optimizer = None if params["sac_training"]["use_value_network"]: value_network = FeedForwardParameters( **params["sac_value_training"]) value_network_optimizer = OptimizerParameters( **params["sac_training"]["value_network_optimizer"]) trainer_params = SACModelParameters( rl=rl_parameters, training=SACTrainingParameters( minibatch_size=params["sac_training"]["minibatch_size"], use_2_q_functions=params["sac_training"]["use_2_q_functions"], use_value_network=params["sac_training"]["use_value_network"], q_network_optimizer=OptimizerParameters( **params["sac_training"]["q_network_optimizer"]), value_network_optimizer=value_network_optimizer, actor_network_optimizer=OptimizerParameters( **params["sac_training"]["actor_network_optimizer"]), entropy_temperature=params["sac_training"] ["entropy_temperature"], ), q_network=FeedForwardParameters(**params["sac_q_training"]), value_network=value_network, actor_network=FeedForwardParameters( **params["sac_actor_training"]), ) trainer = horizon_runner.get_sac_trainer(env, trainer_params, use_gpu) else: raise NotImplementedError( "Model of type {} not supported".format(model_type)) return trainer
def run_gym( params, score_bar, gpu_id, save_timesteps_to_dataset=None, start_saving_from_episode=0, batch_rl_file_path=None, ): # Caffe2 core uses the min of caffe2_log_level and minloglevel # to determine loglevel. See caffe2/caffe2/core/logging.cc for more info. core.GlobalInit(["caffe2", "--caffe2_log_level=2", "--minloglevel=2"]) logger.info("Running gym with params") logger.info(params) rl_parameters = RLParameters(**params["rl"]) env_type = params["env"] env = OpenAIGymEnvironment( env_type, rl_parameters.epsilon, rl_parameters.softmax_policy, params["max_replay_memory_size"], rl_parameters.gamma, ) model_type = params["model_type"] c2_device = core.DeviceOption( caffe2_pb2.CPU if gpu_id == USE_CPU else caffe2_pb2.CUDA, gpu_id) use_gpu = gpu_id != USE_CPU if model_type == ModelType.PYTORCH_DISCRETE_DQN.value: training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"]) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels) else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters) trainer = DQNTrainer(trainer_params, env.normalization, use_gpu) elif model_type == ModelType.DISCRETE_ACTION.value: with core.DeviceScope(c2_device): training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"]) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels) else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters) trainer = DiscreteActionTrainer(trainer_params, env.normalization) elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value: training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"]) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, knn=KnnParameters(model_type="DQN"), ) trainer = ParametricDQNTrainer(trainer_params, env.normalization, env.normalization_action, use_gpu) elif model_type == ModelType.PARAMETRIC_ACTION.value: with core.DeviceScope(c2_device): training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"]) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, knn=KnnParameters(model_type="DQN"), ) trainer = ContinuousActionDQNTrainer(trainer_params, env.normalization, env.normalization_action) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_settings = params["shared_training"] actor_settings = params["actor_training"] critic_settings = params["critic_training"] trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=DDPGTrainingParameters(**training_settings), actor_training=DDPGNetworkParameters(**actor_settings), critic_training=DDPGNetworkParameters(**critic_settings), ) action_range_low = env.action_space.low.astype(np.float32) action_range_high = env.action_space.high.astype(np.float32) trainer = DDPGTrainer( trainer_params, env.normalization, env.normalization_action, torch.from_numpy(action_range_low).unsqueeze(dim=0), torch.from_numpy(action_range_high).unsqueeze(dim=0), use_gpu, ) else: raise NotImplementedError( "Model of type {} not supported".format(model_type)) return run( c2_device, env, model_type, trainer, "{} test run".format(env_type), score_bar, **params["run_details"], save_timesteps_to_dataset=save_timesteps_to_dataset, start_saving_from_episode=start_saving_from_episode, batch_rl_file_path=batch_rl_file_path, )
def __init__( self, model_params: ContinuousActionModelParameters, preprocess_handler: PreprocessHandler, state_normalization: Dict[int, NormalizationParameters], action_normalization: Dict[int, NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool, ): logger.info("Running continuous workflow with params:") logger.info(model_params) model_params = model_params min_action_range_tensor_serving, max_action_range_tensor_serving = construct_action_scale_tensor( action_normalization, model_params.action_rescale_map ) state_dim = get_num_output_features(state_normalization) action_dim = get_num_output_features(action_normalization) # Build Actor Network actor_network = ActorNetModel( layers=( [state_dim] + model_params.actor_training.layers[1:-1] + [action_dim] ), activations=model_params.actor_training.activations, fl_init=model_params.shared_training.final_layer_init, state_dim=state_dim, action_dim=action_dim, ) # Build Critic Network critic_network = CriticNetModel( # Ensure dims match input state and scalar output layers=[state_dim] + model_params.critic_training.layers[1:-1] + [1], activations=model_params.critic_training.activations, fl_init=model_params.shared_training.final_layer_init, state_dim=state_dim, action_dim=action_dim, ) trainer = DDPGTrainer( actor_network, critic_network, model_params, state_normalization, action_normalization, min_action_range_tensor_serving, max_action_range_tensor_serving, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) trainer = update_model_for_warm_start(trainer) assert type(trainer) == DDPGTrainer, "Warm started wrong model type: " + str( type(trainer) ) evaluator = Evaluator( None, model_params.rl.gamma, trainer, metrics_to_score=trainer.metrics_to_score, ) super().__init__( preprocess_handler, trainer, evaluator, model_params.shared_training.minibatch_size, )
def train_network(params): logger.info("Running Parametric DQN workflow with params:") logger.info(params) # Set minibatch size based on # of devices being used to train params["shared_training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"] ) rl_parameters = RLParameters(**params["rl"]) training_parameters = DDPGTrainingParameters(**params["shared_training"]) actor_parameters = DDPGNetworkParameters(**params["actor_training"]) critic_parameters = DDPGNetworkParameters(**params["critic_training"]) trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=training_parameters, actor_training=actor_parameters, critic_training=critic_parameters, ) dataset = JSONDataset( params["training_data_path"], batch_size=training_parameters.minibatch_size ) state_normalization = read_norm_file(params["state_norm_data_path"]) action_normalization = read_norm_file(params["action_norm_data_path"]) num_batches = int(len(dataset) / training_parameters.minibatch_size) logger.info( "Read in batch data set {} of size {} examples. Data split " "into {} batches of size {}.".format( params["training_data_path"], len(dataset), num_batches, training_parameters.minibatch_size, ) ) min_action_range_tensor_serving, max_action_range_tensor_serving = construct_action_scale_tensor( action_normalization, trainer_params.action_rescale_map ) trainer = DDPGTrainer( trainer_params, state_normalization, action_normalization, min_action_range_tensor_serving, max_action_range_tensor_serving, use_gpu=params["use_gpu"], use_all_avail_gpus=params["use_all_avail_gpus"], ) trainer = update_model_for_warm_start(trainer) state_preprocessor = Preprocessor(state_normalization, False) action_preprocessor = Preprocessor(action_normalization, False) start_time = time.time() for epoch in range(params["epochs"]): dataset.reset_iterator() batch_idx = -1 while True: batch_idx += 1 report_training_status(batch_idx, num_batches, epoch, params["epochs"]) batch = dataset.read_batch() if batch is None: break tdp = preprocess_batch_for_training( state_preprocessor, batch, action_preprocessor=action_preprocessor ) tdp.set_type(trainer.dtype) trainer.train(tdp) through_put = (len(dataset) * params["epochs"]) / (time.time() - start_time) logger.info( "Training finished. Processed ~{} examples / s.".format(round(through_put)) ) return export_trainer_and_predictor(trainer, params["model_output_path"])
def run_gym(params, score_bar, gpu_id): rl_settings = params['rl'] rl_settings['gamma'] = rl_settings['reward_discount_factor'] del rl_settings['reward_discount_factor'] env_type = params['env'] env = OpenAIGymEnvironment(env_type, rl_settings['epsilon']) model_type = params['model_type'] c2_device = core.DeviceOption( caffe2_pb2.CPU if gpu_id == USE_CPU else caffe2_pb2.CUDA, gpu_id, ) if model_type == ModelType.DISCRETE_ACTION.value: with core.DeviceScope(c2_device): training_settings = params['training'] training_settings['gamma'] = training_settings[ 'learning_rate_decay'] del training_settings['learning_rate_decay'] trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=RLParameters(**rl_settings), training=TrainingParameters(**training_settings)) if env.img: trainer = DiscreteActionConvTrainer( DiscreteActionConvModelParameters( fc_parameters=trainer_params, cnn_parameters=CNNModelParameters(**params['cnn']), num_input_channels=env.num_input_channels, img_height=env.height, img_width=env.width), env.normalization, ) else: trainer = DiscreteActionTrainer( trainer_params, env.normalization, ) elif model_type == ModelType.PARAMETRIC_ACTION.value: with core.DeviceScope(c2_device): training_settings = params['training'] training_settings['gamma'] = training_settings[ 'learning_rate_decay'] del training_settings['learning_rate_decay'] trainer_params = ContinuousActionModelParameters( rl=RLParameters(**rl_settings), training=TrainingParameters(**training_settings), knn=KnnParameters(model_type='DQN', ), ) trainer = ContinuousActionDQNTrainer(trainer_params, env.normalization, env.normalization_action) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_settings = params['shared_training'] training_settings['gamma'] = training_settings['learning_rate_decay'] del training_settings['learning_rate_decay'] actor_settings = params['actor_training'] critic_settings = params['critic_training'] trainer_params = DDPGModelParameters( rl=DDPGRLParameters(**rl_settings), shared_training=DDPGTrainingParameters(**training_settings), actor_training=DDPGNetworkParameters(**actor_settings), critic_training=DDPGNetworkParameters(**critic_settings), ) trainer = DDPGTrainer( trainer_params, EnvDetails( state_dim=env.state_dim, action_dim=env.action_dim, action_range=(env.action_space.low, env.action_space.high), )) else: raise NotImplementedError( "Model of type {} not supported".format(model_type)) return run(env, model_type, trainer, "{} test run".format(env_type), score_bar, **params["run_details"])