def get_sarsa_parameters_factorized(self): return ContinuousActionModelParameters( rl=RLParameters( gamma=DISCOUNT, target_update_rate=1.0, reward_burnin=100, maxq_learning=False, ), training=TrainingParameters( # These are used by reward network layers=[-1, 256, 128, -1], activations=["relu", "relu", "linear"], factorization_parameters=FactorizationParameters( state=FeedForwardParameters( layers=[-1, 128, 64, 32], activations=["relu", "relu", "linear"]), action=FeedForwardParameters( layers=[-1, 128, 64, 32], activations=["relu", "relu", "linear"]), ), minibatch_size=self.minibatch_size, learning_rate=0.05, optimizer="ADAM", ), knn=KnnParameters(model_type="DQN"), rainbow=RainbowDQNParameters(double_q_learning=True, dueling_architecture=False), in_training_cpe=InTrainingCPEParameters(mdp_sampled_rate=0.1), )
def get_sarsa_parameters_factorized(self): return ContinuousActionModelParameters( rl=RLParameters( gamma=DISCOUNT, target_update_rate=1.0, reward_burnin=100, maxq_learning=False, ), training=TrainingParameters( layers=[], activations=[], factorization_parameters=FactorizationParameters( state=FeedForwardParameters( layers=[-1, 128, 64, 32], activations=["relu", "relu", "linear"]), action=FeedForwardParameters( layers=[-1, 128, 64, 32], activations=["relu", "relu", "linear"]), ), minibatch_size=self.minibatch_size, learning_rate=0.05, optimizer="ADAM", ), knn=KnnParameters(model_type="DQN"), in_training_cpe_evaluation=InTrainingCPEParameters( mdp_sampled_rate=0.1), )
def get_sarsa_parameters(self): return ContinuousActionModelParameters( rl=RLParameters( gamma=DISCOUNT, target_update_rate=0.5, reward_burnin=10, maxq_learning=False, ), training=TrainingParameters( layers=[-1, 200, 1], activations=['linear', 'linear'], minibatch_size=1024, learning_rate=0.01, optimizer='ADAM', ), knn=KnnParameters(model_type='DQN', ))
def get_sarsa_parameters(self): return ContinuousActionModelParameters( rl=RLParameters( gamma=DISCOUNT, target_update_rate=1.0, reward_burnin=100, maxq_learning=False, ), training=TrainingParameters( layers=[-1, 256, 128, -1], activations=["relu", "relu", "linear"], minibatch_size=self.minibatch_size, learning_rate=0.1, optimizer="ADAM", ), knn=KnnParameters(model_type="DQN"), )
def setUp(self): super(self.__class__, self).setUp() np.random.seed(0) random.seed(0) self.state_dim, self.action_dim = 2, 3 self._env = MockEnv(self.state_dim, self.action_dim) self._rl_parameters = RLParameters( gamma=0.9, target_update_rate=0.5, reward_burnin=10, maxq_learning=False, ) self._rl_parameters_maxq = RLParameters( gamma=0.9, target_update_rate=0.5, reward_burnin=10, maxq_learning=True, ) self._rl_parameters = ContinuousActionModelParameters( rl=self._rl_parameters, training=TrainingParameters( layers=[ -1, self._env.num_states * self._env.num_actions * 2, 1 ], activations=['linear', 'linear'], minibatch_size=1024, learning_rate=0.01, optimizer='ADAM', ), knn=KnnParameters(model_type='DQN', )) self._trainer = ContinuousActionDQNTrainer( self._env.normalization, self._env.normalization_action, self._rl_parameters)
def run_gym(params, score_bar, gpu_id, save_timesteps_to_dataset=None): logger.info("Running gym with params") logger.info(params) rl_parameters = RLParameters(**params["rl"]) env_type = params["env"] env = OpenAIGymEnvironment( env_type, rl_parameters.epsilon, rl_parameters.softmax_policy, params["max_replay_memory_size"], ) model_type = params["model_type"] c2_device = core.DeviceOption( caffe2_pb2.CPU if gpu_id == USE_CPU else caffe2_pb2.CUDA, gpu_id ) if model_type == ModelType.DISCRETE_ACTION.value: with core.DeviceScope(c2_device): training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"] ) training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels ) else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters ) trainer = DiscreteActionTrainer(trainer_params, env.normalization) elif model_type == ModelType.PARAMETRIC_ACTION.value: with core.DeviceScope(c2_device): training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"] ) training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, knn=KnnParameters(model_type="DQN"), ) trainer = ContinuousActionDQNTrainer( trainer_params, env.normalization, env.normalization_action ) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_settings = params["shared_training"] actor_settings = params["actor_training"] critic_settings = params["critic_training"] trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=DDPGTrainingParameters(**training_settings), actor_training=DDPGNetworkParameters(**actor_settings), critic_training=DDPGNetworkParameters(**critic_settings), ) # DDPG can handle continuous and discrete action spaces if env.action_type == EnvType.CONTINUOUS_ACTION: action_range = env.action_space.high else: action_range = None trainer = DDPGTrainer( trainer_params, env.normalization, env.normalization_action, use_gpu=False, action_range=action_range, ) else: raise NotImplementedError("Model of type {} not supported".format(model_type)) return run( c2_device, env, model_type, trainer, "{} test run".format(env_type), score_bar, **params["run_details"], save_timesteps_to_dataset=save_timesteps_to_dataset, )
def create_trainer(model_type, params, rl_parameters, use_gpu, env): c2_device = core.DeviceOption(caffe2_pb2.CUDA if use_gpu else caffe2_pb2.CPU) if model_type == ModelType.PYTORCH_DISCRETE_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels ) else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) trainer = DQNTrainer(trainer_params, env.normalization, use_gpu) elif model_type == ModelType.DISCRETE_ACTION.value: with core.DeviceScope(c2_device): training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels ) else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters ) trainer = DiscreteActionTrainer(trainer_params, env.normalization) elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value: training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) rainbow_parameters = params["rainbow"] if isinstance(rainbow_parameters, dict): rainbow_parameters = RainbowDQNParameters(**rainbow_parameters) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, knn=KnnParameters(model_type="DQN"), rainbow=rainbow_parameters, ) trainer = ParametricDQNTrainer( trainer_params, env.normalization, env.normalization_action, use_gpu ) elif model_type == ModelType.PARAMETRIC_ACTION.value: with core.DeviceScope(c2_device): training_parameters = params["training"] if isinstance(training_parameters, dict): training_parameters = TrainingParameters(**training_parameters) if env.img: assert ( training_parameters.cnn_parameters is not None ), "Missing CNN parameters for image input" training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels else: assert ( training_parameters.cnn_parameters is None ), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, knn=KnnParameters(model_type="DQN"), ) trainer = ContinuousActionDQNTrainer( trainer_params, env.normalization, env.normalization_action ) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_parameters = params["shared_training"] if isinstance(training_parameters, dict): training_parameters = DDPGTrainingParameters(**training_parameters) actor_parameters = params["actor_training"] if isinstance(actor_parameters, dict): actor_parameters = DDPGNetworkParameters(**actor_parameters) critic_parameters = params["critic_training"] if isinstance(critic_parameters, dict): critic_parameters = DDPGNetworkParameters(**critic_parameters) trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=training_parameters, actor_training=actor_parameters, critic_training=critic_parameters, ) action_range_low = env.action_space.low.astype(np.float32) action_range_high = env.action_space.high.astype(np.float32) trainer = DDPGTrainer( trainer_params, env.normalization, env.normalization_action, torch.from_numpy(action_range_low).unsqueeze(dim=0), torch.from_numpy(action_range_high).unsqueeze(dim=0), use_gpu, ) else: raise NotImplementedError("Model of type {} not supported".format(model_type)) return trainer
def run_gym( params, score_bar, gpu_id, save_timesteps_to_dataset=None, start_saving_from_episode=0, batch_rl_file_path=None, ): # Caffe2 core uses the min of caffe2_log_level and minloglevel # to determine loglevel. See caffe2/caffe2/core/logging.cc for more info. core.GlobalInit(["caffe2", "--caffe2_log_level=2", "--minloglevel=2"]) logger.info("Running gym with params") logger.info(params) rl_parameters = RLParameters(**params["rl"]) env_type = params["env"] env = OpenAIGymEnvironment( env_type, rl_parameters.epsilon, rl_parameters.softmax_policy, params["max_replay_memory_size"], rl_parameters.gamma, ) model_type = params["model_type"] c2_device = core.DeviceOption( caffe2_pb2.CPU if gpu_id == USE_CPU else caffe2_pb2.CUDA, gpu_id) use_gpu = gpu_id != USE_CPU if model_type == ModelType.PYTORCH_DISCRETE_DQN.value: training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"]) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels) else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters) trainer = DQNTrainer(trainer_params, env.normalization, use_gpu) elif model_type == ModelType.DISCRETE_ACTION.value: with core.DeviceScope(c2_device): training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"]) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels) else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters) trainer = DiscreteActionTrainer(trainer_params, env.normalization) elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value: training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"]) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, knn=KnnParameters(model_type="DQN"), ) trainer = ParametricDQNTrainer(trainer_params, env.normalization, env.normalization_action, use_gpu) elif model_type == ModelType.PARAMETRIC_ACTION.value: with core.DeviceScope(c2_device): training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"]) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, knn=KnnParameters(model_type="DQN"), ) trainer = ContinuousActionDQNTrainer(trainer_params, env.normalization, env.normalization_action) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_settings = params["shared_training"] actor_settings = params["actor_training"] critic_settings = params["critic_training"] trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=DDPGTrainingParameters(**training_settings), actor_training=DDPGNetworkParameters(**actor_settings), critic_training=DDPGNetworkParameters(**critic_settings), ) action_range_low = env.action_space.low.astype(np.float32) action_range_high = env.action_space.high.astype(np.float32) trainer = DDPGTrainer( trainer_params, env.normalization, env.normalization_action, torch.from_numpy(action_range_low).unsqueeze(dim=0), torch.from_numpy(action_range_high).unsqueeze(dim=0), use_gpu, ) else: raise NotImplementedError( "Model of type {} not supported".format(model_type)) return run( c2_device, env, model_type, trainer, "{} test run".format(env_type), score_bar, **params["run_details"], save_timesteps_to_dataset=save_timesteps_to_dataset, start_saving_from_episode=start_saving_from_episode, batch_rl_file_path=batch_rl_file_path, )
def run_gym(params, score_bar, gpu_id): rl_settings = params['rl'] rl_settings['gamma'] = rl_settings['reward_discount_factor'] del rl_settings['reward_discount_factor'] env_type = params['env'] env = OpenAIGymEnvironment(env_type, rl_settings['epsilon']) model_type = params['model_type'] c2_device = core.DeviceOption( caffe2_pb2.CPU if gpu_id == USE_CPU else caffe2_pb2.CUDA, gpu_id, ) if model_type == ModelType.DISCRETE_ACTION.value: with core.DeviceScope(c2_device): training_settings = params['training'] training_settings['gamma'] = training_settings[ 'learning_rate_decay'] del training_settings['learning_rate_decay'] trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=RLParameters(**rl_settings), training=TrainingParameters(**training_settings)) if env.img: trainer = DiscreteActionConvTrainer( DiscreteActionConvModelParameters( fc_parameters=trainer_params, cnn_parameters=CNNModelParameters(**params['cnn']), num_input_channels=env.num_input_channels, img_height=env.height, img_width=env.width), env.normalization, ) else: trainer = DiscreteActionTrainer( trainer_params, env.normalization, ) elif model_type == ModelType.PARAMETRIC_ACTION.value: with core.DeviceScope(c2_device): training_settings = params['training'] training_settings['gamma'] = training_settings[ 'learning_rate_decay'] del training_settings['learning_rate_decay'] trainer_params = ContinuousActionModelParameters( rl=RLParameters(**rl_settings), training=TrainingParameters(**training_settings), knn=KnnParameters(model_type='DQN', ), ) trainer = ContinuousActionDQNTrainer(trainer_params, env.normalization, env.normalization_action) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_settings = params['shared_training'] training_settings['gamma'] = training_settings['learning_rate_decay'] del training_settings['learning_rate_decay'] actor_settings = params['actor_training'] critic_settings = params['critic_training'] trainer_params = DDPGModelParameters( rl=DDPGRLParameters(**rl_settings), shared_training=DDPGTrainingParameters(**training_settings), actor_training=DDPGNetworkParameters(**actor_settings), critic_training=DDPGNetworkParameters(**critic_settings), ) trainer = DDPGTrainer( trainer_params, EnvDetails( state_dim=env.state_dim, action_dim=env.action_dim, action_range=(env.action_space.low, env.action_space.high), )) else: raise NotImplementedError( "Model of type {} not supported".format(model_type)) return run(env, model_type, trainer, "{} test run".format(env_type), score_bar, **params["run_details"])