def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='leaky_relu'), 'measurements': InputEmbedderParameters(activation_function='leaky_relu'), 'goal': InputEmbedderParameters(activation_function='leaky_relu')} self.input_embedders_parameters['observation'].scheme = [ Conv2d(32, 8, 4), Conv2d(64, 4, 2), Conv2d(64, 3, 1), Dense(512), ] self.input_embedders_parameters['measurements'].scheme = [ Dense(128), Dense(128), Dense(128), ] self.input_embedders_parameters['goal'].scheme = [ Dense(128), Dense(128), Dense(128), ] self.middleware_parameters = FCMiddlewareParameters(activation_function='leaky_relu', scheme=MiddlewareScheme.Empty) self.heads_parameters = [MeasurementsPredictionHeadParameters(activation_function='leaky_relu')] self.async_training = False self.batch_size = 64 self.adam_optimizer_beta1 = 0.95
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters(batchnorm=True), 'action': InputEmbedderParameters(scheme=EmbedderScheme.Shallow) } self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [VHeadParameters()] self.optimizer_type = 'Adam' self.batch_size = 64 self.async_training = False self.learning_rate = 0.001 self.create_target_network = True self.shared_optimizer = True self.scale_down_gradients_by_number_of_workers_for_sync_training = False
def __init__(self, num_q_networks): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters(), 'action': InputEmbedderParameters(scheme=EmbedderScheme.Shallow)} self.middleware_parameters = FCMiddlewareParameters(num_streams=num_q_networks) self.heads_parameters = [TD3VHeadParameters()] self.optimizer_type = 'Adam' self.adam_optimizer_beta2 = 0.999 self.optimizer_epsilon = 1e-8 self.batch_size = 100 self.async_training = False self.learning_rate = 0.001 self.create_target_network = True self.shared_optimizer = True self.scale_down_gradients_by_number_of_workers_for_sync_training = False
def create_input_embedder(scheme_dict, embedder_type, activation_function): """Creates an rl coach input embedder scheme_dict - Dictionary where the key is the observation and the value is a dictionary containing all the information required by the scheme creation methods defined above. embedder_type - String indicating desired embedder type, available types are defined in SCHEME_TYPE activation_function - Desired activationfunction for the embdedder """ try: if not ActivationFunctions.has_activation_function(activation_function): raise Exception("Invalid activation function for input embedder") embedder_types_parameters = dict() for observation, info in scheme_dict.items(): scheme = SCHEME_TYPE[embedder_type](info) embedder_types_parameters[observation] = InputEmbedderParameters( scheme=scheme, activation_function=activation_function ) return embedder_types_parameters except KeyError as err: raise Exception("Input embedder, key {} not found".format(err.args[0])) except Exception as err: raise Exception("Error while creating input emmbedder: {}".format(err))
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters()} self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [DNDQHeadParameters()] self.optimizer_type = 'Adam' self.should_get_softmax_probabilities = False
def get_input_embedder(self, input_name: str, embedder_params: InputEmbedderParameters): """ Given an input embedder parameters class, creates the input embedder and returns it :param input_name: the name of the input to the embedder (used for retrieving the shape). The input should be a value within the state or the action. :param embedder_params: the parameters of the class of the embedder :return: the embedder instance """ allowed_inputs = copy.copy(self.spaces.state.sub_spaces) allowed_inputs["action"] = copy.copy(self.spaces.action) allowed_inputs["goal"] = copy.copy(self.spaces.goal) if input_name not in allowed_inputs.keys(): raise ValueError("The key for the input embedder ({}) must match one of the following keys: {}" .format(input_name, allowed_inputs.keys())) emb_type = "vector" if isinstance(allowed_inputs[input_name], TensorObservationSpace): emb_type = "tensor" elif isinstance(allowed_inputs[input_name], PlanarMapsObservationSpace): emb_type = "image" embedder_path = embedder_params.path(emb_type) embedder_params_copy = copy.copy(embedder_params) embedder_params_copy.activation_function = utils.get_activation_function(embedder_params.activation_function) embedder_params_copy.input_rescaling = embedder_params_copy.input_rescaling[emb_type] embedder_params_copy.input_offset = embedder_params_copy.input_offset[emb_type] embedder_params_copy.name = input_name module = dynamic_import_and_instantiate_module_from_params(embedder_params_copy, path=embedder_path, positional_args=[allowed_inputs[input_name].shape]) return module
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters() } self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [PolicyHeadParameters()] self.async_training = True
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters()} self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [VHeadParameters(loss_weight=0.5), PolicyHeadParameters(loss_weight=1.0)] self.optimizer_type = 'Adam' self.clip_gradients = 40.0 self.async_training = True
def test_image_embedder(): params = InputEmbedderParameters(scheme=EmbedderScheme.Medium) emb = ImageEmbedder(params=params) emb.initialize() input_data = mx.nd.random.uniform(low=0, high=1, shape=(10, 3, 244, 244)) output = emb(input_data) assert len(output.shape) == 2 # since last block was flatten assert output.shape[0] == 10 # since batch_size is 10
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters() } self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [DNDQHeadParameters()] self.optimizer_type = 'Adam'
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters()} self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [QHeadParameters()] self.optimizer_type = 'Adam' self.async_training = True self.shared_optimizer = True self.create_target_network = True
def test_vector_embedder(): params = InputEmbedderParameters(scheme=EmbedderScheme.Medium) emb = VectorEmbedder(params=params) emb.initialize() input_data = mx.nd.random.uniform(low=0, high=255, shape=(10, 100)) output = emb(input_data) assert len(output.shape) == 2 # since last block was flatten assert output.shape[0] == 10 # since batch_size is 10 assert output.shape[1] == 256 # since last dense layer has 256 units
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters()} self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Medium) self.heads_parameters = [RegressionHeadParameters()] self.optimizer_type = 'Adam' self.batch_size = 32 self.replace_mse_with_huber_loss = False self.create_target_network = False
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters() } self.middleware_parameters = FCMiddlewareParameters( activation_function='none') self.heads_parameters = [RNDHeadParameters()] self.optimizer_type = 'Adam' self.clip_gradients = None self.create_target_network = False
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty)} self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Empty) self.heads_parameters = [SACQHeadParameters()] # SACQHeadParameters includes the topology of the head self.rescale_gradient_from_head_by_factor = [1] self.optimizer_type = 'Adam' self.batch_size = 256 self.async_training = False self.learning_rate = 0.0003 self.create_target_network = False
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='leaky_relu', input_rescaling={'image': 1.0})} self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Empty) self.heads_parameters = [RNDHeadParameters()] self.create_target_network = False self.optimizer_type = 'Adam' self.batch_size = 100 self.learning_rate = 0.0001 self.should_get_softmax_probabilities = False
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters(activation_function='tanh') } self.middleware_parameters = FCMiddlewareParameters( activation_function='tanh') self.heads_parameters = [VHeadParameters()] self.async_training = True self.l2_regularization = 0 self.create_target_network = True self.batch_size = 128
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='relu')} self.middleware_parameters = FCMiddlewareParameters(activation_function='relu') self.heads_parameters = [SACPolicyHeadParameters()] self.rescale_gradient_from_head_by_factor = [1] self.optimizer_type = 'Adam' self.batch_size = 256 self.async_training = False self.learning_rate = 0.0003 self.create_target_network = False self.l2_regularization = 0 # weight decay regularization. not used in the original paper
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='relu')} self.middleware_parameters = FCMiddlewareParameters(activation_function='relu') self.heads_parameters = [VHeadParameters(initializer='xavier')] self.rescale_gradient_from_head_by_factor = [1] self.optimizer_type = 'Adam' self.batch_size = 256 self.async_training = False self.learning_rate = 0.0003 # 3e-4 see appendix D in the paper # tau is set in SoftActorCriticAlgorithmParameters.rate_for_copying_weights_to_target self.create_target_network = True
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters()} self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [DDPGActorHeadParameters(batchnorm=False)] self.optimizer_type = 'Adam' self.adam_optimizer_beta2 = 0.999 self.optimizer_epsilon = 1e-8 self.batch_size = 100 self.async_training = False self.learning_rate = 0.001 self.create_target_network = True self.shared_optimizer = True self.scale_down_gradients_by_number_of_workers_for_sync_training = False
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty) } self.middleware_parameters = VGG16MiddlewareParameters( scheme=MiddlewareScheme.Medium ) #FCMiddlewareParameters(scheme=MiddlewareScheme.Medium)# self.heads_parameters = [QHeadParameters()] self.optimizer_type = 'Adam' self.batch_size = 32 self.replace_mse_with_huber_loss = True self.create_target_network = True self.should_get_softmax_probabilities = False
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='tanh')} self.middleware_parameters = FCMiddlewareParameters(activation_function='tanh') self.heads_parameters = [VHeadParameters(), PPOHeadParameters()] self.batch_size = 64 self.optimizer_type = 'Adam' self.clip_gradients = None self.use_separate_networks_per_head = True self.async_training = False self.l2_regularization = 0 # The target network is used in order to freeze the old policy, while making updates to the new one # in train_network() self.create_target_network = True self.shared_optimizer = True self.scale_down_gradients_by_number_of_workers_for_sync_training = True
schedule_params.heatup_steps = EnvironmentSteps(0) ################ # Agent Params # ################ agent_params = DDPGAgentParameters() # actor actor_network = agent_params.network_wrappers['actor'] actor_network.learning_rate = 0.001 actor_network.batch_size = 256 actor_network.optimizer_epsilon = 1e-08 actor_network.adam_optimizer_beta1 = 0.9 actor_network.adam_optimizer_beta2 = 0.999 actor_network.input_embedders_parameters = { 'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty) } actor_network.middleware_parameters = FCMiddlewareParameters(scheme=[Dense(256), Dense(256), Dense(256)]) actor_network.heads_parameters[0].batchnorm = False # critic critic_network = agent_params.network_wrappers['critic'] critic_network.learning_rate = 0.001 critic_network.batch_size = 256 critic_network.optimizer_epsilon = 1e-08 critic_network.adam_optimizer_beta1 = 0.9 critic_network.adam_optimizer_beta2 = 0.999 critic_network.input_embedders_parameters = { 'action': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ActorCriticAgentParameters() agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.beta_entropy = 0.005 agent_params.network_wrappers['main'].learning_rate = 0.00002 agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \ InputEmbedderParameters(scheme=[Dense(200)]) agent_params.network_wrappers['main'].middleware_parameters = LSTMMiddlewareParameters(scheme=MiddlewareScheme.Empty, number_of_lstm_cells=128) agent_params.input_filter = InputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/20.)) agent_params.input_filter.add_observation_filter('observation', 'normalize', ObservationNormalizationFilter()) ############### # Environment # ############### env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2)) ######## # Test # ########
schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].input_embedders_parameters = { 'STEREO_CAMERAS': InputEmbedderParameters( scheme=[Conv2d(32, 8, 4), Conv2d(32, 4, 2), Conv2d(64, 4, 2)], activation_function='relu', dropout_rate=0.3), 'LIDAR': InputEmbedderParameters(scheme=[Dense(64), Dense(32)], activation_function='relu', dropout_rate=0.3) } agent_params.network_wrappers['main'].middleware_parameters = \ FCMiddlewareParameters( scheme=[ Dense(256) ], activation_function='relu', dropout_rate=0.3 )
######### # Agent # ######### agent_params = ActorCriticAgentParameters() agent_params.algorithm.policy_gradient_rescaler = PolicyGradientRescaler.GAE agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.gae_lambda = 0.96 agent_params.algorithm.beta_entropy = 0 agent_params.network_wrappers['main'].clip_gradients = 10.0 agent_params.network_wrappers['main'].learning_rate = 0.00001 # agent_params.network_wrappers['main'].batch_size = 20 agent_params.network_wrappers['main'].input_embedders_parameters = { "screen": InputEmbedderParameters(input_rescaling={'image': 3.0}) } agent_params.exploration = AdditiveNoiseParameters() agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0.05) # agent_params.exploration.noise_percentage_schedule = LinearSchedule(0.4, 0.05, 100000) agent_params.exploration.evaluation_noise_percentage = 0.05 agent_params.network_wrappers['main'].batch_size = 64 agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 ############### # Environment # ###############
schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() # agent_params.network_wrappers['main'].input_embedders_parameters = { # 'left_camera': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3), # 'stereo': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3) # } agent_params.network_wrappers['main'].input_embedders_parameters = { 'left_camera': InputEmbedderParameters(activation_function='relu'), 'stereo': InputEmbedderParameters(activation_function='relu') } agent_params.network_wrappers['main'].learning_rate = 0.0003 agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu' agent_params.network_wrappers['main'].batch_size = 64 agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 # agent_params.network_wrappers['main'].learning_rate_decay_steps = 60000 # agent_params.network_wrappers['main'].learning_rate_decay_rate = 0.95 # agent_params.network_wrappers['main'].input_embedders_parameters['observation'].batchnorm = True # agent_params.network_wrappers['main'].input_embedders_parameters['observation'].dropout_rate = 0.3 # agent_params.network_wrappers['main'].l2_regularization = 2e-5 agent_params.algorithm.beta_entropy = 0.001
Conv2d(32, 8, 4), BatchnormActivationDropout(activation_function='relu'), Conv2d(64, 4, 2), BatchnormActivationDropout(activation_function='relu'), Conv2d(64, 3, 1), BatchnormActivationDropout(activation_function='relu'), Flatten(), Dense(256), BatchnormActivationDropout(activation_function='relu') ] # Actor actor_network = agent_params.network_wrappers['actor'] actor_network.input_embedders_parameters = { 'measurements': InputEmbedderParameters(scheme=EmbedderScheme.Empty), agent_params.algorithm.agent_obs_key: InputEmbedderParameters(scheme=camera_obs_scheme, activation_function='none') } actor_network.middleware_parameters.scheme = [Dense(300), Dense(200)] actor_network.learning_rate = 1e-4 # Critic critic_network = agent_params.network_wrappers['critic'] critic_network.input_embedders_parameters = { 'action': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'measurements': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
#################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentEpisodes(16 * 50 * 200) # 200 epochs schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(16 * 50) # 50 cycles schedule_params.evaluation_steps = EnvironmentEpisodes(10) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = DQNAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.001 agent_params.network_wrappers['main'].batch_size = 128 agent_params.network_wrappers['main'].middleware_parameters.scheme = [Dense(256)] agent_params.network_wrappers['main'].input_embedders_parameters = { 'state': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)} agent_params.algorithm.discount = 0.98 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(16) agent_params.algorithm.num_consecutive_training_steps = 40 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(40) agent_params.algorithm.rate_for_copying_weights_to_target = 0.05 agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6) agent_params.exploration.epsilon_schedule = ConstantSchedule(0.2) agent_params.exploration.evaluation_epsilon = 0 agent_params.memory = EpisodicHindsightExperienceReplayParameters() agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Final agent_params.memory.hindsight_transitions_per_regular_transition = 1 agent_params.memory.goals_space = GoalsSpace(goal_name='state', reward_type=ReachingGoal(distance_from_goal_threshold=0,
######### # Agent # ######### agent_params = ClippedPPOAgentParameters() # Agent params agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps( 100) agent_params.algorithm.discount = 0.99 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(4096) agent_params.algorithm.act_for_full_episodes = False # NN configuration agent_params.network_wrappers['main'].input_embedders_parameters = { 'observation': InputEmbedderParameters(scheme=[]) } agent_params.network_wrappers['main'].learning_rate = 0.001 ################ # Environment # ################ env_params = GymVectorEnvironment( level='gym_jiminy.envs.acrobot:JiminyAcrobotEnv') env_params.additional_simulator_parameters = { 'continuous': True, 'enableGoalEnv': False } ################ # Learning #