Beispiel #1
0
class FCMiddleware(Middleware):
    schemes = {
        MiddlewareScheme.Empty:
            [],

        # ppo
        MiddlewareScheme.Shallow:
            [
                Dense([64])
            ],

        # dqn
        MiddlewareScheme.Medium:
            [
                Dense([512])
            ],

        MiddlewareScheme.Deep: \
            [
                Dense([128]),
                Dense([128]),
                Dense([128])
            ]
    }

    def __init__(self,
                 activation_function=tf.nn.relu,
                 scheme: MiddlewareScheme = MiddlewareScheme.Medium,
                 batchnorm: bool = False,
                 dropout: bool = False,
                 name="middleware_fc_embedder"):
        super().__init__(activation_function=activation_function,
                         batchnorm=batchnorm,
                         dropout=dropout,
                         scheme=scheme,
                         name=name)
        self.return_type = Middleware_FC_Embedding
        self.layers = []

    def _build_module(self):
        self.layers.append(self.input)

        if isinstance(self.scheme, MiddlewareScheme):
            layers_params = FCMiddleware.schemes[self.scheme]
        else:
            layers_params = self.scheme
        for idx, layer_params in enumerate(layers_params):
            self.layers.append(
                layer_params(self.layers[-1],
                             name='{}_{}'.format(
                                 layer_params.__class__.__name__, idx)))

            self.layers.extend(
                batchnorm_activation_dropout(self.layers[-1], self.batchnorm,
                                             self.activation_function,
                                             self.dropout, self.dropout_rate,
                                             idx))

        self.output = self.layers[-1]
Beispiel #2
0
class VectorEmbedder(InputEmbedder):
    """
    An input embedder that is intended for inputs that can be represented as vectors.
    The embedder flattens the input, applies several dense layers to it and returns the output.
    """
    schemes = {
        EmbedderScheme.Empty:
            [],

        EmbedderScheme.Shallow:
            [
                Dense([128])
            ],

        # dqn
        EmbedderScheme.Medium:
            [
                Dense([256])
            ],

        # carla
        EmbedderScheme.Deep: \
            [
                Dense([128]),
                Dense([128]),
                Dense([128])
            ]
    }

    def __init__(self,
                 input_size: List[int],
                 activation_function=tf.nn.relu,
                 scheme: EmbedderScheme = EmbedderScheme.Medium,
                 batchnorm: bool = False,
                 dropout: bool = False,
                 name: str = "embedder",
                 input_rescaling: float = 1.0,
                 input_offset: float = 0.0,
                 input_clipping=None):
        super().__init__(input_size, activation_function, scheme, batchnorm,
                         dropout, name, input_rescaling, input_offset,
                         input_clipping)

        self.return_type = InputVectorEmbedding
        if len(self.input_size) != 1 and scheme != EmbedderScheme.Empty:
            raise ValueError(
                "The input size of a vector embedder must contain only a single dimension"
            )
Beispiel #3
0
    def __init__(self):
        super().__init__()
        self.input_embedders_parameters = {
            'observation':
            InputEmbedderParameters(activation_function='leaky_relu'),
            'measurements':
            InputEmbedderParameters(activation_function='leaky_relu'),
            'goal':
            InputEmbedderParameters(activation_function='leaky_relu')
        }

        self.input_embedders_parameters['observation'].scheme = [
            Conv2d([32, 8, 4]),
            Conv2d([64, 4, 2]),
            Conv2d([64, 3, 1]),
            Dense([512]),
        ]

        self.input_embedders_parameters['measurements'].scheme = [
            Dense([128]),
            Dense([128]),
            Dense([128]),
        ]

        self.input_embedders_parameters['goal'].scheme = [
            Dense([128]),
            Dense([128]),
            Dense([128]),
        ]

        self.middleware_parameters = FCMiddlewareParameters(
            activation_function='leaky_relu', scheme=MiddlewareScheme.Empty)
        self.heads_parameters = [
            MeasurementsPredictionHeadParameters(
                activation_function='leaky_relu')
        ]
        self.loss_weights = [1.0]
        self.async_training = False
        self.batch_size = 64
        self.adam_optimizer_beta1 = 0.95
Beispiel #4
0
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(1000)

#########
# Agent #
#########
agent_params = DDPGAgentParameters()
agent_params.network_wrappers['actor'].input_embedders_parameters['measurements'] = \
    agent_params.network_wrappers['actor'].input_embedders_parameters.pop('observation')
agent_params.network_wrappers['critic'].input_embedders_parameters['measurements'] = \
    agent_params.network_wrappers['critic'].input_embedders_parameters.pop('observation')
agent_params.network_wrappers['actor'].input_embedders_parameters[
    'measurements'].scheme = [Dense([300])]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [
    Dense([200])
]
agent_params.network_wrappers['critic'].input_embedders_parameters[
    'measurements'].scheme = [Dense([400])]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [
    Dense([300])
]
agent_params.network_wrappers['critic'].input_embedders_parameters[
    'action'].scheme = EmbedderScheme.Empty
agent_params.input_filter = MujocoInputFilter()
agent_params.input_filter.add_reward_filter("rescale",
                                            RewardRescaleFilter(1 / 10.))

###############
Beispiel #5
0
################
# Agent Params #
################
agent_params = CILAgentParameters()

# forward camera and measurements input
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'CameraRGB': InputEmbedderParameters(scheme=[Conv2d([32, 5, 2]),
                                    Conv2d([32, 3, 1]),
                                    Conv2d([64, 3, 2]),
                                    Conv2d([64, 3, 1]),
                                    Conv2d([128, 3, 2]),
                                    Conv2d([128, 3, 1]),
                                    Conv2d([256, 3, 1]),
                                    Conv2d([256, 3, 1]),
                                    Dense([512]),
                                    Dense([512])],
                            dropout=True,
                            batchnorm=True),
     'measurements': InputEmbedderParameters(scheme=[Dense([128]),
                                    Dense([128])])
}

# TODO: batch norm is currently applied to the fc layers which is not desired
# TODO: dropout should be configured differenetly per layer [1.0] * 8 + [0.7] * 2 + [0.5] * 2 + [0.5] * 1 + [0.5, 1.] * 5

# simple fc middleware
agent_params.network_wrappers['main'].middleware_parameters = FCMiddlewareParameters(scheme=[Dense([512])])

# output branches
agent_params.network_wrappers['main'].heads_parameters = [
Beispiel #6
0
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps, RunPhase, GradientClippingMethod

####################
# Graph Scheduling #
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(1000)

#########
# Agent #
#########
agent_params = NAFAgentParameters()
agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = [Dense([200])]
agent_params.network_wrappers['main'].middleware_parameters.scheme = [Dense([200])]
agent_params.network_wrappers['main'].clip_gradients = 1000
agent_params.network_wrappers['main'].gradients_clipping_method = GradientClippingMethod.ClipByValue

###############
# Environment #
###############
env_params = Mujoco()
env_params.level = SingleLevelSelection(mujoco_v2)

vis_params = VisualizationParameters()
vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
vis_params.dump_mp4 = False

Beispiel #7
0
####################
# Graph Scheduling #
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(1000)

#########
# Agent #
#########
agent_params = NAFAgentParameters()
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].scheme = [Dense([200])]
agent_params.network_wrappers['main'].middleware_parameters.scheme = [
    Dense([200])
]
agent_params.network_wrappers['main'].clip_gradients = 1000
agent_params.network_wrappers[
    'main'].gradients_clipping_method = GradientClippingMethod.ClipByValue

###############
# Environment #
###############
env_params = Mujoco()
env_params.level = SingleLevelSelection(mujoco_v2)

vis_params = VisualizationParameters()
vis_params.video_dump_methods = [
Beispiel #8
0
top_agent_params.memory.hindsight_transitions_per_regular_transition = 3
top_agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future
top_agent_params.memory.goals_space = goals_space
top_agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(32)
top_agent_params.algorithm.num_consecutive_training_steps = 40
top_agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(40)

# exploration - OU process
top_agent_params.exploration = OUProcessParameters()
top_agent_params.exploration.theta = 0.1

# actor
top_actor = top_agent_params.network_wrappers['actor']
top_actor.input_embedders_parameters = {'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
                                        'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)}
top_actor.middleware_parameters.scheme = [Dense([64])] * 3
top_actor.learning_rate = 0.001
top_actor.batch_size = 4096

# critic
top_critic = top_agent_params.network_wrappers['critic']
top_critic.input_embedders_parameters = {'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
                                         'action': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
                                         'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)}
top_critic.embedding_merger_type = EmbeddingMergerType.Concat
top_critic.middleware_parameters.scheme = [Dense([64])] * 3
top_critic.learning_rate = 0.001
top_critic.batch_size = 4096

# ----------
Beispiel #9
0
# Graph Scheduling #
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentSteps(2000)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = PPOAgentParameters()
agent_params.network_wrappers['actor'].learning_rate = 0.001
agent_params.network_wrappers['critic'].learning_rate = 0.001

agent_params.network_wrappers['actor'].input_embedders_parameters['observation'].scheme = [Dense([64])]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense([64])]
agent_params.network_wrappers['critic'].input_embedders_parameters['observation'].scheme = [Dense([64])]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense([64])]

agent_params.input_filter = MujocoInputFilter()
agent_params.input_filter.add_observation_filter('observation', 'normalize', ObservationNormalizationFilter())

agent_params.exploration = ContinuousEntropyParameters()

###############
# Environment #
###############
env_params = Mujoco()
env_params.level = SingleLevelSelection(mujoco_v2)
Beispiel #10
0
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentSteps(2000)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = PPOAgentParameters()
agent_params.network_wrappers['actor'].learning_rate = 0.001
agent_params.network_wrappers['critic'].learning_rate = 0.001

agent_params.network_wrappers['actor'].input_embedders_parameters[
    'observation'].scheme = [Dense([64])]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [
    Dense([64])
]
agent_params.network_wrappers['critic'].input_embedders_parameters[
    'observation'].scheme = [Dense([64])]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [
    Dense([64])
]

agent_params.input_filter = MujocoInputFilter()
agent_params.input_filter.add_observation_filter(
    'observation', 'normalize', ObservationNormalizationFilter())

agent_params.exploration = ContinuousEntropyParameters()
Beispiel #11
0
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ActorCriticAgentParameters()
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.beta_entropy = 0.005
agent_params.network_wrappers['main'].learning_rate = 0.00002
agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \
    InputEmbedderParameters(scheme=[Dense([200])])
agent_params.network_wrappers['main'].middleware_parameters = LSTMMiddlewareParameters(scheme=MiddlewareScheme.Empty,
                                                                                       number_of_lstm_cells=128)

agent_params.input_filter = MujocoInputFilter()
agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/20.))
agent_params.input_filter.add_observation_filter('observation', 'normalize', ObservationNormalizationFilter())

agent_params.exploration = ContinuousEntropyParameters()

###############
# Environment #
###############
env_params = Mujoco()
env_params.level = SingleLevelSelection(mujoco_v2)
Beispiel #12
0
####################
# Graph Scheduling #
####################

schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentSteps(2000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(1000)

#########
# Agent #
#########
agent_params = DDPGAgentParameters()
agent_params.network_wrappers['actor'].input_embedders_parameters[
    'observation'].scheme = [Dense([400])]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [
    Dense([300])
]
agent_params.network_wrappers['critic'].input_embedders_parameters[
    'observation'].scheme = [Dense([400])]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [
    Dense([300])
]
agent_params.network_wrappers['critic'].input_embedders_parameters[
    'action'].scheme = EmbedderScheme.Empty

###############
# Environment #
###############
env_params = Mujoco()
################
agent_params = CILAgentParameters()

# forward camera and measurements input
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'forward_camera':
    InputEmbedderParameters(scheme=[
        Conv2d([32, 5, 2]),
        Conv2d([32, 3, 1]),
        Conv2d([64, 3, 2]),
        Conv2d([64, 3, 1]),
        Conv2d([128, 3, 2]),
        Conv2d([128, 3, 1]),
        Conv2d([256, 3, 1]),
        Conv2d([256, 3, 1]),
        Dense([512]),
        Dense([512])
    ],
                            dropout=True,
                            batchnorm=True),
    'measurements':
    InputEmbedderParameters(scheme=[Dense([128]), Dense([128])])
}

# TODO: batch norm will apply to the fc layers which is not desirable.
# TODO: dropout rate can be configured currently
# TODO: dropout should be configured differenetly per layer [1.0] * 8 + [0.7] * 2 + [0.5] * 2 + [0.5] * 1 + [0.5, 1.] * 5

# simple fc middleware
agent_params.network_wrappers[
    'main'].middleware_parameters = FCMiddlewareParameters(
Beispiel #14
0
schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentEpisodes(16 * 50 *
                                                    200)  # 200 epochs
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(
    16 * 50)  # 50 cycles
schedule_params.evaluation_steps = EnvironmentEpisodes(10)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = DQNAgentParameters()
agent_params.network_wrappers['main'].learning_rate = 0.001
agent_params.network_wrappers['main'].batch_size = 128
agent_params.network_wrappers['main'].middleware_parameters.scheme = [
    Dense([256])
]
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'state': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
agent_params.algorithm.discount = 0.98
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(16)
agent_params.algorithm.num_consecutive_training_steps = 40
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(
    40)
agent_params.algorithm.rate_for_copying_weights_to_target = 0.05
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6)
agent_params.exploration.epsilon_schedule = ConstantSchedule(0.2)
agent_params.exploration.evaluation_epsilon = 0
Beispiel #15
0
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100)
schedule_params.evaluation_steps = EnvironmentEpisodes(3)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = NStepQAgentParameters()

agent_params.network_wrappers['main'].learning_rate = 0.0001
agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = [Conv2d([16, 8, 4]),
                                                                                          Conv2d([32, 4, 2])]
agent_params.network_wrappers['main'].middleware_parameters.scheme = [Dense([256])]

###############
# Environment #
###############
env_params = Atari()
env_params.level = SingleLevelSelection(atari_deterministic_v4)

vis_params = VisualizationParameters()
vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
Beispiel #16
0
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(1000)

#########
# Agent #
#########
agent_params = DDPGAgentParameters()
agent_params.network_wrappers['actor'].input_embedders_parameters['measurements'] = \
    agent_params.network_wrappers['actor'].input_embedders_parameters.pop('observation')
agent_params.network_wrappers['critic'].input_embedders_parameters['measurements'] = \
    agent_params.network_wrappers['critic'].input_embedders_parameters.pop('observation')
agent_params.network_wrappers['actor'].input_embedders_parameters['measurements'].scheme = [Dense([300])]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense([200])]
agent_params.network_wrappers['critic'].input_embedders_parameters['measurements'].scheme = [Dense([400])]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense([300])]
agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty
agent_params.input_filter = MujocoInputFilter()
agent_params.input_filter.add_reward_filter("rescale", RewardRescaleFilter(1/10.))

###############
# Environment #
###############
env_params = ControlSuiteEnvironmentParameters()
env_params.level = SingleLevelSelection(control_suite_envs)

vis_params = VisualizationParameters()
vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
Beispiel #17
0
# Agent Params #
################
agent_params = DDPGAgentParameters()

# actor
actor_network = agent_params.network_wrappers['actor']
actor_network.learning_rate = 0.001
actor_network.batch_size = 256
actor_network.optimizer_epsilon = 1e-08
actor_network.adam_optimizer_beta1 = 0.9
actor_network.adam_optimizer_beta2 = 0.999
actor_network.input_embedders_parameters = {
    'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
actor_network.middleware_parameters = FCMiddlewareParameters(scheme=[Dense([256]), Dense([256]), Dense([256])])
actor_network.heads_parameters[0].batchnorm = False

# critic
critic_network = agent_params.network_wrappers['critic']
critic_network.learning_rate = 0.001
critic_network.batch_size = 256
critic_network.optimizer_epsilon = 1e-08
critic_network.adam_optimizer_beta1 = 0.9
critic_network.adam_optimizer_beta2 = 0.999
critic_network.input_embedders_parameters = {
    'action': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
critic_network.middleware_parameters = FCMiddlewareParameters(scheme=[Dense([256]), Dense([256]), Dense([256])])