Ejemplo n.º 1
0
def create_scheme(info_dict):
    """Creates a custom scheme whose first layers are convolutional layers and
    last layers are dense layers.
    info_dict- dictionary containing the following entries:
    conv_info_list - List of list where the embedded list represent the
                     num of filter, kernel size, and stride. Embedded list
                     of size less than 3 will produce an exception. The size
                     of the non-imbedded list is interpreted and the desired number
                     convolutional layers.
    dense_layer__hidden_unit_list = List where the size represents the number of desired
                                    dense layers to be used after the convolution layer, the
                                    value of the list represents the number of hidden units
    """
    try:
        scheme = list()
        # Add the convolutional layers first
        for conv_info in info_dict[SchemeInfo.CONV_INFO_LIST.value]:
            num_filters, kernel_size, strides = tuple(conv_info)
            scheme.append(Conv2d(num_filters, kernel_size, strides))

        for hindden_units in info_dict[SchemeInfo.DENSE_LAYER_INFO_LIST.value]:
            scheme.append(Dense(hindden_units))

        return scheme
    except KeyError as err:
        raise Exception("Info, key {} not found".format(err.args[0]))
    except ValueError as err:
        raise Exception("Error while unpacking info: {}".format(err))
    except Exception as err:
        raise Exception("Error while creating scheme: {}".format(err))
agent_params.algorithm.use_non_zero_discount_for_terminal_states = True
agent_params.exploration.noise_schedule = LinearSchedule(1.5, 0.5, 300000)

agent_params.input_filter = NoInputFilter()
agent_params.output_filter = NoOutputFilter()

# Camera observation pre-processing network scheme
camera_obs_scheme = [
    Conv2d(32, 8, 4),
    BatchnormActivationDropout(activation_function='relu'),
    Conv2d(64, 4, 2),
    BatchnormActivationDropout(activation_function='relu'),
    Conv2d(64, 3, 1),
    BatchnormActivationDropout(activation_function='relu'),
    Flatten(),
    Dense(256),
    BatchnormActivationDropout(activation_function='relu')
]

# Actor
actor_network = agent_params.network_wrappers['actor']
actor_network.input_embedders_parameters = {
    'measurements':
    InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    agent_params.algorithm.agent_obs_key:
    InputEmbedderParameters(scheme=camera_obs_scheme,
                            activation_function='none')
}

actor_network.middleware_parameters.scheme = [Dense(300), Dense(200)]
actor_network.learning_rate = 1e-4
####################
# Graph Scheduling #
####################

schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentSteps(1000000)
schedule_params.steps_between_evaluation_periods = EnvironmentSteps(5000)
schedule_params.evaluation_steps = EnvironmentEpisodes(10)
schedule_params.heatup_steps = EnvironmentSteps(10000)

#########
# Agent #
#########
agent_params = TD3AgentParameters()
agent_params.network_wrappers['actor'].input_embedders_parameters[
    'observation'].scheme = [Dense(400)]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [
    Dense(300)
]

agent_params.network_wrappers['critic'].input_embedders_parameters[
    'observation'].scheme = EmbedderScheme.Empty
agent_params.network_wrappers['critic'].input_embedders_parameters[
    'action'].scheme = EmbedderScheme.Empty
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [
    Dense(400), Dense(300)
]

###############
# Environment #
###############
Ejemplo n.º 4
0
agent_params = ClippedPPOAgentParameters()
agent_params.network_wrappers['main'].input_embedders_parameters = {
#         'left_camera': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3),
         'STEREO_CAMERAS': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3),
#         'stereo': InputEmbedderParameters(
#             scheme=[
#                 #Conv2d(32, 8, 4),
#                 Conv2d(32, 4, 2),
#                 Conv2d(64, 4, 2),
#                 #Conv2d(64, 3, 1),
#                 Conv2d(64, 2, 1)
#             ],
#            activation_function='relu', dropout_rate=0.3)
        'LIDAR': InputEmbedderParameters(
            scheme=[
                Dense(64)
             ],
             activation_function='relu', dropout_rate=0.3)      
        }

agent_params.network_wrappers['main'].middleware_parameters = \
     FCMiddlewareParameters(
         scheme=[
             Dense(512)
         ],
         activation_function='relu', dropout_rate=0.3
     )

agent_params.network_wrappers['main'].learning_rate = 0.0003
#agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
agent_params.network_wrappers['main'].batch_size = 64
Ejemplo n.º 5
0
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ActorCriticAgentParameters()
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.beta_entropy = 0.005
agent_params.network_wrappers['main'].learning_rate = 0.00002
agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \
    InputEmbedderParameters(scheme=[Dense(200)])
agent_params.network_wrappers['main'].middleware_parameters = LSTMMiddlewareParameters(scheme=MiddlewareScheme.Empty,
                                                                                       number_of_lstm_cells=128)

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/20.))
agent_params.input_filter.add_observation_filter('observation', 'normalize', ObservationNormalizationFilter())

###############
# Environment #
###############
env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2))

########
# Test #
########
Ejemplo n.º 6
0
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.gae_lambda = 0.95
agent_params.algorithm.discount = 0.99
agent_params.algorithm.beta_entropy = 0.01

agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.beta_entropy = 0.05
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(2048)

agent_params.network_wrappers["main"].learning_rate = 0.0003
agent_params.network_wrappers["main"].input_embedders_parameters[
    "observation"
].activation_function = "tanh"
agent_params.network_wrappers["main"].input_embedders_parameters["observation"].scheme = [Dense(64)]
agent_params.network_wrappers["main"].middleware_parameters.scheme = [Dense(64)]
agent_params.network_wrappers["main"].middleware_parameters.activation_function = "tanh"
agent_params.network_wrappers["main"].batch_size = 64
agent_params.network_wrappers["main"].optimizer_epsilon = 1e-5
agent_params.network_wrappers["main"].clip_gradients = 40.0

agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)

###############
# Environment #
###############
env_params = GymVectorEnvironment(level="autoscalesim:SimpleScalableWebserviceSim")

########
Ejemplo n.º 7
0
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ClippedPPOAgentParameters()
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'STEREO_CAMERAS':
    InputEmbedderParameters(
        scheme=[Conv2d(32, 8, 4),
                Conv2d(32, 4, 2),
                Conv2d(64, 4, 2)],
        activation_function='relu',
        dropout_rate=0.3),
    'LIDAR':
    InputEmbedderParameters(scheme=[Dense(64), Dense(32)],
                            activation_function='relu',
                            dropout_rate=0.3)
}

agent_params.network_wrappers['main'].middleware_parameters = \
     FCMiddlewareParameters(
         scheme=[
             Dense(256)
         ],
         activation_function='relu', dropout_rate=0.3
     )

agent_params.network_wrappers['main'].learning_rate = 0.0003
#agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
agent_params.network_wrappers['main'].batch_size = 64
Ejemplo n.º 8
0
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(1000)

#########
# Agent #
#########
agent_params = DDPGAgentParameters()
agent_params.network_wrappers['actor'].input_embedders_parameters['measurements'] = \
    agent_params.network_wrappers['actor'].input_embedders_parameters.pop('observation')
agent_params.network_wrappers['critic'].input_embedders_parameters['measurements'] = \
    agent_params.network_wrappers['critic'].input_embedders_parameters.pop('observation')
agent_params.network_wrappers['actor'].input_embedders_parameters['measurements'].scheme = [Dense(300)]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense(200)]
agent_params.network_wrappers['critic'].input_embedders_parameters['measurements'].scheme = [Dense(400)]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense(300)]
agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty
agent_params.input_filter = InputFilter()
agent_params.input_filter.add_reward_filter("rescale", RewardRescaleFilter(1/10.))

###############
# Environment #
###############
env_params = ControlSuiteEnvironmentParameters(level=SingleLevelSelection(control_suite_envs))

########
# Test #
########
Ejemplo n.º 9
0
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(1000)

#########
# Agent #
#########

agent_params = SoftActorCriticAgentParameters()
# override default parameters:
# value (v) networks parameters
agent_params.network_wrappers['v'].batch_size = 32
agent_params.network_wrappers['v'].learning_rate = 0.0003
agent_params.network_wrappers['v'].middleware_parameters.scheme = [Dense(32)]
agent_params.network_wrappers['v'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['v'].adam_optimizer_beta2 = 0.999
agent_params.network_wrappers['v'].input_embedders_parameters['forward_camera'] = \
    agent_params.network_wrappers['v'].input_embedders_parameters.pop('observation')

# critic (q) network parameters
agent_params.network_wrappers['q'].heads_parameters[0].network_layers_sizes = (
    32, 32)
agent_params.network_wrappers['q'].batch_size = 32
agent_params.network_wrappers['q'].learning_rate = 0.0003
agent_params.network_wrappers['q'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['q'].adam_optimizer_beta2 = 0.999
agent_params.network_wrappers['q'].input_embedders_parameters['forward_camera'] = \
    agent_params.network_wrappers['q'].input_embedders_parameters.pop('observation')
Ejemplo n.º 10
0
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = DQNAgentParameters()

# DQN params
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(100)
agent_params.algorithm.discount = 0.99
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(10)  # was 1

# NN configuration
agent_params.network_wrappers['main'].learning_rate = 0.001     # was 0.00025
agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False

agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = [Conv2d(32, 2, 1), Conv2d(32, 2, 2), Dense(64)]
agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu'
agent_params.network_wrappers['main'].input_embedders_parameters['observation'].input_rescaling = {'image': 1.0, 'vector': 1.0, 'tensor': 1.0}
agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Empty

# ER size
agent_params.memory.max_size = (MemoryGranularity.Transitions, 40000)

# E-Greedy schedule
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, schedule_params.improve_steps.num_steps)     # was 1.0, 0.01, 10000

graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                    schedule_params=schedule_params, vis_params=VisualizationParameters(),
                                    preset_validation_params=preset_validation_params)
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100)
schedule_params.evaluation_steps = EnvironmentEpisodes(3)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = NStepQAgentParameters()

agent_params.network_wrappers['main'].learning_rate = 0.0001
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].scheme = [Conv2d(16, 8, 4),
                             Conv2d(32, 4, 2)]
agent_params.network_wrappers['main'].middleware_parameters.scheme = [
    Dense(256)
]

###############
# Environment #
###############
env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4))

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.trace_test_levels = [
    'breakout', 'pong', 'space_invaders'
]
Ejemplo n.º 12
0
agent_params = ActorCriticAgentParameters()

agent_params.algorithm.policy_gradient_rescaler = PolicyGradientRescaler.GAE
agent_params.algorithm.discount = 0.99
agent_params.algorithm.apply_gradients_every_x_episodes = 10
agent_params.algorithm.num_steps_between_gradient_updates = 10
agent_params.algorithm.gae_lambda = 1
agent_params.algorithm.beta_entropy = 0.01

agent_params.network_wrappers['main'].optimizer_type = 'Adam'
agent_params.network_wrappers['main'].learning_rate = 0.0001

agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].scheme = [Conv2d(32, 2, 1),
                             Conv2d(32, 2, 2),
                             Dense(64)]
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].activation_function = 'relu'
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].input_rescaling = {
        'image': 1.0,
        'vector': 1.0,
        'tensor': 1.0
    }
agent_params.network_wrappers[
    'main'].middleware_parameters.scheme = MiddlewareScheme.Empty

########
# Test #
########
preset_validation_params.num_workers = 8
Ejemplo n.º 13
0
    #                 Conv2d(64, 3, 1),
    #                 BatchnormActivationDropout(batchnorm=True, activation_function='relu'),
    #                 Dense(512),
    #                 BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5),
    #                 Dense(512),
    #                 BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5)
    #             ],
    #             activation_function='none'),
    'lidar':
    InputEmbedderParameters(
        scheme=[
            #                 Dense(256),
            #                 BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5),
            #                 Dense(256),
            #                 BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5)
            Dense(32),
            BatchnormActivationDropout(activation_function='relu',
                                       dropout_rate=0.5),
            Dense(32),
            BatchnormActivationDropout(activation_function='relu',
                                       dropout_rate=0.5)
        ],
        activation_function='none'
    )  # we define the activation function for each layer explicitly
}
# agent_params.network_wrappers['main'].middleware_parameters = \
#     FCMiddlewareParameters(
#         scheme=[
#             Dense(256),
#             BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5),
#             Dense(128),
Ejemplo n.º 14
0
schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentEpisodes(100)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(10)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentEpisodes(10)

#########
# Agent #
#########
agent_params = ClippedPPOAgentParameters()

agent_params.network_wrappers["main"].learning_rate = 0.001
agent_params.network_wrappers["main"].input_embedders_parameters[
    "observation"].activation_function = "tanh"
agent_params.network_wrappers["main"].input_embedders_parameters[
    "observation"].scheme = [Dense(32)]
agent_params.network_wrappers["main"].middleware_parameters.scheme = [
    Dense(32)
]
agent_params.network_wrappers[
    "main"].middleware_parameters.activation_function = "tanh"
agent_params.network_wrappers["main"].batch_size = 256
agent_params.network_wrappers["main"].optimizer_epsilon = 1e-5
agent_params.network_wrappers["main"].adam_optimizer_beta2 = 0.999

agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.3
agent_params.algorithm.clipping_decay_schedule = LinearSchedule(
    0.5, 0.1, 10000 * 50)
agent_params.algorithm.beta_entropy = 0
agent_params.algorithm.gae_lambda = 0.95
agent_params.algorithm.discount = 0.999
schedule_params.evaluation_steps = EnvironmentEpisodes(5)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ClippedPPOAgentParameters()
agent_params.network_wrappers['main'].input_embedders_parameters = {
        'STEREO_CAMERAS': InputEmbedderParameters(
            scheme=[
                Conv2d(32, 8, 4),
                Conv2d(32, 4, 2),
                Conv2d(64, 4, 2),
                Conv2d(64, 3, 1),
                Conv2d(64, 2, 1),
                Dense(256)
            ],
            activation_function='relu', dropout_rate=0.3),
        'LIDAR': InputEmbedderParameters(
            scheme=[
                Dense(64),
                Dense(32)
             ],
             activation_function='relu', dropout_rate=0.3)      
        }

agent_params.network_wrappers['main'].middleware_parameters = \
     FCMiddlewareParameters(
         scheme=[
             Dense(256)
         ],
####################
# Graph Scheduling #
####################

schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentSteps(2000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(10000)

#########
# Agent #
#########
agent_params = DDPGAgentParameters()
agent_params.network_wrappers['actor'].input_embedders_parameters['observation'].scheme = [Dense(400)]
agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense(300)]
agent_params.network_wrappers['critic'].input_embedders_parameters['observation'].scheme = [Dense(400)]
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense(300)]
agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty

###############
# Environment #
###############
env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2))

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
Ejemplo n.º 17
0
############
# DQN Agent
############

agent_params = DDQNAgentParameters()

# DQN params
agent_params.algorithm.discount = 0.99
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(1000)

# NN configuration
agent_params.network_wrappers['main'].batch_size = 32
agent_params.network_wrappers['main'].learning_rate = 0.0001
agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = [Dense(512)]
agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False
agent_params.network_wrappers['main'].heads_parameters = [DuelingQHeadParameters()]
agent_params.network_wrappers['main'].middleware_parameters.scheme = [Dense(512)]

# ER size
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10000)

# E-Greedy schedule
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 40000)

#############
# Environment
#############

env_params = GymVectorEnvironment(level='trading_env:TradingEnv')
schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentEpisodes(100)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(10)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentEpisodes(10)

#########
# Agent #
#########
agent_params = ClippedPPOAgentParameters()

agent_params.network_wrappers['main'].learning_rate = 0.001
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].activation_function = 'tanh'
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].scheme = [Dense(32)]
agent_params.network_wrappers['main'].middleware_parameters.scheme = [
    Dense(32)
]
agent_params.network_wrappers[
    'main'].middleware_parameters.activation_function = 'tanh'
agent_params.network_wrappers['main'].batch_size = 256
agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.3
agent_params.algorithm.clipping_decay_schedule = LinearSchedule(
    0.5, 0.1, 10000 * 50)
agent_params.algorithm.beta_entropy = 0
agent_params.algorithm.gae_lambda = 0.95
agent_params.algorithm.discount = 0.999
Ejemplo n.º 19
0
schedule_params.improve_steps = TrainingSteps(10000)
schedule_params.steps_between_evaluation_periods = EnvironmentSteps(204)
schedule_params.evaluation_steps = EnvironmentEpisodes(5)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ClippedPPOAgentParameters()

agent_params.network_wrappers["main"].learning_rate = 0.0003
agent_params.network_wrappers["main"].input_embedders_parameters[
    "observation"].activation_function = "tanh"
agent_params.network_wrappers["main"].input_embedders_parameters[
    "observation"].scheme = [Dense(64)]
agent_params.network_wrappers["main"].middleware_parameters.scheme = [
    Dense(64)
]
agent_params.network_wrappers[
    "main"].middleware_parameters.activation_function = "tanh"
agent_params.network_wrappers["main"].batch_size = 64
agent_params.network_wrappers["main"].optimizer_epsilon = 1e-5
agent_params.network_wrappers["main"].adam_optimizer_beta2 = 0.999

agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
agent_params.algorithm.clipping_decay_schedule = LinearSchedule(
    1.0, 0, 1000000)
agent_params.algorithm.beta_entropy = 0
agent_params.algorithm.gae_lambda = 0.95
agent_params.algorithm.discount = 0.99
Ejemplo n.º 20
0
         Conv2d(64, 3, 1),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function='tanh'),
         Conv2d(128, 3, 2),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function='tanh'),
         Conv2d(128, 3, 1),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function='tanh'),
         Conv2d(256, 3, 1),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function='tanh'),
         Conv2d(256, 3, 1),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function='tanh'),
         Dense(512),
         BatchnormActivationDropout(activation_function='tanh',
                                    dropout_rate=0.3),
         Dense(512),
         BatchnormActivationDropout(activation_function='tanh',
                                    dropout_rate=0.3)
     ],
     activation_function=
     'none'  # we define the activation function for each layer explicitly
 ),
 'measurements':
 InputEmbedderParameters(
     scheme=[
         Dense(128),
         BatchnormActivationDropout(activation_function='tanh',
                                    dropout_rate=0.5),
Ejemplo n.º 21
0
# Agent #
#########
agent_params = ClippedPPOAgentParameters()

# added
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].scheme = [
        Conv2d(32, 8, 4),
        BatchnormActivationDropout(batchnorm=True, activation_function='relu'),
        Conv2d(32, 4, 2),
        BatchnormActivationDropout(batchnorm=True, activation_function='relu'),
        Conv2d(64, 4, 2),
        BatchnormActivationDropout(batchnorm=True, activation_function='relu'),
        Conv2d(64, 3, 1),
        BatchnormActivationDropout(batchnorm=True, activation_function='relu'),
        Dense(512),
        BatchnormActivationDropout(activation_function='relu',
                                   dropout_rate=0.5),
        Dense(512),
        BatchnormActivationDropout(activation_function='relu',
                                   dropout_rate=0.5)
    ]
# agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Deep
agent_params.network_wrappers['main'].middleware_parameters.scheme = [
    Dense(128),
    BatchnormActivationDropout(activation_function='relu', dropout_rate=0.4),
    Dense(128),
    BatchnormActivationDropout(activation_function='relu', dropout_rate=0.4),
    Dense(128),
    BatchnormActivationDropout(activation_function='relu', dropout_rate=0.4),
]
Ejemplo n.º 22
0
####################
# Graph Scheduling #
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentEpisodes(16 * 50 * 200)  # 200 epochs
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(16 * 50)  # 50 cycles
schedule_params.evaluation_steps = EnvironmentEpisodes(10)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = DQNAgentParameters()
agent_params.network_wrappers['main'].learning_rate = 0.001
agent_params.network_wrappers['main'].batch_size = 128
agent_params.network_wrappers['main'].middleware_parameters.scheme = [Dense(256)]
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'state': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)}
agent_params.algorithm.discount = 0.98
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(16)
agent_params.algorithm.num_consecutive_training_steps = 40
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(40)
agent_params.algorithm.rate_for_copying_weights_to_target = 0.05
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6)
agent_params.exploration.epsilon_schedule = ConstantSchedule(0.2)
agent_params.exploration.evaluation_epsilon = 0

agent_params.memory = EpisodicHindsightExperienceReplayParameters()
agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Final
agent_params.memory.hindsight_transitions_per_regular_transition = 1
Ejemplo n.º 23
0
schedule_params.improve_steps = TrainingSteps(100000)
schedule_params.steps_between_evaluation_periods = EnvironmentSteps(2000)
schedule_params.evaluation_steps = EnvironmentEpisodes(5)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ClippedPPOAgentParameters()

agent_params.network_wrappers['main'].learning_rate = 0.0003
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].activation_function = 'tanh'
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].scheme = [Dense(64)]
agent_params.network_wrappers['main'].middleware_parameters.scheme = [
    Dense(64)
]
agent_params.network_wrappers[
    'main'].middleware_parameters.activation_function = 'tanh'
agent_params.network_wrappers['main'].batch_size = 64
agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
agent_params.algorithm.clipping_decay_schedule = LinearSchedule(
    1.0, 0, 1000000)
agent_params.algorithm.beta_entropy = 0
agent_params.algorithm.gae_lambda = 0.95
agent_params.algorithm.discount = 0.99
Ejemplo n.º 24
0
def create_batchnorm_scheme(info_dict):
    """Creates a scheme where every other layer is a batchnorm layer, convolutional layers
    are first then dense layers.
     info_dict- dictionary containing the following entries:
     conv_info_list - List of list where the embedded list represent the
                      num of filter, kernel size, and stride. Embedded list
                      of size less than 3 will produce an exception. The size
                      of the non-imbedded list is interpreted and the desired number
                      convolutional layers.
     dense_layer__hidden_unit_list = List where the size represents the number of desired
                                     dense layers to be used after the convolution layer, the
                                     value of the list represents the number of hidden units
     bn_info_conv - List containing bool whether or not to use batchnorm for the convolutional
                    part of the archetecture, string for desired activation function, and dropout
                    rate, list with less than 3 items will cause an excpetion.
     bn_info_dense - List containing bool whether or not to use batchnorm for the dense
                     part of the archetecture, string for desired activation function,
                     and dropout rate, list with less than 3 items will cause an excpetion.
     is_first_layer_bn - True if the first layer of the scheme should be a batchnorm
                         layer.
    """
    try:
        batchnorm, activation_function, dropout_rate = tuple(
            info_dict[SchemeInfo.BN_INFO_CONV.value]
        )

        if not ActivationFunctions.has_activation_function(activation_function):
            raise Exception("Invalid activation function for batchnorm scheme")

        scheme = list()

        if info_dict[SchemeInfo.IS_FIRST_LAYER_BN.value]:
            scheme.append(
                BatchnormActivationDropout(
                    batchnorm=batchnorm,
                    activation_function=activation_function,
                    dropout_rate=dropout_rate,
                )
            )
        # Add the convolutional layers first
        for conv_info in info_dict[SchemeInfo.CONV_INFO_LIST.value]:
            # Add the convolutional filters followed by batchnorms
            num_filters, kernel_size, strides = tuple(conv_info)
            scheme.append(Conv2d(num_filters, kernel_size, strides))
            scheme.append(
                BatchnormActivationDropout(
                    batchnorm=batchnorm,
                    activation_function=activation_function,
                    dropout_rate=dropout_rate,
                )
            )

        batchnorm, activation_function, dropout_rate = tuple(
            info_dict[SchemeInfo.BN_INFO_DENSE.value]
        )

        if not ActivationFunctions.has_activation_function(activation_function):
            raise Exception("Invalid activation function for batchnorm scheme")

        for hindden_units in info_dict[SchemeInfo.DENSE_LAYER_INFO_LIST.value]:
            scheme.append(Dense(hindden_units))
            scheme.append(
                BatchnormActivationDropout(
                    batchnorm=batchnorm,
                    activation_function=activation_function,
                    dropout_rate=dropout_rate,
                )
            )
        return scheme
    except KeyError as err:
        raise Exception("Info, key {} not found".format(err.args[0]))
    except ValueError as err:
        raise Exception("Error while unpacking info: {}".format(err))
    except Exception as err:
        raise Exception("Error while creating scheme: {}".format(err))
Ejemplo n.º 25
0
# Agent Params #
################
agent_params = DDPGAgentParameters()

# actor
actor_network = agent_params.network_wrappers['actor']
actor_network.learning_rate = 0.001
actor_network.batch_size = 256
actor_network.optimizer_epsilon = 1e-08
actor_network.adam_optimizer_beta1 = 0.9
actor_network.adam_optimizer_beta2 = 0.999
actor_network.input_embedders_parameters = {
    'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
actor_network.middleware_parameters = FCMiddlewareParameters(scheme=[Dense(256), Dense(256), Dense(256)])
actor_network.heads_parameters[0].batchnorm = False

# critic
critic_network = agent_params.network_wrappers['critic']
critic_network.learning_rate = 0.001
critic_network.batch_size = 256
critic_network.optimizer_epsilon = 1e-08
critic_network.adam_optimizer_beta1 = 0.9
critic_network.adam_optimizer_beta2 = 0.999
critic_network.input_embedders_parameters = {
    'action': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
critic_network.middleware_parameters = FCMiddlewareParameters(scheme=[Dense(256), Dense(256), Dense(256)])
Ejemplo n.º 26
0
#########
agent_params = ClippedPPOAgentParameters()
agent_params.network_wrappers['main'].input_embedders_parameters = {
    #         'left_camera': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3),
    'STEREO_CAMERAS':
    InputEmbedderParameters(scheme=[
        Conv2d(32, 8, 4),
        Conv2d(32, 4, 2),
        Conv2d(64, 4, 2),
        Conv2d(64, 3, 1),
        Conv2d(64, 2, 1)
    ],
                            activation_function='relu',
                            dropout_rate=0.3),
    'LIDAR':
    InputEmbedderParameters(scheme=[Dense(64)],
                            activation_function='relu',
                            dropout_rate=0.3)
}

agent_params.network_wrappers['main'].middleware_parameters = \
     FCMiddlewareParameters(
         scheme=[
             Dense(512)
         ],
         activation_function='relu', dropout_rate=0.3
     )

agent_params.network_wrappers['main'].learning_rate = 0.0003
#agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
agent_params.network_wrappers['main'].batch_size = 64