Esempio n. 1
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters(batchnorm=True),
                                         'action': InputEmbedderParameters(scheme=EmbedderScheme.Shallow)}
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [VHeadParameters()]
     self.optimizer_type = 'Adam'
     self.batch_size = 64
     self.async_training = False
     self.learning_rate = 0.001
     self.create_target_network = True
     self.shared_optimizer = True
     self.scale_down_gradients_by_number_of_workers_for_sync_training = False
Esempio n. 2
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [DNDQHeadParameters()]
     self.optimizer_type = 'Adam'
Esempio n. 3
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [PolicyHeadParameters()]
     self.async_training = True
Esempio n. 4
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [DNDQHeadParameters()]
     self.loss_weights = [1.0]
     self.rescale_gradient_from_head_by_factor = [1]
     self.optimizer_type = 'Adam'
Esempio n. 5
0
 def __init__(self, scheme=MiddlewareScheme.Medium, dense_layer=Dense):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters(
         scheme=scheme, dense_layer=dense_layer)
     self.heads_parameters = [DNDQHeadParameters()]
     self.optimizer_type = 'Adam'
Esempio n. 6
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.input_embedders_parameters['observation'].scheme = EmbedderScheme.Medium
     self.middleware_parameters = FCMiddlewareParameters()
     self.optimizer_type = 'Adam'
     self.batch_size = 32
     self.replace_mse_with_huber_loss = False
     self.create_target_network = False
Esempio n. 7
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Medium)
     self.heads_parameters = [QHeadParameters()]
     self.loss_weights = [1.0]
     self.optimizer_type = 'Adam'
     self.batch_size = 32
     self.replace_mse_with_huber_loss = True
     self.create_target_network = True
Esempio n. 8
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [NAFHeadParameters()]
     self.optimizer_type = 'Adam'
     self.learning_rate = 0.001
     self.async_training = True
     self.create_target_network = True
Esempio n. 9
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters(activation_function='tanh')
     }
     self.middleware_parameters = FCMiddlewareParameters(
         activation_function='tanh')
     self.heads_parameters = [VHeadParameters()]
     self.async_training = True
     self.l2_regularization = 0
     self.create_target_network = True
     self.batch_size = 128
Esempio n. 10
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [QHeadParameters()]
     self.loss_weights = [1.0]
     self.optimizer_type = 'Adam'
     self.async_training = True
     self.shared_optimizer = True
     self.create_target_network = True
Esempio n. 11
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [VHeadParameters(), PolicyHeadParameters()]
     self.loss_weights = [0.5, 1.0]
     self.rescale_gradient_from_head_by_factor = [1, 1]
     self.optimizer_type = 'Adam'
     self.clip_gradients = 40.0
     self.async_training = True
Esempio n. 12
0
    def __init__(self):
        super().__init__()
        self.input_embedders_parameters = {
            'observation':
            InputEmbedderParameters(activation_function='leaky_relu'),
            'measurements':
            InputEmbedderParameters(activation_function='leaky_relu'),
            'goal':
            InputEmbedderParameters(activation_function='leaky_relu')
        }

        self.input_embedders_parameters['observation'].scheme = [
            Conv2d([32, 8, 4]),
            Conv2d([64, 4, 2]),
            Conv2d([64, 3, 1]),
            Dense([512]),
        ]

        self.input_embedders_parameters['measurements'].scheme = [
            Dense([128]),
            Dense([128]),
            Dense([128]),
        ]

        self.input_embedders_parameters['goal'].scheme = [
            Dense([128]),
            Dense([128]),
            Dense([128]),
        ]

        self.middleware_parameters = FCMiddlewareParameters(
            activation_function='leaky_relu', scheme=MiddlewareScheme.Empty)
        self.heads_parameters = [
            MeasurementsPredictionHeadParameters(
                activation_function='leaky_relu')
        ]
        self.loss_weights = [1.0]
        self.async_training = False
        self.batch_size = 64
        self.adam_optimizer_beta1 = 0.95
Esempio n. 13
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [
         VHeadParameters(loss_weight=0.5),
         PolicyHeadParameters(loss_weight=1.0)
     ]
     self.optimizer_type = 'Adam'
     self.clip_gradients = 40.0
     self.async_training = True
Esempio n. 14
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters(batchnorm=True)
     }
     self.middleware_parameters = FCMiddlewareParameters(batchnorm=True)
     self.heads_parameters = [DDPGActorHeadParameters()]
     self.loss_weights = [1.0]
     self.rescale_gradient_from_head_by_factor = [1]
     self.optimizer_type = 'Adam'
     self.batch_size = 64
     self.async_training = False
     self.learning_rate = 0.0001
     self.create_target_network = True
     self.shared_optimizer = True
     self.scale_down_gradients_by_number_of_workers_for_sync_training = False
Esempio n. 15
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [VHeadParameters(), PolicyHeadParameters()]
     self.loss_weights = [0.5, 1.0]
     self.sil_loss_weights = [0.5 * 0.01,
                              1.0]  # called beta^SIL in the paper
     self.rescale_gradient_from_head_by_factor = [1, 1]
     self.optimizer_type = 'Adam'
     self.clip_gradients = 40.0
     self.batch_size = 32  # = 512 / 16 workers (since training is synchronous)
     self.async_training = False  # A2C
     self.shared_optimizer = True
Esempio n. 16
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='tanh')}
     self.middleware_parameters = FCMiddlewareParameters(activation_function='tanh')
     self.heads_parameters = [VHeadParameters(), PPOHeadParameters()]
     self.loss_weights = [1.0, 1.0]
     self.rescale_gradient_from_head_by_factor = [1, 1]
     self.batch_size = 64
     self.optimizer_type = 'Adam'
     self.clip_gradients = None
     self.use_separate_networks_per_head = True
     self.async_training = False
     self.l2_regularization = 0
     self.create_target_network = True
     self.shared_optimizer = True
     self.scale_down_gradients_by_number_of_workers_for_sync_training = True
Esempio n. 17
0
    def __init__(self):
        super().__init__()
        self.input_embedders_parameters = {
            'observation': InputEmbedderParameters(activation_function='tanh')
        }
        self.middleware_parameters = FCMiddlewareParameters(
            activation_function='tanh')
        self.heads_parameters = [VHeadParameters(), PPOHeadParameters()]
        self.batch_size = 64
        self.optimizer_type = 'Adam'
        self.clip_gradients = None
        self.use_separate_networks_per_head = True
        self.async_training = False
        self.l2_regularization = 0

        # The target network is used in order to freeze the old policy, while making updates to the new one
        # in train_network()
        self.create_target_network = True
        self.shared_optimizer = True
        self.scale_down_gradients_by_number_of_workers_for_sync_training = True
Esempio n. 18
0
schedule_params.evaluation_steps = EnvironmentEpisodes(5)
schedule_params.heatup_steps = EnvironmentSteps(0)

################
# Agent Params #
################
agent_params = CILAgentParameters()

# forward camera and measurements input
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'CameraRGB': InputEmbedderParameters(scheme=[Conv2d([32, 5, 2]),
                                    Conv2d([32, 3, 1]),
                                    Conv2d([64, 3, 2]),
                                    Conv2d([64, 3, 1]),
                                    Conv2d([128, 3, 2]),
                                    Conv2d([128, 3, 1]),
                                    Conv2d([256, 3, 1]),
                                    Conv2d([256, 3, 1]),
                                    Dense([512]),
                                    Dense([512])],
                            dropout=True,
                            batchnorm=True),
     'measurements': InputEmbedderParameters(scheme=[Dense([128]),
                                    Dense([128])])
}

# TODO: batch norm is currently applied to the fc layers which is not desired
# TODO: dropout should be configured differenetly per layer [1.0] * 8 + [0.7] * 2 + [0.5] * 2 + [0.5] * 1 + [0.5, 1.] * 5

# simple fc middleware
agent_params.network_wrappers['main'].middleware_parameters = FCMiddlewareParameters(scheme=[Dense([512])])
#########
# Agent #
#########
agent_params = ActorCriticAgentParameters()

agent_params.algorithm.policy_gradient_rescaler = PolicyGradientRescaler.GAE
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.gae_lambda = 0.96
agent_params.algorithm.beta_entropy = 0

agent_params.network_wrappers['main'].clip_gradients = 10.0
agent_params.network_wrappers['main'].learning_rate = 0.00001
# agent_params.network_wrappers['main'].batch_size = 20
agent_params.network_wrappers['main'].input_embedders_parameters = {
    "screen": InputEmbedderParameters(input_rescaling={'image': 3.0})
}

agent_params.exploration = AdditiveNoiseParameters()
agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0.05)
# agent_params.exploration.noise_percentage_schedule = LinearSchedule(0.4, 0.05, 100000)
agent_params.exploration.evaluation_noise_percentage = 0.05

agent_params.network_wrappers['main'].batch_size = 64
agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

###############
# Environment #
###############
Esempio n. 20
0
 InputEmbedderParameters(
     scheme=[
         Conv2d(32, 5, 2),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function=tf.tanh),
         Conv2d(32, 3, 1),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function=tf.tanh),
         Conv2d(64, 3, 2),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function=tf.tanh),
         Conv2d(64, 3, 1),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function=tf.tanh),
         Conv2d(128, 3, 2),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function=tf.tanh),
         Conv2d(128, 3, 1),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function=tf.tanh),
         Conv2d(256, 3, 1),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function=tf.tanh),
         Conv2d(256, 3, 1),
         BatchnormActivationDropout(batchnorm=True,
                                    activation_function=tf.tanh),
         Dense(512),
         BatchnormActivationDropout(activation_function=tf.tanh,
                                    dropout_rate=0.3),
         Dense(512),
         BatchnormActivationDropout(activation_function=tf.tanh,
                                    dropout_rate=0.3)
     ],
     activation_function=
     'none'  # we define the activation function for each layer explicitly
 ),
Esempio n. 21
0
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(
    16 * 50)  # 50 cycles
schedule_params.evaluation_steps = EnvironmentEpisodes(10)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = DQNAgentParameters()
agent_params.network_wrappers['main'].learning_rate = 0.001
agent_params.network_wrappers['main'].batch_size = 128
agent_params.network_wrappers['main'].middleware_parameters.scheme = [
    Dense(256)
]
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'state': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
agent_params.algorithm.discount = 0.98
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(16)
agent_params.algorithm.num_consecutive_training_steps = 40
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(
    40)
agent_params.algorithm.rate_for_copying_weights_to_target = 0.05
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6)
agent_params.exploration.epsilon_schedule = ConstantSchedule(0.2)
agent_params.exploration.evaluation_epsilon = 0

###############
# Environment #
###############
Esempio n. 22
0
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ActorCriticAgentParameters()
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.beta_entropy = 0.005
agent_params.network_wrappers['main'].learning_rate = 0.00002
agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \
    InputEmbedderParameters(scheme=[Dense(200)])
agent_params.network_wrappers[
    'main'].middleware_parameters = LSTMMiddlewareParameters(
        scheme=MiddlewareScheme.Empty, number_of_lstm_cells=128)

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_reward_filter('rescale',
                                            RewardRescaleFilter(1 / 20.))
agent_params.input_filter.add_observation_filter(
    'observation', 'normalize', ObservationNormalizationFilter())

###############
# Environment #
###############
env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2))
Esempio n. 23
0
schedule_params.heatup_steps = EnvironmentSteps(0)

################
# Agent Params #
################
agent_params = DDPGAgentParameters()

# actor
actor_network = agent_params.network_wrappers['actor']
actor_network.learning_rate = 0.001
actor_network.batch_size = 256
actor_network.optimizer_epsilon = 1e-08
actor_network.adam_optimizer_beta1 = 0.9
actor_network.adam_optimizer_beta2 = 0.999
actor_network.input_embedders_parameters = {
    'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
actor_network.middleware_parameters = FCMiddlewareParameters(
    scheme=[Dense(256), Dense(256), Dense(256)])
actor_network.heads_parameters[0].batchnorm = False

# critic
critic_network = agent_params.network_wrappers['critic']
critic_network.learning_rate = 0.001
critic_network.batch_size = 256
critic_network.optimizer_epsilon = 1e-08
critic_network.adam_optimizer_beta1 = 0.9
critic_network.adam_optimizer_beta2 = 0.999
critic_network.input_embedders_parameters = {
    'action': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
Esempio n. 24
0
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ActorCriticAgentParameters()
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.beta_entropy = 0.005
agent_params.network_wrappers['main'].learning_rate = 0.00002
agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \
    InputEmbedderParameters(scheme=[Dense([200])])
agent_params.network_wrappers[
    'main'].middleware_parameters = LSTMMiddlewareParameters(
        scheme=MiddlewareScheme.Empty, number_of_lstm_cells=128)

agent_params.input_filter = MujocoInputFilter()
agent_params.input_filter.add_reward_filter('rescale',
                                            RewardRescaleFilter(1 / 20.))
agent_params.input_filter.add_observation_filter(
    'observation', 'normalize', ObservationNormalizationFilter())

agent_params.exploration = ContinuousEntropyParameters()

###############
# Environment #
###############