Ejemplo n.º 1
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [PolicyHeadParameters()]
     self.loss_weights = [1.0]
     self.async_training = True
Ejemplo n.º 2
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Medium)
     self.heads_parameters = [PolicyHeadParameters()]
     self.optimizer_type = 'Adam'
     self.batch_size = 32
     self.replace_mse_with_huber_loss = False
     self.create_target_network = False
Ejemplo n.º 3
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [VHeadParameters(), PolicyHeadParameters()]
     self.loss_weights = [0.5, 1.0]
     self.rescale_gradient_from_head_by_factor = [1, 1]
     self.optimizer_type = 'Adam'
     self.clip_gradients = 40.0
     self.async_training = True
Ejemplo n.º 4
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [
         VHeadParameters(loss_weight=0.5),
         PolicyHeadParameters(loss_weight=1.0)
     ]
     self.optimizer_type = 'Adam'
     self.clip_gradients = 40.0
     self.async_training = True
Ejemplo n.º 5
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [VHeadParameters(), PolicyHeadParameters()]
     self.loss_weights = [0.5, 1.0]
     self.sil_loss_weights = [0.5 * 0.01,
                              1.0]  # called beta^SIL in the paper
     self.rescale_gradient_from_head_by_factor = [1, 1]
     self.optimizer_type = 'Adam'
     self.clip_gradients = 40.0
     self.batch_size = 32  # = 512 / 16 workers (since training is synchronous)
     self.async_training = False  # A2C
     self.shared_optimizer = True