Esempio n. 1
0
    def __init__(self,
                 action_size,
                 observation_size,
                 lr=1e-5,
                 learning_rate_decay_steps=1000,
                 learning_rate_decay_rate=0.95,
                 training_param=TrainingParam()):
        BaseDeepQ.__init__(self, action_size, observation_size, lr,
                           learning_rate_decay_steps, learning_rate_decay_rate,
                           training_param)
        # TODO add as meta param the number of "Q" you want to use (here 2)
        # TODO add as meta param size and types of the networks
        self.average_reward = 0
        self.life_spent = 1
        self.qvalue_evolution = np.zeros((0, ))
        self.Is_nan = False

        self.model_value_target = None
        self.model_value = None
        self.model_Q = None
        self.model_Q2 = None
        self.model_policy = None

        self.construct_q_network()
        self.previous_size = 0
        self.previous_eyes = None
        self.previous_arange = None
        self.previous_size_train = 0
        self.previous_eyes_train = None
Esempio n. 2
0
    def __init__(self, nn_params, training_param=None, verbose=False):
        if training_param is None:
            training_param = TrainingParam()
        BaseDeepQ.__init__(self, nn_params, training_param, verbose=verbose)

        # TODO add as meta param the number of "Q" you want to use (here 2)
        # TODO add as meta param size and types of the networks
        self.average_reward = 0
        self.life_spent = 1
        self.qvalue_evolution = np.zeros((0, ))
        self.Is_nan = False

        self.model_value_target = None
        self.model_value = None
        self.model_Q = None
        self.model_Q2 = None
        self.model_policy = None

        self.construct_q_network()
        self.previous_size = 0
        self.previous_eyes = None
        self.previous_arange = None
        self.previous_size_train = 0
        self.previous_eyes_train = None

        # optimizers and learning rate
        self.schedule_lr_policy = None
        self.optimizer_policy = None
        self.schedule_lr_Q = None
        self.optimizer_Q = None
        self.schedule_lr_Q2 = None
        self.optimizer_Q2 = None
        self.schedule_lr_value = None
        self.optimizer_value = None
 def __init__(self, nn_params, training_param=None):
     if training_param is None:
         training_param = TrainingParam()
     BaseDeepQ.__init__(self, nn_params, training_param)
     self._custom_objects = {"LtauBis": LtauBis}
     self.construct_q_network()
     self._max_global_norm_grad = training_param.max_global_norm_grad
     self._max_value_grad = training_param.max_value_grad
     self._max_loss = training_param.max_loss
Esempio n. 4
0
 def __init__(self,
              nn_params,
              training_param=None):
     if training_param is None:
         training_param = TrainingParam()
     BaseDeepQ.__init__(self,
                        nn_params,
                        training_param)
     self.schedule_lr_model = None
     self.construct_q_network()
Esempio n. 5
0
 def __init__(self,
              action_size,
              observation_size,
              lr=1e-5,
              learning_rate_decay_steps=1000,
              learning_rate_decay_rate=0.95,
              training_param=TrainingParam()):
     BaseDeepQ.__init__(self, action_size, observation_size, lr,
                        learning_rate_decay_steps, learning_rate_decay_rate,
                        training_param)
     self.construct_q_network()
Esempio n. 6
0
    def __init__(self, nn_params, training_param=None):
        if training_param is None:
            training_param = TrainingParam()
        BaseDeepQ.__init__(self, nn_params, training_param)
        self._custom_objects = {"LtauBis": LtauBis}
        self._max_global_norm_grad = training_param.max_global_norm_grad
        self._max_value_grad = training_param.max_value_grad
        self._max_loss = training_param.max_loss

        self.train_lr = 1.0

        # added
        self.encoded_state = None
        self.grid_model = None
        self._schedule_grid_model = None
        self._optimizer_grid_model = None
        self._qnet_variables = []
        self.grid_model_losses_npy = None

        self.construct_q_network()
Esempio n. 7
0
 def __init__(self,
              action_size,
              observation_size,
              tau_dim_start,
              tau_dim_end,
              add_tau,
              lr=0.00001,
              learning_rate_decay_steps=1000,
              learning_rate_decay_rate=0.95,
              training_param=TrainingParam()):
     BaseDeepQ.__init__(self,
                        action_size,
                        observation_size,
                        lr,
                        learning_rate_decay_steps=learning_rate_decay_steps,
                        learning_rate_decay_rate=learning_rate_decay_rate,
                        training_param=training_param)
     self.tau_dim_start = tau_dim_start
     self.tau_dim_end = tau_dim_end
     self.add_tau = add_tau
     self.custom_objects = {"Ltau": Ltau}
     self.construct_q_network()