def __init__(self, action_size, observation_size, lr=1e-5, learning_rate_decay_steps=1000, learning_rate_decay_rate=0.95, training_param=TrainingParam()): BaseDeepQ.__init__(self, action_size, observation_size, lr, learning_rate_decay_steps, learning_rate_decay_rate, training_param) # TODO add as meta param the number of "Q" you want to use (here 2) # TODO add as meta param size and types of the networks self.average_reward = 0 self.life_spent = 1 self.qvalue_evolution = np.zeros((0, )) self.Is_nan = False self.model_value_target = None self.model_value = None self.model_Q = None self.model_Q2 = None self.model_policy = None self.construct_q_network() self.previous_size = 0 self.previous_eyes = None self.previous_arange = None self.previous_size_train = 0 self.previous_eyes_train = None
def __init__(self, nn_params, training_param=None, verbose=False): if training_param is None: training_param = TrainingParam() BaseDeepQ.__init__(self, nn_params, training_param, verbose=verbose) # TODO add as meta param the number of "Q" you want to use (here 2) # TODO add as meta param size and types of the networks self.average_reward = 0 self.life_spent = 1 self.qvalue_evolution = np.zeros((0, )) self.Is_nan = False self.model_value_target = None self.model_value = None self.model_Q = None self.model_Q2 = None self.model_policy = None self.construct_q_network() self.previous_size = 0 self.previous_eyes = None self.previous_arange = None self.previous_size_train = 0 self.previous_eyes_train = None # optimizers and learning rate self.schedule_lr_policy = None self.optimizer_policy = None self.schedule_lr_Q = None self.optimizer_Q = None self.schedule_lr_Q2 = None self.optimizer_Q2 = None self.schedule_lr_value = None self.optimizer_value = None
def __init__(self, nn_params, training_param=None): if training_param is None: training_param = TrainingParam() BaseDeepQ.__init__(self, nn_params, training_param) self._custom_objects = {"LtauBis": LtauBis} self.construct_q_network() self._max_global_norm_grad = training_param.max_global_norm_grad self._max_value_grad = training_param.max_value_grad self._max_loss = training_param.max_loss
def __init__(self, nn_params, training_param=None): if training_param is None: training_param = TrainingParam() BaseDeepQ.__init__(self, nn_params, training_param) self.schedule_lr_model = None self.construct_q_network()
def __init__(self, action_size, observation_size, lr=1e-5, learning_rate_decay_steps=1000, learning_rate_decay_rate=0.95, training_param=TrainingParam()): BaseDeepQ.__init__(self, action_size, observation_size, lr, learning_rate_decay_steps, learning_rate_decay_rate, training_param) self.construct_q_network()
def __init__(self, nn_params, training_param=None): if training_param is None: training_param = TrainingParam() BaseDeepQ.__init__(self, nn_params, training_param) self._custom_objects = {"LtauBis": LtauBis} self._max_global_norm_grad = training_param.max_global_norm_grad self._max_value_grad = training_param.max_value_grad self._max_loss = training_param.max_loss self.train_lr = 1.0 # added self.encoded_state = None self.grid_model = None self._schedule_grid_model = None self._optimizer_grid_model = None self._qnet_variables = [] self.grid_model_losses_npy = None self.construct_q_network()
def __init__(self, action_size, observation_size, tau_dim_start, tau_dim_end, add_tau, lr=0.00001, learning_rate_decay_steps=1000, learning_rate_decay_rate=0.95, training_param=TrainingParam()): BaseDeepQ.__init__(self, action_size, observation_size, lr, learning_rate_decay_steps=learning_rate_decay_steps, learning_rate_decay_rate=learning_rate_decay_rate, training_param=training_param) self.tau_dim_start = tau_dim_start self.tau_dim_end = tau_dim_end self.add_tau = add_tau self.custom_objects = {"Ltau": Ltau} self.construct_q_network()