def __init__(self, *args): paramLen = len(DRQNPrioritized.newParameters) super().__init__(*args[:-paramLen]) self.alpha = float(args[-paramLen]) empty_state = self.get_empty_state() self.memory = ExperienceReplay.PrioritizedReplayBuffer(self, self.memory_size, TransitionFrame(empty_state, -1, 0, empty_state, False), history_length = self.historylength, alpha = self.alpha)
def __init__(self, *args): # Initializing model parameters paramLen = len(DDPG.newParameters) super().__init__(*args[:-paramLen]) self.batch_size, self.memory_size, self.target_update_interval, self.tau = [ int(arg) for arg in args[-paramLen:] ] empty_state = self.get_empty_state() self.memory = ExperienceReplay.ReplayBuffer( self, self.memory_size, TransitionFrame(empty_state, -1, 0, empty_state, False)) # Learning rate for actor-critic models critic_lr = 0.002 actor_lr = 0.001 self.critic_optimizer = tf.keras.optimizers.Adam(critic_lr) self.actor_optimizer = tf.keras.optimizers.Adam(actor_lr) # self.ou_noise = OUNoise(self.action_size) self.actor_model = self.get_actor() self.critic_model = self.get_critic() self.target_actor = self.get_actor() self.target_critic = self.get_critic() # Making the weights equal initially self.target_actor.set_weights(self.actor_model.get_weights()) self.target_critic.set_weights(self.critic_model.get_weights()) self.total_steps = 0 self.allMask = np.full((1, self.action_size), 1) self.allBatchMask = np.full((self.batch_size, self.action_size), 1)
def __init__(self, *args): paramLen = len(DeepQHindsight.newParameters) super().__init__(*args) empty_state = self.get_empty_state() self.memory = ExperienceReplay.HindsightReplayBuffer( self, self.memory_size, TransitionFrame(empty_state, -1, 0, empty_state, False))
def __init__(self, *args): paramLen = len(Rainbow.newParameters) super().__init__(*args[:-paramLen]) Qparams = [] for i in range(3): Qparams.append(DeepQ.newParameters[i].default) '''self.batch_size, self.memory_size, self.target_update_interval = [int(param) for param in Qparams] #self.batch_size, self.memory_size, self.target_update_interval, _ = [int(arg) for arg in args[-paramLen:]] _, _, _, self.learning_rate = [arg for arg in args[-paramLen:]] self.memory = ExperienceReplay.ReplayBuffer(self, self.memory_size, TransitionFrame(empty_state, -1, 0, empty_state, False)) self.total_steps = 0 self.allMask = np.full((1, self.action_size), 1) self.allBatchMask = np.full((self.batch_size, self.action_size), 1)''' empty_state = self.get_empty_state() self.total_steps = 0 self.model = self.buildQNetwork() self.target = self.buildQNetwork() self.lr = 0.001 self.memory = ExperienceReplay.ReplayBuffer( self, self.memory_size, TransitionFrame(empty_state, -1, 0, empty_state, False)) # Parameters used for Bellman Distribution self.num_atoms = 51 self.v_min = -10 self.v_max = 10 self.delta_z = (self.v_max - self.v_min) / float(self.num_atoms - 1) self.z = [self.v_min + i * self.delta_z for i in range(self.num_atoms)] self.sample_size = min(self.batch_size, self.memory_size) # Initialize prioritization exponent self.p = 0.5 self.allBatchMask = np.full((self.sample_size, self.num_atoms), 1)
def __init__(self, *args): super().__init__(*args) empty_state = self.get_empty_state() self.memory = ExperienceReplay.ReplayBuffer( self, self.memory_size, ActionTransitionFrame(-1, empty_state, -1, 0, empty_state, False), history_length=self.historylength)
def __init__(self, *args): paramLen = len(ADRQNHindsight.newParameters) super().__init__(*args) empty_state = self.get_empty_state() self.memory = ExperienceReplay.HindsightReplayBuffer( self, self.memory_size, ActionTransitionFrame(-1, empty_state, -1, 0, empty_state, False), history_length=self.historylength)
def __init__(self, *args): # Initializing model parameters paramLen = len(SAC.newParameters) super().__init__(*args[:-paramLen]) self.batch_size, self.memory_size, self.target_update_interval, self.tau, self.temperature = [ int(arg) for arg in args[-paramLen:] ] self.polyak = 0.01 self.total_steps = 0 empty_state = self.get_empty_state() self.memory = ExperienceReplay.ReplayBuffer( self, self.memory_size, TransitionFrame(empty_state, -1, 0, empty_state, False)) # Learning rate for actor-critic models critic_lr = 0.002 actor_lr = 0.001 self.critic_optimizer = tf.keras.optimizers.Adam(critic_lr) self.actor_optimizer = tf.keras.optimizers.Adam(actor_lr) self.actor_network = actorNetwork(self.action_size) self.soft_Q_network = self.q_network() self.soft_Q_targetnetwork = self.q_network() self.soft_Q_network1 = self.q_network() self.soft_Q_targetnetwork1 = self.q_network() # Building up 2 soft q-function with their relative targets in1 = tf.keras.Input(shape=self.state_size, dtype=tf.float64) in2 = tf.keras.Input(shape=self.action_size, dtype=tf.float64) self.soft_Q_network([in1, in2]) self.soft_Q_targetnetwork([in1, in2]) force_update(self.soft_Q_network.variables, self.soft_Q_targetnetwork.variables) self.soft_Q_network1([in1, in2]) self.soft_Q_targetnetwork1([in1, in2]) force_update(self.soft_Q_network1.variables, self.soft_Q_targetnetwork1.variables) # Optimizers for the networks self.softq_optimizer = tf.keras.optimizers.Adam( learning_rate=critic_lr) self.softq_optimizer2 = tf.keras.optimizers.Adam( learning_rate=critic_lr) self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=actor_lr)
def __init__(self, *args): paramLen = len(DeepQ.newParameters) super().__init__(*args[:-paramLen]) self.batch_size, self.memory_size, self.target_update_interval = [ int(arg) for arg in args[-paramLen:] ] self.model = self.buildQNetwork() self.target = self.buildQNetwork() empty_state = self.get_empty_state() self.memory = ExperienceReplay.ReplayBuffer( self, self.memory_size, TransitionFrame(empty_state, -1, 0, empty_state, False)) self.total_steps = 0 self.allMask = np.full((1, self.action_size), 1) self.allBatchMask = np.full((self.batch_size, self.action_size), 1)
def __init__(self, *args): paramLen = len(PPO.newParameters) super().__init__(*args[:-paramLen]) empty_state = self.get_empty_state() # Initialize parameters self.memory = ExperienceReplay.ReplayBuffer( self, self.memory_size, TransitionFrame(empty_state, -1, 0, empty_state, False)) self.total_steps = 0 self.actorIts = 2 self.allMask = np.full((1, self.action_size), 1) self.allBatchMask = np.full((self.batch_size, self.action_size), 1) self.policy_lr = 0.001 self.value_lr = 0.001 self.policy_model = Actor(self.state_size, self.action_size, self.policy_lr).policy_network() self.value_model = Critic(self.state_size, self.action_size, self.value_lr).value_network()
def __init__(self, *args): print("Stuff PPO:") print(str(args)) paramLen = len(PPO.newParameters) super().__init__(*args[:-paramLen]) empty_state = self.get_empty_state() # Initialize parameters self.memory = ExperienceReplay.ReplayBuffer( self, self.memory_size, TransitionFrame(empty_state, -1, 0, empty_state, False)) self.total_steps = 0 self.allMask = np.full((1, self.action_size), 1) self.allBatchMask = np.full((self.batch_size, self.action_size), 1) #self.batch_size, _, _, self.horizon, self.epochSize, _, _ = [int(arg) for arg in args[-paramLen:]] #_, self.policy_lr, self.value_lr, _, _, self.epsilon, self.lam = [arg for arg in args[-paramLen:]] self.policy_lr = 0.001 self.value_lr = 0.001
def resetBuffer(self): self.memory = ExperienceReplay.ReplayBuffer(self, self.memory_size, self.historylength)