コード例 #1
0
ファイル: drqn.py プロジェクト: varikmp/easyRL-v0
 def __init__(self, *args):
     paramLen = len(DRQNPrioritized.newParameters)
     super().__init__(*args[:-paramLen])
     self.alpha = float(args[-paramLen])
     empty_state = self.get_empty_state()
     self.memory = ExperienceReplay.PrioritizedReplayBuffer(self, self.memory_size, TransitionFrame(empty_state, -1, 0, empty_state, False),
                                                             history_length = self.historylength, alpha = self.alpha)
コード例 #2
0
ファイル: ddpg.py プロジェクト: varikmp/easyRL-v0
    def __init__(self, *args):

        # Initializing model parameters
        paramLen = len(DDPG.newParameters)
        super().__init__(*args[:-paramLen])

        self.batch_size, self.memory_size, self.target_update_interval, self.tau = [
            int(arg) for arg in args[-paramLen:]
        ]
        empty_state = self.get_empty_state()
        self.memory = ExperienceReplay.ReplayBuffer(
            self, self.memory_size,
            TransitionFrame(empty_state, -1, 0, empty_state, False))
        # Learning rate for actor-critic models
        critic_lr = 0.002
        actor_lr = 0.001

        self.critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
        self.actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

        # self.ou_noise = OUNoise(self.action_size)

        self.actor_model = self.get_actor()
        self.critic_model = self.get_critic()

        self.target_actor = self.get_actor()
        self.target_critic = self.get_critic()

        # Making the weights equal initially
        self.target_actor.set_weights(self.actor_model.get_weights())
        self.target_critic.set_weights(self.critic_model.get_weights())

        self.total_steps = 0
        self.allMask = np.full((1, self.action_size), 1)
        self.allBatchMask = np.full((self.batch_size, self.action_size), 1)
コード例 #3
0
 def __init__(self, *args):
     paramLen = len(DeepQHindsight.newParameters)
     super().__init__(*args)
     empty_state = self.get_empty_state()
     self.memory = ExperienceReplay.HindsightReplayBuffer(
         self, self.memory_size,
         TransitionFrame(empty_state, -1, 0, empty_state, False))
コード例 #4
0
ファイル: rainbow.py プロジェクト: varikmp/easyRL-v0
    def __init__(self, *args):
        paramLen = len(Rainbow.newParameters)
        super().__init__(*args[:-paramLen])

        Qparams = []
        for i in range(3):
            Qparams.append(DeepQ.newParameters[i].default)
        '''self.batch_size, self.memory_size, self.target_update_interval = [int(param) for param in Qparams]
        #self.batch_size, self.memory_size, self.target_update_interval, _ = [int(arg) for arg in args[-paramLen:]]
        _, _, _, self.learning_rate = [arg for arg in args[-paramLen:]]
        self.memory = ExperienceReplay.ReplayBuffer(self, self.memory_size, TransitionFrame(empty_state, -1, 0, empty_state, False))
        self.total_steps = 0
        self.allMask = np.full((1, self.action_size), 1)
        self.allBatchMask = np.full((self.batch_size, self.action_size), 1)'''
        empty_state = self.get_empty_state()
        self.total_steps = 0
        self.model = self.buildQNetwork()
        self.target = self.buildQNetwork()
        self.lr = 0.001
        self.memory = ExperienceReplay.ReplayBuffer(
            self, self.memory_size,
            TransitionFrame(empty_state, -1, 0, empty_state, False))

        # Parameters used for Bellman Distribution
        self.num_atoms = 51
        self.v_min = -10
        self.v_max = 10
        self.delta_z = (self.v_max - self.v_min) / float(self.num_atoms - 1)
        self.z = [self.v_min + i * self.delta_z for i in range(self.num_atoms)]
        self.sample_size = min(self.batch_size, self.memory_size)
        # Initialize prioritization exponent
        self.p = 0.5
        self.allBatchMask = np.full((self.sample_size, self.num_atoms), 1)
コード例 #5
0
ファイル: adrqn.py プロジェクト: varikmp/easyRL-v0
 def __init__(self, *args):
     super().__init__(*args)
     empty_state = self.get_empty_state()
     self.memory = ExperienceReplay.ReplayBuffer(
         self,
         self.memory_size,
         ActionTransitionFrame(-1, empty_state, -1, 0, empty_state, False),
         history_length=self.historylength)
コード例 #6
0
ファイル: adrqn.py プロジェクト: varikmp/easyRL-v0
 def __init__(self, *args):
     paramLen = len(ADRQNHindsight.newParameters)
     super().__init__(*args)
     empty_state = self.get_empty_state()
     self.memory = ExperienceReplay.HindsightReplayBuffer(
         self,
         self.memory_size,
         ActionTransitionFrame(-1, empty_state, -1, 0, empty_state, False),
         history_length=self.historylength)
コード例 #7
0
    def __init__(self, *args):

        # Initializing model parameters
        paramLen = len(SAC.newParameters)
        super().__init__(*args[:-paramLen])

        self.batch_size, self.memory_size, self.target_update_interval, self.tau, self.temperature = [
            int(arg) for arg in args[-paramLen:]
        ]
        self.polyak = 0.01
        self.total_steps = 0

        empty_state = self.get_empty_state()
        self.memory = ExperienceReplay.ReplayBuffer(
            self, self.memory_size,
            TransitionFrame(empty_state, -1, 0, empty_state, False))
        # Learning rate for actor-critic models
        critic_lr = 0.002
        actor_lr = 0.001

        self.critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
        self.actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

        self.actor_network = actorNetwork(self.action_size)

        self.soft_Q_network = self.q_network()
        self.soft_Q_targetnetwork = self.q_network()

        self.soft_Q_network1 = self.q_network()
        self.soft_Q_targetnetwork1 = self.q_network()

        # Building up 2 soft q-function with their relative targets
        in1 = tf.keras.Input(shape=self.state_size, dtype=tf.float64)
        in2 = tf.keras.Input(shape=self.action_size, dtype=tf.float64)

        self.soft_Q_network([in1, in2])
        self.soft_Q_targetnetwork([in1, in2])
        force_update(self.soft_Q_network.variables,
                     self.soft_Q_targetnetwork.variables)

        self.soft_Q_network1([in1, in2])
        self.soft_Q_targetnetwork1([in1, in2])
        force_update(self.soft_Q_network1.variables,
                     self.soft_Q_targetnetwork1.variables)

        # Optimizers for the networks
        self.softq_optimizer = tf.keras.optimizers.Adam(
            learning_rate=critic_lr)
        self.softq_optimizer2 = tf.keras.optimizers.Adam(
            learning_rate=critic_lr)
        self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=actor_lr)
コード例 #8
0
 def __init__(self, *args):
     paramLen = len(DeepQ.newParameters)
     super().__init__(*args[:-paramLen])
     self.batch_size, self.memory_size, self.target_update_interval = [
         int(arg) for arg in args[-paramLen:]
     ]
     self.model = self.buildQNetwork()
     self.target = self.buildQNetwork()
     empty_state = self.get_empty_state()
     self.memory = ExperienceReplay.ReplayBuffer(
         self, self.memory_size,
         TransitionFrame(empty_state, -1, 0, empty_state, False))
     self.total_steps = 0
     self.allMask = np.full((1, self.action_size), 1)
     self.allBatchMask = np.full((self.batch_size, self.action_size), 1)
コード例 #9
0
 def __init__(self, *args):
     paramLen = len(PPO.newParameters)
     super().__init__(*args[:-paramLen])
     empty_state = self.get_empty_state()
     # Initialize parameters
     self.memory = ExperienceReplay.ReplayBuffer(
         self, self.memory_size,
         TransitionFrame(empty_state, -1, 0, empty_state, False))
     self.total_steps = 0
     self.actorIts = 2
     self.allMask = np.full((1, self.action_size), 1)
     self.allBatchMask = np.full((self.batch_size, self.action_size), 1)
     self.policy_lr = 0.001
     self.value_lr = 0.001
     self.policy_model = Actor(self.state_size, self.action_size,
                               self.policy_lr).policy_network()
     self.value_model = Critic(self.state_size, self.action_size,
                               self.value_lr).value_network()
コード例 #10
0
ファイル: ppo_test.py プロジェクト: varikmp/easyRL-v0
    def __init__(self, *args):
        print("Stuff PPO:")
        print(str(args))
        paramLen = len(PPO.newParameters)
        super().__init__(*args[:-paramLen])
        empty_state = self.get_empty_state()
        # Initialize parameters
        self.memory = ExperienceReplay.ReplayBuffer(
            self, self.memory_size,
            TransitionFrame(empty_state, -1, 0, empty_state, False))
        self.total_steps = 0
        self.allMask = np.full((1, self.action_size), 1)
        self.allBatchMask = np.full((self.batch_size, self.action_size), 1)
        #self.batch_size, _, _, self.horizon, self.epochSize, _, _ = [int(arg) for arg in args[-paramLen:]]
        #_, self.policy_lr, self.value_lr, _, _, self.epsilon, self.lam = [arg for arg in args[-paramLen:]]

        self.policy_lr = 0.001
        self.value_lr = 0.001
コード例 #11
0
ファイル: drqn.py プロジェクト: varikmp/easyRL-v0
 def resetBuffer(self):
     self.memory = ExperienceReplay.ReplayBuffer(self, self.memory_size, self.historylength)