Python Replay.add Examples

Programming Language: Python

Namespace/Package Name: replay

Class/Type: Replay

Method/Function: add

Examples at hotexamples.com: 2

Python Replay.add - 2 examples found. These are the top rated real world Python examples of replay.Replay.add extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Replay(30)

all(3)

game_won(2)

add(2)

interpolate(2)

add_to_memory(2)

burn_memory(2)

get_cards_kept_by_player(2)

__init__(2)

get_sections_and_labels(1)

getCommand(1)

combine_data(1)

find_sections_count_before_first_collision(1)

commands(1)

clear(1)

add_transition(1)

addCommand(1)

load(1)

Example #1

Show file

 def play_one_game(self):
     replay = Replay()
     s = self.env.reset()
     count = 0
     while True:
         conv_s = np.reshape(s, [1, 84, 84, 4])
         p_g = self.nns["good"].predict(conv_s)
         p_n = self.nns["normal"].predict(conv_s)
         p_b = self.nns["bad"].predict(conv_s)
         p = 2 * p_g["pi"][0] + p_n["pi"][0] - p_b["pi"][0]
         p += np.ones_like(self.a)
         p /= np.sum(p)
         a = np.random.choice(self.a, p=p)
         s_, r, t, _ = self.env.step(a)
         replay.add(s, a)
         replay.score += r
         s = s_
         count += 1
         if count % 10 == 0:
             print(".", end="", flush=True)
         if t:
             print()
             break
     return replay

Example #2

Show file

File: ddpg.py Project: xuebai1990/Quadcopter-ReinforcementLearening

class DDPG:
    def __init__(self, task):
        # Hyper parameters
        self.learning_rate_actor = 1e-4
        self.learning_rate_critic = 1e-3
        self.gamma = 0.99
        self.tau = 0.001

        # Define net
        self.sess = tf.Session()
        self.task = task
        self.actor = ActorNet(self.sess, self.task.state_size, self.task.action_size, self.learning_rate_actor, \
                     self.task.action_low, self.task.action_high, self.tau)
        self.critic = CriticNet(self.sess, self.task.state_size, self.task.action_size, self.learning_rate_critic, self.tau)

        # Define noise
        self.mu = 0
        self.theta = 0.15
        self.sigma = 0.20
        self.noise = OUNoise(self.task.action_size, self.mu, self.theta, self.sigma)

        # Define memory replay
        self.buffer_size = 1000000
        self.batch_size = 64
        self.memory = Replay(self.buffer_size, self.batch_size)

        # Score
        self.best_score = -np.inf
        self.best_reward = -np.inf

    def reset(self):
        self.noise.reset()
        state = self.task.reset()
        self.last_state = state
        self.total_reward = 0.0
        self.count = 0
        return state

    def learn(self, experience):
        # Turn into different np arrays
        state_batch = np.vstack([e[0] for e in experience])
        action_batch = np.vstack([e[1] for e in experience])
        reward_batch = np.vstack([e[2] for e in experience])
        next_state_batch = np.vstack([e[3] for e in experience])
        done_batch = np.vstack([e[4] for e in experience])

        # Calculate next_state q value
        next_action_batch = self.actor.target_actions(next_state_batch)
        next_q_targets = self.critic.targetQ(next_state_batch, next_action_batch)

        # Train critic net
        q_targets = reward_batch + self.gamma * next_q_targets * (1 - done_batch)
        self.critic.train(state_batch, action_batch, q_targets)

        # Train actor net
        action_gradients = self.critic.gradients(state_batch, action_batch)
        self.actor.train(action_gradients, state_batch)

        # Update target network
        self.actor.update_target(False)
        self.critic.update_target(False)

    def step(self, action, reward, next_state, done):
        self.memory.add([self.last_state, action, reward, next_state, done])
        self.total_reward += reward
        self.count += 1
        if done:
            self.score = self.total_reward / float(self.count) if self.count else 0.0
            self.best_score = max(self.best_score, self.score)
            self.best_reward = max(self.total_reward, self.best_reward)

        if len(self.memory.buffer) > self.batch_size:
            experiences = self.memory.sample()
            self.learn(experiences)

        self.last_state = next_state

    def act(self, states):
        states = np.reshape(states, [-1, self.task.state_size])
        action = self.actor.actions(states)[0]
        return list(action + self.noise.sample())