Python ReplayBuffer.push Examples

Programming Language: Python

Namespace/Package Name: dqn

Class/Type: ReplayBuffer

Method/Function: push

Examples at hotexamples.com: 3

Python ReplayBuffer.push - 3 examples found. These are the top rated real world Python examples of dqn.ReplayBuffer.push extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ReplayBuffer(12)

sample(9)

add(7)

push(3)

sample_pytorch(2)

Example #1

Show file

state = env.reset()

for frame_idx in range(
        1, num_frames + 1
):  # QUESTION: Why is num_frames > replay_buffer capacity? replay_buffer should
    # overfill because num_frames is larger, so it will keep adding. Does it automatically expand when you push? I think it does expand,
    # using the numpy expand_dims funciton

    #print("Frame: " + str(frame_idx))

    epsilon = epsilon_by_frame(frame_idx)  # get the epsilon value
    action = model.act(state, epsilon)  # This is where act function is used

    next_state, reward, done, _ = env.step(
        action)  # look at next state to see if it gives us a reward
    replay_buffer.push(state, action, reward, next_state, done)

    state = next_state
    episode_reward += reward

    # if the game is over
    if done:
        state = env.reset()
        all_rewards.append(
            (frame_idx, episode_reward))  # record reward for that game
        episode_reward = 0  # reset

    # Once the replay buffer has filled up enough
    if len(replay_buffer) > replay_initial:
        loss = compute_td_loss(
            model, target_model, batch_size, gamma,

Example #2

Show file

losses = []
all_rewards = []
episode_reward = 0

state = env.reset()  # initial state

for frame_idx in range(1, num_frames +
                       1):  # plays until player or model gets score 21
    #print("Frame: " + str(frame_idx))      #uncomment to look at frames

    epsilon = epsilon_by_frame(frame_idx)
    action = model.act(state, epsilon)  #will write this function

    next_state, reward, done, _ = env.step(action)  #get next state
    replay_buffer.push(state, action, reward, next_state,
                       done)  #push actions resutls to buffer

    state = next_state
    episode_reward += reward

    if done:  # reset game and
        state = env.reset()
        all_rewards.append((frame_idx, episode_reward))
        episode_reward = 0

    if len(
            replay_buffer
    ) > replay_initial:  #if number of plays has reached the limit calculate loss and optimize update model
        loss = compute_td_loss(model, target_model, batch_size, gamma,
                               replay_buffer)
        optimizer.zero_grad()

Example #3

Show file

File: run_dqn_pong.py Project: slampham/Pong

# Neg exp func. Start exploring then exploiting according to frame_indx
epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

losses = []
all_rewards = []
episode_reward = 0
state = env.reset()  # Initial state

best_mean_reward = float('-inf')
for frame_idx in range(starting_frame, num_frames + 1):  # Each frame in # frames played
    epsilon = epsilon_by_frame(frame_idx)   # Epsilon decreases as frames played
    action = model.act(state, epsilon)      # if (rand < e) explore. Else action w max(Q-val). action: int

    next_state, reward, done, _ = env.step(action)  # Get env info after taking action. next_state: 2d int. reward: float. done: bool.
    replay_buffer.push(state, action, reward, next_state, done)  # Save state info onto buffer (note: every frame)

    state = next_state                      # Change to next state
    episode_reward += reward                # Keep adding rewards until goal state

    if done:                                # Goal state
        state = env.reset()                 # Restart game
        all_rewards.append((frame_idx, episode_reward))  # Store episode_reward w frame it ended
        episode_reward = 0

    if len(replay_buffer) > replay_initial:     # If enough frames in replay_buffer (10000)
        loss = compute_td_loss(model, target_model, batch_size, gamma, replay_buffer)
        optimizer.zero_grad()                   # Resets gradient after every mini-batch
        loss.backward()
        optimizer.step()
        losses.append((frame_idx, loss.data.cpu().numpy()))