Python ReplayBuffer.cache Examples

Programming Language: Python

Namespace/Package Name: replay_buffer

Class/Type: ReplayBuffer

Method/Function: cache

Examples at hotexamples.com: 1

Python ReplayBuffer.cache - 1 examples found. These are the top rated real world Python examples of replay_buffer.ReplayBuffer.cache extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ReplayBuffer(30)

add(30)

count(26)

get_batch(25)

append(17)

encode_recent_observation(7)

getBatch(7)

add_sample(7)

insert(7)

clear(6)

load(5)

load_memory(5)

can_sample(4)

add_episode(4)

add_transition(4)

get_last_steps(3)

get_size(3)

create_batch(3)

is_ready(3)

add_experience(3)

add_to_memory(2)

make_index(2)

compute_values(2)

compute_value_difference(2)

compute_reward_distances(2)

compute_returns(2)

compute_lambda_returns(2)

fetch_sample(2)

compute_episode_boundaries(2)

encoder_recent_observation(2)

idx(2)

get_batch_data(1)

get_current_size(1)

get_experiences(1)

get_len(1)

get_current_state(1)

load_replay(1)

get_losses_offline(1)

get_memory(1)

get_minibatch(1)

importance_sampling(1)

in_order_iterate(1)

insert_sample(1)

k(1)

length(1)

lenth(1)

load_from_file(1)

get_random_minibatch(1)

dump(1)

get(1)

Example #1

Show file

File: dqn_pytorch.py Project: paolodelia99/py-pacman

def train_agent(layout: str, episodes: int = 10000, frames_to_skip: int = 4):
    GAMMA = 0.99
    EPSILON = 1.0
    EPS_END = 0.1
    EPS_DECAY = 1e7
    TARGET_UPDATE = 10
    BATCH_SIZE = 64

    epsilon_by_frame = lambda frame_idx: EPS_END + (
        EPSILON - EPS_END) * math.exp(-1. * frame_idx / EPS_DECAY)

    # Get screen size so that we can initialize layers correctly based on shape
    # returned from AI gym. Typical dimensions at this point are close to 3x40x90
    # which is the result of a clamped and down-scaled render buffer in get_screen()
    env = PacmanEnv(layout=layout)
    env = SkipFrame(env, skip=frames_to_skip)
    env = GrayScaleObservation(env)
    env = ResizeObservation(env, shape=84)
    env = FrameStack(env, num_stack=4)
    screen = env.reset(mode='rgb_array')

    # Get number of actions from gym action space
    n_actions = env.action_space.n

    policy_net = DQN(screen.shape, n_actions).to(device)
    target_net = DQN(screen.shape, n_actions).to(device)
    target_net.load_state_dict(policy_net.state_dict())
    target_net.eval()

    optimizer = optim.RMSprop(policy_net.parameters())
    memory = ReplayBuffer(BATCH_SIZE)

    for i_episode in range(episodes):
        # Initialize the environment and state
        state = env.reset(mode='rgb_array')
        ep_reward = 0.
        EPSILON = epsilon_by_frame(i_episode)

        for t in count():
            # Select and perform an action
            env.render(mode='human')
            action = select_action(state, EPSILON, policy_net, n_actions)
            next_state, reward, done, info = env.step(action)
            reward = max(-1.0, min(reward, 1.0))
            ep_reward += reward

            memory.cache(state, next_state, action, reward, done)

            # Observe new state
            if done:
                next_state = None

            # Move to the next state
            state = next_state

            # Perform one step of the optimization (on the target network)
            optimize_model(memory, policy_net, optimizer, target_net, GAMMA)
            if done:
                print("Episode #{}, lasts for {} timestep, total reward: {}".
                      format(i_episode, t + 1, ep_reward))
                break
        # Update the target network, copying all weights and biases in DQN
        if i_episode % TARGET_UPDATE == 0:
            target_net.load_state_dict(policy_net.state_dict())

        if i_episode % 1000 == 0:
            save_model(target_net, 'pacman.pth')

    print('Complete')
    env.render()
    env.close()

    save_model(target_net, 'pacman.pth')