Python DDPG.get_action 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: agent

클래스/타입: DDPG

메소드/함수: get_action

hotexamples.com에서의 예제들: 4

Python DDPG.get_action - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 agent.DDPG.get_action에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

DDPG(26)

construct_model(4)

sample_action(4)

get_action(4)

store_experience(4)

learn(3)

update_model(3)

update(3)

add_experience_arp(3)

save_model(3)

train_arp(3)

select_action(3)

decay(3)

store_transition(3)

step(2)

test(2)

train(2)

train_ac(2)

train_lm(2)

save(2)

load_models(2)

reset_episode(2)

refine_action(2)

act(2)

load_model(2)

evaluate_actor(2)

construct_state(2)

build_nets(2)

add_experience_lm(2)

add_experience_ac(2)

load_memory(1)

hardupdate(1)

store_transitions(1)

close_all(1)

choose_action(1)

action_choose(1)

update_target_net(1)

예제 #1

파일 보기

파일: test.py 프로젝트: takeru1205/DDPG_PyTorch

import gym
from agent import DDPG

env = gym.make('Pendulum-v0')

agent = DDPG(env)
agent.load_model()

state = env.reset()

cumulative_reward = 0
for i in range(200):
    action = agent.get_action(state)
    env.render()
    state, reward, _, _ = env.step(action * 2)
    cumulative_reward += reward
print('Cumulative Reward: {}'.format(cumulative_reward))

예제 #2

파일 보기

파일: test.py 프로젝트: takeru1205/DDPG_PyTorch

from collections import deque
import gym
import numpy as np
from agent import DDPG
from utils import get_screen

env = gym.make('Pendulum-v0')

agent = DDPG(env, memory=False)
agent.load_model()

env.reset()
pixel = env.render(mode='rgb_array')
state = deque([get_screen(pixel) for _ in range(3)], maxlen=3)
cumulative_reward = 0
for timestep in range(200):
    action = agent.get_action(np.array(state)[np.newaxis])
    _, reward, _, _ = env.step(action * 2)
    pixel = env.render(mode='rgb_array')
    state_ = state.copy()
    state_.append(get_screen(pixel))
    state = state_
    cumulative_reward += reward
print('Cumulative Reward: {}'.format(cumulative_reward))

예제 #3

파일 보기

np.random.seed(42)
env.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

writer = SummaryWriter(log_dir='logs/')
agent = DDPG(env, writer)

all_timesteps = 0

for e in range(epoch):
    noise = OUActionNoise(env.action_space.shape[0])
    state = env.reset()
    cumulative_reward = 0
    for timestep in range(200):
        action = agent.get_action(state, noise, timestep)
        state_, reward, done, _ = env.step(action * env.action_space.high[0])
        # env.render()
        agent.store_transition(state, action, state_, reward, done)

        state = state_
        cumulative_reward += reward

        agent.update(all_timesteps)
        all_timesteps += 1
    print('Epoch : {} / {}, Cumulative Reward : {}'.format(
        e, epoch, cumulative_reward))
    writer.add_scalar("reward", cumulative_reward, e)

agent.save_model()

예제 #4

파일 보기

파일: train.py 프로젝트: takeru1205/DDPG_PyTorch

torch.manual_seed(42)
torch.cuda.manual_seed(42)

writer = SummaryWriter(log_dir='logs/')
agent = DDPG(env, writer)

all_timesteps = 0

for e in range(epoch):
    noise = OUActionNoise(env.action_space.shape[0])
    env.reset()
    pixel = env.render(mode='rgb_array')
    state = deque([get_screen(pixel) for _ in range(3)], maxlen=3)
    cumulative_reward = 0
    for timestep in range(200):
        action = agent.get_action(np.array(state)[np.newaxis], noise, timestep)
        _, reward, done, _ = env.step(action * env.action_space.high[0])
        pixel = env.render(mode='rgb_array')
        state_ = state.copy()
        state_.append(get_screen(pixel))
        agent.store_transition(np.array(state), action, np.array(state_),
                               reward, done)

        state = state_
        cumulative_reward += reward

        agent.update(all_timesteps, batch_size=16)
        all_timesteps += 1
    print('Epoch : {} / {}, Cumulative Reward : {}'.format(
        e, epoch, cumulative_reward))
    writer.add_scalar("reward", cumulative_reward, e)