Python DDPG.store_transition 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: agent

클래스/타입: DDPG

메소드/함수: store_transition

hotexamples.com에서의 예제들: 3

Python DDPG.store_transition - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 agent.DDPG.store_transition에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

DDPG(26)

construct_model(4)

sample_action(4)

get_action(4)

store_experience(4)

learn(3)

update_model(3)

update(3)

add_experience_arp(3)

save_model(3)

train_arp(3)

select_action(3)

decay(3)

store_transition(3)

step(2)

test(2)

train(2)

train_ac(2)

train_lm(2)

save(2)

load_models(2)

reset_episode(2)

refine_action(2)

act(2)

load_model(2)

evaluate_actor(2)

construct_state(2)

build_nets(2)

add_experience_lm(2)

add_experience_ac(2)

load_memory(1)

hardupdate(1)

store_transitions(1)

close_all(1)

choose_action(1)

action_choose(1)

update_target_net(1)

예제 #1

파일 보기

def main():
    """
    UnboundLocalError: local variable 'RENDER' referenced before assignment

    If the global variable changed in a function without
    declare with a "global" prefix, then the variable here
    will be treat as a local variable

    For example,
    if "RENDER" is not been declared with global prefix,
    access "RENDER" variable will raise UnboundLocalError
    before assign value to "RENDER"
    """

    global RENDER
    env = gym.make(ENV_NAME)
    env = env.unwrapped
    env.seed(1)

    s_dim = env.observation_space.shape[0]
    a_dim = env.action_space.shape[0]
    a_bound = env.action_space.high[0]
    # print(f"s_dim: {s_dim}, a_dim: {a_dim}, a_bound: {a_bound}")
    # s_dim: 3, a_dim: 1, a_bound: 2.0

    ddpg = DDPG(s_dim, a_dim, a_bound)

    # var: add noise to action
    var = 3
    for i in range(MAX_EPISODES):
        s = env.reset()
        # s : list
        # s.shape = (3,)
        ep_reward = 0
        for j in range(MAX_EP_STEPS):
            if RENDER:
                env.render()

            a = ddpg.choose_action(s)
            a = np.clip(np.random.normal(a, var), -a_bound, a_bound)
            s_, r, done, info = env.step(a)

            # s : list
            # a : np.float
            # r : float
            # s_ : list
            ddpg.store_transition(s, a, r/10, s_)

            if ddpg.m_pointer > ddpg.capacity:
                var *= 0.9995
                ddpg.learn()

            s = s_
            ep_reward += r

            if done or (j+1) == MAX_EP_STEPS:
                print(f"Episode: {i:03d}")
                print(f"\tReward: {ep_reward:.3f}, Explore: {var:.2f}")
                if ep_reward > -150:
                    RENDER = True
                break
    env.close()

예제 #2

파일 보기

np.random.seed(42)
env.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

writer = SummaryWriter(log_dir='logs/')
agent = DDPG(env, writer)

all_timesteps = 0

for e in range(epoch):
    noise = OUActionNoise(env.action_space.shape[0])
    state = env.reset()
    cumulative_reward = 0
    for timestep in range(200):
        action = agent.get_action(state, noise, timestep)
        state_, reward, done, _ = env.step(action * env.action_space.high[0])
        # env.render()
        agent.store_transition(state, action, state_, reward, done)

        state = state_
        cumulative_reward += reward

        agent.update(all_timesteps)
        all_timesteps += 1
    print('Epoch : {} / {}, Cumulative Reward : {}'.format(
        e, epoch, cumulative_reward))
    writer.add_scalar("reward", cumulative_reward, e)

agent.save_model()

예제 #3

파일 보기

파일: train.py 프로젝트: takeru1205/DDPG_PyTorch

agent = DDPG(env, writer)

all_timesteps = 0

for e in range(epoch):
    noise = OUActionNoise(env.action_space.shape[0])
    env.reset()
    pixel = env.render(mode='rgb_array')
    state = deque([get_screen(pixel) for _ in range(3)], maxlen=3)
    cumulative_reward = 0
    for timestep in range(200):
        action = agent.get_action(np.array(state)[np.newaxis], noise, timestep)
        _, reward, done, _ = env.step(action * env.action_space.high[0])
        pixel = env.render(mode='rgb_array')
        state_ = state.copy()
        state_.append(get_screen(pixel))
        agent.store_transition(np.array(state), action, np.array(state_),
                               reward, done)

        state = state_
        cumulative_reward += reward

        agent.update(all_timesteps, batch_size=16)
        all_timesteps += 1
    print('Epoch : {} / {}, Cumulative Reward : {}'.format(
        e, epoch, cumulative_reward))
    writer.add_scalar("reward", cumulative_reward, e)
    if e % 500 == 0:
        agent.save_model('models/' + str(e) + '_')
agent.save_model()