Python TD3Agent 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pyforce.agents

클래스/타입: TD3Agent

hotexamples.com에서의 예제들: 3

Python TD3Agent - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pyforce.agents.TD3Agent에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

TD3Agent(3)

자주 사용되는 메소드들

TD3Agent (3)

예제 #1

파일 보기

Path(save_path).mkdir(exist_ok=True)

run_number = 0
for p in Path(save_path).iterdir():
    if p.is_dir() and p.name.isnumeric():
        if int(p.name) > run_number:
            run_number = int(str(p.name))
run_number += 1
save_path += ('/' + str(run_number))

Path(save_path).mkdir(exist_ok=True)

with open(save_path + '/description.txt', mode='w') as f:
    f.write(description)

agent = TD3Agent(env, save_path=save_path, critic_lr=1e-3,
                 actor_lr=1e-3).to(device)

# agent=TD3Agent(
#     env,
#     save_path="./evals/td3_example",
#     critic_lr=1e-3,
#     actor_lr=1e-3
# ).to(device)

# agent.train(env,100000,train_freq=1,batch_size=100,policy_noise=0.1,policy_noise_clip=.25,gamma=.99, policy_freq=2, tau=0.005,warmup_steps=10000,buffer_size=50000, exp_noise=.1,eval_freq=1, render=True, eval_episodes=1)
agent.train(env,
            500,
            train_freq=1,
            batch_size=800,
            policy_noise=0.1,
            policy_noise_clip=.25,

예제 #2

파일 보기

with open(file_path_txt, 'w') as f:
    f.write('agent_params\n')
    for k in agent_params.keys():
        f.write('\t' + k + ' = ' + str(agent_params[k]) + '\n')
    f.write('env_params\n')
    for k in sandbox_env.env_params.keys():
        f.write('\t' + k + ' = ' + str(sandbox_env.env_params[k]) + '\n')

# device="cuda:0" if torch.cuda.is_available() else "cpu"
# torch.cuda.set_device(0)
device= "cpu"
env=wrap_openai_gym(sandbox_env.App(always_render=False))

agent=TD3Agent(
    env,
    save_path=agent_params['save_path'],
    critic_lr=agent_params['critic_lr'],
    actor_lr=agent_params['actor_lr']
).to(device)

if LOAD_AGENT_FROM is not None:
    with open(LOAD_AGENT_FROM, 'rb') as f:
        state_dict = pickle.load(f)
    agent.load_state_dict(state_dict)

# agent=TD3Agent(
#     env,
#     save_path="./evals/td3_example",
#     critic_lr=1e-3,
#     actor_lr=1e-3
# ).to(device)

예제 #3

파일 보기

# from pyforce.nn import default_network_components
from pyforce.agents import TD3Agent
# import gym
import torch

import sandbox_env
# from pyforce import agents

# device="cuda:0" if torch.cuda.is_available() else "cpu"
# # torch.cuda.set_device(0)
device = "cpu"
# print(device)
env = wrap_openai_gym(sandbox_env.App(always_render=True, verbose=False))

agent = TD3Agent(env,
                 save_path="./evals/td3_new",
                 critic_lr=1e-3,
                 actor_lr=1e-3).to(device)

# agent=TD3Agent(
#     env,
#     save_path="./evals/td3_example",
#     critic_lr=1e-3,
#     actor_lr=1e-3
# ).to(device)

# agent.train(env,100000,train_freq=1,batch_size=100,policy_noise=0.1,policy_noise_clip=.25,gamma=.99, policy_freq=2, tau=0.005,warmup_steps=10000,buffer_size=50000, exp_noise=.1,eval_freq=1, render=True, eval_episodes=1)
agent.train(env,
            100000,
            train_freq=1,
            batch_size=800,
            policy_noise=0.1,