Esempio n. 1
0
def run(lock, shared_eps_num, shared_eps_reward, hyper_param):
    model = SAC(
        env_name='Pendulum-v0',
        load_dir='./ckpt/ckpt_' + str(hyper_param[0]) + '_' +
        str(hyper_param[1]),
        log_dir='./log/log_' + str(hyper_param[0]) + '_' + str(hyper_param[1]),
        buffer_size=1e6,
        seed=hyper_param[1],
        max_episode_steps=500,  # manual set
        batch_size=hyper_param[0],
        discount=0.99,
        learning_starts=500,
        tau=0.005,
        save_eps_num=100)

    timesteps = 0
    total_timesteps = 1e5
    max_eps_steps = 100

    # train
    while timesteps < total_timesteps:
        episode_reward = 0
        done = False
        eps_steps = 0
        obs = model.env.reset()
        while not done and eps_steps < max_eps_steps:
            action = model.predict(obs)
            new_obs, reward, done, info = model.env.step(action)
            model.replay_buffer.push(obs, action, reward, new_obs, done)
            obs = new_obs
            episode_reward += reward
            eps_steps += 1
            timesteps += 1
            if timesteps > model.learning_starts:
                model.train_step()
        model.episode_num += 1
        model.writer.add_scalar('episode_reward', episode_reward,
                                model.episode_num)

        lock.acquire()
        shared_eps_num.value = model.episode_num
        shared_eps_reward.value = episode_reward
        lock.release()
Esempio n. 2
0
from env import Env
import numpy as np

# from rllite.common import choose_gpu, GymDelay

# choose your GPU if you have more than one
# choose_gpu(0)
env = Env(10666)
# set
model = SAC(
    external_env=env,  # import your env
    env_name="ssl_env",  # your env name
    load_dir='./ckpt',
    log_dir="./log",
    buffer_size=1e6,
    seed=2,
    max_episode_steps=500,  # manual set
    batch_size=64,
    discount=0.99,
    learning_starts=1000,
    tau=0.005,
    save_eps_num=100)

# model.learn(1e6)

timesteps = 0
total_timesteps = 1e6
max_eps_steps = 500

# tricky limit
xy_acc = 4.0 / 75.0 / 2
Esempio n. 3
0
from rllite import SAC
model = SAC('Pendulum-v0').learn(1e6)
Esempio n. 4
0
from rllite import SAC

# set
model = SAC(env_name='Pendulum-v0',
            load_dir='./ckpt',
            log_dir="./log",
            buffer_size=1e6,
            seed=1,
            max_episode_steps=None,
            batch_size=64,
            discount=0.99,
            learning_starts=500,
            tau=0.005,
            save_eps_num=100)

# train
model.learn(1e6)

# eval
for _ in range(10):
    done = False
    obs = model.env.reset()
    while not done:
        action = model.predict(obs)
        obs, reward, done, info = model.env.step(action)
        model.env.render()
model.env.close()