예제 #1
0
            break
    env_t.close()


env_name = 'CartPole-v1'
gamma = 0.99
num_envs = 8
PENALTY = -1.0
n_step = 4
max_frame = 50000
lr = 0.001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if __name__ == '__main__':

    envs = SubprocVecEnv([make_env(env_name) for i in range(num_envs)])
    net = nn.Sequential(nn.Linear(4, 128), nn.ReLU(), nn.Linear(128, 2))
    actor = Actor(4, 128, 2).to(device)
    critic = Critic(4, 128).to(device)
    solver = optim.Adam(
        list(actor.parameters()) + list(critic.parameters()), lr)

    duration = []
    frame_count = 0
    lifespan = [[0] for _ in range(num_envs)]
    s_gotten = None

    while frame_count * n_step < max_frame:
        obs_l, acts_l, rews_l, dones_l, probs_l = [], [], [], [], []
        accept_sample = [True for _ in range(num_envs)]
        for _ in range(n_step):
예제 #2
0
from common.multiprocessing_env import SubprocVecEnv, make_env
from ddpg import *
from utils import *
from memory import Memory
from ddpg_pendulum_net import Actor, Critic
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

if __name__ == "__main__":

    env_name = 'Pendulum-v0'
    num_envs = 8
    envs = [make_env(env_name) for i in range(num_envs)]
    envs = SubprocVecEnv(envs)

    actor_lr = 2e-4
    critic_lr = 3e-4
    gamma = 0.99
    soft_tau = 0.001
    model = DDPG(Actor(), Critic(), gamma, soft_tau, actor_lr, critic_lr)

    memory_size = 500000
    memory = Memory(memory_size)
    eval_freq = 1000
    batch_size = 256
    n_steps = 1
    num_ddpg_iter = 5
    max_iteration = 50000

    frame_count = 0