コード例 #1
0
ファイル: sac_main.py プロジェクト: jinPrelude/rl_algorithms
def main(args):
    env = gym.make(args['env_name'])

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    action_dim = env.action_space.shape[0]
    max_action = env.action_space.high[0]
    state_dim = env.observation_space.shape[0]


    sac = SAC(args, action_dim, max_action, state_dim, device)
    summary = tensorboardX.SummaryWriter('./log/{}_sac_{}'.format(args['env_name'], args['noise_type']))

    timestep = 0
    start_time = time.time()
    for episode in range(args['max_episode']):
        episode_reward = 0
        state = env.reset()

        while True:
            action = sac.get_action(state)
            next_state, reward, done, info = env.step(action)
            sac.save(state, action, reward, next_state, int(done))
            episode_reward += reward
            state = next_state
            timestep += 1

            if sac.memory_counter > args['batch_size']: # BATCH_SIZE(64) 이상일 때 부터 train 시작
                sac.train()

            if done:
                print('episode: ', episode, '   reward : %.3f'%(episode_reward), '    timestep :', timestep)

                summary.add_scalar('reward/episode', episode_reward, episode)

                break

        if episode % args['save_freq'] == 0:
            if not os.path.exists('./SaveModel') :
                os.mkdir('./SaveModel')
            torch.save(sac.actor.state_dict(), './SaveModel/{}_sac_{}_{}'.format(args['env_name'], args['noise_type'], episode))
コード例 #2
0
from sac import SAC
import itertools
import torch

ACTIONS = [x for x in itertools.product([-1, 0, 1], [-1, 0, 1])]

n_episodes = 1000
n_steps = 10000
rews = []
env = CarEnv()
model = SAC(9, 19, 0.7, 0.45, 0.001)
for e in range(n_episodes):
    obs, _ = env.reset()
    episode_reward = 0
    for s in range(n_steps):
        act_idx, _, _ = model.get_action(
            torch.tensor(obs, device=torch.device('cuda')))
        act = ACTIONS[act_idx]
        obs_, rew = env.step(act)
        episode_reward += rew
        transition = (torch.as_tensor(obs,
                                      dtype=torch.double,
                                      device=torch.device('cuda')),
                      torch.as_tensor(ACTIONS.index(act),
                                      dtype=torch.long,
                                      device=torch.device('cuda')),
                      torch.as_tensor(rew,
                                      dtype=torch.double,
                                      device=torch.device('cuda')),
                      torch.as_tensor(obs_,
                                      dtype=torch.double,
                                      device=torch.device('cuda')),