예제 #1
0
from env.flow_lib import flow_env
import torch
from torch.distributions.bernoulli import Bernoulli
import numpy as np
from utils import device
from utils.normalizer import Normalizer
from models.agent import StochasticPolicy, Policy

env, env_name = flow_env(render=False, use_inflows=True)
print("simulated task: {}".format(env_name))

act_dim = env.action_space.shape[0]
obs_dim = env.observation_space.shape[0]
print(obs_dim)
normalizer = Normalizer(obs_dim)

filename = 'ppo_340000'
#filename = 'td3_shortgreenpenalty_1332000'
### load RL policy ###
if 'ppo' in filename:
    actor = StochasticPolicy(obs_dim, act_dim, 300,
                             normalizer=normalizer).to(device)
elif 'td3' in filename:
    actor = Policy(obs_dim, act_dim, hidden_dim=400,
                   normalizer=normalizer).to(device)
else:
    raise NotImplementedError

checkpoint = torch.load('./model_log/' + filename)
actor.load_state_dict(checkpoint['model_state_dict'])
reward_sum = 0.
예제 #2
0
from utils import Transition, device

import numpy as np
import gym
import gym.spaces

import torch
import torch.optim as optim
import torch.nn as nn

torch.utils.backcompat.broadcast_warning.enabled = True
torch.set_default_tensor_type('torch.DoubleTensor')

args = parser.parser()
print('agent type: {}'.format(args.pg_type))
env, env_name = flow_env(render=args.render, use_inflows=True)

### seeding ###
env.seed(args.seed)
torch.manual_seed(args.seed)
np.random.seed(args.seed)
###############


def save_policy(steps, actor):
    filename = '{}_{}'.format(args.pg_type, steps)
    torch.save({
        'steps': steps,
        'model_state_dict': actor.state_dict()
    }, './model_log/' + filename)
예제 #3
0
import numpy as np
import gym
import gym.spaces

import torch
import torch.optim as optim
import torch.nn as nn

torch.utils.backcompat.broadcast_warning.enabled = True
torch.set_default_tensor_type('torch.DoubleTensor')

NUM_INTER = 9
args = parser.parser()
print('agent type: {}'.format(args.pg_type))
env, env_name = flow_env(render=args.render, use_inflows=True, horizon=4000)

### seeding ###
env.seed(args.seed)
torch.manual_seed(args.seed)
np.random.seed(args.seed)
###############

obs_dim = env.observation_space.shape[0]
act_dim = env.action_space.shape[0]
tb_writer, label = log.log_writer(args)
total_steps = 0
normalizer = Normalizer(obs_dim)
print("simulated task: {}".format(env_name))

policies = MultiAgent(obs_dim, act_dim, normalizer, args.gamma, args.tau,
예제 #4
0
from env.flow_lib import flow_env
import torch
from torch.distributions.bernoulli import Bernoulli
import numpy as np
from utils import device
from utils.rollout import ma_evaluate
from utils.normalizer import Normalizer
from models.agent import StochasticPolicy, Policy
from agents.multi_agent import MultiAgent

env, env_name = flow_env(render=True,
                         use_inflows=True,
                         sim_step=1,
                         horizon=5000)
print("simulated task: {}".format(env_name))

act_dim = env.action_space.shape[0]
obs_dim = env.observation_space.shape[0]
print(obs_dim)
normalizer = Normalizer(obs_dim)

filename = 'td3_lr3e-4_908000'
#filename = 'ppo_0'
#filename = 'td3_shortgreenpenalty_1332000'
### load RL policy ###
if 'ppo' in filename:
    pg_type = 'ppo'
elif 'td3' in filename:
    pg_type = 'td3'
policies = MultiAgent(obs_dim, 1, normalizer, 0.995, 0.9, pg_type=pg_type)
policies.load_policies(filename)