from env.flow_lib import flow_env import torch from torch.distributions.bernoulli import Bernoulli import numpy as np from utils import device from utils.normalizer import Normalizer from models.agent import StochasticPolicy, Policy env, env_name = flow_env(render=False, use_inflows=True) print("simulated task: {}".format(env_name)) act_dim = env.action_space.shape[0] obs_dim = env.observation_space.shape[0] print(obs_dim) normalizer = Normalizer(obs_dim) filename = 'ppo_340000' #filename = 'td3_shortgreenpenalty_1332000' ### load RL policy ### if 'ppo' in filename: actor = StochasticPolicy(obs_dim, act_dim, 300, normalizer=normalizer).to(device) elif 'td3' in filename: actor = Policy(obs_dim, act_dim, hidden_dim=400, normalizer=normalizer).to(device) else: raise NotImplementedError checkpoint = torch.load('./model_log/' + filename) actor.load_state_dict(checkpoint['model_state_dict']) reward_sum = 0.
from utils import Transition, device import numpy as np import gym import gym.spaces import torch import torch.optim as optim import torch.nn as nn torch.utils.backcompat.broadcast_warning.enabled = True torch.set_default_tensor_type('torch.DoubleTensor') args = parser.parser() print('agent type: {}'.format(args.pg_type)) env, env_name = flow_env(render=args.render, use_inflows=True) ### seeding ### env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) ############### def save_policy(steps, actor): filename = '{}_{}'.format(args.pg_type, steps) torch.save({ 'steps': steps, 'model_state_dict': actor.state_dict() }, './model_log/' + filename)
import numpy as np import gym import gym.spaces import torch import torch.optim as optim import torch.nn as nn torch.utils.backcompat.broadcast_warning.enabled = True torch.set_default_tensor_type('torch.DoubleTensor') NUM_INTER = 9 args = parser.parser() print('agent type: {}'.format(args.pg_type)) env, env_name = flow_env(render=args.render, use_inflows=True, horizon=4000) ### seeding ### env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) ############### obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] tb_writer, label = log.log_writer(args) total_steps = 0 normalizer = Normalizer(obs_dim) print("simulated task: {}".format(env_name)) policies = MultiAgent(obs_dim, act_dim, normalizer, args.gamma, args.tau,
from env.flow_lib import flow_env import torch from torch.distributions.bernoulli import Bernoulli import numpy as np from utils import device from utils.rollout import ma_evaluate from utils.normalizer import Normalizer from models.agent import StochasticPolicy, Policy from agents.multi_agent import MultiAgent env, env_name = flow_env(render=True, use_inflows=True, sim_step=1, horizon=5000) print("simulated task: {}".format(env_name)) act_dim = env.action_space.shape[0] obs_dim = env.observation_space.shape[0] print(obs_dim) normalizer = Normalizer(obs_dim) filename = 'td3_lr3e-4_908000' #filename = 'ppo_0' #filename = 'td3_shortgreenpenalty_1332000' ### load RL policy ### if 'ppo' in filename: pg_type = 'ppo' elif 'td3' in filename: pg_type = 'td3' policies = MultiAgent(obs_dim, 1, normalizer, 0.995, 0.9, pg_type=pg_type) policies.load_policies(filename)