def experiment(variant): import sys from traffic.make_env import make_env expl_env = make_env(args.exp_name) eval_env = make_env(args.exp_name) obs_dim = eval_env.observation_space.low.size action_dim = eval_env.action_space.n gb = TrafficGraphBuilder(input_dim=4, ego_init=torch.tensor([0.,1.]), other_init=torch.tensor([1.,0.]), edge_index=torch.tensor([[0,0,1,2], [1,2,0,0]])) qf = GNNNet( pre_graph_builder = gb, node_dim = 16, output_dim = action_dim, post_mlp_kwargs = variant['qf_kwargs'], num_conv_layers=3) target_qf = copy.deepcopy(qf) eval_policy = ArgmaxDiscretePolicy(qf) expl_policy = PolicyWrappedWithExplorationStrategy( EpsilonGreedy(expl_env.action_space, variant['epsilon']), eval_policy, ) eval_path_collector = MdpPathCollector( eval_env, eval_policy, ) expl_path_collector = MdpPathCollector( expl_env, expl_policy, ) replay_buffer = PrioritizedReplayBuffer( variant['replay_buffer_size'], expl_env, ) qf_criterion = nn.MSELoss() trainer = DQNTrainer( qf=qf, target_qf=target_qf, qf_criterion=qf_criterion, replay_buffer=replay_buffer, **variant['trainer_kwargs'] ) algorithm = TorchBatchRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, **variant['algorithm_kwargs'] ) algorithm.to(ptu.device) algorithm.train()
def experiment(variant): import sys from traffic.make_env import make_env expl_env = make_env(args.exp_name) eval_env = make_env(args.exp_name) obs_dim = eval_env.observation_space.low.size action_dim = eval_env.action_space.n gb = TrafficGraphBuilder(input_dim=4, ego_init=torch.tensor([0., 1.]), other_init=torch.tensor([1., 0.]), edge_index=torch.tensor([[0, 0, 1, 2], [1, 2, 0, 0]])) module = GNNNet(pre_graph_builder=gb, node_dim=16, output_dim=action_dim, post_mlp_kwargs=dict(hidden_sizes=[32]), num_conv_layers=3) policy = SoftmaxPolicy(module, **variant['policy_kwargs']) vf = Mlp( hidden_sizes=[32, 32], input_size=obs_dim, output_size=1, ) vf_criterion = nn.MSELoss() eval_policy = ArgmaxDiscretePolicy(policy, use_preactivation=True) expl_policy = policy eval_path_collector = MdpPathCollector( eval_env, eval_policy, ) expl_path_collector = MdpPathCollector( expl_env, expl_policy, ) trainer = PPOTrainer(policy=policy, value_function=vf, vf_criterion=vf_criterion, **variant['trainer_kwargs']) algorithm = TorchOnlineRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, **variant['algorithm_kwargs']) algorithm.to(ptu.device) algorithm.train()
def experiment(variant): from traffic.make_env import make_env expl_env = make_env(args.exp_name, **variant['env_kwargs']) eval_env = make_env(args.exp_name, **variant['env_kwargs']) obs_dim = eval_env.observation_space.low.size action_dim = eval_env.action_space.n label_num = expl_env.label_num label_dim = expl_env.label_dim max_path_length = variant['trainer_kwargs']['max_path_length'] if variant['load_kwargs']['load']: load_dir = variant['load_kwargs']['load_dir'] load_data = torch.load(load_dir + '/params.pkl', map_location='cpu') policy = load_data['trainer/policy'] vf = load_data['trainer/value_function'] else: hidden_dim = variant['lstm_kwargs']['hidden_dim'] num_lstm_layers = variant['lstm_kwargs']['num_layers'] node_dim = variant['gnn_kwargs']['node_dim'] node_num = expl_env.max_veh_num + 1 input_node_dim = int(obs_dim / node_num) a_0 = np.zeros(action_dim) h1_0 = np.zeros((node_num, hidden_dim * num_lstm_layers)) c1_0 = np.zeros((node_num, hidden_dim * num_lstm_layers)) h2_0 = np.zeros((node_num, hidden_dim * num_lstm_layers)) c2_0 = np.zeros((node_num, hidden_dim * num_lstm_layers)) latent_0 = (h1_0, c1_0, h2_0, c2_0) from lstm_net import LSTMNet lstm1_ego = LSTMNet(input_node_dim, action_dim, hidden_dim, num_lstm_layers) lstm1_other = LSTMNet(input_node_dim, 0, hidden_dim, num_lstm_layers) lstm2_ego = LSTMNet(node_dim, 0, hidden_dim, num_lstm_layers) lstm2_other = LSTMNet(node_dim, 0, hidden_dim, num_lstm_layers) from graph_builder import TrafficGraphBuilder gb = TrafficGraphBuilder( input_dim=hidden_dim, node_num=node_num, ego_init=torch.tensor([0., 1.]), other_init=torch.tensor([1., 0.]), ) from gnn_net import GNNNet gnn = GNNNet( pre_graph_builder=gb, node_dim=variant['gnn_kwargs']['node_dim'], conv_type=variant['gnn_kwargs']['conv_type'], num_conv_layers=variant['gnn_kwargs']['num_layers'], hidden_activation=variant['gnn_kwargs']['activation'], ) from gnn_lstm2_net import GNNLSTM2Net policy_net = GNNLSTM2Net(node_num, gnn, lstm1_ego, lstm1_other, lstm2_ego, lstm2_other) from layers import FlattenLayer, SelectLayer decoder = nn.Sequential(SelectLayer(-2, 0), FlattenLayer(2), nn.ReLU(), nn.Linear(hidden_dim, action_dim)) from layers import ReshapeLayer sup_learner = nn.Sequential( SelectLayer(-2, np.arange(1, node_num)), nn.ReLU(), nn.Linear(hidden_dim, label_dim), ) from sup_softmax_lstm_policy import SupSoftmaxLSTMPolicy policy = SupSoftmaxLSTMPolicy( a_0=a_0, latent_0=latent_0, obs_dim=obs_dim, action_dim=action_dim, lstm_net=policy_net, decoder=decoder, sup_learner=sup_learner, ) print('parameters: ', np.sum([p.view(-1).shape[0] for p in policy.parameters()])) vf = Mlp( hidden_sizes=[32, 32], input_size=obs_dim, output_size=1, ) vf_criterion = nn.MSELoss() from rlkit.torch.policies.make_deterministic import MakeDeterministic eval_policy = MakeDeterministic(policy) expl_policy = policy eval_path_collector = MdpPathCollector( eval_env, eval_policy, ) expl_path_collector = MdpPathCollector( expl_env, expl_policy, ) from sup_replay_buffer import SupReplayBuffer replay_buffer = SupReplayBuffer( observation_dim=obs_dim, action_dim=action_dim, label_dim=label_num, max_replay_buffer_size=int(1e6), max_path_length=max_path_length, recurrent=True, ) from rlkit.torch.vpg.ppo_sup_vanilla import PPOSupVanillaTrainer trainer = PPOSupVanillaTrainer(policy=policy, value_function=vf, vf_criterion=vf_criterion, replay_buffer=replay_buffer, recurrent=True, **variant['trainer_kwargs']) algorithm = TorchOnlineRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, log_path_function=get_traffic_path_information, **variant['algorithm_kwargs']) algorithm.to(ptu.device) algorithm.train()
def experiment(variant): from simple_sup_lstm import SimpleSupLSTMEnv expl_env = SimpleSupLSTMEnv(**variant['env_kwargs']) eval_env = SimpleSupLSTMEnv(**variant['env_kwargs']) obs_dim = eval_env.observation_space.low.size action_dim = eval_env.action_space.n label_num = expl_env.label_num label_dim = expl_env.label_dim if variant['load_kwargs']['load']: load_dir = variant['load_kwargs']['load_dir'] load_data = torch.load(load_dir + '/params.pkl', map_location='cpu') policy = load_data['trainer/policy'] vf = load_data['trainer/value_function'] else: hidden_dim = variant['lstm_kwargs']['hidden_dim'] num_lstm_layers = variant['lstm_kwargs']['num_layers'] node_dim = variant['gnn_kwargs']['node_dim'] node_num = expl_env.node_num input_node_dim = int(obs_dim / node_num) a_0 = np.zeros(action_dim) h1_0 = np.zeros((node_num, hidden_dim * num_lstm_layers)) c1_0 = np.zeros((node_num, hidden_dim * num_lstm_layers)) h2_0 = np.zeros((node_num, hidden_dim * num_lstm_layers)) c2_0 = np.zeros((node_num, hidden_dim * num_lstm_layers)) latent_0 = (h1_0, c1_0, h2_0, c2_0) from lstm_net import LSTMNet lstm1_ego = LSTMNet(input_node_dim, action_dim, hidden_dim, num_lstm_layers) lstm1_other = LSTMNet(input_node_dim, 0, hidden_dim, num_lstm_layers) lstm2_ego = LSTMNet(node_dim, 0, hidden_dim, num_lstm_layers) lstm2_other = LSTMNet(node_dim, 0, hidden_dim, num_lstm_layers) from graph_builder import TrafficGraphBuilder gb = TrafficGraphBuilder( input_dim=hidden_dim, node_num=node_num, ego_init=torch.tensor([0., 1.]), other_init=torch.tensor([1., 0.]), ) from gnn_net import GNNNet gnn = GNNNet( pre_graph_builder=gb, node_dim=variant['gnn_kwargs']['node_dim'], conv_type=variant['gnn_kwargs']['conv_type'], num_conv_layers=variant['gnn_kwargs']['num_layers'], hidden_activation=variant['gnn_kwargs']['activation'], ) from gnn_lstm2_net import GNNLSTM2Net policy_net = GNNLSTM2Net(node_num, gnn, lstm1_ego, lstm1_other, lstm2_ego, lstm2_other) from layers import FlattenLayer, SelectLayer post_net = nn.Sequential(SelectLayer(-2, 0), FlattenLayer(2), nn.ReLU(), nn.Linear(hidden_dim, action_dim)) from softmax_lstm_policy import SoftmaxLSTMPolicy policy = SoftmaxLSTMPolicy( a_0=a_0, latent_0=latent_0, obs_dim=obs_dim, action_dim=action_dim, lstm_net=policy_net, post_net=post_net, ) print('parameters: ', np.sum([p.view(-1).shape[0] for p in policy.parameters()])) vf = Mlp( hidden_sizes=[32, 32], input_size=obs_dim, output_size=1, ) # TODO: id is also an input vf_criterion = nn.MSELoss() from rlkit.torch.policies.make_deterministic import MakeDeterministic eval_policy = MakeDeterministic(policy) expl_policy = policy eval_path_collector = MdpPathCollector( eval_env, eval_policy, ) expl_path_collector = MdpPathCollector( expl_env, expl_policy, ) trainer = PPOTrainer(policy=policy, value_function=vf, vf_criterion=vf_criterion, recurrent=True, **variant['trainer_kwargs']) algorithm = TorchOnlineRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, **variant['algorithm_kwargs']) algorithm.to(ptu.device) algorithm.train()
ng = pyg_utils.to_networkx(data) pos = {} labels = {} color_map = [] for node in ng.nodes: pos[node] = obs_batch[0, node, :2].numpy() labels[node] = str(node) color_map.append('C' + str(node)) plt.figure() networkx.draw(ng, pos, node_color=color_map, labels=labels) plt.show() from graph_builder import TrafficGraphBuilder node_num = env.max_veh_num + 1 gb = TrafficGraphBuilder( input_dim=4, node_num=node_num, ego_init=torch.tensor([0., 1.]), other_init=torch.tensor([1., 0.]), ) obs = env.reset() env.render() check_graph(obs) while True: obs, r, done, info = env.step(1) env.render() check_graph(obs)
import json with open('{}/{}/seed{}/variant.json'.format(pre_dir, args.log_dir, args.seed)) as f: variant = json.load(f) env = make_env(args.exp_name, **variant['env_kwargs']) o = env.reset() env.render() pdb.set_trace() policy.reset() if args.plot_graph: node_num = env.max_veh_num + 1 from graph_builder import TrafficGraphBuilder gb = TrafficGraphBuilder( input_dim=4, node_num=node_num, ego_init=torch.tensor([0., 1.]), other_init=torch.tensor([1., 0.]), ) max_path_length = 200 path_length = 0 done = False c_r = 0. while True: path_length += 1 a, agent_info = eval_policy.get_action(o) o, r, done, _ = env.step(a) if 'intentions' in agent_info.keys(): intentions = agent_info['intentions'] else:
import torch import numpy as np from graph_builder import TrafficGraphBuilder from gnn_net import GNNNet gb = TrafficGraphBuilder(input_dim=4, ego_init=torch.tensor([0., 1.]), other_init=torch.tensor([1., 0.]), edge_index=torch.tensor([[0, 0, 1, 2], [1, 2, 0, 0]])) obs_batch = torch.tensor([[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2], [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5]]) x, edge_index = gb(obs_batch) print(x) print(edge_index) gnn = GNNNet(pre_graph_builder=gb, node_dim=16, output_dim=2, post_mlp_kwargs={'hidden_sizes': [64]}, num_conv_layers=0) q = gnn(obs_batch) print(q)