cgca = data['trainer/cgca'] cactor_n = data['trainer/cactor_n'] from rlkit.torch.policies.make_deterministic import MakeDeterministic cactor_n = [MakeDeterministic(cactor) for cactor in cactor_n] for a2 in a2s: o_n = env.reset() cg_inputs = torch.cat([ torch.tensor(o_n), torch.tensor([[np.random.rand() * 2. - 1.], [a2]]) ], dim=-1).float()[None, :] # cg_inputs = torch.cat([torch.tensor(o_n), torch.tensor([[0.5],[a2]])],dim=-1).float()[None,:] if context_graph: contexts = context_graph(cg_inputs) c1, _ = cactor.get_action(contexts[:, 0, :]) c1s.append(c1[0]) else: if cactor: cactions, _ = cactor.get_action(cg_inputs[0]) c1s.append(cactions[0, :]) else: contexts = cgca(cg_inputs) c1, _ = cactor_n[0].get_action(contexts[:, 0, :]) c1s.append(c1[0]) for a1 in a1s: o_n = env.reset() cg_inputs = torch.cat([ torch.tensor(o_n), torch.tensor([[a1], [np.random.rand() * 2. - 1.]])
eval_policy = MakeDeterministic(policy) returns = [] success_num = 0 collision_num = 0 inference_correct = 0 inference_total = 0 for _ in range(args.epoch): o = env.reset() policy.reset() path_length = 0 done = False c_r = 0. while True: path_length += 1 a, agent_info = eval_policy.get_action(o) o, r, done, env_info = env.step(a) if 'intentions' in agent_info.keys(): intention_probs = agent_info['intentions'] inffered_intentions = np.argmax(intention_probs, axis=-1) true_intentions = env.get_sup_labels() valid_mask = ~np.isnan(true_intentions) true_intentions = true_intentions[valid_mask] inffered_intentions = inffered_intentions[valid_mask] inference_correct += np.sum( inffered_intentions == true_intentions) inference_total += np.sum(valid_mask) else: inference_total += 1
if args.eval: if not args.log_dir.startswith('MADDPG'): from rlkit.torch.policies.make_deterministic import MakeDeterministic p1 = MakeDeterministic(data['trainer/trained_policy_n'][0]) p2 = MakeDeterministic(data['trainer/trained_policy_n'][1]) plt.figure() with torch.no_grad(): for _ in range(args.num): o_n = env.reset() xs = [o_n[0][0]] ys = [o_n[0][1]] obs1 = torch.tensor(o_n[0]).float() obs2 = torch.tensor(o_n[1]).float() for i in range(5): a1, _ = p1.get_action(obs1) a2, _ = p2.get_action(obs2) o_n, r_n, done, info = env.step([a1, a2]) xs.append(o_n[0][0]) ys.append(o_n[0][1]) obs1 = torch.tensor(o_n[0]).float() obs2 = torch.tensor(o_n[1]).float() plt.plot(xs, ys, 'o-') plt.plot(xs[0], ys[0], 'o', color='green') plt.plot(xs[-1], ys[-1], 'o', color='black') plt.gca().set_aspect('equal', 'box') plt.xlim(-1, 1) plt.ylim(-1, 1) plt.xlabel('x') plt.ylabel('y')
with torch.no_grad(): for agent in range(args.num_ag): cs[agent] = [] if agent < int(args.num_ag/2): for a2 in opponent_as: o_n = env.reset() actions = torch.zeros(args.num_ag) actions[:int(args.num_ag/2)] = 0. actions[int(args.num_ag/2):] = a2 actions = actions[:,None] cg_inputs = torch.cat([torch.tensor(o_n), actions],dim=-1).float()[None,:] if cactor: if cgca: contexts = cgca(cg_inputs) cactions, _ = cactor.get_action(contexts[0]) else: cactions, _ = cactor.get_action(cg_inputs[0]) cs[agent].append(cactions[agent,0]) else: if cgca: contexts = cgca(cg_inputs) else: contexts = cgca_n[agent](cg_inputs) ca, _ = cactor_n[agent].get_action(contexts[:,agent,:][0]) cs[agent].append(ca[0]) else: for a1 in opponent_as: o_n = env.reset() actions = torch.zeros(args.num_ag) actions[:int(args.num_ag/2)] = a1
c2s = [] d_path = pre_path + '/' + 'seed' + str(args.seed) + '/params.pkl' data = torch.load(d_path, map_location='cpu') c1net = data['trainer/cactor_n'][0] c2net = data['trainer/cactor_n'][1] from rlkit.torch.policies.make_deterministic import MakeDeterministic c1net = MakeDeterministic(c1net) c2net = MakeDeterministic(c2net) for a2 in a2s: o_n = env.reset() c1_input = torch.tensor([o_n[0][0], a2]).float() c1, _ = c1net.get_action(c1_input) c1s.append(c1[0]) for a1 in a1s: o_n = env.reset() c2_input = torch.tensor([o_n[1][0], a1]).float() c2, _ = c2net.get_action(c2_input) c2s.append(c2[0]) # plt.figure() # plt.subplot(1,2,1) # plt.plot(a2s,c1s) # plt.gca().set_aspect('equal', 'box') # plt.gca().axes.get_xaxis().set_ticks([-1.,0.,1.]) # plt.gca().axes.get_yaxis().set_ticks([-1.,0.,1.]) # plt.xlim(-1,1)