def vis(args): imgs = np.load(args.ds) vae = joblib.load(args.file) losses = [] for i, image_obs in enumerate(imgs): img = normalize_image(image_obs) recon, *_ = eval_np(vae, img) error = ((recon - img)**2).sum() losses.append((i, error)) losses.sort(key=lambda x: -x[1]) for rank, (i, error) in enumerate(losses[:NUM_SHOWN]): image_obs = imgs[i] recon, *_ = eval_np(vae, normalize_image(image_obs)) img = image_obs.reshape(3, 48, 48).transpose() rimg = recon.reshape(3, 48, 48).transpose() cv2.imshow("image, rank {}, loss {}".format(rank, error), img) cv2.imshow("recon, rank {}, loss {}".format(rank, error), rimg) print("rank {}\terror {}".format(rank, error)) for j, (i, error) in enumerate(losses[-NUM_SHOWN:]): rank = len(losses) - j - 1 image_obs = imgs[i] recon, *_ = eval_np(vae, normalize_image(image_obs)) img = image_obs.reshape(3, 48, 48).transpose() rimg = recon.reshape(3, 48, 48).transpose() cv2.imshow("image, rank {}, loss {}".format(rank, error), img) cv2.imshow("recon, rank {}, loss {}".format(rank, error), rimg) print("rank {}\terror {}".format(rank, error)) cv2.waitKey(0) cv2.destroyAllWindows()
def get_action(self, obs, labels=None): if labels is None: pis = eval_np(self, obs[None])[0, :] else: pis = eval_np(self, obs[None], labels=labels[None])[0, :] action = np.random.choice(np.arange(pis.shape[0]), p=pis) return action, {}
def get_actions(self, obs_np, deterministic=False): if self.return_raw_action: actions, info = eval_np(self, obs_np, deterministic=deterministic, return_info=True) raw_actions = info['preactivation'] return actions, raw_actions else: return eval_np(self, obs_np, deterministic=deterministic)
def get_attention_weight(self, obs): if hasattr(self.policy[0], 'attentioner'): with torch.no_grad(): policy_inputs = eval_np(self.obs_to_policy_inputs, obs[None]) x, attention_weight = eval_np(self.policy[0], policy_inputs, return_attention_weights=True) return attention_weight else: return None
def get_linear_loss(ob_space, encoder): x = get_batch(ob_space, batch_size=2**15) z_np = eval_np(encoder, x) results = np.linalg.lstsq(z_np, x, rcond=None) matrix = results[0] eval_states = get_batch(ob_space, batch_size=2**15) z_np = eval_np(encoder, eval_states) x_hat = z_np.dot(matrix) return ((eval_states - x_hat)**2).mean()
def _plot_level_curves(self): # Create mesh grid. xs = np.linspace(-1, 1, 50) ys = np.linspace(-1, 1, 50) xgrid, ygrid = np.meshgrid(xs, ys) N = len(xs) * len(ys) # Copy default values along the first axis and replace nans with # the mesh grid points. actions = np.tile(self._default_action, (N, 1)) actions[:, self._var_inds[0]] = xgrid.ravel() actions[:, self._var_inds[1]] = ygrid.ravel() for ax, obs in zip(self._ax_lst, self._obs_lst): repeated_obs = np.repeat( obs[None], actions.shape[0], axis=0, ) qs = eval_np(self._qf, repeated_obs, actions) qs = qs.reshape(xgrid.shape) cs = ax.contour(xgrid, ygrid, qs, 20) self._line_objects += cs.collections self._line_objects += ax.clabel(cs, inline=1, fontsize=10, fmt='%.2f')
def get_actions(self, obs_np, deterministic=False, return_log_prob=True): outputs = eval_np(self, obs_np, deterministic=deterministic, return_log_prob=return_log_prob) if return_log_prob and not deterministic: return outputs[0], outputs[1] else: return outputs[0]
def get_attention_weight(self, obs): if hasattr(self.encoder, 'attentioner'): with torch.no_grad(): x, attention_weight = eval_np(self.encoder, obs[None], return_attention_weights=True) return attention_weight else: return None
def get_actions(self, obs_np, deterministic=False): if self.return_raw_action: with torch.no_grad(): actions, info = self.forward(torch.tensor(obs_np).float(), deterministic=deterministic, return_info=True) raw_actions = info['preactivation'] return np.array(actions), np.array(raw_actions) else: return eval_np(self, obs_np, deterministic=deterministic)
def get_action(self, current_ob): if (self.replan_every_time_step or self.t_in_plan == self.planning_horizon or self.last_solution is None): if self.dynamic_lm and self.best_obs_seq is not None: error = np.linalg.norm(current_ob - self.best_obs_seq[self.t_in_plan + 1]) self.update_lagrange_multiplier(error) goal = self.env.multitask_goal[self.multitask_goal_slice] full_solution = self.replan(current_ob, goal) x_torch = ptu.np_to_var(full_solution, requires_grad=True) current_ob_torch = ptu.np_to_var(current_ob) _, actions, next_obs = self.batchify(x_torch, current_ob_torch) self.best_action_seq = np.array( [ptu.get_numpy(a) for a in actions]) self.best_obs_seq = np.array([current_ob] + [ptu.get_numpy(o) for o in next_obs]) self.last_solution = full_solution self.t_in_plan = 0 tdm_actions = eval_np(self.tdm_policy, self.best_obs_seq[:-1], self.best_obs_seq[1:], np.zeros((self.planning_horizon, 1))) agent_info = dict( best_action_seq=self.best_action_seq[self.t_in_plan:], # best_action_seq=tdm_actions, best_obs_seq=self.best_obs_seq[self.t_in_plan:], ) action = self.best_action_seq[self.t_in_plan] # action = tdm_actions[self.t_in_plan] self.t_in_plan += 1 # print("action", action) # print("tdm_action", tdm_actions[0]) return action, agent_info
def get_actions(self, obs): #print ("torch/networks.py, get_actions, actions: ", type(obs), ", ", obs.shape) return eval_np(self, obs)
def get_actions(self, obs_np, deterministic=False): return eval_np(self, obs_np, deterministic=deterministic)[0]
world_args=world_args) num_agent = env.num_agents obs_dim = env.observation_space.low.size action_dim = env.action_space.low.size max_path_length = args.mpl path_length = 0 done = np.array([False] * num_agent) c_r = np.zeros(num_agent) o_n = env.reset() while True: path_length += 1 a_n = [] if args.sb: if shared_gnn: o_emb_n = eval_np(shared_gnn, o_n[None, :])[0] for i, policy in enumerate(policy_n): o = o_emb_n[i] a, info = policy.get_action(o) a_n.append(a) else: for policy in policy_n: a, info = policy.get_action(o_n) a_n.append(a) else: for o, policy in zip(o_n, policy_n): a, info = policy.get_action(o) a_n.append(a) o_n, r_n, done, _ = env.step(a_n) c_r += r_n
def get_actions(self, obs_np, deterministic=False): return eval_np(self, obs_np, deterministic=deterministic, execute_actions=True)[0]
def get_actions(self, obs): # numpy(观测) 得到 numpy (action) return eval_np(self, obs)
def get_action(self, obs): pis = eval_np(self, obs[None])[0, :] action = np.random.choice(np.arange(pis.shape[0]), p=pis) return action, {}
def get_action(self, obs_np): return eval_np(self, obs_np), {}
def get_action(self, obs_np, deterministic=False, print_action=False): dist_vec = eval_np(self.prob_network, obs_np) return Categorical(torch.from_numpy(dist_vec)).sample().item( ) if not deterministic else dist_vec.argmax(), {}
def get_action(self, obs_np): dist_vec = eval_np(self, obs_np) return Categorical(torch_ify(dist_vec)).sample().item(), {}
def get_actions(self, obs): return eval_np(self, obs)
args.seed)) as f: variant = json.load(f) env = make_env(args.exp_name, **variant['env_kwargs']) o = env.reset() max_path_length = 200 path_length = 0 done = False c_r = 0. while True: path_length += 1 a, _ = eval_policy.get_action(o) o, r, done, _ = env.step(a) if sup_learner: intentions = eval_np(sup_learner, o[None, :]) elif hasattr(policy, 'sup_prob'): intentions = eval_np(policy.sup_prob, o[None, :])[0] else: intentions = None if hasattr(policy, 'get_attention_weight'): attention_weight = policy.get_attention_weight(o) else: attention_weight = None c_r += r env.render(extra_input={ 'attention_weight': attention_weight, 'intention': intentions })