Esempio n. 1
0
def vis(args):
    imgs = np.load(args.ds)
    vae = joblib.load(args.file)
    losses = []
    for i, image_obs in enumerate(imgs):
        img = normalize_image(image_obs)
        recon, *_ = eval_np(vae, img)
        error = ((recon - img)**2).sum()
        losses.append((i, error))

    losses.sort(key=lambda x: -x[1])

    for rank, (i, error) in enumerate(losses[:NUM_SHOWN]):
        image_obs = imgs[i]
        recon, *_ = eval_np(vae, normalize_image(image_obs))

        img = image_obs.reshape(3, 48, 48).transpose()
        rimg = recon.reshape(3, 48, 48).transpose()

        cv2.imshow("image, rank {}, loss {}".format(rank, error), img)
        cv2.imshow("recon, rank {}, loss {}".format(rank, error), rimg)
        print("rank {}\terror {}".format(rank, error))
    for j, (i, error) in enumerate(losses[-NUM_SHOWN:]):
        rank = len(losses) - j - 1
        image_obs = imgs[i]
        recon, *_ = eval_np(vae, normalize_image(image_obs))

        img = image_obs.reshape(3, 48, 48).transpose()
        rimg = recon.reshape(3, 48, 48).transpose()

        cv2.imshow("image, rank {}, loss {}".format(rank, error), img)
        cv2.imshow("recon, rank {}, loss {}".format(rank, error), rimg)
        print("rank {}\terror {}".format(rank, error))
    cv2.waitKey(0)
    cv2.destroyAllWindows()
Esempio n. 2
0
 def get_action(self, obs, labels=None):
     if labels is None:
         pis = eval_np(self, obs[None])[0, :]
     else:
         pis = eval_np(self, obs[None], labels=labels[None])[0, :]
     action = np.random.choice(np.arange(pis.shape[0]), p=pis)
     return action, {}
Esempio n. 3
0
 def get_actions(self, obs_np, deterministic=False):
     if self.return_raw_action:
         actions, info = eval_np(self,
                                 obs_np,
                                 deterministic=deterministic,
                                 return_info=True)
         raw_actions = info['preactivation']
         return actions, raw_actions
     else:
         return eval_np(self, obs_np, deterministic=deterministic)
Esempio n. 4
0
 def get_attention_weight(self, obs):
     if hasattr(self.policy[0], 'attentioner'):
         with torch.no_grad():
             policy_inputs = eval_np(self.obs_to_policy_inputs, obs[None])
             x, attention_weight = eval_np(self.policy[0],
                                           policy_inputs,
                                           return_attention_weights=True)
         return attention_weight
     else:
         return None
Esempio n. 5
0
def get_linear_loss(ob_space, encoder):
    x = get_batch(ob_space, batch_size=2**15)
    z_np = eval_np(encoder, x)
    results = np.linalg.lstsq(z_np, x, rcond=None)
    matrix = results[0]

    eval_states = get_batch(ob_space, batch_size=2**15)
    z_np = eval_np(encoder, eval_states)
    x_hat = z_np.dot(matrix)
    return ((eval_states - x_hat)**2).mean()
Esempio n. 6
0
    def _plot_level_curves(self):
        # Create mesh grid.
        xs = np.linspace(-1, 1, 50)
        ys = np.linspace(-1, 1, 50)
        xgrid, ygrid = np.meshgrid(xs, ys)
        N = len(xs) * len(ys)

        # Copy default values along the first axis and replace nans with
        # the mesh grid points.
        actions = np.tile(self._default_action, (N, 1))
        actions[:, self._var_inds[0]] = xgrid.ravel()
        actions[:, self._var_inds[1]] = ygrid.ravel()

        for ax, obs in zip(self._ax_lst, self._obs_lst):
            repeated_obs = np.repeat(
                obs[None],
                actions.shape[0],
                axis=0,
            )
            qs = eval_np(self._qf, repeated_obs, actions)
            qs = qs.reshape(xgrid.shape)

            cs = ax.contour(xgrid, ygrid, qs, 20)
            self._line_objects += cs.collections
            self._line_objects += ax.clabel(cs,
                                            inline=1,
                                            fontsize=10,
                                            fmt='%.2f')
Esempio n. 7
0
 def get_actions(self, obs_np, deterministic=False, return_log_prob=True):
     outputs = eval_np(self,
                       obs_np,
                       deterministic=deterministic,
                       return_log_prob=return_log_prob)
     if return_log_prob and not deterministic:
         return outputs[0], outputs[1]
     else:
         return outputs[0]
Esempio n. 8
0
 def get_attention_weight(self, obs):
     if hasattr(self.encoder, 'attentioner'):
         with torch.no_grad():
             x, attention_weight = eval_np(self.encoder,
                                           obs[None],
                                           return_attention_weights=True)
         return attention_weight
     else:
         return None
Esempio n. 9
0
 def get_actions(self, obs_np, deterministic=False):
     if self.return_raw_action:
         with torch.no_grad():
             actions, info = self.forward(torch.tensor(obs_np).float(),
                                          deterministic=deterministic,
                                          return_info=True)
         raw_actions = info['preactivation']
         return np.array(actions), np.array(raw_actions)
     else:
         return eval_np(self, obs_np, deterministic=deterministic)
Esempio n. 10
0
    def get_action(self, current_ob):
        if (self.replan_every_time_step
                or self.t_in_plan == self.planning_horizon
                or self.last_solution is None):
            if self.dynamic_lm and self.best_obs_seq is not None:
                error = np.linalg.norm(current_ob -
                                       self.best_obs_seq[self.t_in_plan + 1])
                self.update_lagrange_multiplier(error)
            goal = self.env.multitask_goal[self.multitask_goal_slice]
            full_solution = self.replan(current_ob, goal)

            x_torch = ptu.np_to_var(full_solution, requires_grad=True)
            current_ob_torch = ptu.np_to_var(current_ob)

            _, actions, next_obs = self.batchify(x_torch, current_ob_torch)
            self.best_action_seq = np.array(
                [ptu.get_numpy(a) for a in actions])
            self.best_obs_seq = np.array([current_ob] +
                                         [ptu.get_numpy(o) for o in next_obs])

            self.last_solution = full_solution
            self.t_in_plan = 0

        tdm_actions = eval_np(self.tdm_policy, self.best_obs_seq[:-1],
                              self.best_obs_seq[1:],
                              np.zeros((self.planning_horizon, 1)))
        agent_info = dict(
            best_action_seq=self.best_action_seq[self.t_in_plan:],
            # best_action_seq=tdm_actions,
            best_obs_seq=self.best_obs_seq[self.t_in_plan:],
        )
        action = self.best_action_seq[self.t_in_plan]
        # action = tdm_actions[self.t_in_plan]
        self.t_in_plan += 1
        # print("action", action)
        # print("tdm_action", tdm_actions[0])

        return action, agent_info
 def get_actions(self, obs):
     #print ("torch/networks.py, get_actions, actions: ", type(obs), ", ", obs.shape)
     return eval_np(self, obs)
Esempio n. 12
0
 def get_actions(self, obs_np, deterministic=False):
     return eval_np(self, obs_np, deterministic=deterministic)[0]
Esempio n. 13
0
               world_args=world_args)
num_agent = env.num_agents
obs_dim = env.observation_space.low.size
action_dim = env.action_space.low.size

max_path_length = args.mpl
path_length = 0
done = np.array([False] * num_agent)
c_r = np.zeros(num_agent)
o_n = env.reset()
while True:
    path_length += 1
    a_n = []
    if args.sb:
        if shared_gnn:
            o_emb_n = eval_np(shared_gnn, o_n[None, :])[0]
            for i, policy in enumerate(policy_n):
                o = o_emb_n[i]
                a, info = policy.get_action(o)
                a_n.append(a)
        else:
            for policy in policy_n:
                a, info = policy.get_action(o_n)
                a_n.append(a)
    else:
        for o, policy in zip(o_n, policy_n):
            a, info = policy.get_action(o)
            a_n.append(a)

    o_n, r_n, done, _ = env.step(a_n)
    c_r += r_n
Esempio n. 14
0
 def get_actions(self, obs_np, deterministic=False):
     return eval_np(self,
                    obs_np,
                    deterministic=deterministic,
                    execute_actions=True)[0]
Esempio n. 15
0
 def get_actions(self, obs):
     # numpy(观测) 得到 numpy (action)
     return eval_np(self, obs)
Esempio n. 16
0
 def get_action(self, obs):
     pis = eval_np(self, obs[None])[0, :]
     action = np.random.choice(np.arange(pis.shape[0]), p=pis)
     return action, {}
Esempio n. 17
0
 def get_action(self, obs_np):
     return eval_np(self, obs_np), {}
Esempio n. 18
0
 def get_action(self, obs_np, deterministic=False, print_action=False):
     dist_vec = eval_np(self.prob_network, obs_np)
     return Categorical(torch.from_numpy(dist_vec)).sample().item(
     ) if not deterministic else dist_vec.argmax(), {}
Esempio n. 19
0
 def get_action(self, obs_np):
     dist_vec = eval_np(self, obs_np)
     return Categorical(torch_ify(dist_vec)).sample().item(), {}
Esempio n. 20
0
 def get_actions(self, obs):
     return eval_np(self, obs)
Esempio n. 21
0
                                             args.seed)) as f:
    variant = json.load(f)
env = make_env(args.exp_name, **variant['env_kwargs'])
o = env.reset()

max_path_length = 200
path_length = 0
done = False
c_r = 0.
while True:
    path_length += 1
    a, _ = eval_policy.get_action(o)
    o, r, done, _ = env.step(a)

    if sup_learner:
        intentions = eval_np(sup_learner, o[None, :])
    elif hasattr(policy, 'sup_prob'):
        intentions = eval_np(policy.sup_prob, o[None, :])[0]
    else:
        intentions = None

    if hasattr(policy, 'get_attention_weight'):
        attention_weight = policy.get_attention_weight(o)
    else:
        attention_weight = None

    c_r += r
    env.render(extra_input={
        'attention_weight': attention_weight,
        'intention': intentions
    })