def conditional_generator_function(self, c_, c_next_, obs):
     '''
     This doesn't do anything.
     '''
     c_ = undiscretize(c_, self.discretization_bins, self.P.unif_range)
     c_next_ = undiscretize(c_next_, self.discretization_bins, self.P.unif_range)
     z_ = from_numpy_to_var(np.random.randn(c_.shape[0], self.rand_z_dim))
     _, next_observation = self.G(z_, from_numpy_to_var(c_), from_numpy_to_var(c_next_))
     return next_observation.data.cpu().numpy()
Exemple #2
0
 def continuous_transition_function(self, c_):
     c_ = undiscretize(c_, self.discretization_bins, self.P.unif_range)
     c_next_ = self.T(from_numpy_to_var(c_)).data.cpu().numpy()
     c_next_ = np.clip(c_next_, self.P.unif_range[0] + 1e-6,
                       self.P.unif_range[1] - 1e-6)
     c_next_d = discretize(c_next_, self.discretization_bins,
                           self.P.unif_range)
     return c_next_d
def get_torch_images_from_numpy(npy_list,
                                conditional,
                                normalize=True,
                                one_image=False):
    """
    :param npy_list: a list of (image, attrs) pairs
    :param normalize: if True then the output is between 0 and 1
    :return: Torch Variable as input to model
    """
    if one_image:
        o = from_numpy_to_var(np.transpose(npy_list, (0, 3, 1, 2)))
    else:
        o = from_numpy_to_var(
            np.transpose(np.stack(npy_list[:, 0]), (0, 3, 1, 2)))
    if normalize:
        o /= 255
    if one_image:
        return o
    if conditional:
        return o[:, :3].contiguous(), o[:, 3:].contiguous()
    return o, None
Exemple #4
0
    def astar_plan(self, c_start, c_goal, verbose=True, **kwargs):
        """
        Generate a plan in observation space given start and goal states via A* search.
        :param c_start: bs x c_dim
        :param c_goal: bs x c_dim
        :return: rollout: horizon x bs x channel_dim x img_W x img_H
        """
        with torch.no_grad():
            rollout = []
            # _z = Variable(torch.randn(c_start.size()[0], self.rand_z_dim)).cuda()
            bs = c_start.size()[0]
            traj = plan_traj_astar(
                kwargs['start_obs'],
                kwargs['goal_obs'],
                start_state=c_start[0].data.cpu().numpy(),
                goal_state=c_goal[0].data.cpu().numpy(),
                transition_function=self.continuous_transition_function,
                preprocess_function=self.preprocess_function,
                discriminator_function=self.discriminator_function_np,
                generator_function=self.conditional_generator_function)

            for t, disc in enumerate(traj[:-1]):
                state = undiscretize(disc.state, self.discretization_bins,
                                     self.P.unif_range)
                state_next = undiscretize(traj[t + 1].state,
                                          self.discretization_bins,
                                          self.P.unif_range)
                c = from_numpy_to_var(state).repeat(bs, 1)
                c_next = from_numpy_to_var(state_next).repeat(bs, 1)
                _z = Variable(torch.randn(c.size()[0], self.rand_z_dim)).cuda()

                _cur_img, _next_img = self.G(_z, c, c_next)
                if t == 0:
                    rollout.append(_cur_img)
                next_img = _next_img
                rollout.append(next_img)
                if verbose:
                    # import ipdb; ipdb.set_trace()
                    print("\t c_%d: %s" % (t, print_array(c[0].data)))
        return rollout
Exemple #5
0
def get_c_loss(model, c_model, c_type, o, o_next, context, N, o_neg=None):
    batch_size = o.size(0)
    if c_type[:3] == "cpc":
        # Positive
        positive_log_density = c_model.log_density(o, o_next, context)

        # Negative
        negative_c = context.repeat(N, 1, 1, 1)
        if o_neg is None:
            negative_o_pred = model.inference(negative_c,
                                              n_samples=1,
                                              layer_cond=False)
        else:
            negative_o_pred = model(o_neg, negative_c)[0]
        negative_log_density = c_model.log_density(o.repeat(
            N, 1, 1, 1), negative_o_pred, negative_c).view(N, batch_size).t()

        # Loss
        density_ratio = torch.cat([
            from_numpy_to_var(np.zeros((batch_size, 1))),
            negative_log_density - positive_log_density[:, None]
        ],
                                  dim=1)
        if c_type == "cpc-sptm":
            density_ratio = torch.cat([
                density_ratio, -positive_log_density[:, None],
                negative_log_density
            ],
                                      dim=1)
        c_loss = torch.mean(log_sum_exp(density_ratio))
    elif c_type[:4] == "sptm":
        # Positive
        positive_y_pred = c_model(o, o_next, context)
        # Negative
        if o_neg is None:
            negative_o_pred = model.inference(context,
                                              n_samples=1,
                                              layer_cond=False)
        else:
            negative_o_pred = model(o_neg, context)[0]
        negative_y_pred = c_model(o, negative_o_pred, context)
        ys = torch.cat([positive_y_pred, negative_y_pred])
        labels = torch.cat([torch.ones(batch_size), torch.zeros(batch_size)])
        if torch.cuda.is_available():
            labels = labels.cuda()
        c_loss = c_model.loss(ys, labels)
    else:
        raise NotImplementedError
    assert not torch.isnan(c_loss)
    return c_loss
Exemple #6
0
def localization(o_cur_pred, o_samples_npy, context, c_model):
    """
    :param o_cur:
    :param o_samples_npy: in numpy
    :param c_model:
    :return:
    """
    # TODO: do batching like build graph
    # Finds the closest node v in the graph G of our exploration sequence to some observation o
    # will first look in local neighborhood of last node, unless we are at the start (start=True)
    datalen = len(o_samples_npy)
    with torch.no_grad():
        rscores = get_score(c_model, o_cur_pred.repeat(datalen, 1, 1, 1),
                            from_numpy_to_var(o_samples_npy),
                            context.repeat(datalen, 1, 1, 1), type="all")["raw"]
    i = np.argmax(rscores)
    return i
Exemple #7
0
def find_next_way_point(o_cur_pred, path, o_goal_pred, context, shortcut_thresh, c_model):
    path_to_goal = torch.cat([from_numpy_to_var(path), o_goal_pred])
    return path_to_goal[1]
Exemple #8
0
def run_planning_and_inverse_model(env, envname, hp, n_test_locs, test_data, config, test_mode, model, c_model, actor,
                                   savepath):
    # Finding threshold
    # TODO: auto collect this & make sure all c_type's work
    replanning = hp["replanning_every"]
    edge_weight = hp["use_edge_weight"]
    using_vae_samples = hp["use_vae_samples"]
    threshold_edge = hp["threshold_edge"]
    threshold_wp = hp["threshold_shortcut"]
    # threshold = 10.5
    n_planning_samples = hp["n_planning_samples"]
    score_type = hp["score_type"]
    # Precompute the graph
    o_samples_npy = graph = node_goal = None
    total_dist = total_success = 0
    for i in range(n_test_locs):
        curr_obs, goal_obs, start_state, goal_state, context = test_data[i]
        env.reset(start_state)
        env.render(config=config)

        # Generate data, Build Graph, Localize goal
        if i == 0 or test_mode == "eval":
            # TODO: do batching to get a larger graph
            if using_vae_samples:
                o_samples_npy = model.inference(context,
                                                n_samples=n_planning_samples,
                                                layer_cond=False).cpu().detach().numpy()
            else:
                o_samples_npy = generate_samples(envname, env, config, n_planning_samples).cpu().detach().numpy()
            # threshold = find_threshold(o_samples_npy, context, c_model, min_thres=0, max_thres=50, n_iters=10)
            graph = build_memory_graph(o_samples_npy, context, c_model, score_type, threshold_edge, edge_weight)
            save_image(from_numpy_to_var(o_samples_npy),
                       os.path.join(savepath, 'plan_samples.png'),
                       nrow=10)
        # Precompute goal node and recon o
        node_goal = localization(goal_obs, o_samples_npy, context, c_model)
        o_goal_pred, _, _, _ = model(goal_obs, context, determ=True)
        # Actor run
        for t in range(hp["T"]):
            o_curr_pred, _, _, _ = model(curr_obs, context, determ=True)
            # Localize and do planning
            if t % replanning == 0:
                node_cur = localization(o_curr_pred, o_samples_npy, context, c_model)
                shortest_path, edge_weights, edge_raw = find_shortest_path(graph, node_cur, node_goal)
                # padding(edge_weights, edge_raw)

            # Next goal img
            j = t % replanning
            next_way_point = find_next_way_point(
                o_curr_pred,
                o_samples_npy[shortest_path][min(j // 10, len(shortest_path) - 1):],
                o_goal_pred,
                context,
                threshold_wp,
                c_model)
            next_o_goal = next_way_point[None, :]
            # Visualize plans
            if t % 50 == 0:
                img_seq = torch.cat([curr_obs, from_numpy_to_var(o_samples_npy[shortest_path]), goal_obs])
                all_score = get_score(
                    c_model,
                    o_curr_pred.repeat(len(img_seq), 1, 1, 1),
                    img_seq,
                    context.repeat(len(img_seq), 1, 1, 1),
                    type="all"
                )
                img_seq_np = img_seq.detach().cpu().numpy()
                write_number_on_images(img_seq_np,
                                       ["%d, %.3f" % (i, j) for i, j in zip(all_score["raw"], all_score[score_type])],
                                       position="top-left")
                write_number_on_images(img_seq_np,
                                       ["", ""] + ["%d, %.3f" % (i, j) for i, j in zip(edge_raw, edge_weights)] + [""],
                                       position="bottom-left")
                save_image(torch.Tensor(img_seq_np),
                           os.path.join(savepath, 'plan_task_%d_step_%d.png' % (i, t)), nrow=len(shortest_path) + 2)
            # Get action
            action = actor(o_curr_pred, next_o_goal, context).cpu().numpy()
            _, _ = env.step_only(action + np.random.randn(2) * hp["action_noise"])
            next_img = env.get_current_img(config)
            curr_obs = get_torch_images_from_numpy(next_img[None, :], False, one_image=True)
            # print("Final State: %s; Goal State: %s" % (env.get_current_obs(), goal_state))
            import matplotlib.pyplot as plt
            if not os.path.exists(os.path.join(savepath, '%d' % i)):
                os.makedirs(os.path.join(savepath, '%d' % i))
            plt.imshow(next_img)
            plt.savefig(os.path.join(savepath, '%d/%03d.png' % (i, t)))
            plt.close()
            curr_dist = np.linalg.norm(env.get_current_obs() - goal_state)
            is_success = (np.abs(env.get_current_obs() - goal_state) < hp["block_size"]).all()
            if is_success:
                break
        print("Final Distance for Task %d: %f; Success %s" % (i, curr_dist, is_success))
        total_dist += curr_dist
        total_success += is_success
    print("Summary: total dist %f; success %d out of %d" % (total_dist, total_success, n_test_locs))
    return total_dist, total_success, n_test_locs
def get_torch_actions(npy_list):
    act = np.array([
        npy_list[i, 1]['action'].reshape(-1) for i in range(npy_list.shape[0])
    ])
    return from_numpy_to_var(act)
Exemple #10
0
 def forward(self, s, a=None):
     bs = list(s.size())[0]
     mu, var = self.get_mu_and_var(s)
     return mu + var.sqrt() * from_numpy_to_var(
         np.random.randn(bs, self.s_dim))
Exemple #11
0
    def plan_hack(self,
                  data_start_loader,
                  data_goal_loader,
                  epoch,
                  metric,
                  keep_best=10):
        """
        Generate visual plans from starts to goals.
        First, find the closest codes for starts and goals.
        Then, generate the plans in the latent space.
        Finally, map the latent plans to visual plans and use the classifier to pick the top K.
        The start image is fixed. The goal image is loaded from data_goal_loader.
        :param data_start_loader:
        :param data_goal_loader:
        :param epoch:
        :param metric:
        :param keep_best:
        :return:
        """
        all_confidences = []
        c_start = None
        est_start_obs = None
        for img in data_start_loader:
            if self.fcn:
                start_obs = self.apply_fcn_mse(img[0])
            else:
                start_obs = Variable(img[0]).cuda()
            pt_start = os.path.join(self.out_dir, 'plans',
                                    'c_min_start_%s.pt' % metric)
            if os.path.exists(pt_start):
                z_start, c_start, _, est_start_obs = torch.load(pt_start)
            else:
                z_start, c_start, _, est_start_obs = self.closest_code(
                    start_obs, 400, False, metric, 1)
                torch.save([z_start, c_start, _, est_start_obs], pt_start)
            break
        # Hacky for now
        try:
            c_start = Variable(c_start)
            est_start_obs = Variable(est_start_obs)
        except RuntimeError:
            pass

        for i, img in enumerate(data_goal_loader, 0):
            if self.fcn:
                goal_obs = self.apply_fcn_mse(img[0])
            else:
                goal_obs = Variable(img[0]).cuda()
            pt_goal = os.path.join(
                self.out_dir, 'plans',
                'c_min_goal_%s_%d_epoch_%d.pt' % (metric, i, epoch))
            if os.path.exists(pt_goal):
                z_goal, _, c_goal, est_goal_obs = torch.load(pt_goal)
            else:

                z_goal, _, c_goal, est_goal_obs = self.closest_code(
                    goal_obs, 400, True, metric, 1)
                torch.save([z_goal, _, c_goal, est_goal_obs], pt_goal)
            # Hacky for now
            try:
                c_goal = Variable(c_goal)
                est_goal_obs = Variable(est_goal_obs)
            except RuntimeError:
                pass
            # Plan using c_start and c_goal.
            rollout = self.planner(c_start.repeat(self.traj_eval_copies, 1),
                                   c_goal.repeat(self.traj_eval_copies, 1),
                                   start_obs=start_obs,
                                   goal_obs=goal_obs)

            # Insert closest start and goal.
            rollout.insert(
                0, est_start_obs.repeat(self.traj_eval_copies, 1, 1, 1))
            rollout.append(est_goal_obs.repeat(self.traj_eval_copies, 1, 1, 1))

            # Insert real start and goal.
            rollout.insert(0, start_obs.repeat(self.traj_eval_copies, 1, 1, 1))
            rollout.append(goal_obs.repeat(self.traj_eval_copies, 1, 1, 1))

            rollout_best_k, confidences = self.get_best_k(rollout, keep_best)
            rollout_data = torch.stack(rollout_best_k, dim=0)

            masks = -np.ones(
                (rollout_data.size()[0], keep_best, self.channel_dim, 64, 64),
                dtype=np.float32)
            write_number_on_images(masks, confidences)

            # save_image(torch.max(rollout_data, from_numpy_to_var(masks)).view(-1, self.channel_dim, 64, 64).data,
            #            os.path.join(self.out_dir, 'plans', '%s_min_%s_%d_epoch_%d.png'
            #                         % (self.planner.__name__, metric, i, epoch)),
            #            nrow=keep_best,
            #            normalize=True)
            pd = torch.max(rollout_data, from_numpy_to_var(masks)).permute(
                1, 0, 2, 3, 4).contiguous().view(-1, self.channel_dim, 64, 64)
            # confidences.T has size keep_best x rollout length
            all_confidences.append(confidences.T[-1][:-1])

            save_image(pd.data,
                       os.path.join(
                           self.out_dir, 'plans', '%s_min_%s_%d_epoch_%d.png' %
                           (self.planner.__name__, metric, i, epoch)),
                       nrow=int(pd.size()[0] / keep_best),
                       normalize=True)
        all_confidences = np.stack(all_confidences)
        print((all_confidences[:, 0] > 0.9).sum(),
              (all_confidences[:, -1] > 0.9).sum())
        import pickle as pkl
        with open(os.path.join(self.out_dir, 'all_confidences.pkl'),
                  'wb') as f:
            pkl.dump(all_confidences, f)
        import matplotlib.pyplot as plt
        plt.boxplot([
            all_confidences.mean(1),
            all_confidences[all_confidences[:, -1] > 0.9].mean(1)
        ])
        plt.savefig(os.path.join(self.out_dir, 'boxplot.png'))
Exemple #12
0
 def discriminator_function_np(self, obs, obs_next):
     return self.discriminator_function(from_numpy_to_var(obs),
                                        from_numpy_to_var(obs_next))
Exemple #13
0
 def log_prob(self, s):
     bs = list(s.size())[0]
     log_prob = from_numpy_to_var(
         -np.ones(bs) * np.log(self.unif_range[1] - self.unif_range[0]))
     return log_prob
Exemple #14
0
 def sample(self, batch_size):
     s = np.random.uniform(*self.unif_range, size=(batch_size, self.s_dim))
     return from_numpy_to_var(s)
Exemple #15
0
def build_memory_graph(o_samples_npy, context, c_model, score_type, edge_thresh=0, edge_weight=False):
    """
    :param o_samples_npy: in numpy
    :param c_model:
    :param edge_thresh:
    :param edge_weight:
    :return:
    """
    graph = nx.DiGraph()
    datalen = len(o_samples_npy)

    # add nodes representing each observation in the trajectory and
    # add edges if temporally close together: if |i-j| = 1
    for i in range(datalen):
        graph.add_node(i)

    # add edges to o_i, o_j if R(o_i, o_j) > s_thresh, and
    # i,j are separated by at least deltaT
    bs = min(datalen, 500)
    assert datalen % bs == 0
    batch_len = int(datalen / bs)
    all_pair_scores = []
    raw_scores = []
    with torch.no_grad():
        for i, oi in enumerate(o_samples_npy):
            cscores = []
            rscores = []
            o = from_numpy_to_var(tile(oi, bs))
            for batch in range(batch_len):
                o_next_batch = from_numpy_to_var(o_samples_npy[batch * bs:(batch + 1) * bs])
                scores = get_score(c_model, o, o_next_batch, context.repeat(bs, 1, 1, 1), type="all")
                # Weights
                ys = scores[score_type]
                cscores.append(ys)
                # Raw
                ys = scores["raw"]
                rscores.append(ys)
            cscores = np.concatenate(cscores)
            rscores = np.concatenate(rscores)
            all_pair_scores.append(cscores)
            raw_scores.append(rscores)
    all_pair_scores = np.array(all_pair_scores)
    raw_scores = np.array(raw_scores)
    assert all_pair_scores.shape[0] == all_pair_scores.shape[1] == datalen
    assert raw_scores.shape[0] == raw_scores.shape[1] == datalen

    # Normalizing scores
    global MIN_SCORE, MAX_SCORE
    MIN_SCORE = all_pair_scores.min()
    # all_pair_scores -= MIN_SCORE
    MAX_SCORE = all_pair_scores.max()
    # all_pair_scores /= MAX_SCORE
    # all_pair_scores *= 100
    # all_pair_scores -= 50
    for i in range(datalen):
        for j in range(datalen):
            # if not edge_weight:
            #     if all_pair_scores[i, j] >= edge_thresh:
            #         graph.add_edge(i, j, raw=raw_scores[i, j])
            # else:
            #     graph.add_edge(i, j, weight=all_pair_scores[i, j], raw=raw_scores[i, j])
            if i != j:
                graph.add_edge(i, j, weight=np.exp(raw_scores[i] - raw_scores[i, j]).sum(), raw=raw_scores[i, j])
    print("W edge: min & max", MIN_SCORE, MAX_SCORE)
    print("Raw score: min & max", raw_scores.min(), raw_scores.max())
    return graph
Exemple #16
0
    def plan(self,
             data_start_loader,
             data_goal_loader,
             epoch,
             metric,
             keep_best=10):
        """
        Generate visual plans from starts to goals.
        First, find the closest codes for starts and goals.
        Then, generate the plans in the latent space.
        Finally, map the latent plans to visual plans and use the classifier to pick the top K.
        The start image is loaded from data_start_loader. The goal image is loaded from data_goal_loader.
        :param data_start_loader:
        :param data_goal_loader:
        :param epoch:
        :param metric:
        :param keep_best:
        :return:
        """
        planning_dataloader = zip(data_start_loader, data_goal_loader)
        for i, pair in enumerate(planning_dataloader, 0):
            if self.fcn:
                start_obs = self.apply_fcn_mse(pair[0][0])
                goal_obs = self.apply_fcn_mse(pair[1][0])

            # Compute c_start and c_goal
            pt_path = os.path.join(
                self.out_dir, 'plans',
                'c_min_%s_%d_epoch_%d.pt' % (metric, i, epoch))
            if os.path.exists(pt_path):
                c_start, c_goal, est_start_obs, est_goal_obs = torch.load(
                    pt_path)
            else:
                _, c_start, _, est_start_obs = self.closest_code(
                    start_obs, 400, False, metric, 1)
                _, _, c_goal, est_goal_obs = self.closest_code(
                    goal_obs, 400, True, metric, 1)
                # _, c_start, _, est_start_obs = self.closest_code(start_obs,
                #                                                  self.traj_eval_copies,
                #                                                  False,
                #                                                  metric, 0)
                # _, _, c_goal, est_goal_obs = self.closest_code(goal_obs,
                #                                                self.traj_eval_copies,
                #                                                True,
                #                                                metric, 0)
                torch.save([c_start, c_goal, est_start_obs, est_goal_obs],
                           pt_path)

            # Plan using c_start and c_goal.
            rollout = self.planner(c_start.repeat(self.traj_eval_copies, 1),
                                   c_goal.repeat(self.traj_eval_copies, 1),
                                   start_obs=start_obs,
                                   goal_obs=goal_obs)

            # Insert closest start and goal.
            rollout.insert(
                0, est_start_obs.repeat(self.traj_eval_copies, 1, 1, 1))
            rollout.append(est_goal_obs.repeat(self.traj_eval_copies, 1, 1, 1))

            # Insert real start and goal.
            rollout.insert(0, start_obs.repeat(self.traj_eval_copies, 1, 1, 1))
            rollout.append(goal_obs.repeat(self.traj_eval_copies, 1, 1, 1))

            rollout_best_k, confidences = self.get_best_k(rollout, keep_best)
            rollout_data = torch.stack(rollout_best_k, dim=0)

            masks = -np.ones(
                (rollout_data.size()[0], keep_best, self.channel_dim, 64, 64),
                dtype=np.float32)
            write_number_on_images(masks, confidences)

            # save_image(torch.max(rollout_data, from_numpy_to_var(masks)).view(-1, self.channel_dim, 64, 64).data,
            #            os.path.join(self.out_dir, 'plans', '%s_min_%s_%d_epoch_%d.png'
            #                         % (self.planner.__name__, metric, i, epoch)),
            #            nrow=keep_best,
            #            normalize=True)

            pd = torch.max(rollout_data, from_numpy_to_var(masks)).permute(
                1, 0, 2, 3, 4).contiguous().view(-1, self.channel_dim, 64, 64)

            save_image(pd.data,
                       os.path.join(
                           self.out_dir, 'plans', '%s_min_%s_%d_epoch_%d.png' %
                           (self.planner.__name__, metric, i, epoch)),
                       nrow=int(pd.size()[0] / keep_best),
                       normalize=True)
Exemple #17
0
 def forward_soft(self, x):
     mu, var = self.forward(x)
     return mu + var.sqrt() * from_numpy_to_var(
         np.random.randn(*list(var.size())))