Example #1
0
    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        self._hp = self._default_hparams()
        self._override_defaults(policyparams)

        self.agentparams = ag_params
        self.img_sz = (64, 64)

        learned_cost_testparams = self.setup_model_testparams(self._hp.learned_cost_model_path)

        self.learned_cost = DistFuncEvaluation(QFunctionTestTime, learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._img_height, self._img_width = [ag_params['image_height'], ag_params['image_width']]

        self._adim = self.agentparams['adim']
        self._sdim = self.agentparams['sdim']

        self._n_cam = 1 #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
Example #2
0
    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        self._hp = self._default_hparams()
        self._override_defaults(policyparams)

        self.agentparams = ag_params
        self.img_sz = (64, 64)
        learned_cost_testparams = {}
        learned_cost_testparams['batch_size'] = self._hp.num_samples
        learned_cost_testparams['data_conf'] = {'img_sz': self.img_sz}  #todo currently uses 64x64!!
        learned_cost_testparams['classifier_restore_path'] = self._hp.learned_cost_model_path
        learned_cost_testparams['classifier_restore_paths'] = ['']
        self.learned_cost = DistFuncEvaluation(GCBCTestTime, learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._img_height, self._img_width = [ag_params['image_height'], ag_params['image_width']]

        self._adim = self.agentparams['adim']
        self._sdim = self.agentparams['sdim']

        self._n_cam = 1

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        CEMBaseController.__init__(self, ag_params, policyparams)

        predictor_hparams = {}
        predictor_hparams['run_batch_size'] = min(self._hp.vpred_batch_size,
                                                  self._hp.num_samples)
        self.predictor = VPredEvaluation(self._hp.vidpred_model_path,
                                         predictor_hparams,
                                         n_gpus=ngpu,
                                         first_gpu=gpu_id)
        self.predictor.restore(gpu_mem_limit=True)
        self._net_context = self.predictor.n_context
        if self._hp.start_planning < self._net_context - 1:
            self._hp.start_planning = self._net_context - 1
        self.img_sz = self.predictor._input_hparams['img_size']

        learned_cost_testparams = {}
        learned_cost_testparams['batch_size'] = self._hp.num_samples
        learned_cost_testparams['data_conf'] = {
            'img_sz': self.img_sz
        }  #todo currently uses 64x64!!
        learned_cost_testparams[
            'classifier_restore_path'] = self._hp.learned_cost_model_path
        self.learned_cost = DistFuncEvaluation(self._hp.learned_cost,
                                               learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._net_context = self.predictor.n_context
        if self._hp.start_planning < self._net_context - 1:
            self._hp.start_planning = self._net_context - 1

        self._img_height, self._img_width = [
            ag_params['image_height'], ag_params['image_width']
        ]

        self._n_cam = 1  #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
Example #4
0
    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        self._hp = self._default_hparams()
        self._override_defaults(policyparams)

        self.agentparams = ag_params
        self.img_sz = (64, 64)
        learned_cost_testparams = self.setup_model_testparams(
            self._hp.learned_cost_model_path)

        self.learned_cost = DistFuncEvaluation(DistQFunctionTestTime,
                                               learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()
        learned_cost_dir = os.path.dirname(
            learned_cost_testparams['classifier_restore_path'])
        graph_dir = learned_cost_dir + '/graph.pkl'
        if not os.path.isfile(graph_dir):
            self.preconstruct_graph(graph_dir)

        self.graph, self.graph_states = self.construct_graph(graph_dir)

        inv_model_testparams = self.setup_model_testparams(
            self._hp.inv_model_path)
        self.inverse_model = DistFuncEvaluation(GCBCTestTime,
                                                inv_model_testparams)

        self._img_height, self._img_width = [
            ag_params['image_height'], ag_params['image_width']
        ]

        self._adim = self.agentparams['adim']
        self._sdim = self.agentparams['sdim']

        self._n_cam = 1  #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
Example #5
0
class BCController(Policy):
    """
    Use the goal-conditioned behavior cloning baseline model to perform control.
    """

    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        self._hp = self._default_hparams()
        self._override_defaults(policyparams)

        self.agentparams = ag_params
        self.img_sz = (64, 64)
        learned_cost_testparams = {}
        learned_cost_testparams['batch_size'] = self._hp.num_samples
        learned_cost_testparams['data_conf'] = {'img_sz': self.img_sz}  #todo currently uses 64x64!!
        learned_cost_testparams['classifier_restore_path'] = self._hp.learned_cost_model_path
        learned_cost_testparams['classifier_restore_paths'] = ['']
        self.learned_cost = DistFuncEvaluation(GCBCTestTime, learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._img_height, self._img_width = [ag_params['image_height'], ag_params['image_width']]

        self._adim = self.agentparams['adim']
        self._sdim = self.agentparams['sdim']

        self._n_cam = 1

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None

    def reset(self):
        self._expert_score = None
        self._images = None
        self._expert_images = None
        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
        return super(BCController, self).reset()

    def _default_hparams(self):
        default_dict = {
            'action_sample_batches': 1,
            'num_samples': 200,
            'learned_cost_model_path': None,
            'verbose_every_iter': False,
        }
        parent_params = super(BCController, self)._default_hparams()

        for k in default_dict.keys():
            parent_params.add_hparam(k, default_dict[k])
        return parent_params

    def get_best_action(self, t=None):
        resampled_imgs = resample_imgs(self._images, self.img_sz) / 255.
        input_images = ten2pytrch(resampled_imgs, self.device)[-1]
        input_images = input_images[None].repeat(self._hp.num_samples, 1, 1, 1)
        input_states = torch.from_numpy(self._states)[None].float().to(self.device).repeat(self._hp.num_samples, 1)
        goal_img = uint2pytorch(resample_imgs(self._goal_image, self.img_sz), self._hp.num_samples, self.device)

        inp_dict = {'current_img': input_images,
                    'current_state': input_states,
                    'goal_img': goal_img,}
        act = self.learned_cost.predict(inp_dict).action[0].cpu().detach().numpy()
        return act

    def act(self, t=None, i_tr=None, images=None, goal_image=None, verbose_worker=None, state=None):
        self._images = images
        self._states = state
        self._verbose_worker = verbose_worker

        ### Support for getting goal images from environment
        if goal_image.shape[0] == 1:
          self._goal_image = goal_image[0]
        else:
          self._goal_image = goal_image[-1, 0]  # pick the last time step as the goal image

        return {'actions': self.get_best_action(t)}
Example #6
0
class SORBController(Policy):
    """
    Run Search on the Replay Buffer.
    Code largely based on the author's implementation in https://colab.research.google.com/github/google-research/google-research/blob/master/sorb/SoRB.ipynb.
    However, a key difference is that we use a goal-conditioned behavior cloning inverse model rather than an actor
    learned alongside the distance function, which we find performs much better empirically.
    """
    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        self._hp = self._default_hparams()
        self._override_defaults(policyparams)

        self.agentparams = ag_params
        self.img_sz = (64, 64)
        learned_cost_testparams = self.setup_model_testparams(
            self._hp.learned_cost_model_path)

        self.learned_cost = DistFuncEvaluation(DistQFunctionTestTime,
                                               learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()
        learned_cost_dir = os.path.dirname(
            learned_cost_testparams['classifier_restore_path'])
        graph_dir = learned_cost_dir + '/graph.pkl'
        if not os.path.isfile(graph_dir):
            self.preconstruct_graph(graph_dir)

        self.graph, self.graph_states = self.construct_graph(graph_dir)

        inv_model_testparams = self.setup_model_testparams(
            self._hp.inv_model_path)
        self.inverse_model = DistFuncEvaluation(GCBCTestTime,
                                                inv_model_testparams)

        self._img_height, self._img_width = [
            ag_params['image_height'], ag_params['image_width']
        ]

        self._adim = self.agentparams['adim']
        self._sdim = self.agentparams['sdim']

        self._n_cam = 1  #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None

    def setup_model_testparams(self, model_dir):
        learned_cost_testparams = {
            'batch_size': self._hp.num_samples,
            'data_conf': {
                'img_sz': self.img_sz
            },
            'classifier_restore_path': model_dir,
            'classifier_restore_paths': ['']
        }
        return learned_cost_testparams

    def compute_pairwise_dist(self, v1, v2=None):
        if v2 is None:
            v2 = v1
        dists = []
        if not torch.is_tensor(v2):
            v2 = torch.FloatTensor(v2)
        if not torch.is_tensor(v1):
            v1 = torch.FloatTensor(v1)

        if v2.shape[0] == 1:
            curr = 0
            while curr < v1.shape[0]:
                batch = v1[curr:curr + self._hp.dloader_bs]
                inp_dict = {
                    'current_img': batch.cuda(),
                    'goal_img': v2.repeat(batch.shape[0], 1, 1, 1).cuda(),
                }
                score = self.learned_cost.predict(inp_dict)
                if not hasattr(score, '__len__'):
                    score = np.array([score])
                dists.append(score)
                curr += self._hp.dloader_bs
            dists = np.concatenate(dists)[None]
        else:
            for i, image in tqdm.tqdm(enumerate(v1)):
                inp_dict = {
                    'current_img': image[None].repeat(v2.shape[0], 1, 1,
                                                      1).cuda(),
                    'goal_img': v2.cuda(),
                }
                score = self.learned_cost.predict(inp_dict)
                dists.append(score)
        dists = np.stack(dists)
        return dists

    def preconstruct_graph(self, cache_fname):
        images = self.get_random_observations()
        dist = self.compute_pairwise_dist(images)
        graph = {'images': images.cpu().numpy(), 'dists': dist}
        with open(cache_fname, 'wb') as f:
            pkl.dump(graph, f)

    def construct_graph(self, cache_fname):
        # Load cache
        with open(cache_fname, 'rb') as f:
            data = pkl.load(f)
            images, dists = data['images'], data['dists']
        g = nx.DiGraph()
        for i, s_i in enumerate(images):
            for j, s_j in enumerate(images):
                length = dists[i, j]
                if self.dist_check(length):
                    g.add_edge(i, j, weight=length)
        return g, images

    def get_random_observations(self):
        hp = AttrDict(img_sz=(64, 64), sel_len=-1, T=31)
        dataset = FixLenVideoDataset(self._hp.graph_dataset,
                                     self.learned_cost.model._hp,
                                     hp).get_data_loader(self._hp.dloader_bs)
        total_images = []
        dl = iter(dataset)
        for i in range(self._hp.graph_size // self._hp.dloader_bs):
            try:
                batch = next(dl)
            except StopIteration:
                dl = iter(dataset)
                batch = next(dl)
            images = batch['demo_seq_images']
            selected_images = images[torch.arange(len(images)),
                                     torch.randint(0, images.shape[1],
                                                   (len(images), ))]
            total_images.append(selected_images)
        total_images = torch.cat(total_images)
        return total_images

    def reset(self):
        self._expert_score = None
        self._images = None
        self._expert_images = None
        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
        return super(SORBController, self).reset()

    def _default_hparams(self):
        default_dict = {
            'learned_cost_model_path': None,
            'inv_model_path': None,
            'verbose_every_iter': False,
            'dist_q': True,
            'graph_dataset': None,
            'graph_size': 5000,
            'dloader_bs': 500,
            'num_samples': 200,
            'max_dist': 15.0,
            'min_dist': 0.0,
        }
        parent_params = super(SORBController, self)._default_hparams()

        for k in default_dict.keys():
            parent_params.add_hparam(k, default_dict[k])
        return parent_params

    def dist_check(self, dist):
        return self._hp.min_dist < dist < self._hp.max_dist

    def get_waypoint(self, input_images, goal_img):
        g2 = self.graph.copy()
        start_to_rb = self.compute_pairwise_dist(input_images[None],
                                                 self.graph_states).flatten()
        rb_to_goal = self.compute_pairwise_dist(self.graph_states,
                                                goal_img).flatten()
        start_to_goal = self.compute_pairwise_dist(
            input_images[None], goal_img).flatten().squeeze()
        for i, (dist_from_start,
                dist_to_goal) in enumerate(zip(start_to_rb, rb_to_goal)):
            if self.dist_check(dist_from_start):
                g2.add_edge('start', i, weight=dist_from_start)
            if self.dist_check(dist_to_goal):
                g2.add_edge(i, 'goal', weight=dist_to_goal)
        try:
            path = nx.shortest_path(g2, 'start', 'goal', weight='weight')
            edge_lengths = []
            for (i, j) in zip(path[:-1], path[1:]):
                edge_lengths.append(g2[i][j]['weight'])
        except:
            path = ['start', 'goal']
            edge_lengths = [start_to_goal]

        wypt_to_goal_dist = np.cumsum(
            edge_lengths[::-1])[::-1]  # Reverse CumSum
        waypoint_vec = list(path)[1:-1]
        verbose_folder = self.traj_log_dir
        plan_imgs = [self.graph_states[i] for i in waypoint_vec]
        plan_imgs_cat = np.concatenate([input_images.cpu().numpy()] +
                                       plan_imgs + [goal_img[0].cpu().numpy()],
                                       axis=1)
        plan_imgs_cat = np.transpose((plan_imgs_cat + 1) / 2 * 255, [1, 2, 0])
        cv2.imwrite(verbose_folder + '/plan_{}.png'.format(self._t),
                    plan_imgs_cat[:, :, ::-1])

        return waypoint_vec, wypt_to_goal_dist[1:], edge_lengths[
            0], start_to_goal

    def get_best_action(self, t=None):
        resampled_imgs = resample_imgs(self._images, self.img_sz) / 255.
        input_images = ten2pytrch(resampled_imgs, self.device)[-1]
        goal_img = uint2pytorch(resample_imgs(self._goal_image, self.img_sz),
                                self._hp.num_samples, self.device)

        waypoints, graph_dists, first_wp_dist, start_to_goal = self.get_waypoint(
            input_images, goal_img[0][None])
        if len(waypoints) > 0 and (first_wp_dist < start_to_goal
                                   or start_to_goal > self._hp.max_dist):
            wpt_goal = torch.FloatTensor(
                self.graph_states[waypoints[0]])[None].to(self.device)
        else:
            wpt_goal = goal_img[0][None]

        inp_dict = {
            'current_img': input_images[None],
            'goal_img': wpt_goal,
        }

        act = self.inverse_model.predict(
            inp_dict).action[0].cpu().detach().numpy()
        return act

    def act(self,
            t=None,
            i_tr=None,
            images=None,
            goal_image=None,
            verbose_worker=None,
            state=None):
        self._images = images
        self._states = state
        self._verbose_worker = verbose_worker
        self._t = t

        ### Support for getting goal images from environment
        if goal_image.shape[0] == 1:
            self._goal_image = goal_image[0]
        else:
            self._goal_image = goal_image[
                -1, 0]  # pick the last time step as the goal image

        action = {'actions': self.get_best_action(t)}
        print(action)
        return action
class LearnedCostController(CEMBaseController):
    """
    Cross Entropy Method Stochastic Optimizer
    """
    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        CEMBaseController.__init__(self, ag_params, policyparams)

        predictor_hparams = {}
        predictor_hparams['run_batch_size'] = min(self._hp.vpred_batch_size,
                                                  self._hp.num_samples)
        self.predictor = VPredEvaluation(self._hp.vidpred_model_path,
                                         predictor_hparams,
                                         n_gpus=ngpu,
                                         first_gpu=gpu_id)
        self.predictor.restore(gpu_mem_limit=True)
        self._net_context = self.predictor.n_context
        if self._hp.start_planning < self._net_context - 1:
            self._hp.start_planning = self._net_context - 1
        self.img_sz = self.predictor._input_hparams['img_size']

        learned_cost_testparams = {}
        learned_cost_testparams['batch_size'] = self._hp.num_samples
        learned_cost_testparams['data_conf'] = {
            'img_sz': self.img_sz
        }  #todo currently uses 64x64!!
        learned_cost_testparams[
            'classifier_restore_path'] = self._hp.learned_cost_model_path
        self.learned_cost = DistFuncEvaluation(self._hp.learned_cost,
                                               learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._net_context = self.predictor.n_context
        if self._hp.start_planning < self._net_context - 1:
            self._hp.start_planning = self._net_context - 1

        self._img_height, self._img_width = [
            ag_params['image_height'], ag_params['image_width']
        ]

        self._n_cam = 1  #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None

    def reset(self):
        self._expert_score = None
        self._images = None
        self._expert_images = None
        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
        return super(LearnedCostController, self).reset()

    def _default_hparams(self):
        default_dict = {
            'finalweight': 10,
            'state_append': None,
            'compare_to_expert': False,
            'verbose_img_height': 128,
            'verbose_frac_display': 0.,
            'vidpred_model_path': '',
            'learned_cost_model_path': '',
            'vpred_batch_size': 200,
            'learned_cost': BaseTempDistClassifierTestTime
        }
        parent_params = super(LearnedCostController, self)._default_hparams()

        for k in default_dict.keys():
            parent_params.add_hparam(k, default_dict[k])
        return parent_params

    def evaluate_rollouts(self, actions, cem_itr):
        previous_actions = np.concatenate([
            x[None] for x in self._sampler.chosen_actions[-self._net_context:]
        ],
                                          axis=0)
        previous_actions = np.tile(previous_actions, [actions.shape[0], 1, 1])
        # input_actions = np.concatenate((previous_actions, actions), axis=1)[:, :self.predictor.sequence_length]

        resampled_imgs = resample_imgs(self._images, self.img_sz)
        last_frames, last_states = get_context(self._net_context, self._t,
                                               self._state, resampled_imgs,
                                               self._hp)
        context = {
            "context_frames": last_frames[0],  #only take first batch example
            "context_actions": previous_actions[0],
            "context_states": last_states[0]
        }
        prediction_dict = self.predictor(context, {'actions': actions})
        gen_images = prediction_dict['predicted_frames']

        scores = []

        for tpred in range(gen_images.shape[1]):
            input_images = ten2pytrch(gen_images[:, tpred], self.device)
            inp_dict = {
                'current_img':
                input_images,
                'goal_img':
                uint2pytorch(resample_imgs(self._goal_image, self.img_sz),
                             self._hp.num_samples, self.device)
            }

            print('peform prediction for ', tpred)
            scores.append(self.learned_cost.predict(inp_dict))

        # weight final time step by some number and average over time.
        scores = np.stack(scores, 1)
        scores = self._weight_scores(scores)

        if self._verbose_condition(cem_itr):
            verbose_folder = self.traj_log_dir + "/planning_{}_itr_{}".format(
                self._t, cem_itr)

            content_dict = OrderedDict()
            visualize_indices = scores.argsort()[:10]

            # start images
            for c in range(self._n_cam):
                name = 'cam_{}_start'.format(c)
                save_path = save_img_direct(verbose_folder, name,
                                            self._images[-1, c])
                content_dict[name] = [save_path for _ in visualize_indices]

            name = 'goal_img'
            save_path = save_img_direct(verbose_folder, name,
                                        (self._goal_image * 255).astype(
                                            np.uint8))
            content_dict[name] = [save_path for _ in visualize_indices]

            # render predicted images
            for c in range(self._n_cam):
                verbose_images = [
                    (gen_images[g_i, :, c] * 255).astype(np.uint8)
                    for g_i in visualize_indices
                ]
                row_name = 'cam_{}_pred_images'.format(c)
                content_dict[row_name] = save_gifs_direct(
                    verbose_folder, row_name, verbose_images)

            self.learned_cost.model.visualize_test_time(
                content_dict, visualize_indices, verbose_folder)

            # save scores
            content_dict['scores'] = scores[visualize_indices]

            html_page = fill_template(cem_itr,
                                      self._t,
                                      content_dict,
                                      img_height=self._hp.verbose_img_height)
            save_html_direct("{}/plan.html".format(verbose_folder), html_page)

            #todo make logger instead of verbose worker !!

        return scores

    def _weight_scores(self, raw_scores):
        if self._hp.finalweight >= 0:
            scores = raw_scores.copy()
            scores[:, -1] *= self._hp.finalweight
            scores = np.sum(
                scores,
                axis=1) / sum([1. for _ in range(self.predictor.horizon - 1)] +
                              [self._hp.finalweight])
        else:
            scores = raw_scores[:, -1].copy()
        return scores

    def act(self,
            t=None,
            i_tr=None,
            images=None,
            goal_image=None,
            verbose_worker=None,
            state=None):
        self._images = images
        self._verbose_worker = verbose_worker
        ### Support for getting goal images from environment
        if goal_image.shape[0] == 1:
            self._goal_image = goal_image[0]
        else:
            self._goal_image = goal_image[
                -1, 0]  # pick the last time step as the goal image

        return super(LearnedCostController, self).act(t, i_tr, state)
class LearnedCostController(CEMBaseController):
    """
    Cross Entropy Method Stochastic Optimizer
    """
    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        CEMBaseController.__init__(self, ag_params, policyparams)

        predictor_hparams = {}
        predictor_hparams['run_batch_size'] = min(self._hp.vpred_batch_size, self._hp.num_samples)

        self.predictor = VPredEvaluation(self._hp.vidpred_model_path, predictor_hparams, n_gpus=ngpu, first_gpu=gpu_id)
        self.predictor.restore(gpu_mem_limit=True)

        self._net_context = self.predictor.n_context
        if self._hp.start_planning < self._net_context - 1:
            self._hp.start_planning = self._net_context - 1
        self.img_sz = self.predictor._input_hparams['img_size']

        learned_cost_testparams = {}
        learned_cost_testparams['batch_size'] = self._hp.num_samples
        learned_cost_testparams['data_conf'] = {'img_sz': self.img_sz}  #todo currently uses 64x64!!
        learned_cost_testparams['classifier_restore_path'] = self._hp.learned_cost_model_path
        learned_cost_testparams['classifier_restore_paths'] = self._hp.learned_cost_model_paths

        self.learned_cost = DistFuncEvaluation(self._hp.learned_cost, learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._net_context = self.predictor.n_context
        if self._hp.start_planning < self._net_context - 1:
            self._hp.start_planning = self._net_context - 1

        self._img_height, self._img_width = [ag_params['image_height'], ag_params['image_width']]

        self._n_cam = 1 #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None


    def reset(self):
        self._expert_score = None
        self._images = None
        self._expert_images = None
        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
        return super(LearnedCostController, self).reset()

    def _default_hparams(self):
        default_dict = {
            'finalweight': 10,
            'state_append': None,
            'compare_to_expert': False,
            'verbose_img_height': 128,
            'verbose_frac_display': 0.,
            'vidpred_model_path': '',
            'learned_cost_model_path': '',
            'vpred_batch_size': 200,
            'learned_cost': QFunctionTestTime,
            'planning_horizon': 100,
            'log_raw_imgs': False,
        }
        parent_params = super(LearnedCostController, self)._default_hparams()

        for k in default_dict.keys():
            parent_params.add_hparam(k, default_dict[k])
        return parent_params

    @staticmethod
    def kendall_tau(scores_1, true_scores, num_pairs_ret=10):
        """
        Given two np arrays of scores for the same trajectories, return the normalized Kendall tau score
        (disagreement) between them
        """
        assert len(scores_1) == len(true_scores)
        ## Double argsort gives rankings
        total = len(scores_1)
        ranks_1, ranks_2 = scores_1.argsort().argsort(), true_scores.argsort().argsort()
        disagree = 0
        ret = [] #indices to return
        true_score_ordering = true_scores.argsort()

        print(f'Score of true best trajectory: {scores_1[true_score_ordering[0]]}')
        for number, i in enumerate(true_score_ordering): # Go through the indices based on how good they actually are
            for j in true_score_ordering[number:]:
                if i == j:
                    continue
                if (ranks_1[i] < ranks_1[j] and ranks_2[i] > ranks_2[j]) or\
                   (ranks_1[i] > ranks_1[j] and ranks_2[i] < ranks_2[j]):
                    disagree += 1
                    if len(ret) < num_pairs_ret:
                        # Return better trajectory by true ranking first
                        if ranks_2[i] < ranks_2[j]:
                            ret.append((i, j))
                        else:
                            ret.append((j, i))
                    continue
        num_pairs = total * (total + 1) / 2.0
        return 1.0 * disagree / num_pairs, ret

    def evaluate_rollouts(self, actions, cem_itr):
        previous_actions = np.concatenate([x[None] for x in self._sampler.chosen_actions[-self._net_context:]], axis=0)
        previous_actions = np.tile(previous_actions, [actions.shape[0], 1, 1])
        goal_state_rep = torch.FloatTensor(self._goal_state).to(self.device)
        goal_state_rep = goal_state_rep[None].repeat(actions.shape[0], 1)

        resampled_imgs = resample_imgs(self._images, self.img_sz)
        last_frames, last_states = get_context(self._net_context, self._t,
                                               self._state, resampled_imgs, self._hp)
        context = {
            "context_frames": last_frames[0],  #only take first batch example
            "context_actions": previous_actions[0],
            "context_states": last_states[0]
        }

        prediction_dict = self.predictor(context, {'actions': actions})
        gen_images = prediction_dict['predicted_frames']
        if 'predicted_states' in prediction_dict:
            gen_states = prediction_dict['predicted_states']
        else:
            gen_states = np.zeros((actions.shape[0], gen_images.shape[1], goal_state_rep.shape[-1]))

        scores = []

        for tpred in range(gen_images.shape[1]):
            input_images = ten2pytrch(gen_images[:, tpred], self.device)
            inp_dict = {'current_img': input_images,
                        'current_state': torch.FloatTensor(gen_states[:, tpred]).to(self.device),
                        'goal_state': goal_state_rep,
                        'goal_img': uint2pytorch(resample_imgs(self._goal_image, self.img_sz), gen_images.shape[0], self.device),}
            print('peform prediction for ', tpred)
            score = self.learned_cost.predict(inp_dict)
            scores.append(score)

        # weight final time step by some number and average over time.
        scores = np.stack(scores, 1)
        scores = self._weight_scores(scores).squeeze()

        if self._verbose_condition(cem_itr):
            verbose_folder = self.traj_log_dir + "/planning_{}_itr_{}".format(self._t, cem_itr)

            content_dict = OrderedDict()
            visualize_indices = scores.argsort()[:10]

            # start images
            for c in range(self._n_cam):
                name = 'cam_{}_start'.format(c)
                save_path = save_img_direct(verbose_folder, name, self._images[-1, c])
                content_dict[name] = [save_path for _ in visualize_indices]

            name = 'goal_img'
            save_path = save_img_direct(verbose_folder, name, (self._goal_image*255).astype(np.uint8))
            content_dict[name] = [save_path for _ in visualize_indices]

            # render predicted images
            for c in range(self._n_cam):
                verbose_images = [(gen_images[g_i, :]*255).astype(np.uint8) for g_i in visualize_indices]
                verbose_images = [resample_imgs(traj, self._goal_image.shape).squeeze() for traj in verbose_images]
                row_name = 'cam_{}_pred_images'.format(c)
                content_dict[row_name] = save_gifs_direct(verbose_folder,
                                                       row_name, verbose_images)
                if self._hp.log_raw_imgs:
                    for i, frames in enumerate(verbose_images):
                        save_imgs_direct(verbose_folder, '{}_{}_frame'.format(row_name, i), frames, fmt='png')

            self.learned_cost.model.visualize_test_time(content_dict, visualize_indices, verbose_folder)

        return scores

    def _weight_scores(self, raw_scores):
        scores = raw_scores.copy()
        # If the model is predicting longer sequences than the planner is actually going to use, truncate
        scores = scores[:, :self._planning_horizon]
        if self.last_plan:
            last_step = self.agentparams['T'] - self._curr_step - 1
            for i in range(last_step, scores.shape[1]):
                scores[:, i] = scores[:, last_step]

        if self._hp.finalweight >= 0:
            scores[:, -1] *= self._hp.finalweight
            scores = np.sum(scores, axis=1) / sum([1. for _ in range(self._planning_horizon - 1)] + [self._hp.finalweight])
        else:
            scores = scores[:, -1].copy()
        return scores

    def act(self, t=None, i_tr=None, images=None, goal_image=None, verbose_worker=None, state=None, policy_out=None, goal_obj_pose=None, goal_state=None):
        self._images = np.array(images)
        self._verbose_worker = verbose_worker
        self._goal_obj_pos = np.array(goal_obj_pose)
        self._goal_state = np.array(goal_state)
        self._planning_horizon = min(self._hp.planning_horizon, self.predictor.horizon)
        self._curr_step = t

        if self.agentparams['T'] - t < self._planning_horizon:
            self.last_plan = True
        else:
            self.last_plan = False

        ### Support for getting goal images from environment
        if goal_image.shape[0] == 1:
          self._goal_image = goal_image[0]
        else:
          self._goal_image = goal_image[-1, 0]  # pick the last time step as the goal image

        self._goal_image = np.array(self._goal_image)

        return super(LearnedCostController, self).act(t, i_tr, state)
Example #9
0
class QFunctionController(Policy):
    """
    Cross Entropy Method Stochastic Optimizer
    """
    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        self._hp = self._default_hparams()
        self._override_defaults(policyparams)

        self.agentparams = ag_params
        self.img_sz = (64, 64)

        learned_cost_testparams = self.setup_model_testparams(self._hp.learned_cost_model_path)

        self.learned_cost = DistFuncEvaluation(QFunctionTestTime, learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._img_height, self._img_width = [ag_params['image_height'], ag_params['image_width']]

        self._adim = self.agentparams['adim']
        self._sdim = self.agentparams['sdim']

        self._n_cam = 1 #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None

    def reset(self):
        self._expert_score = None
        self._images = None
        self._expert_images = None
        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
        return super(QFunctionController, self).reset()

    def setup_model_testparams(self, model_dir):
        learned_cost_testparams = {
            'batch_size': self._hp.num_samples,
            'data_conf': {
                'img_sz': self.img_sz
            },
            'classifier_restore_path': model_dir,
            'classifier_restore_paths': ['']
        }
        return learned_cost_testparams

    def _default_hparams(self):
        default_dict = {
            'action_sample_batches': 1,
            'num_samples': 200,
            'learned_cost_model_path': None,
            'verbose_every_iter': False,
        }
        parent_params = super(QFunctionController, self)._default_hparams()

        for k in default_dict.keys():
            parent_params.add_hparam(k, default_dict[k])
        return parent_params

    def get_best_action(self, t=None):
        resampled_imgs = resample_imgs(self._images, self.img_sz) / 255.
        input_images = ten2pytrch(resampled_imgs, self.device)[-1]
        input_images = input_images[None].repeat(self._hp.num_samples, 1, 1, 1)
        input_states = torch.from_numpy(self._states)[None].float().to(self.device).repeat(self._hp.num_samples, 1)
        goal_img = uint2pytorch(resample_imgs(self._goal_image, self.img_sz), self._hp.num_samples, self.device)

        try_actions = np.random.uniform(-1, 1, size=(self._hp.num_samples, self._adim))
        try_actions = np.clip(try_actions, -1, 1)
        try_actions_tensor = torch.FloatTensor(try_actions).cuda()
        inp_dict = {
                 'current_img': input_images,
                 'goal_img': goal_img,
                 'actions': try_actions_tensor
              }
        qvalues = self.learned_cost.predict(inp_dict)
        best_action_ind = np.argmin(qvalues, axis=0)
        act = try_actions[best_action_ind]

        return act

    def act(self, t=None, i_tr=None, images=None, goal_image=None, verbose_worker=None, state=None):
        self._images = images
        self._states = state[-1][:2]
        print(f'state {t}: {self._states}')
        self._verbose_worker = verbose_worker

        ### Support for getting goal images from environment
        if goal_image.shape[0] == 1:
          self._goal_image = goal_image[0]
        else:
          self._goal_image = goal_image[-1, 0]  # pick the last time step as the goal image

        return {'actions': self.get_best_action(t)}