Python DistFuncEvaluation Examples

Programming Language: Python

Namespace/Package Name: classifier_control.classifier.utils.DistFuncEvaluation

Examples at hotexamples.com: 9

Python DistFuncEvaluation - 9 examples found. These are the top rated real world Python examples of classifier_control.classifier.utils.DistFuncEvaluation.DistFuncEvaluation extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

predict(5)

DistFuncEvaluation(4)

Frequently Used Methods

predict (5)

DistFuncEvaluation (4)

Example #1

Show file

File: q_function_controller.py Project: s-tian/mbold

    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        self._hp = self._default_hparams()
        self._override_defaults(policyparams)

        self.agentparams = ag_params
        self.img_sz = (64, 64)

        learned_cost_testparams = self.setup_model_testparams(self._hp.learned_cost_model_path)

        self.learned_cost = DistFuncEvaluation(QFunctionTestTime, learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._img_height, self._img_width = [ag_params['image_height'], ag_params['image_width']]

        self._adim = self.agentparams['adim']
        self._sdim = self.agentparams['sdim']

        self._n_cam = 1 #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None

Example #2

Show file

    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        self._hp = self._default_hparams()
        self._override_defaults(policyparams)

        self.agentparams = ag_params
        self.img_sz = (64, 64)
        learned_cost_testparams = {}
        learned_cost_testparams['batch_size'] = self._hp.num_samples
        learned_cost_testparams['data_conf'] = {'img_sz': self.img_sz}  #todo currently uses 64x64!!
        learned_cost_testparams['classifier_restore_path'] = self._hp.learned_cost_model_path
        learned_cost_testparams['classifier_restore_paths'] = ['']
        self.learned_cost = DistFuncEvaluation(GCBCTestTime, learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._img_height, self._img_width = [ag_params['image_height'], ag_params['image_width']]

        self._adim = self.agentparams['adim']
        self._sdim = self.agentparams['sdim']

        self._n_cam = 1

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None

Example #3

Show file

File: pytorch_classifier_controller.py Project: febert/classifier_control

    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        CEMBaseController.__init__(self, ag_params, policyparams)

        predictor_hparams = {}
        predictor_hparams['run_batch_size'] = min(self._hp.vpred_batch_size,
                                                  self._hp.num_samples)
        self.predictor = VPredEvaluation(self._hp.vidpred_model_path,
                                         predictor_hparams,
                                         n_gpus=ngpu,
                                         first_gpu=gpu_id)
        self.predictor.restore(gpu_mem_limit=True)
        self._net_context = self.predictor.n_context
        if self._hp.start_planning < self._net_context - 1:
            self._hp.start_planning = self._net_context - 1
        self.img_sz = self.predictor._input_hparams['img_size']

        learned_cost_testparams = {}
        learned_cost_testparams['batch_size'] = self._hp.num_samples
        learned_cost_testparams['data_conf'] = {
            'img_sz': self.img_sz
        }  #todo currently uses 64x64!!
        learned_cost_testparams[
            'classifier_restore_path'] = self._hp.learned_cost_model_path
        self.learned_cost = DistFuncEvaluation(self._hp.learned_cost,
                                               learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._net_context = self.predictor.n_context
        if self._hp.start_planning < self._net_context - 1:
            self._hp.start_planning = self._net_context - 1

        self._img_height, self._img_width = [
            ag_params['image_height'], ag_params['image_width']
        ]

        self._n_cam = 1  #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None

Example #4

Show file

File: sorb_controller.py Project: s-tian/mbold

    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        self._hp = self._default_hparams()
        self._override_defaults(policyparams)

        self.agentparams = ag_params
        self.img_sz = (64, 64)
        learned_cost_testparams = self.setup_model_testparams(
            self._hp.learned_cost_model_path)

        self.learned_cost = DistFuncEvaluation(DistQFunctionTestTime,
                                               learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()
        learned_cost_dir = os.path.dirname(
            learned_cost_testparams['classifier_restore_path'])
        graph_dir = learned_cost_dir + '/graph.pkl'
        if not os.path.isfile(graph_dir):
            self.preconstruct_graph(graph_dir)

        self.graph, self.graph_states = self.construct_graph(graph_dir)

        inv_model_testparams = self.setup_model_testparams(
            self._hp.inv_model_path)
        self.inverse_model = DistFuncEvaluation(GCBCTestTime,
                                                inv_model_testparams)

        self._img_height, self._img_width = [
            ag_params['image_height'], ag_params['image_width']
        ]

        self._adim = self.agentparams['adim']
        self._sdim = self.agentparams['sdim']

        self._n_cam = 1  #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None

Example #5

Show file

class BCController(Policy):
    """
    Use the goal-conditioned behavior cloning baseline model to perform control.
    """

    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        self._hp = self._default_hparams()
        self._override_defaults(policyparams)

        self.agentparams = ag_params
        self.img_sz = (64, 64)
        learned_cost_testparams = {}
        learned_cost_testparams['batch_size'] = self._hp.num_samples
        learned_cost_testparams['data_conf'] = {'img_sz': self.img_sz}  #todo currently uses 64x64!!
        learned_cost_testparams['classifier_restore_path'] = self._hp.learned_cost_model_path
        learned_cost_testparams['classifier_restore_paths'] = ['']
        self.learned_cost = DistFuncEvaluation(GCBCTestTime, learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._img_height, self._img_width = [ag_params['image_height'], ag_params['image_width']]

        self._adim = self.agentparams['adim']
        self._sdim = self.agentparams['sdim']

        self._n_cam = 1

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None

    def reset(self):
        self._expert_score = None
        self._images = None
        self._expert_images = None
        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
        return super(BCController, self).reset()

    def _default_hparams(self):
        default_dict = {
            'action_sample_batches': 1,
            'num_samples': 200,
            'learned_cost_model_path': None,
            'verbose_every_iter': False,
        }
        parent_params = super(BCController, self)._default_hparams()

        for k in default_dict.keys():
            parent_params.add_hparam(k, default_dict[k])
        return parent_params

    def get_best_action(self, t=None):
        resampled_imgs = resample_imgs(self._images, self.img_sz) / 255.
        input_images = ten2pytrch(resampled_imgs, self.device)[-1]
        input_images = input_images[None].repeat(self._hp.num_samples, 1, 1, 1)
        input_states = torch.from_numpy(self._states)[None].float().to(self.device).repeat(self._hp.num_samples, 1)
        goal_img = uint2pytorch(resample_imgs(self._goal_image, self.img_sz), self._hp.num_samples, self.device)

        inp_dict = {'current_img': input_images,
                    'current_state': input_states,
                    'goal_img': goal_img,}
        act = self.learned_cost.predict(inp_dict).action[0].cpu().detach().numpy()
        return act

    def act(self, t=None, i_tr=None, images=None, goal_image=None, verbose_worker=None, state=None):
        self._images = images
        self._states = state
        self._verbose_worker = verbose_worker

        ### Support for getting goal images from environment
        if goal_image.shape[0] == 1:
          self._goal_image = goal_image[0]
        else:
          self._goal_image = goal_image[-1, 0]  # pick the last time step as the goal image

        return {'actions': self.get_best_action(t)}

Example #6

Show file

File: sorb_controller.py Project: s-tian/mbold

class SORBController(Policy):
    """
    Run Search on the Replay Buffer.
    Code largely based on the author's implementation in https://colab.research.google.com/github/google-research/google-research/blob/master/sorb/SoRB.ipynb.
    However, a key difference is that we use a goal-conditioned behavior cloning inverse model rather than an actor
    learned alongside the distance function, which we find performs much better empirically.
    """
    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        self._hp = self._default_hparams()
        self._override_defaults(policyparams)

        self.agentparams = ag_params
        self.img_sz = (64, 64)
        learned_cost_testparams = self.setup_model_testparams(
            self._hp.learned_cost_model_path)

        self.learned_cost = DistFuncEvaluation(DistQFunctionTestTime,
                                               learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()
        learned_cost_dir = os.path.dirname(
            learned_cost_testparams['classifier_restore_path'])
        graph_dir = learned_cost_dir + '/graph.pkl'
        if not os.path.isfile(graph_dir):
            self.preconstruct_graph(graph_dir)

        self.graph, self.graph_states = self.construct_graph(graph_dir)

        inv_model_testparams = self.setup_model_testparams(
            self._hp.inv_model_path)
        self.inverse_model = DistFuncEvaluation(GCBCTestTime,
                                                inv_model_testparams)

        self._img_height, self._img_width = [
            ag_params['image_height'], ag_params['image_width']
        ]

        self._adim = self.agentparams['adim']
        self._sdim = self.agentparams['sdim']

        self._n_cam = 1  #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None

    def setup_model_testparams(self, model_dir):
        learned_cost_testparams = {
            'batch_size': self._hp.num_samples,
            'data_conf': {
                'img_sz': self.img_sz
            },
            'classifier_restore_path': model_dir,
            'classifier_restore_paths': ['']
        }
        return learned_cost_testparams

    def compute_pairwise_dist(self, v1, v2=None):
        if v2 is None:
            v2 = v1
        dists = []
        if not torch.is_tensor(v2):
            v2 = torch.FloatTensor(v2)
        if not torch.is_tensor(v1):
            v1 = torch.FloatTensor(v1)

        if v2.shape[0] == 1:
            curr = 0
            while curr < v1.shape[0]:
                batch = v1[curr:curr + self._hp.dloader_bs]
                inp_dict = {
                    'current_img': batch.cuda(),
                    'goal_img': v2.repeat(batch.shape[0], 1, 1, 1).cuda(),
                }
                score = self.learned_cost.predict(inp_dict)
                if not hasattr(score, '__len__'):
                    score = np.array([score])
                dists.append(score)
                curr += self._hp.dloader_bs
            dists = np.concatenate(dists)[None]
        else:
            for i, image in tqdm.tqdm(enumerate(v1)):
                inp_dict = {
                    'current_img': image[None].repeat(v2.shape[0], 1, 1,
                                                      1).cuda(),
                    'goal_img': v2.cuda(),
                }
                score = self.learned_cost.predict(inp_dict)
                dists.append(score)
        dists = np.stack(dists)
        return dists

    def preconstruct_graph(self, cache_fname):
        images = self.get_random_observations()
        dist = self.compute_pairwise_dist(images)
        graph = {'images': images.cpu().numpy(), 'dists': dist}
        with open(cache_fname, 'wb') as f:
            pkl.dump(graph, f)

    def construct_graph(self, cache_fname):
        # Load cache
        with open(cache_fname, 'rb') as f:
            data = pkl.load(f)
            images, dists = data['images'], data['dists']
        g = nx.DiGraph()
        for i, s_i in enumerate(images):
            for j, s_j in enumerate(images):
                length = dists[i, j]
                if self.dist_check(length):
                    g.add_edge(i, j, weight=length)
        return g, images

    def get_random_observations(self):
        hp = AttrDict(img_sz=(64, 64), sel_len=-1, T=31)
        dataset = FixLenVideoDataset(self._hp.graph_dataset,
                                     self.learned_cost.model._hp,
                                     hp).get_data_loader(self._hp.dloader_bs)
        total_images = []
        dl = iter(dataset)
        for i in range(self._hp.graph_size // self._hp.dloader_bs):
            try:
                batch = next(dl)
            except StopIteration:
                dl = iter(dataset)
                batch = next(dl)
            images = batch['demo_seq_images']
            selected_images = images[torch.arange(len(images)),
                                     torch.randint(0, images.shape[1],
                                                   (len(images), ))]
            total_images.append(selected_images)
        total_images = torch.cat(total_images)
        return total_images

    def reset(self):
        self._expert_score = None
        self._images = None
        self._expert_images = None
        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
        return super(SORBController, self).reset()

    def _default_hparams(self):
        default_dict = {
            'learned_cost_model_path': None,
            'inv_model_path': None,
            'verbose_every_iter': False,
            'dist_q': True,
            'graph_dataset': None,
            'graph_size': 5000,
            'dloader_bs': 500,
            'num_samples': 200,
            'max_dist': 15.0,
            'min_dist': 0.0,
        }
        parent_params = super(SORBController, self)._default_hparams()

        for k in default_dict.keys():
            parent_params.add_hparam(k, default_dict[k])
        return parent_params

    def dist_check(self, dist):
        return self._hp.min_dist < dist < self._hp.max_dist

    def get_waypoint(self, input_images, goal_img):
        g2 = self.graph.copy()
        start_to_rb = self.compute_pairwise_dist(input_images[None],
                                                 self.graph_states).flatten()
        rb_to_goal = self.compute_pairwise_dist(self.graph_states,
                                                goal_img).flatten()
        start_to_goal = self.compute_pairwise_dist(
            input_images[None], goal_img).flatten().squeeze()
        for i, (dist_from_start,
                dist_to_goal) in enumerate(zip(start_to_rb, rb_to_goal)):
            if self.dist_check(dist_from_start):
                g2.add_edge('start', i, weight=dist_from_start)
            if self.dist_check(dist_to_goal):
                g2.add_edge(i, 'goal', weight=dist_to_goal)
        try:
            path = nx.shortest_path(g2, 'start', 'goal', weight='weight')
            edge_lengths = []
            for (i, j) in zip(path[:-1], path[1:]):
                edge_lengths.append(g2[i][j]['weight'])
        except:
            path = ['start', 'goal']
            edge_lengths = [start_to_goal]

        wypt_to_goal_dist = np.cumsum(
            edge_lengths[::-1])[::-1]  # Reverse CumSum
        waypoint_vec = list(path)[1:-1]
        verbose_folder = self.traj_log_dir
        plan_imgs = [self.graph_states[i] for i in waypoint_vec]
        plan_imgs_cat = np.concatenate([input_images.cpu().numpy()] +
                                       plan_imgs + [goal_img[0].cpu().numpy()],
                                       axis=1)
        plan_imgs_cat = np.transpose((plan_imgs_cat + 1) / 2 * 255, [1, 2, 0])
        cv2.imwrite(verbose_folder + '/plan_{}.png'.format(self._t),
                    plan_imgs_cat[:, :, ::-1])

        return waypoint_vec, wypt_to_goal_dist[1:], edge_lengths[
            0], start_to_goal

    def get_best_action(self, t=None):
        resampled_imgs = resample_imgs(self._images, self.img_sz) / 255.
        input_images = ten2pytrch(resampled_imgs, self.device)[-1]
        goal_img = uint2pytorch(resample_imgs(self._goal_image, self.img_sz),
                                self._hp.num_samples, self.device)

        waypoints, graph_dists, first_wp_dist, start_to_goal = self.get_waypoint(
            input_images, goal_img[0][None])
        if len(waypoints) > 0 and (first_wp_dist < start_to_goal
                                   or start_to_goal > self._hp.max_dist):
            wpt_goal = torch.FloatTensor(
                self.graph_states[waypoints[0]])[None].to(self.device)
        else:
            wpt_goal = goal_img[0][None]

        inp_dict = {
            'current_img': input_images[None],
            'goal_img': wpt_goal,
        }

        act = self.inverse_model.predict(
            inp_dict).action[0].cpu().detach().numpy()
        return act

    def act(self,
            t=None,
            i_tr=None,
            images=None,
            goal_image=None,
            verbose_worker=None,
            state=None):
        self._images = images
        self._states = state
        self._verbose_worker = verbose_worker
        self._t = t

        ### Support for getting goal images from environment
        if goal_image.shape[0] == 1:
            self._goal_image = goal_image[0]
        else:
            self._goal_image = goal_image[
                -1, 0]  # pick the last time step as the goal image

        action = {'actions': self.get_best_action(t)}
        print(action)
        return action

Example #7

Show file

File: pytorch_classifier_controller.py Project: febert/classifier_control

class LearnedCostController(CEMBaseController):
    """
    Cross Entropy Method Stochastic Optimizer
    """
    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        CEMBaseController.__init__(self, ag_params, policyparams)

        predictor_hparams = {}
        predictor_hparams['run_batch_size'] = min(self._hp.vpred_batch_size,
                                                  self._hp.num_samples)
        self.predictor = VPredEvaluation(self._hp.vidpred_model_path,
                                         predictor_hparams,
                                         n_gpus=ngpu,
                                         first_gpu=gpu_id)
        self.predictor.restore(gpu_mem_limit=True)
        self._net_context = self.predictor.n_context
        if self._hp.start_planning < self._net_context - 1:
            self._hp.start_planning = self._net_context - 1
        self.img_sz = self.predictor._input_hparams['img_size']

        learned_cost_testparams = {}
        learned_cost_testparams['batch_size'] = self._hp.num_samples
        learned_cost_testparams['data_conf'] = {
            'img_sz': self.img_sz
        }  #todo currently uses 64x64!!
        learned_cost_testparams[
            'classifier_restore_path'] = self._hp.learned_cost_model_path
        self.learned_cost = DistFuncEvaluation(self._hp.learned_cost,
                                               learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._net_context = self.predictor.n_context
        if self._hp.start_planning < self._net_context - 1:
            self._hp.start_planning = self._net_context - 1

        self._img_height, self._img_width = [
            ag_params['image_height'], ag_params['image_width']
        ]

        self._n_cam = 1  #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None

    def reset(self):
        self._expert_score = None
        self._images = None
        self._expert_images = None
        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
        return super(LearnedCostController, self).reset()

    def _default_hparams(self):
        default_dict = {
            'finalweight': 10,
            'state_append': None,
            'compare_to_expert': False,
            'verbose_img_height': 128,
            'verbose_frac_display': 0.,
            'vidpred_model_path': '',
            'learned_cost_model_path': '',
            'vpred_batch_size': 200,
            'learned_cost': BaseTempDistClassifierTestTime
        }
        parent_params = super(LearnedCostController, self)._default_hparams()

        for k in default_dict.keys():
            parent_params.add_hparam(k, default_dict[k])
        return parent_params

    def evaluate_rollouts(self, actions, cem_itr):
        previous_actions = np.concatenate([
            x[None] for x in self._sampler.chosen_actions[-self._net_context:]
        ],
                                          axis=0)
        previous_actions = np.tile(previous_actions, [actions.shape[0], 1, 1])
        # input_actions = np.concatenate((previous_actions, actions), axis=1)[:, :self.predictor.sequence_length]

        resampled_imgs = resample_imgs(self._images, self.img_sz)
        last_frames, last_states = get_context(self._net_context, self._t,
                                               self._state, resampled_imgs,
                                               self._hp)
        context = {
            "context_frames": last_frames[0],  #only take first batch example
            "context_actions": previous_actions[0],
            "context_states": last_states[0]
        }
        prediction_dict = self.predictor(context, {'actions': actions})
        gen_images = prediction_dict['predicted_frames']

        scores = []

        for tpred in range(gen_images.shape[1]):
            input_images = ten2pytrch(gen_images[:, tpred], self.device)
            inp_dict = {
                'current_img':
                input_images,
                'goal_img':
                uint2pytorch(resample_imgs(self._goal_image, self.img_sz),
                             self._hp.num_samples, self.device)
            }

            print('peform prediction for ', tpred)
            scores.append(self.learned_cost.predict(inp_dict))

        # weight final time step by some number and average over time.
        scores = np.stack(scores, 1)
        scores = self._weight_scores(scores)

        if self._verbose_condition(cem_itr):
            verbose_folder = self.traj_log_dir + "/planning_{}_itr_{}".format(
                self._t, cem_itr)

            content_dict = OrderedDict()
            visualize_indices = scores.argsort()[:10]

            # start images
            for c in range(self._n_cam):
                name = 'cam_{}_start'.format(c)
                save_path = save_img_direct(verbose_folder, name,
                                            self._images[-1, c])
                content_dict[name] = [save_path for _ in visualize_indices]

            name = 'goal_img'
            save_path = save_img_direct(verbose_folder, name,
                                        (self._goal_image * 255).astype(
                                            np.uint8))
            content_dict[name] = [save_path for _ in visualize_indices]

            # render predicted images
            for c in range(self._n_cam):
                verbose_images = [
                    (gen_images[g_i, :, c] * 255).astype(np.uint8)
                    for g_i in visualize_indices
                ]
                row_name = 'cam_{}_pred_images'.format(c)
                content_dict[row_name] = save_gifs_direct(
                    verbose_folder, row_name, verbose_images)

            self.learned_cost.model.visualize_test_time(
                content_dict, visualize_indices, verbose_folder)

            # save scores
            content_dict['scores'] = scores[visualize_indices]

            html_page = fill_template(cem_itr,
                                      self._t,
                                      content_dict,
                                      img_height=self._hp.verbose_img_height)
            save_html_direct("{}/plan.html".format(verbose_folder), html_page)

            #todo make logger instead of verbose worker !!

        return scores

    def _weight_scores(self, raw_scores):
        if self._hp.finalweight >= 0:
            scores = raw_scores.copy()
            scores[:, -1] *= self._hp.finalweight
            scores = np.sum(
                scores,
                axis=1) / sum([1. for _ in range(self.predictor.horizon - 1)] +
                              [self._hp.finalweight])
        else:
            scores = raw_scores[:, -1].copy()
        return scores

    def act(self,
            t=None,
            i_tr=None,
            images=None,
            goal_image=None,
            verbose_worker=None,
            state=None):
        self._images = images
        self._verbose_worker = verbose_worker
        ### Support for getting goal images from environment
        if goal_image.shape[0] == 1:
            self._goal_image = goal_image[0]
        else:
            self._goal_image = goal_image[
                -1, 0]  # pick the last time step as the goal image

        return super(LearnedCostController, self).act(t, i_tr, state)

Example #8

Show file

File: pytorch_classifier_controller.py Project: s-tian/mbold

class LearnedCostController(CEMBaseController):
    """
    Cross Entropy Method Stochastic Optimizer
    """
    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        CEMBaseController.__init__(self, ag_params, policyparams)

        predictor_hparams = {}
        predictor_hparams['run_batch_size'] = min(self._hp.vpred_batch_size, self._hp.num_samples)

        self.predictor = VPredEvaluation(self._hp.vidpred_model_path, predictor_hparams, n_gpus=ngpu, first_gpu=gpu_id)
        self.predictor.restore(gpu_mem_limit=True)

        self._net_context = self.predictor.n_context
        if self._hp.start_planning < self._net_context - 1:
            self._hp.start_planning = self._net_context - 1
        self.img_sz = self.predictor._input_hparams['img_size']

        learned_cost_testparams = {}
        learned_cost_testparams['batch_size'] = self._hp.num_samples
        learned_cost_testparams['data_conf'] = {'img_sz': self.img_sz}  #todo currently uses 64x64!!
        learned_cost_testparams['classifier_restore_path'] = self._hp.learned_cost_model_path
        learned_cost_testparams['classifier_restore_paths'] = self._hp.learned_cost_model_paths

        self.learned_cost = DistFuncEvaluation(self._hp.learned_cost, learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._net_context = self.predictor.n_context
        if self._hp.start_planning < self._net_context - 1:
            self._hp.start_planning = self._net_context - 1

        self._img_height, self._img_width = [ag_params['image_height'], ag_params['image_width']]

        self._n_cam = 1 #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None


    def reset(self):
        self._expert_score = None
        self._images = None
        self._expert_images = None
        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
        return super(LearnedCostController, self).reset()

    def _default_hparams(self):
        default_dict = {
            'finalweight': 10,
            'state_append': None,
            'compare_to_expert': False,
            'verbose_img_height': 128,
            'verbose_frac_display': 0.,
            'vidpred_model_path': '',
            'learned_cost_model_path': '',
            'vpred_batch_size': 200,
            'learned_cost': QFunctionTestTime,
            'planning_horizon': 100,
            'log_raw_imgs': False,
        }
        parent_params = super(LearnedCostController, self)._default_hparams()

        for k in default_dict.keys():
            parent_params.add_hparam(k, default_dict[k])
        return parent_params

    @staticmethod
    def kendall_tau(scores_1, true_scores, num_pairs_ret=10):
        """
        Given two np arrays of scores for the same trajectories, return the normalized Kendall tau score
        (disagreement) between them
        """
        assert len(scores_1) == len(true_scores)
        ## Double argsort gives rankings
        total = len(scores_1)
        ranks_1, ranks_2 = scores_1.argsort().argsort(), true_scores.argsort().argsort()
        disagree = 0
        ret = [] #indices to return
        true_score_ordering = true_scores.argsort()

        print(f'Score of true best trajectory: {scores_1[true_score_ordering[0]]}')
        for number, i in enumerate(true_score_ordering): # Go through the indices based on how good they actually are
            for j in true_score_ordering[number:]:
                if i == j:
                    continue
                if (ranks_1[i] < ranks_1[j] and ranks_2[i] > ranks_2[j]) or\
                   (ranks_1[i] > ranks_1[j] and ranks_2[i] < ranks_2[j]):
                    disagree += 1
                    if len(ret) < num_pairs_ret:
                        # Return better trajectory by true ranking first
                        if ranks_2[i] < ranks_2[j]:
                            ret.append((i, j))
                        else:
                            ret.append((j, i))
                    continue
        num_pairs = total * (total + 1) / 2.0
        return 1.0 * disagree / num_pairs, ret

    def evaluate_rollouts(self, actions, cem_itr):
        previous_actions = np.concatenate([x[None] for x in self._sampler.chosen_actions[-self._net_context:]], axis=0)
        previous_actions = np.tile(previous_actions, [actions.shape[0], 1, 1])
        goal_state_rep = torch.FloatTensor(self._goal_state).to(self.device)
        goal_state_rep = goal_state_rep[None].repeat(actions.shape[0], 1)

        resampled_imgs = resample_imgs(self._images, self.img_sz)
        last_frames, last_states = get_context(self._net_context, self._t,
                                               self._state, resampled_imgs, self._hp)
        context = {
            "context_frames": last_frames[0],  #only take first batch example
            "context_actions": previous_actions[0],
            "context_states": last_states[0]
        }

        prediction_dict = self.predictor(context, {'actions': actions})
        gen_images = prediction_dict['predicted_frames']
        if 'predicted_states' in prediction_dict:
            gen_states = prediction_dict['predicted_states']
        else:
            gen_states = np.zeros((actions.shape[0], gen_images.shape[1], goal_state_rep.shape[-1]))

        scores = []

        for tpred in range(gen_images.shape[1]):
            input_images = ten2pytrch(gen_images[:, tpred], self.device)
            inp_dict = {'current_img': input_images,
                        'current_state': torch.FloatTensor(gen_states[:, tpred]).to(self.device),
                        'goal_state': goal_state_rep,
                        'goal_img': uint2pytorch(resample_imgs(self._goal_image, self.img_sz), gen_images.shape[0], self.device),}
            print('peform prediction for ', tpred)
            score = self.learned_cost.predict(inp_dict)
            scores.append(score)

        # weight final time step by some number and average over time.
        scores = np.stack(scores, 1)
        scores = self._weight_scores(scores).squeeze()

        if self._verbose_condition(cem_itr):
            verbose_folder = self.traj_log_dir + "/planning_{}_itr_{}".format(self._t, cem_itr)

            content_dict = OrderedDict()
            visualize_indices = scores.argsort()[:10]

            # start images
            for c in range(self._n_cam):
                name = 'cam_{}_start'.format(c)
                save_path = save_img_direct(verbose_folder, name, self._images[-1, c])
                content_dict[name] = [save_path for _ in visualize_indices]

            name = 'goal_img'
            save_path = save_img_direct(verbose_folder, name, (self._goal_image*255).astype(np.uint8))
            content_dict[name] = [save_path for _ in visualize_indices]

            # render predicted images
            for c in range(self._n_cam):
                verbose_images = [(gen_images[g_i, :]*255).astype(np.uint8) for g_i in visualize_indices]
                verbose_images = [resample_imgs(traj, self._goal_image.shape).squeeze() for traj in verbose_images]
                row_name = 'cam_{}_pred_images'.format(c)
                content_dict[row_name] = save_gifs_direct(verbose_folder,
                                                       row_name, verbose_images)
                if self._hp.log_raw_imgs:
                    for i, frames in enumerate(verbose_images):
                        save_imgs_direct(verbose_folder, '{}_{}_frame'.format(row_name, i), frames, fmt='png')

            self.learned_cost.model.visualize_test_time(content_dict, visualize_indices, verbose_folder)

        return scores

    def _weight_scores(self, raw_scores):
        scores = raw_scores.copy()
        # If the model is predicting longer sequences than the planner is actually going to use, truncate
        scores = scores[:, :self._planning_horizon]
        if self.last_plan:
            last_step = self.agentparams['T'] - self._curr_step - 1
            for i in range(last_step, scores.shape[1]):
                scores[:, i] = scores[:, last_step]

        if self._hp.finalweight >= 0:
            scores[:, -1] *= self._hp.finalweight
            scores = np.sum(scores, axis=1) / sum([1. for _ in range(self._planning_horizon - 1)] + [self._hp.finalweight])
        else:
            scores = scores[:, -1].copy()
        return scores

    def act(self, t=None, i_tr=None, images=None, goal_image=None, verbose_worker=None, state=None, policy_out=None, goal_obj_pose=None, goal_state=None):
        self._images = np.array(images)
        self._verbose_worker = verbose_worker
        self._goal_obj_pos = np.array(goal_obj_pose)
        self._goal_state = np.array(goal_state)
        self._planning_horizon = min(self._hp.planning_horizon, self.predictor.horizon)
        self._curr_step = t

        if self.agentparams['T'] - t < self._planning_horizon:
            self.last_plan = True
        else:
            self.last_plan = False

        ### Support for getting goal images from environment
        if goal_image.shape[0] == 1:
          self._goal_image = goal_image[0]
        else:
          self._goal_image = goal_image[-1, 0]  # pick the last time step as the goal image

        self._goal_image = np.array(self._goal_image)

        return super(LearnedCostController, self).act(t, i_tr, state)

Example #9

Show file

File: q_function_controller.py Project: s-tian/mbold

class QFunctionController(Policy):
    """
    Cross Entropy Method Stochastic Optimizer
    """
    def __init__(self, ag_params, policyparams, gpu_id, ngpu):
        """

        :param ag_params: agent parameters
        :param policyparams: policy parameters
        :param gpu_id: starting gpu id
        :param ngpu: number of gpus
        """
        self._hp = self._default_hparams()
        self._override_defaults(policyparams)

        self.agentparams = ag_params
        self.img_sz = (64, 64)

        learned_cost_testparams = self.setup_model_testparams(self._hp.learned_cost_model_path)

        self.learned_cost = DistFuncEvaluation(QFunctionTestTime, learned_cost_testparams)
        self.device = self.learned_cost.model.get_device()

        self._img_height, self._img_width = [ag_params['image_height'], ag_params['image_width']]

        self._adim = self.agentparams['adim']
        self._sdim = self.agentparams['sdim']

        self._n_cam = 1 #self.predictor.n_cam

        self._desig_pix = None
        self._goal_pix = None
        self._images = None

        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None

    def reset(self):
        self._expert_score = None
        self._images = None
        self._expert_images = None
        self._goal_image = None
        self._start_image = None
        self._verbose_worker = None
        return super(QFunctionController, self).reset()

    def setup_model_testparams(self, model_dir):
        learned_cost_testparams = {
            'batch_size': self._hp.num_samples,
            'data_conf': {
                'img_sz': self.img_sz
            },
            'classifier_restore_path': model_dir,
            'classifier_restore_paths': ['']
        }
        return learned_cost_testparams

    def _default_hparams(self):
        default_dict = {
            'action_sample_batches': 1,
            'num_samples': 200,
            'learned_cost_model_path': None,
            'verbose_every_iter': False,
        }
        parent_params = super(QFunctionController, self)._default_hparams()

        for k in default_dict.keys():
            parent_params.add_hparam(k, default_dict[k])
        return parent_params

    def get_best_action(self, t=None):
        resampled_imgs = resample_imgs(self._images, self.img_sz) / 255.
        input_images = ten2pytrch(resampled_imgs, self.device)[-1]
        input_images = input_images[None].repeat(self._hp.num_samples, 1, 1, 1)
        input_states = torch.from_numpy(self._states)[None].float().to(self.device).repeat(self._hp.num_samples, 1)
        goal_img = uint2pytorch(resample_imgs(self._goal_image, self.img_sz), self._hp.num_samples, self.device)

        try_actions = np.random.uniform(-1, 1, size=(self._hp.num_samples, self._adim))
        try_actions = np.clip(try_actions, -1, 1)
        try_actions_tensor = torch.FloatTensor(try_actions).cuda()
        inp_dict = {
                 'current_img': input_images,
                 'goal_img': goal_img,
                 'actions': try_actions_tensor
              }
        qvalues = self.learned_cost.predict(inp_dict)
        best_action_ind = np.argmin(qvalues, axis=0)
        act = try_actions[best_action_ind]

        return act

    def act(self, t=None, i_tr=None, images=None, goal_image=None, verbose_worker=None, state=None):
        self._images = images
        self._states = state[-1][:2]
        print(f'state {t}: {self._states}')
        self._verbose_worker = verbose_worker

        ### Support for getting goal images from environment
        if goal_image.shape[0] == 1:
          self._goal_image = goal_image[0]
        else:
          self._goal_image = goal_image[-1, 0]  # pick the last time step as the goal image

        return {'actions': self.get_best_action(t)}