Beispiel #1
0
 def generate_reachable_belief_points(self, belief, max_belief_points):
     m = self.model
     ntrials = 10
     n_tentatives = 0
     beliefs=[]
     beliefs.append(belief.copy())
     bel = belief.copy()        
     while len(beliefs)<max_belief_points and n_tentatives < ntrials*100:
         for n in range(ntrials):
            si = rand_choice(m.states)
            ai = rand_choice(m.get_legal_actions(si))
            #print(si,ai)
            sj, oj, r, cost = m.simulate_action(si, ai, debug=False)
            new_bel = self.update_belief(bel, ai, oj)
            #print(new_bel)
            if new_bel not in beliefs:
                beliefs.append(new_bel.copy())
            if len(beliefs) >= max_belief_points:
                break
            bel = new_bel.copy()
         n_tentatives = n_tentatives + 1
         #print(n_tentatives)
            
     return beliefs
Beispiel #2
0
    def rollout(self, state, h, depth, max_depth, budget):
        """
        Perform randomized recursive rollout search starting from 'h' util the max depth has been achived
        :param state: starting state's index
        :param h: history sequence
        :param depth: current planning horizon
        :param max_depth: max planning horizon
        :return:
        """
        if depth > max_depth or budget <= 0:
            return 0

        ai = rand_choice(self.model.get_legal_actions(state))
        sj, oj, r, cost = self.model.simulate_action(state, ai)

        return r + self.model.discount * self.rollout(sj, h + [ai, oj], depth + 1, max_depth, budget-cost)
Beispiel #3
0
 def sample_state(self):
     return rand_choice(self.B)
Beispiel #4
0
    def update_belief(self, belief, action, obs):
        """
        Updates the belief tree given the environment feedback.
        extending the history, updating particle sets, etc
        """
        m, root = self.model, self.tree.root

        #####################
        # Find the new root #
        #####################
        new_root = root.get_child(action).get_child(obs)
        if new_root is None:
            log.warning(
                "Warning: {} is not in the search tree".format(root.h +
                                                               [action, obs]))
            # The step result randomly produced a different observation
            action_node = root.get_child(action)
            if action_node.children:
                # grab any of the beliefs extending from the belief node's action node (i.e, the nearest belief node)
                log.info('grabing a bearest belief node...')
                new_root = rand_choice(action_node.children)
            else:
                # or create the new belief node and rollout from there
                log.info('creating a new belief node')
                particles = self.model.gen_particles(n=self.max_particles)
                new_root = self.tree.add(h=action_node.h + [obs],
                                         name=obs,
                                         parent=action_node,
                                         observation=obs,
                                         particle=particles,
                                         budget=root.budget - action_node.cost)

        ##################
        # Fill Particles #
        ##################
        particle_slots = self.max_particles - len(new_root.B)
        if particle_slots > 0:
            # fill particles by Monte-Carlo using reject sampling
            particles = []
            while len(particles) < particle_slots:
                si = root.sample_state()
                sj, oj, r, cost = self.model.simulate_action(si, action)

                if oj == obs:
                    particles.append(sj)
            new_root.B += particles

        #####################
        # Advance and Prune #
        #####################
        self.tree.prune(root, exclude=new_root)
        self.tree.root = new_root
        new_belief = self.compute_belief()

        ###########################
        # Particle Reinvigoration #
        ###########################
        if any([prob == 0.0 for prob in new_belief]):
            # perform particle re-invigoration when particle deprivation happens
            mutations = self.model.gen_particles(
                n=int(self.max_particles * self.reinvigorated_particles_ratio))
            for particle in mutations:
                new_root.B[randint(0, len(new_root.B))] = particle

            # re-compute the current belief distribution after reinvigoration
            new_belief = self.compute_belief()
            log.info(('*** {} random particles are added ***'.format(
                len(mutations))))
        return new_belief