Ejemplo n.º 1
0
    def add_configs(self,
                    budget=float('inf'),
                    initial_belief=None,
                    simulation_time=0.5,
                    max_particles=350,
                    reinvigorated_particles_ratio=0.1,
                    utility_fn='ucb1',
                    C=0.5):
        # acquaire utility function to choose the most desirable action to try
        if utility_fn == 'ucb1':
            self.utility_fn = UtilityFunction.ucb1(C)
        elif utility_fn == 'sa_ucb':
            self.utility_fn = UtilityFunction.sa_ucb(C)
        elif utility_fn == 'mab_bv1':
            if self.model.costs is None:
                raise ValueError(
                    'Must specify action costs if utility function is MAB_BV1')
            self.utility_fn = UtilityFunction.mab_bv1(min(self.model.costs), C)

        # other configs
        self.simulation_time = simulation_time
        self.max_particles = max_particles
        self.reinvigorated_particles_ratio = reinvigorated_particles_ratio

        # initialise belief search tree
        root_particles = self.model.gen_particles(n=self.max_particles,
                                                  prob=initial_belief)
        self.tree = BeliefTree(budget, root_particles)
Ejemplo n.º 2
0
    def add_configs(self, budget=float('inf'), initial_belief=None, simulation_time=0.5,
                    max_particles=350, reinvigorated_particles_ratio=0.1, utility_fn='ucb1', C=0.5):
        # acquaire utility function to choose the most desirable action to try
        self.utility_fn = {
            'ucb1': UtilityFunction.ucb1(C),
            'mab_bv1': UtilityFunction.mab_bv1(min(self.model.costs), C),
            'sa_ucb': UtilityFunction.sa_ucb(C)
        }[utility_fn]

        # other configs
        self.simulation_time = simulation_time
        self.max_particles = max_particles
        self.reinvigorated_particles_ratio = reinvigorated_particles_ratio
        
        # initialise belief search tree
        root_particles = self.model.gen_particles(n=self.max_particles, prob=initial_belief)
        self.tree = BeliefTree(budget, root_particles)
Ejemplo n.º 3
0
class POMCP(Solver):
    def __init__(self, model):
        Solver.__init__(self, model)
        self.tree = None

        self.simulation_time = None  # in seconds
        self.max_particles = None  # maximum number of particles can be supplied by hand for a belief node
        self.reinvigorated_particles_ratio = None  # ratio of max_particles to mutate
        self.utility_fn = None

    def add_configs(self,
                    budget=float('inf'),
                    initial_belief=None,
                    simulation_time=0.5,
                    max_particles=350,
                    reinvigorated_particles_ratio=0.1,
                    utility_fn='ucb1',
                    C=0.5):
        # acquaire utility function to choose the most desirable action to try
        if utility_fn == 'ucb1':
            self.utility_fn = UtilityFunction.ucb1(C)
        elif utility_fn == 'sa_ucb':
            self.utility_fn = UtilityFunction.sa_ucb(C)
        elif utility_fn == 'mab_bv1':
            if self.model.costs is None:
                raise ValueError(
                    'Must specify action costs if utility function is MAB_BV1')
            self.utility_fn = UtilityFunction.mab_bv1(min(self.model.costs), C)

        # other configs
        self.simulation_time = simulation_time
        self.max_particles = max_particles
        self.reinvigorated_particles_ratio = reinvigorated_particles_ratio

        # initialise belief search tree
        root_particles = self.model.gen_particles(n=self.max_particles,
                                                  prob=initial_belief)
        self.tree = BeliefTree(budget, root_particles)

    def compute_belief(self):
        base = [0.0] * self.model.num_states
        particle_dist = elem_distribution(self.tree.root.B)
        for state, prob in particle_dist.items():
            base[self.model.states.index(state)] = round(prob, 6)
        return base

    def rollout(self, state, h, depth, max_depth, budget):
        """
        Perform randomized recursive rollout search starting from 'h' util the max depth has been achived
        :param state: starting state's index
        :param h: history sequence
        :param depth: current planning horizon
        :param max_depth: max planning horizon
        :return:
        """
        if depth > max_depth or budget <= 0:
            return 0

        ai = rand_choice(self.model.get_legal_actions(state))
        sj, oj, r, cost = self.model.simulate_action(state, ai)

        return r + self.model.discount * self.rollout(
            sj, h + [ai, oj], depth + 1, max_depth, budget - cost)

    def simulate(self,
                 state,
                 max_depth,
                 depth=0,
                 h=[],
                 parent=None,
                 budget=None):
        """
        Perform MCTS simulation on a POMCP belief search tree
        :param state: starting state's index
        :return:
        """
        # Stop recursion once we are deep enough in our built tree
        if depth > max_depth:
            return 0

        obs_h = None if not h else h[-1]
        node_h = self.tree.find_or_create(h,
                                          name=obs_h or 'root',
                                          parent=parent,
                                          budget=budget,
                                          observation=obs_h)

        # ===== ROLLOUT =====
        # Initialize child nodes and return an approximate reward for this
        # history by rolling out until max depth
        if not node_h.children:
            # always reach this line when node_h was just now created
            for ai in self.model.get_legal_actions(state):
                cost = self.model.cost_function(ai)
                # only adds affordable actions
                if budget - cost >= 0:
                    self.tree.add(h + [ai],
                                  name=ai,
                                  parent=node_h,
                                  action=ai,
                                  cost=cost)

            return self.rollout(state, h, depth, max_depth, budget)

        # ===== SELECTION =====
        # Find the action that maximises the utility value
        np.random.shuffle(node_h.children)
        node_ha = sorted(node_h.children, key=self.utility_fn, reverse=True)[0]

        # ===== SIMULATION =====
        # Perform monte-carlo simulation of the state under the action
        sj, oj, reward, cost = self.model.simulate_action(
            state, node_ha.action)
        R = reward + self.model.discount * self.simulate(
            sj,
            max_depth,
            depth + 1,
            h=h + [node_ha.action, oj],
            parent=node_ha,
            budget=budget - cost)
        # ===== BACK-PROPAGATION =====
        # Update the belief node for h
        node_h.B += [state]
        node_h.N += 1

        # Update the action node for this action
        node_ha.update_stats(cost, reward)
        node_ha.N += 1
        node_ha.V += (R - node_ha.V) / node_ha.N

        return R

    def solve(self, T, modo):
        """
        Solves for up to T steps
        """
        begin = time.time()
        n = 0
        while time.time() - begin < self.simulation_time:
            n += 1
            state = self.tree.root.sample_state()
            self.simulate(state,
                          max_depth=T,
                          h=self.tree.root.h,
                          budget=self.tree.root.budget)
        if modo == "Iterativo":
            log.info('# Simulacion -> {}'.format(n))

    def get_action(self, belief):
        """
        Choose the action maximises V
        'belief' is just a part of the function signature but not actually required here
        """
        root = self.tree.root
        action_vals = [(action.V, action.action) for action in root.children]
        return max(action_vals)[1]

    def update_belief(self, belief, action, obs):
        """
        Updates the belief tree given the environment feedback.
        extending the history, updating particle sets, etc
        """
        m, root = self.model, self.tree.root

        #####################
        # Find the new root #
        #####################
        new_root = root.get_child(action).get_child(obs)
        if new_root is None:
            log.warning(
                "Warning: {} is not in the search tree".format(root.h +
                                                               [action, obs]))
            # The step result randomly produced a different observation
            action_node = root.get_child(action)
            if action_node.children:
                # grab any of the beliefs extending from the belief node's action node (i.e, the nearest belief node)
                log.info('grabing a bearest belief node...')
                new_root = rand_choice(action_node.children)
            else:
                # or create the new belief node and rollout from there
                log.info('creating a new belief node')
                particles = self.model.gen_particles(n=self.max_particles)
                new_root = self.tree.add(h=action_node.h + [obs],
                                         name=obs,
                                         parent=action_node,
                                         observation=obs,
                                         particle=particles,
                                         budget=root.budget - action_node.cost)

        ##################
        # Fill Particles #
        ##################
        particle_slots = self.max_particles - len(new_root.B)
        if particle_slots > 0:
            # fill particles by Monte-Carlo using reject sampling
            particles = []
            while len(particles) < particle_slots:
                si = root.sample_state()
                sj, oj, r, cost = self.model.simulate_action(si, action)

                if oj == obs:
                    particles.append(sj)
            new_root.B += particles

        #####################
        # Advance and Prune #
        #####################
        self.tree.prune(root, exclude=new_root)
        self.tree.root = new_root
        new_belief = self.compute_belief()

        ###########################
        # Particle Reinvigoration #
        ###########################
        if any([prob == 0.0 for prob in new_belief]):
            # perform particle re-invigoration when particle deprivation happens
            mutations = self.model.gen_particles(
                n=int(self.max_particles * self.reinvigorated_particles_ratio))
            for particle in mutations:
                new_root.B[randint(0, len(new_root.B))] = particle

            # re-compute the current belief distribution after reinvigoration
            new_belief = self.compute_belief()
            log.info(('*** {} random particles are added ***'.format(
                len(mutations))))
        return new_belief

    def draw(self, beliefs):
        """
        Dummy
        """
        pass