예제 #1
0
 def get_single_decision_index(self,point):
     subtree_desc = None
     if self.reuse_tree and self.tree:
         subtree_desc = self.tree.find_subtree(point,
                                            self.sim_thresh)
         
     if subtree_desc:
         (a_id,c_id) = subtree_desc
         self.tree.crop_subtree(a_id,c_id)
         explore_budget = self.budget\
                          - self.tree.num_visits()
     else:
         self.tree = MonteCarloTree(self.trans_fn,
                                    self.cost_fn,
                                    self.discount,
                                    self.actions,
                                    self.rollout_policy,
                                    self.initial_prob,
                                    self.val_fn,
                                    point,
                                    self.horizon,
                                    self.prob_scale)
         explore_budget = self.budget
         
         
     self.tree.grow_tree(int(explore_budget))
     a_id = np.argmax(self.tree.root_node.action_visits)
     return a_id
예제 #2
0
class MCTSPolicy(IndexPolicy):
    def __init__(self,problem,
                 actions,
                 rollout_policy,
                 initial_prob,
                 value_fn,
                 horizon,
                 prob_scale,
                 budget):
        self.trans_fn = problem.gen_model.trans_fn
        self.cost_fn = problem.gen_model.cost_fn
        self.discount = problem.discount
        self.actions = actions
        self.rollout_policy = rollout_policy
        self.initial_prob = initial_prob
        self.val_fn = value_fn
        self.horizon = horizon
        self.prob_scale = prob_scale
        self.budget = budget

        self.action_dim = actions.shape[1]

        self.reuse_tree = False
        self.sim_thresh = 1e-15

        self.tree = None

    def get_single_decision_index(self,point):
        subtree_desc = None
        if self.reuse_tree and self.tree:
            subtree_desc = self.tree.find_subtree(point,
                                               self.sim_thresh)
            
        if subtree_desc:
            (a_id,c_id) = subtree_desc
            self.tree.crop_subtree(a_id,c_id)
            explore_budget = self.budget\
                             - self.tree.num_visits()
        else:
            self.tree = MonteCarloTree(self.trans_fn,
                                       self.cost_fn,
                                       self.discount,
                                       self.actions,
                                       self.rollout_policy,
                                       self.initial_prob,
                                       self.val_fn,
                                       point,
                                       self.horizon,
                                       self.prob_scale)
            explore_budget = self.budget
            
            
        self.tree.grow_tree(int(explore_budget))
        a_id = np.argmax(self.tree.root_node.action_visits)
        return a_id
        
    def get_decision_indices(self,points):
        (N,D) = points.shape
        actions = np.empty(N)
        for i in xrange(N):
            actions[i] = self.get_single_decision_index(
                points[i,:])
        return actions
    
    def get_single_decision(self,point):
        aid = self.get_single_decision_index(point)
        return self.actions[aid,:]
    
    def get_decisions(self,points):
        aids = self.get_decision_indices(points).astype('i')
        return self.actions[aids,:]

    def get_action_dim(self):
        return self.actions.shape[1]