def set_new_node( self, node, depth, er_bound, prob, parent_likelihood ): #creating new nodes (precisely, it's not actually instantiating, but fill values of newly instantiated node) # sets the fields of a new node b = node.state.belief node.risk = bound_prob(avg_func(b, self.r)) # Depth of a node is its dist to the root node.depth = depth node.set_prob(prob) node.set_likelihood( prob * parent_likelihood) #likelehood=parent likelihood * prob # Probability of violating constraints in a belief state. (never # change) if is_terminal_belief(b, self.term, self.terminal_prob): #terminal belif: self.set_terminal_node(node) elif node.depth == self.fixed_horizon: # self.set_terminal_node(node) node.value = avg_func(b, self.h) node.terminal = True # new node is non terminal node.best_action = None # no action associated yet node.exec_risk_bound = bound_prob(er_bound) # execution risk bound # avg heuristic estimate of execution risk at node node.set_exec_risk(node.risk) node.risk_upper = node.exec_risk else: # the value of a node is the average of the heuristic only when it's # first created. After that, the value is given as a function of # its children node.value = avg_func(b, self.h) node.terminal = False # new node is non terminal node.best_action = None # no action associated yet node.exec_risk_bound = bound_prob(er_bound) # execution risk bound # avg heuristic estimate of execution risk at node node.set_exec_risk(node.risk)
def set_new_node(self, node, depth, er_bound, prob, parent_likelihood): # sets the fields of a new node if self.continuous_belief: b = node.state node.risk = bound_prob(self.r(b)) node.depth = depth node.set_prob(prob) node.set_likelihood(prob * parent_likelihood) if self.term(node.state): self.set_terminal_node(node) else: # the value of a node is the average of the heuristic only when it's # first created. After that, the value is given as a function of # its children node.value = self.h(node) node.terminal = False # new node is non terminal node.best_action = None # no action associated yet node.exec_risk_bound = bound_prob( er_bound) # execution risk bound # avg heuristic estimate of execution risk at node node.set_exec_risk(node.risk) else: b = node.state.belief node.risk = bound_prob(avg_func(b, self.r)) # Depth of a node is its dist to the root node.depth = depth node.set_prob(prob) node.set_likelihood(prob * parent_likelihood) # Probability of violating constraints in a belief state. (never # change) if is_terminal_belief(b, self.term, self.terminal_prob): self.set_terminal_node(node) else: # the value of a node is the average of the heuristic only when it's # first created. After that, the value is given as a function of # its children node.value = avg_func(b, self.h) node.terminal = False # new node is non terminal node.best_action = None # no action associated yet node.exec_risk_bound = bound_prob( er_bound) # execution risk bound # avg heuristic estimate of execution risk at node node.set_exec_risk(node.risk)
def update_values_and_best_actions(self, expanded_nodes): # updates the Q values on nodes on the graph and the current best policy # for each expanded node at a time self.debug('\n ****************************') self.debug('Update values and best actions ') self.debug('****************************') for exp_idx, exp_node in enumerate(expanded_nodes): Z = self.build_ancestor_list(exp_node) # updates the best action at the node for node in Z: self.debug('\nupdate values and best action: ' + str(node.state.state_print())) self.debug('current Q: ', node.value, "\n") # all actions available at that node all_action_operators = [] if node.terminal else self.graph.all_node_operators( node) # get all actions (operators) of node from graph # risk at the node's belief state (does no depend on the action # taken) risk = node.risk # current *admissible* (optimistic) estimate of the node's Q # value current_Q = node.value # execution risk bound. the execution risk cap depends on type of chance # constraint being imposed er_bound = min([node.exec_risk_bound, self.er_cap]) if self.cc_type == 'everywhere': er_bound = self.er_cap best_action_idx = -1 best_Q = self.initial_Q # -inf or inf based on optimization best_D = -1 # depth exec_risk_for_best = -1.0 # Estimates value and risk of the current node for each # possible action for act_idx, act in enumerate(all_action_operators): probs = act.properties['prob'] prob_safe = act.properties['prob_safe'] children = self.graph.hyperedge_successors(node, act) # estimate Q of taking this action from current node. Composed of # current reward and the average reward of its children Q = act.op_value + \ np.sum([p * child.value for (p, child) in zip(probs, children)]) # Average child depth D = 1 + np.sum([ p * child.depth for (p, child) in zip(probs, children) ]) # compute an estimate of the er of taking this action from current node. # composed of the current risk and the avg execution risk # of its children if self.cc_type == 'overall': exec_risk = risk + (1.0 - risk) * np.sum([ p * child.exec_risk for (p, child) in zip(prob_safe, children) ]) # enforcing same risk bound at all steps in the policy elif self.cc_type == 'everywhere': exec_risk = risk self.debug('action: ' + act.name + ' children: ' + str(children[0].state.state_print()) + ' risk ' + str(exec_risk)) self.debug(' act_op_value: ', act.op_value) for child in children: self.debug(' child_value: ', child.value) self.debug(' children Q: ' + str(Q)) # if execution risk bound has been violated or if Q value for this action is worse # than current best, we should definitely not select it. if (exec_risk > er_bound) or self.is_worse(Q, best_Q): select_action = False if (exec_risk > er_bound): self.debug(' Action pruned by risk bound') # if risk bound respected and Q value is equal or better else: select_action = True # Test if the risk bound for the current node has been # violated if select_action: # Updates the execution risk bounds for the children child_er_bounds, cc_infeasible = self.compute_exec_risk_bounds( er_bound, risk, children, prob_safe) for child in children: self.debug(' select_action: child ' + child.state.state_print() + " depth: " + str(child.depth) + " risk bound: " + str(child.exec_risk_bound) + ' infeasible: ' + str(cc_infeasible)) if not cc_infeasible: # if chance constraint has not been violated for idx, child in enumerate(children): child.exec_risk_bound = child_er_bounds[idx] # Updates the best action at node best_Q = Q best_action_idx = act_idx best_D = D exec_risk_for_best = exec_risk # Test if some action has been selected if best_action_idx >= 0: # if (not np.isclose(best_Q, current_Q)) and self.is_better(best_Q, current_Q): # print('current_Q', current_Q, 'best_Q', best_Q) # print( # 'WARNING: node Q value improved, which might indicate inadmissibility.') # propagate execution risk bound down to all descendants (added by Sungkwoen) er_bound_updating_nodes = deque([node]) while len(er_bound_updating_nodes) > 0: updating_node = er_bound_updating_nodes.popleft() if updating_node.best_action: best_action_updating = updating_node.best_action er_bound_updating = updating_node.exec_risk_bound risk_updating = updating_node.risk probs_updating = best_action_updating.properties[ 'prob'] prob_safe_updating = best_action_updating.properties[ 'prob_safe'] children_updating = self.graph.hyperedge_successors( updating_node, best_action_updating) child_er_bounds, er_bound_infeasible = self.compute_exec_risk_bounds( er_bound_updating, risk_updating, children_updating, prob_safe_updating) for idx, child in enumerate(children_updating): child.exec_risk_bound = child_er_bounds[idx] er_bound_updating_nodes.extend(children_updating) # updates optimal value est, execution tisk est, and mark # best action node.set_value(best_Q) node.set_exec_risk(exec_risk_for_best) node.set_best_action(all_action_operators[best_action_idx]) self.debug('best action for ' + str(node.state.state_print()) + ' set as ' + str(all_action_operators[best_action_idx].name)) else: # no action was selected, so this node is terminal self.debug('*\n*\n*\n*\n no best action for ' + str(node.state.state_print()) + '\n*\n*\n*\n') # mdeyo: Finally got plans with deadends to work! # Deadends = state with no actions available, either # because it's an actual deadend or because all actons were # too risky. # If the deadend was on the optimal path, the planner would # just mark it terminal and planning would end before # the goal was achieved # mdeyo: Current fix is just to mark the deadend state as # having execution risk = 1.0 so that the planner will # remove the previous action from policy and properly pick # the next best action at the parent state # node.risk = 1.0 # node.set_exec_risk(node.risk) # mdeyo: alternative, possibly better fix is to update the # value instead of the risk, setting the value to +inf when # minimizing # only mark inf value deadend if not actually the goal if not is_terminal_belief(node.state.belief, self.term, self.terminal_prob): self.mark_deadend(node) if not node.terminal and not node.deadend: self.set_terminal_node(node)
def expand_best_partial_solution(self): # expands a node in the graph currently contained in the best # partial solution. Add new nodes and edges on the graph # nodes_to_expand = self.opennodes # self.opennodes = None nodes_to_expand = [self.choose_node()] # choose best node for node in nodes_to_expand: self.debug('\n ******* expanding node *******') self.debug(node.state.state_print()) # print(node.state.state_print()) self.debug('******************************\n') belief = node.state.belief # belief state associated to the node parent_risk = node.risk # execution risk for current node parent_bound = node.exec_risk_bound # er bound for current node parent_depth = node.depth # dist of parent to root parent_likelihood = node.likelihood # likelihood that node is reached in policy if self.cc_type == 'everywhere': parent_bound = self.cc self.debug('compute_exec_risk_bounds: parent_bound ', parent_bound, ' parent_risk ', parent_risk) # if the current node is guaranteed to violate constraints and a violation # is set to halt process: make node terminal if self.halt_on_violation and np.isclose(parent_risk, 1.0): all_node_actions = [] else: # else get the available actions from model all_node_actions = self.get_all_actions(belief, self.A) action_added = False # flag if a new action has been added if len(all_node_actions) > 0: added_count = 0 for act in all_node_actions: self.debug("\n", act) child_obj_list, prob_list, prob_safe_list, new_child_idxs = self.obtain_child_objs_and_probs( belief, self.T, self.O, self.r, act) # initializes the new child nodes for c_idx in new_child_idxs: self.set_new_node(child_obj_list[c_idx], parent_depth + 1, 0.0, prob_list[c_idx], parent_likelihood) # if parent bound Delta is ~ 1.0, the child nodes are guaranteed to have # their risk bound equal to 1 if (not np.isclose(parent_bound, 1.0)): # computes execution risk bounds for the child nodes er_bounds, er_bound_infeasible = self.compute_exec_risk_bounds( parent_bound, parent_risk, child_obj_list, prob_safe_list) else: er_bounds = [1.0] * len(child_obj_list) er_bound_infeasible = False # Only creates new operator if all er bounds a non-negative if not er_bound_infeasible: # updates the values of the execution risk for all children # that will be added to the graph for idx, child in enumerate(child_obj_list): child.exec_risk_bound = er_bounds[idx] # average instantaneous value (cost or reward) avg_op_value = avg_func(belief, self.V, act) act_obj = RAOStarGraphOperator(name=str(act), op_value=avg_op_value, properties={ 'prob': prob_list, 'prob_safe': prob_safe_list }) # an "Action" object crerated # add edge (Action) to graph self.graph.add_hyperedge(parent_obj=node, child_obj_list=child_obj_list, prob_list=prob_list, op_obj=act_obj) action_added = True added_count += 1 else: self.debug( ' action not added - error bound infeasible') if not action_added: # self.debug('action not added') # self.set_terminal_node(node) if not is_terminal_belief(node.state.belief, self.term, self.terminal_prob): self.mark_deadend(node) if not node.terminal and not node.deadend: self.set_terminal_node(node) # returns the list of node either added actions to or marked terminal return nodes_to_expand