def call(self,req_type,params): """ Calls CLARK with different types of inputs and parameters. """ req_type = req_type.lower() if req_type == 'pddl': parser = pddlc.get_argument_parser() call_func = pddlc.call_clark elif req_type in ['crmpl','rmpyl']: parser = rmpylc.get_argument_parser() call_func = rmpylc.call_clark elif req_type == 'ccpomdp': parser = ccpomdpc.get_argument_parser() call_func = ccpomdpc.call_clark else: print(req_type+' is an invalid request type.') _print_clark_usage() _add_common_parsing(parser) #Adds common arguments try: args = parser.parse_args(params.split()) success,output_dict = call_func(args) #If requested, writes the policy in graphical SVG format. if args.svg: policy_file = args.output[:args.output.rfind('.')] dot_policy = policy_to_dot(output_dict['explicit'],output_dict['policy']) dot_policy.write(policy_file+'.svg',format='svg') #Generates output tpn output_dict['rmpyl'].to_ptpn(filename=args.output) except: success,output_dict = False,None print('\n##### ERROR: could not process request\n\tType: %s\n\tParams: %s\n'%(req_type,params)) if self.debug: import ipdb; ipdb.set_trace() raise return success,output_dict
init_tcs.append(TemporalConstraint(start=cc_fp.global_start_event, end=cc_fp.global_end_event, ctype='controllable',lb=0.0, ub=1000.0)) b0 = cc_fp.get_initial_belief(prior=prior,initial_site='start_point',goal_site='end_point', init_tcs=init_tcs) planner = RAOStar(cc_fp,node_name='id',cc=0.01,cc_type='overall', terminal_prob=1.0,randomization=0.0,propagate_risk=True, halt_on_violation=False,verbose=1) #Searches for the optimal policy policy,explicit,performance = planner.search(b0) #Converts policy to graphical SVG format dot_policy = policy_to_dot(explicit,policy) dot_policy.write('flightgear_policy.svg',format='svg') #Converts optimal exploration policy into an RMPyL program exploration_policy = policy_to_rmpyl(explicit,policy) #The flight policy has the additional actions of taking off and landing. flight_policy = RMPyL(name='run()') flight_policy *= flight_policy.sequence(Episode(action='(takeoff plane)'), exploration_policy, Episode(action='(land plane)')) #Eliminates probabilistic choices from the policy, since Pike (in fact, the #Lisp tpn package) cannot properly handle them. for obs in flight_policy.observations: if obs.type=='probabilistic':
# time_limit=5,drop_penalty=10.0,p_fail=0.1,verbose=0) # Diagnostic demo, where the robot has to figure out if it is fit for the task # With this setting of parameters, the robot tries an action a couple of times # before bugging the human for help. param_dict = {'p_fail_fit':0.2, #Prob. of failing a task, while being fit 'p_fail_unfit':0.999, #Prob. of failing a task, while not being fit 'p_fit_fit':1.0, #Prob. of remaining fit, if fit before 'p_fit_unfit':0.0, #Prob. becoming fit, if unfit before 'goal_drop_penalty':100.0, #Penalty for not achieving a goal 'robot_action_cost':1.0, #Cost of robot performing an action 'human_action_cost':5.0} #Cost of human performing an action mitsu_model = DiagnosticMitsubishi(domain_file=dom_file,prob_file=prob_file, time_limit=5,verbose=1,param_dict=param_dict) b0 = mitsu_model.get_initial_belief() planner = RAOStar(mitsu_model,node_name='id',cc=0.3,cc_type='overall', terminal_prob=1.0,randomization=0.0,propagate_risk=True, verbose=1,log=False) policy,explicit,performance = planner.search(b0) dot_policy = policy_to_dot(explicit,policy) rmpyl_policy = policy_to_rmpyl(explicit,policy) dot_policy.write('mitsubishi_policy.svg',format='svg') rmpyl_policy.to_ptpn(filename='mitsubishi_policy_rmpyl_ptpn.tpn')
def search(self, b0, time_limit=np.infty, iter_limit=np.infty): """Searches for the optimal path from start to goal on the hypergraph.""" if self._verbose >= 1: print('\n##### Starting RAO* search!\n') self._start_time = time.time() self._init_search(b0) count = 0 root = self._explicit.root #Initial objective at the root, which is the best possible (it can #only degrade with an admissible heuristic). prev_root_value = np.infty if self._model.is_maximization else -np.infty if self._log: #Creates a log file, if necessary filename = time.strftime("%d_%m_%Y_%H_%M_%S") + '_log_rao.txt' if not os.path.exists('./log'): os.makedirs('./log') self._log_f = open('./log/' + filename, 'w') print("\nLogging into " + filename) if self._animation: if not os.path.exists('./animation'): os.makedirs('./animation') print("\nCreated animaition directory") interrupted = False try: while len(self._opennodes) > 0 and (count <= iter_limit) and ( time.time() - self._start_time <= time_limit): count += 1 ########### CORE FUNCTIONS #Expands the current best solution expanded_nodes = self._expand_best_partial_solution() #Updates the value estimates and policy self._update_values_and_best_actions(expanded_nodes) #NOTE: the value inconsistency here was coming from nodes that #were not contained in the best partial policy graph, but contained #children in it. #debug_all_hypegraph_values(self,'After value update') #Updates the mapping of ancestors on the best policy graph and also #the list of open nodes to be expanded. self._update_policy_open_nodes() #debug_policy_values(self,'Policy values after policy update') #debug_all_hypegraph_values(self,'After policy update') ####################################################### #Performance measures and info root_value = root.value self.performance['root_value_series'].append(root_value) self.performance['root_exec_risk_series'].append( root.exec_risk) #Root node changed from its best value if not np.isclose(root_value, prev_root_value): #If the heuristic is really admissible, the root value can #only degrade (decrease for maximization, or increase for #minimization). if self._is_better(root_value, prev_root_value): print( 'WARNING: root value improved, which might indicate inadmissibility.' ) else: self.performance['time_to_best_value'] = time.time( ) - self._start_time self.performance['iter_to_best_value'] = count prev_root_value = root_value if self._verbose >= 2: print("Expanded nodes [%s]" % (' '.join([str(n.name) for n in expanded_nodes]))) if self._verbose >= 1: total_states = sum( [len(node.state.belief) for node in expanded_nodes]) print( "Iter: %d, Open nodes: %d, States evaluted: %d, Root value: %.4f, Root ER: %.4f" % (count, len(self._opennodes), total_states, root.value, root.exec_risk)) if self._animation: #If an animation should be generated partial_policy = self._extract_policy(partial=True) dot_policy = policy_to_dot(self._explicit, partial_policy) dot_policy.write('./animation/%d_rao.svg' % (count), format='svg') except KeyboardInterrupt: interrupted = True print( "\n\n***** EXECUTION TERMINATED BY USER AT ITERATION %d. *****" % (count)) self.performance['total_elapsed_time'] = time.time() - self._start_time self.performance['optimal_value'] = root.value self.performance['exec_risk_for_optimal_value'] = root.exec_risk if self._log: #Closes the log file, if one has been created self._log_f.close() print("\nClosed " + filename) print("\nTotal elapsed time: %f s" % (self.performance['total_elapsed_time'])) print("Time to optimal value: %f s" % (self.performance['time_to_best_value'])) print("Iterations till optimal value: %d" % (self.performance['iter_to_best_value'])) print("Number of expanded nodes: %d" % (self.performance['expanded_nodes'])) print("Number of evaluated particles: %d" % (self.performance['evaluated_particles'])) print("Optimal value: %f" % (self.performance['optimal_value'])) print("Execution risk for optimal value: %f" % (self.performance['exec_risk_for_optimal_value'])) policy = self._extract_policy(partial=interrupted) if len(policy) == 0: print('\n##### Failed to find policy (it is empty)...\n') elif self.performance['optimal_value'] == -float('inf'): print( '\n##### Failed to find policy (probably due to chance constraint violation)...\n' ) else: print('\n##### Policy found!\n') return policy, self._explicit, self.performance
def search(self,b0,time_limit=np.infty,iter_limit=np.infty): """Searches for the optimal path from start to goal on the hypergraph.""" if self._verbose>=1: print('\n##### Starting RAO* search!\n') self._start_time = time.time() self._init_search(b0) count=0 root = self._explicit.root #Initial objective at the root, which is the best possible (it can #only degrade with an admissible heuristic). prev_root_value = np.infty if self._model.is_maximization else -np.infty if self._log: #Creates a log file, if necessary filename = time.strftime("%d_%m_%Y_%H_%M_%S")+'_log_rao.txt' if not os.path.exists('./log'): os.makedirs('./log') self._log_f=open('./log/'+filename,'w') print("\nLogging into "+filename) if self._animation: if not os.path.exists('./animation'): os.makedirs('./animation') print("\nCreated animaition directory") interrupted=False try: while len(self._opennodes)>0 and (count<=iter_limit) and (time.time()-self._start_time<=time_limit): count+=1 ########### CORE FUNCTIONS #Expands the current best solution expanded_nodes = self._expand_best_partial_solution() #Updates the value estimates and policy self._update_values_and_best_actions(expanded_nodes) #NOTE: the value inconsistency here was coming from nodes that #were not contained in the best partial policy graph, but contained #children in it. #debug_all_hypegraph_values(self,'After value update') #Updates the mapping of ancestors on the best policy graph and also #the list of open nodes to be expanded. self._update_policy_open_nodes() #debug_policy_values(self,'Policy values after policy update') #debug_all_hypegraph_values(self,'After policy update') ####################################################### #Performance measures and info root_value = root.value self.performance['root_value_series'].append(root_value) self.performance['root_exec_risk_series'].append(root.exec_risk) #Root node changed from its best value if not np.isclose(root_value,prev_root_value): #If the heuristic is really admissible, the root value can #only degrade (decrease for maximization, or increase for #minimization). if self._is_better(root_value,prev_root_value): print('WARNING: root value improved, which might indicate inadmissibility.') else: self.performance['time_to_best_value'] = time.time()-self._start_time self.performance['iter_to_best_value'] = count prev_root_value = root_value if self._verbose>=2: print("Expanded nodes [%s]"%(' '.join([str(n.name) for n in expanded_nodes]))) if self._verbose>=1: total_states=sum([len(node.state.belief) for node in expanded_nodes]) print("Iter: %d, Open nodes: %d, States evaluted: %d, Root value: %.4f, Root ER: %.4f"%(count, len(self._opennodes), total_states, root.value, root.exec_risk)) if self._animation: #If an animation should be generated partial_policy = self._extract_policy(partial=True) dot_policy = policy_to_dot(self._explicit,partial_policy) dot_policy.write('./animation/%d_rao.svg'%(count),format='svg') except KeyboardInterrupt: interrupted=True print("\n\n***** EXECUTION TERMINATED BY USER AT ITERATION %d. *****"%(count)) self.performance['total_elapsed_time'] = time.time()-self._start_time self.performance['optimal_value'] = root.value self.performance['exec_risk_for_optimal_value'] = root.exec_risk if self._log: #Closes the log file, if one has been created self._log_f.close() print("\nClosed "+filename) print("\nTotal elapsed time: %f s"%(self.performance['total_elapsed_time'])) print("Time to optimal value: %f s"%(self.performance['time_to_best_value'])) print("Iterations till optimal value: %d"%(self.performance['iter_to_best_value'])) print("Number of expanded nodes: %d"%(self.performance['expanded_nodes'])) print("Number of evaluated particles: %d"%(self.performance['evaluated_particles'])) print("Optimal value: %f"%(self.performance['optimal_value'])) print("Execution risk for optimal value: %f"%(self.performance['exec_risk_for_optimal_value'])) policy = self._extract_policy(partial=interrupted) if len(policy)==0: print('\n##### Failed to find policy (it is empty)...\n') elif self.performance['optimal_value']==-float('inf'): print('\n##### Failed to find policy (probably due to chance constraint violation)...\n') else: print('\n##### Policy found!\n') return policy, self._explicit, self.performance