def __init__(self, domain_file, prob_file, pddl_pickle='', maximization=False, perform_scheduling=True, duration_func=None, paris_params={}, max_steps=50, verbose=0): #Initializes the generic PDDL model super(DurativePDDL, self).__init__(domain_file, prob_file, pddl_pickle, maximization, verbose) self.perform_scheduling = perform_scheduling self.duration_func = duration_func #Maximum number of actions allowed in any plan self.max_steps = max_steps #PDDL plan analyzer self._pa = PySATPlanAnalyzer(domain_file, prob_file, precompute_steps=max_steps, sequential=True, remove_static=True, verbose=verbose > 0) if self.perform_scheduling: self.scheduler = PARIS(**paris_params)
def strong_stnu_reformulation(rmpyl_policy,pseudocontrol=False,makespan=False,cc=-1.0): """ Performs the STNU reformulation for chance-constrained strong controllability. It also converts all uncontrollable durations to controllable, if pseudo-controllability is desired. It alters the RMPyL policy given as input. """ paris = PARIS() risk_bound,sc_schedule = paris.stnu_reformulation(rmpyl_policy,makespan=makespan,cc=cc) if pseudocontrol: for tc in rmpyl_policy.temporal_constraints: tc.type = 'controllable' return risk_bound,sc_schedule
def pddl_clark(dom_file,prob_file,out_file,use_pysat,maxsteps,cc,duration_func, schedule,makespan,timewindow,det_schedule,stnu_ref,animation): """ Calls CLARK as a PDDL planner. It assumes that STRIPS problem and domain files are given, and allows its to be extended with all types of temporal constraints supported by PARIS through a function pointer. """ if use_pysat: output_dict = pysat_planner(dom_file,prob_file,maxsteps,duration_func) else: output_dict = clark_planner(dom_file,prob_file,maxsteps,cc,duration_func, schedule,timewindow,animation) if output_dict['rmpyl'] != None: #Function to allow RMPyL policies to be manually modified to suit the needs #of whoever is calling CLARK (oh, Enterprise... why do you torture me?) rmpyl_post_processing(output_dict['rmpyl'],stnu_ref,det_schedule) if schedule: paris = PARIS() risk_bound,sc_schedule = paris.stnu_reformulation(output_dict['rmpyl'],makespan=makespan,cc=cc) if risk_bound != None: risk_bound = min(risk_bound,1.0) print('\nSuccessfully performed STNU reformulation with scheduling risk %f %%!'%(risk_bound*100.0)) print('\nThis is the schedule:') for e,t in sorted([(e,t) for e,t in sc_schedule.items()],key=lambda x: x[1]): print('\t%s: %.2f s'%(e,t)) else: print('\nFailed to perform STNU reformulation...') sc_schedule=risk_bound=None else: sc_schedule=risk_bound=None domain,problem,task = model_parser(dom_file,prob_file,remove_static=False) print('Plan not found!') initial_conditions = sorted(list(task.initial_state)) print('Initial conditions:') for p in initial_conditions: print('\t%s'%(str(p))) output_dict['sc_schedule']=sc_schedule output_dict['sc_risk_bound']=risk_bound return output_dict['rmpyl']!=None,output_dict
def __init__(self, path_planner, sites, perform_scheduling=True, prob_discovery=0.95, name='robot', paris_params={}, verbose=0): super(tCCRockSample, self).__init__(path_planner, sites, prob_discovery, name, verbose) #Events used to enforce global durations #self.global_start_event = Event(name='depart-from-start') #self.global_end_event = Event(name='arrive-at-goal') self.perform_scheduling = perform_scheduling #PARIS strong controllability checker self.scheduler = PARIS(**paris_params)
def __init__(self,path_planner,sites,perform_scheduling=True, prob_discovery=0.95,name='robot',paris_params={},verbose=0): super(tCCRockSample,self).__init__(path_planner,sites,prob_discovery,name,verbose) #Events used to enforce global durations #self.global_start_event = Event(name='depart-from-start') #self.global_end_event = Event(name='arrive-at-goal') self.perform_scheduling = perform_scheduling #PARIS strong controllability checker self.scheduler = PARIS(**paris_params)
def __init__(self,domain_file,prob_file,pddl_pickle='',maximization=False, perform_scheduling=True,duration_func=None,paris_params={}, max_steps=50,verbose=0): #Initializes the generic PDDL model super(DurativePDDL,self).__init__(domain_file,prob_file,pddl_pickle,maximization,verbose) self.perform_scheduling = perform_scheduling self.duration_func = duration_func #Maximum number of actions allowed in any plan self.max_steps = max_steps #PDDL plan analyzer self._pa = PySATPlanAnalyzer(domain_file,prob_file,precompute_steps=max_steps, sequential=True,remove_static=True, verbose=verbose>0) if self.perform_scheduling: self.scheduler = PARIS(**paris_params)
def _paris_service_cb(self,req): """Handler of PARIS service requests.""" resp = PARISSrvResponse() tcs = ut.ros_pstnu_to_rmpyl_temporal_constraints(req.pstnu) paris = PARIS(req.gaussian_div,req.gaussian_optimize_partition, req.gaussian_lr,req.gaussian_tol,req.gaussian_max_iter) if paris.set_minimum_risk_strong_controllability(tcs): setup_success = True if req.makespan: rospy.loginfo('Setting up makespan optimization.') paris.set_makespan_optimization() if req.cc>=0.0: rospy.loginfo('Setting up chance-constrained optimization.') paris.set_chance_constraint(req.cc) rospy.loginfo('Calling PARIS to solve scheduling problem.') squeeze_dict,risk_bound,sc_schedule = paris.solve() else: setup_success = False squeeze_dict=None if not setup_success: rospy.logwarn('Failed to set up strong controllability problem.') if squeeze_dict != None: resp.success = True resp.schedule = ut.schedule_to_schedule_msg(sc_schedule) resp.squeezes = ut.squeeze_dict_to_squeeze_msgs(squeeze_dict) resp.risk_upper_bound = risk_bound rospy.loginfo('Strong schedule found!') else: resp.success = False rospy.logwarn('Failed to solve strong controllability problem.') resp.header.stamp = rospy.Time.now() #Records time when solution was obtained return resp
def check_cc_strong_controllability(tcs,example_name='',write_tpns=False, gaussian_approx='smart_piecewise',gaussian_div=5, gaussian_optimize_partition=False,gaussian_lr=0.05, gaussian_tol=1e-4,gaussian_max_iter=1000, cc=-1.0,makespan=False): """ Checks the chance-constrained strong controllability of a pSTN. """ pt = PyTemporal() start_time = time.time() paris = PARIS(gaussian_div,gaussian_optimize_partition,gaussian_lr, gaussian_tol,gaussian_max_iter) paris.init_model() if paris.set_minimum_risk_strong_controllability(tcs): if makespan: paris.set_makespan_optimization() if cc>=0.0: paris.set_chance_constraint(cc) squeeze_dict,risk_bound,sc_schedule = paris.solve() else: squeeze_dict=None if squeeze_dict!=None: risk_bound = min(risk_bound,1.0) #The bound must saturate at 1 (almost sure to fail) elapsed = time.time()-start_time print('\n***** '+example_name) if squeeze_dict!=None: print('Found SC reformulation in %.3f ms!'%(elapsed*1000.0)) print('These are the squeezings:') prob_success=1.0 for tc,tc_dict in squeeze_dict.items(): print('\t%s [%.1f,%.1f]->[%.1f,%.1f]:\tRisk=%.4f%%'%(tc.name,tc.lb,tc.ub,tc_dict['lb'],tc_dict['ub'],tc_dict['risk']*100.0)) prob_success*=(1.0-tc_dict['risk']) print('\nThis is the schedule:') for ev_name, ev_time in sc_schedule.items(): print('\t%s: %.2f s'%(ev_name,ev_time)) risk_indep = (1.0-prob_success) print('\nTotal scheduling risk (assuming independent stochastic durations): %.4f%%'%(risk_indep*100.0)) print('Linear risk bound: %.4f%%'%(risk_bound*100.0)) if risk_bound<risk_indep: raise ValueError('Boole\'s bound %f less than (independent) risk %f'%(risk_bound,risk_indep)) else: risk_gap = risk_bound-risk_indep print('Bound gap: %f%%'%((risk_gap)*100.0)) if write_tpns: sc_stnu=[] for tc in tcs: if tc.type=='controllable': sc_stnu.append(tc) else: if tc in squeeze_dict: lb = tc_dict['lb'] ub = tc_dict['ub'] else: lb = tc.lb ub = tc.ub sc_stnu.append(TemporalConstraint(start=tc.start,end=tc.end, ctype='uncontrollable_bounded', lb=lb, ub=ub)) prog = pt.to_rmpyl(sc_stnu) prog.to_ptpn(filename='paris_sc_%s.tpn'%(example_name)) else: prob_success=risk_bound=risk_gap=-1 print('Strongly controllable reformulation failed in %.3f ms...'%(elapsed*1000.0)) if write_tpns: prog = pt.to_rmpyl(tcs) prog.to_ptpn(filename='paris_pstn_%s.tpn'%(example_name)) return prob_success,elapsed,risk_bound,risk_gap
class DurativePDDL(AbstractCCPDDL): """ Class encapsulating a PDDL model with durative actions and temporal constraints. Action durations can be controllable, set-bounded, or probabilistic. """ def __init__(self, domain_file, prob_file, pddl_pickle='', maximization=False, perform_scheduling=True, duration_func=None, paris_params={}, max_steps=50, verbose=0): #Initializes the generic PDDL model super(DurativePDDL, self).__init__(domain_file, prob_file, pddl_pickle, maximization, verbose) self.perform_scheduling = perform_scheduling self.duration_func = duration_func #Maximum number of actions allowed in any plan self.max_steps = max_steps #PDDL plan analyzer self._pa = PySATPlanAnalyzer(domain_file, prob_file, precompute_steps=max_steps, sequential=True, remove_static=True, verbose=verbose > 0) if self.perform_scheduling: self.scheduler = PARIS(**paris_params) def get_state(self, true_predicates, last_event, constraints, optimal_next_actions, min_pddl_length, delayed): """ Returns a proper state representation. """ state_dict = { 'true_predicates': frozenset(true_predicates), 'last_event': last_event, #Temporal event associated with state 'constraints': constraints, #Temporal constraints 'optimal_next_actions': optimal_next_actions, #Best actions (length-wise) to execute 'min_pddl_length': min_pddl_length, #Minimum length of the relaxed PDDL plan. 'delayed': delayed } #Whether the state is on an optimal PDDL plan return state_dict def get_initial_belief(self, constraints=[]): """ Proper initial representation of the initial belief state of the search. """ belief = {} #Determines the minimum length and the optimal first actions for the relaxed PDDL plan. min_length, op_next_actions = self._shortest_length_and_actions( self.task.initial_state, self.task.goals, self.max_steps) #Temporal constraint that makes sure that the start and end events of a #mission are correctly aligned. start_before_end = TemporalConstraint( start=self.global_start_event, end=self.global_end_event, ctype='controllable', lb=0.0, ub=float('inf')) s0 = self.get_state(true_predicates=self.task.initial_state, last_event=self.global_start_event, constraints=constraints + [start_before_end], optimal_next_actions=op_next_actions, min_pddl_length=min_length, delayed=False) belief[self.hash_state(s0)] = [s0, 1.0] return belief def actions(self, state): """ Actions available at a state. """ if not self.is_terminal(state): #If this state has has its expansion delayed, return the expansion action if state['delayed']: return ['__expand__'] #If not, returns all applicable operators. else: true_pred = state['true_predicates'] #True predicates if self.duration_func == None: return [ PDDLEpisode(op) for op in self.task.operators if op.applicable(true_pred) ] else: return [ PDDLEpisode(op, duration=self.duration_func(op.name)) for op in self.task.operators if op.applicable(true_pred) ] else: return [] def state_transitions(self, state, action): """ Returns the next state, after executing an operator. """ #If this node has been delayed, expands it. if action == '__expand__': #The only states that can be expanded are delayed states. if not state['delayed']: import ipdb ipdb.set_trace() pass next_min_length, next_op_actions = self._shortest_length_and_actions( state['true_predicates'], self.task.goals, self.max_steps) #Returns a non-delayed copy of the current state with updated minimum #PDDL plan length and optimal actions. next_state = self.get_state( true_predicates=state['true_predicates'], last_event=state['last_event'], constraints=state['constraints'], optimal_next_actions=next_op_actions, min_pddl_length=next_min_length, delayed=False) else: #DEBUG: states with no optimal actions to be executed should be deemed #terminal and should never be dequed. Also, delayed states can only #be expanded if state['optimal_next_actions'] in [None, [] ] or state['delayed']: import ipdb ipdb.set_trace() pass #Applies PDDL operator to true predicates new_pred = action.pddl_operator.apply(state['true_predicates']) #Temporal constraint representing the duration of the activity constraints = [action.duration] #Temporal constraint representing the fact that this action comes after #the previous one constraints.append( TemporalConstraint(start=state['last_event'], end=action.start, ctype='controllable', lb=0.0, ub=float('inf'))) #Temporal constraint representing the fact that this activity should end #before the end of the mission constraints.append( TemporalConstraint(start=action.end, end=self.global_end_event, ctype='controllable', lb=0.0, ub=float('inf'))) #If the current action is in the set of optimal next actions, do not #delay the expansion of the next state, for it is on a promising path if action.pddl_operator.name in state['optimal_next_actions']: delayed = False next_min_length = state['min_pddl_length'] - 1 next_op_actions = self._pa.first_actions( new_pred, self.task.goals, next_min_length) #However, if this is not an optimal action at the current state, #refrain from expanding the next state until strictly necessary. else: delayed = True next_min_length = state['min_pddl_length'] next_op_actions = None next_state = self.get_state( true_predicates=new_pred, last_event=action.end, constraints=state['constraints'] + constraints, optimal_next_actions=next_op_actions, min_pddl_length=next_min_length, delayed=delayed) return [[next_state, 1.0]] def value(self, state, action): """ Cost of performing an action at a state. """ return 0.0 if action == '__expand__' else 1.0 def heuristic(self, state): """ Number of unachieved goals, assuming that two goals cannot be achieved simultaneously. """ return state['min_pddl_length'] def state_risk(self, state): """ Returns the scheduling risk bound returned by PARIS. """ #Checks if scheduling should be performed if self.perform_scheduling: if not 'scheduling_risk' in state: squeeze_dict, risk_bound, sc_schedule = self.scheduler.schedule( state['constraints']) state[ 'scheduling_risk'] = 1.0 if squeeze_dict == None else min( 1.0, risk_bound) #If the scheduling risk is 1.0, it means that there is not scenario #in which the environment can choose non-violating values for the #durations. if self.is_terminal(state): return state['scheduling_risk'] else: return 0.0 else: return 0.0 #No scheduling risk def execution_risk_heuristic(self, state): """ The immediate risk is always an admissible estimate of the execution risk. """ if 'scheduling_risk' in state: return state['scheduling_risk'] else: return 0.0 def is_terminal(self, state): """A state is terminal if it has been expanded and reached the goal, if no action is available at it or if the best decision is to stay put (empty set of actions).""" return (not state['delayed']) and ((self.task.goals<=state['true_predicates']) or \ (state['optimal_next_actions'] in [None,[]])) def _shortest_length_and_actions(self, initial_state, goals, max_steps, min_steps=-1): """ Determines the minimum length and optimal first actions for the relaxed PDDL plan. """ min_length = self._pa.shortest_plan_length(initial_state, goals, max_steps, min_steps) if min_length < float('inf'): #Determines the set of first actions for all optimal plan lengths. optimal_next_actions = self._pa.first_actions( initial_state, goals, min_length) else: optimal_next_actions = None return min_length, optimal_next_actions
ub=float(time_bound))) global_prog = RMPyL(name='run()') global_prog *= global_prog.parallel(prog.plan,*activation_episodes,start=global_start) activation_tcs.append(TemporalConstraint(start=global_start,end=prog.first_event, ctype='controllable',lb=0.0,ub=0.005)) for tc in activation_tcs: global_prog.add_temporal_constraint(tc) # rmpyl_policy.to_ptpn(filename=output_file) #ipdb.set_trace() global_prog.to_ptpn(filename=output_file+'_test') paris = PARIS() # risk_bound,sc_schedule = paris.stnu_reformulation(rmpyl_policy,makespan=True,cc=0.001) risk_bound,sc_schedule = paris.stnu_reformulation(global_prog,makespan=True,cc=0.001) if risk_bound != None: risk_bound = min(risk_bound,1.0) print('\nSuccessfully performed STNU reformulation with scheduling risk %f %%!'%(risk_bound*100.0)) for tc in global_prog.temporal_constraints: if tc.type == 'uncontrollable_bounded': tc.type = 'controllable' for i in range(len(pddl_episodes)-1):
for t, action in enumerate(plan): print('%d: %s' % (t, action)) prog = RMPyL(name='run()') prog.plan = prog.sequence(*[ Episode(start=Event(name='start-of-' + op), end=Event(name='end-of-' + op), action=op, duration=rss_duration_func(op)) for op in plan ]) prog.add_overall_temporal_constraint(ctype='controllable', lb=0.0, ub=2000.0) prog.to_ptpn(filename='rss_pysat_before_stnu_reform.tpn') paris = PARIS() risk_bound, sc_sched = paris.stnu_reformulation(prog, makespan=True, cc=0.001) if risk_bound != None: risk_bound = min(risk_bound, 1.0) print( '\nSuccessfully performed STNU reformulation with scheduling risk %f %%!' % (risk_bound * 100.0)) prog.to_ptpn(filename='rss_pysat_after_stnu_reform.tpn') print('\nThis is the schedule:') for e, t in sorted([(e, t) for e, t in sc_sched.items()], key=lambda x: x[1]): print('\t%s: %.2f s' % (e, t)) else:
expand_all_open=True,verbose=1,log=False,animation=False) policy,explicit,performance = planner.search(b0) dot_policy = policy_to_dot(explicit,policy) dot_policy.write('mitsubishi_durative_policy.svg',format='svg') rmpyl_policy = policy_to_rmpyl(explicit,policy, constraint_fields=['constraints'], global_end=mitsubishi_model.global_end_event) #Combines controllable temporal constraints. rmpyl_policy.simplify_temporal_constraints() rmpyl_policy.to_ptpn(filename='mitsubishi_durative_policy_rmpyl_before_stnu_reform.tpn', exclude_op=['__stop__','__expand__']) paris = PARIS() risk_bound,sc_schedule = paris.stnu_reformulation(rmpyl_policy,makespan=True,cc=cc) if risk_bound != None: risk_bound = min(risk_bound,1.0) print('\nSuccessfully performed STNU reformulation with scheduling risk %f %%!'%(risk_bound*100.0)) rmpyl_policy.to_ptpn(filename='mitsubishi_durative_policy_rmpyl_after_stnu_reform.tpn') print('\nThis is the schedule:') for e,t in sorted([(e,t) for e,t in sc_schedule.items()],key=lambda x: x[1]): print('\t%s: %.2f s'%(e,t)) else: print('\nFailed to perform STNU reformulation...')
class tCCRockSample(CCRockSample): """ Extension of CCRockSample that requires the policies to be strongly controllable in a probabilistic sense. Uses PyTemporal for checking. """ def __init__(self, path_planner, sites, perform_scheduling=True, prob_discovery=0.95, name='robot', paris_params={}, verbose=0): super(tCCRockSample, self).__init__(path_planner, sites, prob_discovery, name, verbose) #Events used to enforce global durations #self.global_start_event = Event(name='depart-from-start') #self.global_end_event = Event(name='arrive-at-goal') self.perform_scheduling = perform_scheduling #PARIS strong controllability checker self.scheduler = PARIS(**paris_params) def get_state(self, position, site, crashed, visited, new_discovery, tcs=[]): """ Returns a proper state representation. """ state_dict = { 'position': position, #Position on map 'site': site, #Map site 'crashed': crashed, #Crashed against obstacle flag 'visited': visited, #Sets of visited locations 'tcs': tcs, #Temporal constraints 'new_discovery': new_discovery } #Dictionary containing if a site #contains a new discovery #Checks if the temporal network is still strongly controllable if len(tcs) > 0 and self.perform_scheduling: squeeze_dict, objective, sc_schedule = self.scheduler.schedule( tcs) if squeeze_dict == None: #No ccSC schedule found. state_dict['sched-risk'] = 1.0 #print('Strongly controllable reformulation failed...') else: #ccSC schedule found, so it records the scheduling risk prob_success = 1.0 for tc_dict in squeeze_dict.values(): prob_success *= (1.0 - tc_dict['risk']) state_dict['sched-risk'] = 1.0 - prob_success #print('Scheduling risk (assuming independent stochastic durations): %.4f%%'%(state_dict['sched-risk']*100.0)) else: #print('No constraints. Trivially schedulable.') state_dict['sched-risk'] = 0.0 return state_dict def get_initial_belief(self, prior, initial_site='_start_', initial_pos=None, init_tcs=[], goal_site='_goal_', goal_pos=None): """ Generates an initial belief distribution over the presence of new discoveries in the map, assuming independence of the prior probabilities. Morevoer, initializes the list of temporal constraints. """ scenarios = self._generate_scenarios(prior) self._add_new_site(initial_site, initial_pos) self.start_site = initial_site self._add_new_site(goal_site, goal_pos) self.goal = goal_site return self._scenarios_to_states( scenarios=scenarios, initial_pos=self.sites[self.start_site]['coords'], initial_site=self.start_site, init_tcs=init_tcs) def _scenarios_to_states(self, scenarios, initial_pos, initial_site, init_tcs): """" Generates the initial belief particles from a list of scenarios. """ belief = {} for s in scenarios: state = self.get_state(position=initial_pos, site=initial_site, crashed=False, visited=set(), new_discovery=s[0], tcs=init_tcs) belief[self.hash_state(state)] = [state, s[1]] return belief def state_transitions(self, state, action): """ Returns the next state, after executing an operator. """ if self._at_site(state['position'], state['site']): if state['crashed']: #If you're crashed, you stay crashed return [[state, 1.0]] else: crash_state = self.get_state(position=(), site='_crash_', crashed=True, visited=set(), new_discovery={}, tcs=[]) #The goal state has an additional temporal constraint related to the #duration of the traversal. goal_state = self.get_state( position=action.episode.properties['goal_coords'][0:2], site=action.goal_site, crashed=False, visited=state['visited'].union(set([state['site']])), new_discovery=state['new_discovery'], tcs=state['tcs'] + list(action.rmpyl.temporal_constraints)) return [[goal_state, 1.0 - action.risk], [crash_state, action.risk]] else: raise ValueError( 'Current position %s and current site %s diverged' % (str(state['position']), state['site'])) def is_terminal(self, state): """ A state is terminal if all sites have been visited or it has crashed. """ return state['crashed'] or (state['site'] == self.goal) def terminal_value(self, state): """ No extra reward at a terminal state. """ if state['crashed']: return 0.0 else: if state['site'] == self.goal: if state['site'] in state['new_discovery'] and state[ 'new_discovery'][state['site']]: return self.sites[state['site']]['value'] else: return 0.0 else: return -np.inf #You MUST end at the goal location, or crash def state_risk(self, state): """ Crash states have risk 1.0, while all others have risk related to scheduling. """ return 1.0 if state['crashed'] else ( state['sched-risk'] if 'sched-risk' in state else 0.0)
class tCCRockSample(CCRockSample): """ Extension of CCRockSample that requires the policies to be strongly controllable in a probabilistic sense. Uses PyTemporal for checking. """ def __init__(self,path_planner,sites,perform_scheduling=True, prob_discovery=0.95,name='robot',paris_params={},verbose=0): super(tCCRockSample,self).__init__(path_planner,sites,prob_discovery,name,verbose) #Events used to enforce global durations #self.global_start_event = Event(name='depart-from-start') #self.global_end_event = Event(name='arrive-at-goal') self.perform_scheduling = perform_scheduling #PARIS strong controllability checker self.scheduler = PARIS(**paris_params) def get_state(self,position,site,crashed,visited,new_discovery,tcs=[]): """ Returns a proper state representation. """ state_dict = {'position':position, #Position on map 'site':site, #Map site 'crashed':crashed, #Crashed against obstacle flag 'visited':visited, #Sets of visited locations 'tcs':tcs, #Temporal constraints 'new_discovery':new_discovery} #Dictionary containing if a site #contains a new discovery #Checks if the temporal network is still strongly controllable if len(tcs)>0 and self.perform_scheduling: squeeze_dict,objective,sc_schedule = self.scheduler.schedule(tcs) if squeeze_dict==None: #No ccSC schedule found. state_dict['sched-risk']=1.0 #print('Strongly controllable reformulation failed...') else: #ccSC schedule found, so it records the scheduling risk prob_success=1.0 for tc_dict in squeeze_dict.values(): prob_success*=(1.0-tc_dict['risk']) state_dict['sched-risk']=1.0-prob_success #print('Scheduling risk (assuming independent stochastic durations): %.4f%%'%(state_dict['sched-risk']*100.0)) else: #print('No constraints. Trivially schedulable.') state_dict['sched-risk']=0.0 return state_dict def get_initial_belief(self,prior,initial_site='_start_',initial_pos=None, init_tcs=[],goal_site='_goal_',goal_pos=None): """ Generates an initial belief distribution over the presence of new discoveries in the map, assuming independence of the prior probabilities. Morevoer, initializes the list of temporal constraints. """ scenarios=self._generate_scenarios(prior) self._add_new_site(initial_site,initial_pos); self.start_site = initial_site self._add_new_site(goal_site,goal_pos); self.goal = goal_site return self._scenarios_to_states(scenarios=scenarios, initial_pos=self.sites[self.start_site]['coords'], initial_site=self.start_site, init_tcs=init_tcs) def _scenarios_to_states(self,scenarios,initial_pos,initial_site,init_tcs): """" Generates the initial belief particles from a list of scenarios. """ belief = {} for s in scenarios: state = self.get_state(position=initial_pos,site=initial_site, crashed=False,visited=set(),new_discovery=s[0], tcs=init_tcs) belief[self.hash_state(state)] = [state,s[1]] return belief def state_transitions(self,state,action): """ Returns the next state, after executing an operator. """ if self._at_site(state['position'],state['site']): if state['crashed']:#If you're crashed, you stay crashed return [[state,1.0]] else: crash_state = self.get_state(position=(),site='_crash_',crashed=True, visited=set(),new_discovery={},tcs=[]) #The goal state has an additional temporal constraint related to the #duration of the traversal. goal_state = self.get_state(position=action.episode.properties['goal_coords'][0:2], site=action.goal_site,crashed=False, visited=state['visited'].union(set([state['site']])), new_discovery=state['new_discovery'], tcs=state['tcs']+list(action.rmpyl.temporal_constraints)) return [[goal_state,1.0-action.risk],[crash_state,action.risk]] else: raise ValueError('Current position %s and current site %s diverged'%(str(state['position']),state['site'])) def is_terminal(self,state): """ A state is terminal if all sites have been visited or it has crashed. """ return state['crashed'] or (state['site'] == self.goal) def terminal_value(self,state): """ No extra reward at a terminal state. """ if state['crashed']: return 0.0 else: if state['site'] == self.goal: if state['site'] in state['new_discovery'] and state['new_discovery'][state['site']]: return self.sites[state['site']]['value'] else: return 0.0 else: return -np.inf #You MUST end at the goal location, or crash def state_risk(self,state): """ Crash states have risk 1.0, while all others have risk related to scheduling. """ return 1.0 if state['crashed'] else (state['sched-risk'] if 'sched-risk' in state else 0.0)
class DurativePDDL(AbstractCCPDDL): """ Class encapsulating a PDDL model with durative actions and temporal constraints. Action durations can be controllable, set-bounded, or probabilistic. """ def __init__(self,domain_file,prob_file,pddl_pickle='',maximization=False, perform_scheduling=True,duration_func=None,paris_params={}, max_steps=50,verbose=0): #Initializes the generic PDDL model super(DurativePDDL,self).__init__(domain_file,prob_file,pddl_pickle,maximization,verbose) self.perform_scheduling = perform_scheduling self.duration_func = duration_func #Maximum number of actions allowed in any plan self.max_steps = max_steps #PDDL plan analyzer self._pa = PySATPlanAnalyzer(domain_file,prob_file,precompute_steps=max_steps, sequential=True,remove_static=True, verbose=verbose>0) if self.perform_scheduling: self.scheduler = PARIS(**paris_params) def get_state(self,true_predicates,last_event,constraints, optimal_next_actions,min_pddl_length,delayed): """ Returns a proper state representation. """ state_dict = {'true_predicates':frozenset(true_predicates), 'last_event':last_event, #Temporal event associated with state 'constraints':constraints, #Temporal constraints 'optimal_next_actions':optimal_next_actions, #Best actions (length-wise) to execute 'min_pddl_length':min_pddl_length, #Minimum length of the relaxed PDDL plan. 'delayed':delayed} #Whether the state is on an optimal PDDL plan return state_dict def get_initial_belief(self,constraints=[]): """ Proper initial representation of the initial belief state of the search. """ belief = {} #Determines the minimum length and the optimal first actions for the relaxed PDDL plan. min_length,op_next_actions = self._shortest_length_and_actions(self.task.initial_state, self.task.goals, self.max_steps) #Temporal constraint that makes sure that the start and end events of a #mission are correctly aligned. start_before_end = TemporalConstraint(start=self.global_start_event,end=self.global_end_event, ctype='controllable',lb=0.0,ub=float('inf')) s0 = self.get_state(true_predicates = self.task.initial_state, last_event = self.global_start_event, constraints = constraints+[start_before_end], optimal_next_actions = op_next_actions, min_pddl_length = min_length, delayed = False) belief[self.hash_state(s0)] = [s0,1.0] return belief def actions(self,state): """ Actions available at a state. """ if not self.is_terminal(state): #If this state has has its expansion delayed, return the expansion action if state['delayed']: return ['__expand__'] #If not, returns all applicable operators. else: true_pred = state['true_predicates'] #True predicates if self.duration_func == None: return [PDDLEpisode(op) for op in self.task.operators if op.applicable(true_pred)] else: return [PDDLEpisode(op,duration=self.duration_func(op.name)) for op in self.task.operators if op.applicable(true_pred)] else: return [] def state_transitions(self,state,action): """ Returns the next state, after executing an operator. """ #If this node has been delayed, expands it. if action == '__expand__': #The only states that can be expanded are delayed states. if not state['delayed']: import ipdb; ipdb.set_trace() pass next_min_length,next_op_actions = self._shortest_length_and_actions(state['true_predicates'], self.task.goals, self.max_steps) #Returns a non-delayed copy of the current state with updated minimum #PDDL plan length and optimal actions. next_state = self.get_state(true_predicates = state['true_predicates'], last_event = state['last_event'], constraints = state['constraints'], optimal_next_actions = next_op_actions, min_pddl_length = next_min_length, delayed=False) else: #DEBUG: states with no optimal actions to be executed should be deemed #terminal and should never be dequed. Also, delayed states can only #be expanded if state['optimal_next_actions'] in [None,[]] or state['delayed']: import ipdb; ipdb.set_trace() pass #Applies PDDL operator to true predicates new_pred = action.pddl_operator.apply(state['true_predicates']) #Temporal constraint representing the duration of the activity constraints = [action.duration] #Temporal constraint representing the fact that this action comes after #the previous one constraints.append(TemporalConstraint(start=state['last_event'],end=action.start, ctype='controllable',lb=0.0,ub=float('inf'))) #Temporal constraint representing the fact that this activity should end #before the end of the mission constraints.append(TemporalConstraint(start=action.end,end=self.global_end_event, ctype='controllable',lb=0.0,ub=float('inf'))) #If the current action is in the set of optimal next actions, do not #delay the expansion of the next state, for it is on a promising path if action.pddl_operator.name in state['optimal_next_actions']: delayed=False next_min_length = state['min_pddl_length']-1 next_op_actions = self._pa.first_actions(new_pred, self.task.goals, next_min_length) #However, if this is not an optimal action at the current state, #refrain from expanding the next state until strictly necessary. else: delayed=True next_min_length = state['min_pddl_length'] next_op_actions = None next_state = self.get_state(true_predicates = new_pred, last_event = action.end, constraints = state['constraints']+constraints, optimal_next_actions = next_op_actions, min_pddl_length = next_min_length, delayed=delayed) return [[next_state,1.0]] def value(self,state,action): """ Cost of performing an action at a state. """ return 0.0 if action == '__expand__' else 1.0 def heuristic(self,state): """ Number of unachieved goals, assuming that two goals cannot be achieved simultaneously. """ return state['min_pddl_length'] def state_risk(self,state): """ Returns the scheduling risk bound returned by PARIS. """ #Checks if scheduling should be performed if self.perform_scheduling: if not 'scheduling_risk' in state: squeeze_dict,risk_bound,sc_schedule = self.scheduler.schedule(state['constraints']) state['scheduling_risk'] = 1.0 if squeeze_dict==None else min(1.0,risk_bound) #If the scheduling risk is 1.0, it means that there is not scenario #in which the environment can choose non-violating values for the #durations. if self.is_terminal(state): return state['scheduling_risk'] else: return 0.0 else: return 0.0 #No scheduling risk def execution_risk_heuristic(self,state): """ The immediate risk is always an admissible estimate of the execution risk. """ if 'scheduling_risk' in state: return state['scheduling_risk'] else: return 0.0 def is_terminal(self,state): """A state is terminal if it has been expanded and reached the goal, if no action is available at it or if the best decision is to stay put (empty set of actions).""" return (not state['delayed']) and ((self.task.goals<=state['true_predicates']) or \ (state['optimal_next_actions'] in [None,[]])) def _shortest_length_and_actions(self,initial_state,goals,max_steps,min_steps=-1): """ Determines the minimum length and optimal first actions for the relaxed PDDL plan. """ min_length = self._pa.shortest_plan_length(initial_state,goals,max_steps,min_steps) if min_length < float('inf'): #Determines the set of first actions for all optimal plan lengths. optimal_next_actions = self._pa.first_actions(initial_state,goals,min_length) else: optimal_next_actions=None return min_length,optimal_next_actions
dot_policy = policy_to_dot(explicit, policy) dot_policy.write('rss_policy.svg', format='svg') rmpyl_policy = policy_to_rmpyl(explicit, policy, constraint_fields=['constraints'], global_end=rss_model.global_end_event) #Combines controllable temporal constraints. rmpyl_policy.simplify_temporal_constraints() rmpyl_policy.to_ptpn(filename='rss_policy_rmpyl_before_stnu_reform.tpn', exclude_op=['__stop__', '__expand__']) paris = PARIS() risk_bound, sc_schedule = paris.stnu_reformulation(rmpyl_policy, makespan=True, cc=cc) if risk_bound != None: risk_bound = min(risk_bound, 1.0) print( '\nSuccessfully performed STNU reformulation with scheduling risk %f %%!' % (risk_bound * 100.0)) rmpyl_policy.to_ptpn(filename='rss_policy_rmpyl_after_stnu_reform.tpn') print('\nThis is the schedule:') for e, t in sorted([(e, t) for e, t in sc_schedule.items()], key=lambda x: x[1]): print('\t%s: %.2f s' % (e, t))