def setUp(self): goalspec = 'F P_[IC][True,none,==]' startpoc = (1, 3) env = init_mdp(startpoc) keys = ['L', 'IC'] actions = [0, 1, 2, 3] root = goalspec2BT(goalspec, planner=None) self.behaviour_tree = BehaviourTree(root) # # Need to udpate the planner parameters child = self.behaviour_tree.root planner = GenRecPropMDP( env, keys, None, dict(), actions=actions, max_trace=10, seed=123) child.setup(0, planner, True, 10) for i in range(10): self.behaviour_tree.tick( pre_tick_handler=reset_env(env) ) print(self.behaviour_tree.root.status) child.train = False for i in range(1): self.behaviour_tree.tick( pre_tick_handler=reset_env(env) )
def setUp(self): goalspec = 'F P_[NC][True,none,==] U F P_[L][03,none,==]' startpoc = (0, 3) self.env = init_mdp(startpoc) keys = ['L', 'NC'] actions = [0, 1, 2, 3] planner = GenRecPropMDPNear( self.env, keys, goalspec, dict(), actions=actions, max_trace=10, seed=123) root = goalspec2BT(goalspec, planner=planner) self.behaviour_tree = BehaviourTree(root) for child in self.behaviour_tree.root.children: # print(child, child.name) child.setup(0, planner, True, 20) # child.planner.env = env # print(child.goalspec, child.planner.goalspec) for i in range(20): self.behaviour_tree.tick( pre_tick_handler=reset_env(self.env) ) for child in self.behaviour_tree.root.children: child.setup(0, planner, True, 10) child.train = False # print(child, child.name, child.train) for i in range(2): self.behaviour_tree.tick( pre_tick_handler=reset_env(self.env) )
def run_planner(planner, behaviour_tree, env, epoch=10): child = behaviour_tree.root # Training setup child.setup(0, planner, True, epoch) # Training loop for i in range(epoch): behaviour_tree.tick( pre_tick_handler=reset_env(env) ) print(behaviour_tree.root.status) # for child in behaviour_tree.root.children: # child.setup(0, planner, True, 10) # Inference setup child.train = False print(child, child.name, child.train) # Inference loop for i in range(1): behaviour_tree.tick( pre_tick_handler=reset_env(env) ) print('inference', behaviour_tree.root.status) print(env.curr_loc)
def find_cheese(seed, max_trace_len=10, epoch=10): # Define the environment for the experiment goalspec = 'F P_[IC][True,none,==]' # startpoc = (3, 0) startpoc = (9, 0) env = init_10x10mdp(startpoc) keys = ['L', 'IC'] actions = [0, 1, 2, 3] root = goalspec2BT(goalspec, planner=None) # print(root) behaviour_tree = BehaviourTree(root) # display_bt(behaviour_tree, True) # print(dir(behaviour_tree)) # # Need to udpate the planner parameters child = behaviour_tree.root # for child in behaviour_tree.root.children: # print(child, child.name, env.curr_loc) planner = GenRecPropMDP( env, keys, None, dict(), actions=actions, max_trace=max_trace_len, epoch=epoch) child.setup(0, planner, True, epoch=epoch) # Experiment data # print(planner.trace_len_data) data = np.zeros(epoch, dtype=np.uint8) for i in range(epoch): behaviour_tree.tick( pre_tick_handler=reset_env(env) ) # print(behaviour_tree.root.status) data[i] = check_bt_status(behaviour_tree.root.status) # print(planner.trace_len_data) # for child in behaviour_tree.root.children: child.setup(0, planner, True, max_trace_len) child.train = False # print(child, child.name, child.train) for i in range(1): behaviour_tree.tick( pre_tick_handler=reset_env(env) ) # data[-1] = check_bt_status(behaviour_tree.root.status) # print('inference', behaviour_tree.root.status) # print(planner.trace_len_data) # print(data) # print(env.curr_loc) return (data, planner.trace_len_data)
def run_planner(planner, behaviour_tree, env, epoch=10, maxtracelen=10): child = behaviour_tree.root # Training setup child.setup(0, planner, True, epoch) child.planner.goalspec = child.name # Training loop blackboard = Blackboard() blackboard.shared_content['current'] = dict() for i in range(epoch): reset_env(env) blackboard.shared_content['current']['epoch'] = i # print(planner.gtable[(1,3)], planner.gtable[(2,3)]) for j in range(maxtracelen): behaviour_tree.tick() # print('Train', i,j, behaviour_tree.root.status) # if behaviour_tree.root.status == Status.SUCCESS: # break if blackboard.shared_content['current']['done']: break child.planner.trace = dict() # print('after trace empty', child.planner.trace) # print(child.name, child.planner.gtable[(1,3)], child.planner.gtable[(2,3)],child.planner.gtable[(3,3)]) # Inference setup child.train = False # Inference loop for i in range(1): reset_env(env) blackboard.shared_content['current']['epoch'] = i # print(planner.gtable[(1,3)], planner.gtable[(2,3)]) for j in range(maxtracelen): behaviour_tree.tick() # print('inference', i,j, behaviour_tree.root.status) if behaviour_tree.root.status == Status.SUCCESS: break if blackboard.shared_content['current']['done']: break print(env.curr_loc)
def run_planner_complex( Planner, behaviour_tree, env, keys, actions, epoch=10, seed=None): # child = behaviour_tree.root # Training setup j = 0 for child in behaviour_tree.root.children: # print(actions[j], child.name) planner = Planner( env, keys, child.name, dict(), actions=actions[0], max_trace=30, seed=seed) child.setup(0, planner, True, epoch[j]) j += 1 # Training loop for i in range(sum(epoch)): behaviour_tree.tick( pre_tick_handler=reset_env(env) ) # print(behaviour_tree.root.status) for child in behaviour_tree.root.children: # Inference setup child.train = False # print(child, child.name, child.train) # Inference loop for i in range(1): behaviour_tree.tick( pre_tick_handler=reset_env(env) ) # print('inference', behaviour_tree.root.status) if behaviour_tree.root.status is Status.SUCCESS: return True else: return False
def run_planner_complex(Planner, behaviour_tree, env, keys, actions, epoch=[10, 10], seed=None, maxtracelen=10): # child = behaviour_tree.root # Training setup j = 0 for child in behaviour_tree.root.children: # print(epoch[j]) planner = Planner(env, keys, child.name, dict(), actions=actions[j], max_trace=maxtracelen, seed=seed, epoch=epoch[j]) # print(child.name, planner) child.setup(0, planner, True, epoch[j]) j += 1 # Training loop blackboard = Blackboard() blackboard.shared_content['current'] = dict() blackboard.shared_content['current']['root'] = np.zeros(epoch[0]) for i in range(epoch[0]): reset_env(env) blackboard.shared_content['current']['epoch'] = i # print(planner.gtable[(1,3)], planner.gtable[(2,3)]) for j in range(maxtracelen): behaviour_tree.tick() # print('Train', i,j) if (blackboard.shared_content['current']['done']): break blackboard.shared_content['current']['root'][i] = ( behaviour_tree.root.status == Status.SUCCESS) * 1.0 for child in behaviour_tree.root.children: # self.child.planner.aggrigate_data(len(trace[gkey]), result) blackboard.shared_content['current'][child.planner.id] = 0 child.planner.tcount = 0 child.planner.trace = dict() # print('trace after clean up', child.planner.trace) # print(child.name, child.planner.gtable[(1,3)], child.planner.gtable[(2,3)],child.planner.gtable[(3,3)]) # Inference for child in behaviour_tree.root.children: # Inference setup child.train = False # print(child, child.name, child.train) # print(child.name, child.planner.tdata) # print(blackboard.shared_content['current']['root']) # Inference loop # for i in range(1): # reset_env(env) # blackboard.shared_content['current']['epoch'] = i # # print(planner.gtable[(1,3)], planner.gtable[(2,3)]) # for j in range(maxtracelen): # behaviour_tree.tick() # # print('inference', i,j, behaviour_tree.root.status) # if behaviour_tree.root.status == Status.SUCCESS: # break # if blackboard.shared_content['current']['done']: # break # if behaviour_tree.root.status is Status.SUCCESS: # return # else: # return False return blackboard.shared_content['current']['root']