def cozmomain2(): goal1 = 'F(P_[DC][True,none,==])' goal2 = 'F(P_[FC][True,none,==])' # goalspec = '((((('+goal+' U '+goal+') U '+goal+') U '+goal+') U '+goal+') U '+goal+')' goalspec = goal1+' U '+goal2 print(goalspec) keys = ['P', 'DC', 'FC', 'CC', 'DD', 'FD', 'D', 'A'] # actions = [0, 1, 2, 3, 5] root = goalspec2BT(goalspec, planner=None) behaviour_tree = BehaviourTree(root) # display_bt(behaviour_tree) # policies = [detect_cube, find_cube, carry_cube, find_charger, move_to_charger, drop_cube] policies = ['detect_cube', 'find_cube'] j = 0 child = behaviour_tree.root for child in behaviour_tree.root.children: # planner = planners[j] planner = CozmoPlanner(ComplexGoal, keys, child.name, policy=policies[j]) j += 1 child.setup(0, planner, False, 5) for i in range(1): behaviour_tree.tick( pre_tick_handler=reset_env(ComplexGoal) ) print(i, behaviour_tree.root.status)
def __init__( self, expname='key', goalspecs='F P_[KE][1,none,==]', keys=['LO', 'FW', 'KE'], actions=list(range(5)), seed=None, maxtracelen=40, trainc=False, epoch=80): env_name = 'MiniGrid-Goals-v0' env = gym.make(env_name) if seed is None: pass else: env = ReseedWrapper(env, seeds=[seed]) env = FullyObsWrapper(env) self.env = env self.env.max_steps = min(env.max_steps, 200) # self.env.agent_view_size = 1 self.env.reset() self.expname = expname self.goalspecs = goalspecs self.epoch = epoch self.maxtracelen = maxtracelen self.trainc = trainc self.allkeys = [ 'LO', 'FW', 'KE', 'DR', 'BOB', 'BOR', 'BAB', 'BAR', 'LV', 'GO', 'CK', 'CBB', 'CBR', 'CAB', 'CAR', 'DO', 'RM'] self.keys = keys self.actions = actions root = goalspec2BT(goalspecs, planner=None, node=CompetentNode) self.behaviour_tree = BehaviourTree(root) self.blackboard = Blackboard()
def taxid(): env = init_taxi_d(seed=1234) target = list(env.decode(env.s)) print(target) goalspec = 'F P_[PI]['+str(3)+',none,==]' keys = ['L', 'PI', 'DI'] actions = [0, 1, 2, 3, 5] root = goalspec2BT(goalspec, planner=None) behaviour_tree = BehaviourTree(root) child = behaviour_tree.root planner = GenRecPropTaxi( env, keys, child.name, dict(), actions=actions, max_trace=5, seed=123) child.setup(0, planner, True, 5) # 4,3,4,3 # print(child.goalspec, child.planner.goalspec, child.planner.env) for i in range(5): behaviour_tree.tick( pre_tick_handler=reset_env_d(env) ) print(i, behaviour_tree.root.status) child.train = False for i in range(1): behaviour_tree.tick( pre_tick_handler=reset_env_d(env) ) print(i, behaviour_tree.root.status)
def setUp(self): goalspec = 'F P_[IC][True,none,==]' startpoc = (1, 3) env = init_mdp(startpoc) keys = ['L', 'IC'] actions = [0, 1, 2, 3] root = goalspec2BT(goalspec, planner=None) self.behaviour_tree = BehaviourTree(root) # # Need to udpate the planner parameters child = self.behaviour_tree.root planner = GenRecPropMDP( env, keys, None, dict(), actions=actions, max_trace=10, seed=123) child.setup(0, planner, True, 10) for i in range(10): self.behaviour_tree.tick( pre_tick_handler=reset_env(env) ) print(self.behaviour_tree.root.status) child.train = False for i in range(1): self.behaviour_tree.tick( pre_tick_handler=reset_env(env) )
def setUp(self): goalspec = 'F P_[NC][True,none,==] U F P_[L][03,none,==]' startpoc = (0, 3) self.env = init_mdp(startpoc) keys = ['L', 'NC'] actions = [0, 1, 2, 3] planner = GenRecPropMDPNear( self.env, keys, goalspec, dict(), actions=actions, max_trace=10, seed=123) root = goalspec2BT(goalspec, planner=planner) self.behaviour_tree = BehaviourTree(root) for child in self.behaviour_tree.root.children: # print(child, child.name) child.setup(0, planner, True, 20) # child.planner.env = env # print(child.goalspec, child.planner.goalspec) for i in range(20): self.behaviour_tree.tick( pre_tick_handler=reset_env(self.env) ) for child in self.behaviour_tree.root.children: child.setup(0, planner, True, 10) child.train = False # print(child, child.name, child.train) for i in range(2): self.behaviour_tree.tick( pre_tick_handler=reset_env(self.env) )
def cozmomain(): goal1 = 'F(P_[DC][True,none,==])' goal2 = 'F(P_[FC][True,none,==])' goal3 = 'F(P_[CC][True,none,==])' goal4 = 'G(P_[CC][True,none,==]) & F(P_[DD][True,none,==])' goal5 = 'F(P_[FD][True,none,==])' goal6 = 'F(P_[CC][False,none,==])' # goal1 = 'F(P_[P][2,none,==])' # goal1 = 'F(P_[P][2,none,==])' goalspec = '((((('+goal1+' U '+goal2+') U '+goal3+') U '+goal4+') U '+goal5+') U '+goal6+')' # noqa: # goalspec = goal+' U '+goal print(goalspec) keys = ['P', 'DC', 'FC', 'CC', 'DD', 'FD', 'D', 'A'] # Pose, Detected Cube, Found Cube, Carried Cube, Detected Desk, Found Desk, Drop Desk # actions = [0, 1, 2, 3, 5] root = goalspec2BT(goalspec, planner=None) behaviour_tree = BehaviourTree(root) # display_bt(behaviour_tree) policies = [ 'detect_cube', 'find_cube', 'carry_cube', 'find_charger', 'move_to_charger', 'drop_cube'] # policies = [detect_cube, find_cube] j = 0 for child in behaviour_tree.root.children: # planner = planners[j] planner = CozmoPlanner(ComplexGoal, keys, child.name, policy=policies[j]) j += 1 child.setup(0, planner, False, 5) for i in range(1): behaviour_tree.tick( pre_tick_handler=reset_env(ComplexGoal) ) print(i, behaviour_tree.root.status)
def setUp(self): env = init_taxi_s(seed=1234) # target = list(env.decode(env.s)) goalspec = 'F P_[PI]['+str(4)+',none,==]' keys = ['L', 'PI', 'DI'] actions = [0, 1, 2, 3, 4, 5] root = goalspec2BT(goalspec, planner=None) self.behaviour_tree = BehaviourTree(root) child = self.behaviour_tree.root planner = GenRecPropTaxi( env, keys, child.name, dict(), actions=actions, max_trace=5, seed=123) child.setup(0, planner, True, 5) print(child.goalspec, child.planner.goalspec, child.planner.env) for i in range(5): self.behaviour_tree.tick( pre_tick_handler=reset_env_s(env) ) # child.setup(0, planner, True, 40) child.train = False print(child, child.name, child.train) for i in range(1): self.behaviour_tree.tick( pre_tick_handler=reset_env_s(env) ) print('inference', self.behaviour_tree.root.status)
def find_bt(goalspec): root = goalspec2BT(goalspec, planner=None) behaviour_tree = BehaviourTree(root) # display_bt(behaviour_tree, True) # print(dir(behaviour_tree)) # # Need to udpate the planner parameters # child = behaviour_tree.root return behaviour_tree
def test_goal_0(): goalspec0 = 'F(P_[IC][True,none,==]) U F(P_[L][13,none,==])' root = goalspec2BT(goalspec0) rootname = type(root).__name__ name, num_child = root.name, len(root.children) assert rootname == 'Sequence' assert name == 'U' assert num_child == 2
def test_goal_1(): goalspec0 = 'P_[IC][True,none,==]' root = goalspec2BT(goalspec0) rootname = type(root).__name__ name, num_child = root.name, len(root.children) assert rootname == 'GoalNode' assert name == 'P_[IC][True,none,==]' assert num_child == 0
def test_goal_9(): goalspec0 = '(F P_[IC][True,none,==]) | (G P_[L][13,none,==])' root = goalspec2BT(goalspec0) rootname = type(root).__name__ name, num_child = root.name, len(root.children) assert rootname == 'Selector' assert name == '|' assert num_child == 2
def test_goal_7(): goalspec0 = '(F P_[IC][True,none,==]) & (G P_[L][13,none,==])' root = goalspec2BT(goalspec0) rootname = type(root).__name__ name, num_child = root.name, len(root.children) assert rootname == 'Parallel' assert name == '&' assert num_child == 2
def test_goal_5(): goalspec0 = """((F(P_[IC][True,none,==]) U G(P_[L][13,none,==]))) R (F(P_[L][23,none,==]))""" root = goalspec2BT(goalspec0) rootname = type(root).__name__ name, num_child = root.name, len(root.children) assert rootname == 'Selector' assert name == 'R' assert num_child == 2
def test_goal_paper(): atomic = 'F(P_[IC][True,none,==])' goalspec = '(' + atomic + ' & ' + atomic + ') U (' + atomic + ' U ' + atomic + ')' print(goalspec) root = goalspec2BT(goalspec) rootname = type(root).__name__ name, num_child = root.name, len(root.children) print(root.children) assert rootname == 'Sequence' assert name == 'U' assert num_child == 3
def find_cheese(seed, max_trace_len=10, epoch=10): # Define the environment for the experiment goalspec = 'F P_[IC][True,none,==]' # startpoc = (3, 0) startpoc = (9, 0) env = init_10x10mdp(startpoc) keys = ['L', 'IC'] actions = [0, 1, 2, 3] root = goalspec2BT(goalspec, planner=None) # print(root) behaviour_tree = BehaviourTree(root) # display_bt(behaviour_tree, True) # print(dir(behaviour_tree)) # # Need to udpate the planner parameters child = behaviour_tree.root # for child in behaviour_tree.root.children: # print(child, child.name, env.curr_loc) planner = GenRecPropMDP( env, keys, None, dict(), actions=actions, max_trace=max_trace_len, epoch=epoch) child.setup(0, planner, True, epoch=epoch) # Experiment data # print(planner.trace_len_data) data = np.zeros(epoch, dtype=np.uint8) for i in range(epoch): behaviour_tree.tick( pre_tick_handler=reset_env(env) ) # print(behaviour_tree.root.status) data[i] = check_bt_status(behaviour_tree.root.status) # print(planner.trace_len_data) # for child in behaviour_tree.root.children: child.setup(0, planner, True, max_trace_len) child.train = False # print(child, child.name, child.train) for i in range(1): behaviour_tree.tick( pre_tick_handler=reset_env(env) ) # data[-1] = check_bt_status(behaviour_tree.root.status) # print('inference', behaviour_tree.root.status) # print(planner.trace_len_data) # print(data) # print(env.curr_loc) return (data, planner.trace_len_data)
def taxi(): env = init_taxi(seed=1234) target = list(env.decode(env.s)) print(target) goalspec = '((((F(P_[L]['+give_loc(target[2])+',none,==])) U (F(P_[PI]['+str(4)+',none,==]))) U (F(P_[L]['+give_loc(target[3])+',none,==]))) U (F(P_[PI]['+str(target[3])+',none,==])))' # noqa: E501 # goalspec = 'F P_[L]['+give_loc(target[2])+',none,==] U F P_[PI]['+str(4)+',none,==]' # noqa: E501 keys = ['L', 'PI', 'DI'] # actions = [[0, 1, 2, 3], [4], [0, 1, 2, 3], [5]] actions = [0, 1, 2, 3, 4, 5] root = goalspec2BT(goalspec, planner=None) print('root', root) behaviour_tree = BehaviourTree(root) # display_bt(behaviour_tree) epoch = [80, 50, 80, 50] j = 0 for child in behaviour_tree.root.children: print('children', child, child.name, child.id) planner = GenRecPropTaxi( env, keys, child.name, dict(), actions=actions, max_trace=40, seed=1) child.setup(0, planner, True, epoch[j]) j += 1 # planner.env = env # print(child.goalspec, child.planner.goalspec, child.planner.env) # print('rootname', behaviour_tree.root.name) # behaviour_tree.root.remove_child_by_id(id) # display_bt(behaviour_tree) for i in range(200): behaviour_tree.tick( pre_tick_handler=reset_env(env) ) print('Training', behaviour_tree.root.status) for child in behaviour_tree.root.children: # child.setup(0, planner, True, 20) child.train = False # print(child, child.name, child.train) for i in range(2): behaviour_tree.tick( pre_tick_handler=reset_env(env) ) print('Inference', behaviour_tree.root.status)
def setUp(self): env = init_taxi(seed=1234) target = list(env.decode(env.s)) goalspec = 'F(P_[L]['+give_loc(target[2])+',none,==])' keys = ['L', 'PI', 'DI'] actions = [0, 1, 2, 3] root = goalspec2BT(goalspec, planner=None) self.behaviour_tree = BehaviourTree(root) child = self.behaviour_tree.root planner = GenRecPropTaxi( env, keys, None, dict(), actions=actions, max_trace=40, seed=1234) child.setup(0, planner, True, 50) print(child.goalspec, child.planner.goalspec, child.planner.env) for i in range(50): self.behaviour_tree.tick( pre_tick_handler=reset_env(env) )
def test_goal_standard(): psi1 = 'F P_[A][True,none,==]' psi2 = 'F P_[B][True,none,==]' psi3 = 'F P_[C][True,none,==]' psi4 = 'F P_[D][True,none,==]' psi5 = 'F P_[E][True,none,==]' psi6 = 'F P_[F][True,none,==]' psi7 = 'F P_[G][True,none,==]' goalspec = '(' + psi1 + ' & ' + psi2 + ') U (' + psi3 + ' U ' + psi4 + ') U (' + psi5 + ' U ' + psi6 + ' U ' + psi7 + ')' # noqa: E501 print(goalspec) root = goalspec2BT(goalspec) rootname = type(root).__name__ name, num_child = root.name, len(root.children) print(root.children, [node.name for node in root.children]) assert rootname == 'Sequence' assert name == 'U' assert num_child == 6
def keydoor2(): env_name = 'MiniGrid-DoorKey-8x8-v0' env = gym.make(env_name) env.max_steps = min(env.max_steps, 200) env.seed(12345) env.reset() env = env_setup(env) state = (env.agent_pos, env.agent_dir) print(state) # Find the key and carry it goalspec = '(F(P_[K][1,none,==]) U F(P_[C][1,none,==])) U (F(P_[D][1,none,==]))' # noqa: E501 # goalspec = '(F(P_[K][1,none,==]) U F(P_[C][1,none,==]))' # noqa: E501 keys = ['L', 'F', 'K', 'D', 'C', 'G', 'O'] actions = [[0, 1, 2, 3], [0, 1, 2, 3, 4], [0, 1, 2, 3]] root = goalspec2BT(goalspec, planner=None) behaviour_tree = BehaviourTree(root) # display_bt(behaviour_tree, save=True) epoch = [70, 40, 70] j = 0 for child in behaviour_tree.root.children: planner = GenRecPropKeyDoor(env, keys, child.name, dict(), actions=actions[j], max_trace=40, seed=None) child.setup(0, planner, True, epoch[j]) j += 1 print(child.goalspec, child.planner.goalspec, type(child.planner.env)) for i in range(150): behaviour_tree.tick(pre_tick_handler=reset_env(env)) print(i, 'Training', behaviour_tree.root.status) # Inference for child in behaviour_tree.root.children: child.train = False for i in range(2): behaviour_tree.tick(pre_tick_handler=reset_env(env)) print(i, 'Inference', behaviour_tree.root.status)
def keydoor(): env_name = 'MiniGrid-DoorKey-8x8-v0' env = gym.make(env_name) env.max_steps = min(env.max_steps, 200) env.seed(12345) env.reset() env = env_setup(env) state = (env.agent_pos, env.agent_dir) print(state) # Find the key goalspec = 'F P_[K][1,none,==]' keys = ['L', 'F', 'K', 'D', 'C', 'G', 'O'] actions = [0, 1, 2, 3, 4, 5] root = goalspec2BT(goalspec, planner=None) behaviour_tree = BehaviourTree(root) child = behaviour_tree.root planner = GenRecPropKeyDoor(env, keys, child.name, dict(), actions=actions, max_trace=40, seed=None) child.setup(0, planner, True, 100) print(child.goalspec, child.planner.goalspec, type(child.planner.env)) # Train for i in range(50): behaviour_tree.tick(pre_tick_handler=reset_env(env)) print(i, 'Training', behaviour_tree.root.status) child.train = False # Inference for i in range(1): behaviour_tree.tick(pre_tick_handler=reset_env(env)) print(i, 'Inference', behaviour_tree.root.status)
def setUp(self): env = init_taxi(seed=1234) target = list(env.decode(env.s)) print(target) goalspec = '((((F(P_[L]['+give_loc(target[2])+',none,==])) U (F(P_[PI]['+str(4)+',none,==]))) U (F(P_[L]['+give_loc(target[3])+',none,==]))) U (F(P_[PI]['+str(target[3])+',none,==])))' # noqa: E501 keys = ['L', 'PI', 'DI'] actions = [0, 1, 2, 3, 4, 5] root = goalspec2BT(goalspec, planner=None) # print('root', root) self.behaviour_tree = BehaviourTree(root) epoch = [80, 50, 80, 50] j = 0 for child in self.behaviour_tree.root.children: # print('children', child, child.name, child.id) planner = GenRecPropTaxi( env, keys, child.name, dict(), actions=actions, max_trace=40, seed=1) child.setup(0, planner, True, epoch[j]) j += 1 # Training for i in range(200): self.behaviour_tree.tick( pre_tick_handler=reset_env(env) ) print('Training', self.behaviour_tree.root.status) # Inference for child in self.behaviour_tree.root.children: child.train = False # print(child, child.name, child.train) for i in range(2): self.behaviour_tree.tick( pre_tick_handler=reset_env(env) ) print('inference', self.behaviour_tree.root.status)
def find_bt(goalspec): root = goalspec2BT(goalspec, planner=None) behaviour_tree = BehaviourTree(root) # display_bt(behaviour_tree, True) return behaviour_tree
def find_key(): env_name = 'MiniGrid-Goals-v0' env = gym.make(env_name) # env = ReseedWrapper(env, seeds=[3]) # Easy env = ReseedWrapper(env, seeds=[5]) # Medium # env = ReseedWrapper(env, seeds=[7]) # Hard env = FullyObsWrapper(env) env.max_steps = min(env.max_steps, 200) env.agent_view_size = 1 env.reset() # env.render(mode='human') # time.sleep(10) # state, reward, done, _ = env.step(2) # print(state['image'].shape, reward, done, _) # Find the key goalspec = 'F P_[KE][1,none,==]' # keys = ['L', 'F', 'K', 'D', 'C', 'G', 'O'] allkeys = [ 'LO', 'FW', 'KE', 'DR', 'BOB', 'BOR', 'BAB', 'BAR', 'LV', 'GO', 'CK', 'CBB', 'CBR', 'CAB', 'CAR', 'DO', 'RM' ] keys = ['LO', 'FW', 'KE'] actions = [0, 1, 2] root = goalspec2BT(goalspec, planner=None, node=CompetentNode) behaviour_tree = BehaviourTree(root) child = behaviour_tree.root planner = GenRecPropMultiGoal(env, keys, child.name, dict(), actions=actions, max_trace=50, seed=None, allkeys=allkeys) def run(pepoch=50, iepoch=10): # pepoch = 50 child.setup(0, planner, True, pepoch) # Train for i in range(pepoch): behaviour_tree.tick(pre_tick_handler=reset_env(env)) # Inference child.train = False child.planner.epoch = iepoch child.planner.tcount = 0 for i in range(iepoch): behaviour_tree.tick(pre_tick_handler=reset_env(env)) competency = [] epochs = [(80, 10)] * 2 datas = [] for i in range(2): run(epochs[i][0], epochs[i][1]) datas.append( np.mean( planner.blackboard.shared_content['ctdata'][planner.goalspec], axis=0)) competency.append(planner.compute_competency()) print(competency) compare_curve(competency, datas)
def carry_key(): env_name = 'MiniGrid-Goals-v0' env = gym.make(env_name) env = ReseedWrapper(env, seeds=[3]) env = FullyObsWrapper(env) env.max_steps = min(env.max_steps, 200) env.agent_view_size = 1 env.reset() # env.render(mode='human') state, reward, done, _ = env.step(2) # Find the key goalspec = 'F P_[KE][1,none,==] U F P_[CK][1,none,==]' allkeys = [ 'LO', 'FW', 'KE', 'DR', 'BOB', 'BOR', 'BAB', 'BAR', 'LV', 'GO', 'CK', 'CBB', 'CBR', 'CAB', 'CAR', 'DO', 'RM' ] keys = ['LO', 'FW', 'KE', 'CK'] actions = [0, 1, 2, 3, 4, 5] root = goalspec2BT(goalspec, planner=None, node=CompetentNode) behaviour_tree = BehaviourTree(root) epoch = 80 def fn_c(child): pass def fn_eset(child): planner = GenRecPropMultiGoal(env, keys, child.name, dict(), actions=actions, max_trace=40, seed=None, allkeys=allkeys) child.setup(0, planner, True, epoch) def fn_einf(child): child.train = False child.planner.epoch = 5 child.planner.tcount = 0 def fn_ecomp(child): child.planner.compute_competency() recursive_setup(behaviour_tree.root, fn_eset, fn_c) # recursive_setup(behaviour_tree.root, fn_c, fn_c) # py_trees.logging.level = py_trees.logging.Level.DEBUG # py_trees.display.print_ascii_tree(behaviour_tree.root) # Train for i in range(100): behaviour_tree.tick(pre_tick_handler=reset_env(env)) print(i, 'Training', behaviour_tree.root.status) # Inference recursive_setup(behaviour_tree.root, fn_einf, fn_c) for i in range(5): behaviour_tree.tick(pre_tick_handler=reset_env(env)) print(i, 'Inference', behaviour_tree.root.status) recursive_setup(behaviour_tree.root, fn_ecomp, fn_c) # recursive_setup(behaviour_tree.root, fn_c, fn_c) blackboard = Blackboard() print(recursive_com(behaviour_tree.root, blackboard))