def setUp(self): l1 = LeafCombination( CollaborativeAction('Do a', (3., 2., 5.), human_probability=.3)) l2 = LeafCombination( CollaborativeAction('Do b', (2., 3., 4.), human_probability=.7)) alt = AlternativeCombination([l1, l2], name='Do all') self.n2p = _NodeToPOMDP.from_node(alt, 2., 1.)
def setUp(self): self.bt = LeafCombination(BringTop()) self.af = LeafCombination(AssembleFoot('leg-1')) self.atj = LeafCombination(AssembleTopJoint('leg-1')) self.alt = LeafCombination(AssembleLegToTop('leg-1')) self.htm = SequentialCombination([self.bt, self.af]) self.p = SupportivePOMDP(self.htm) self.p.p_changed_by_human = 0. self.p.p_change_preference = 0.
def setUp(self): a = CollaborativeAction('Do a', (3., 2., 5.)) b = CollaborativeAction('Do b', (2., 3., 4.)) c = CollaborativeAction('Do c', (2., 3., 4.)) d = CollaborativeAction('Do d', (3., 2., 5.)) self.l1 = LeafCombination(a) self.l2 = LeafCombination(b) self.l3 = LeafCombination(c) self.l4 = LeafCombination(d)
def test_is_correct(self): a = LeafCombination(AbstractAction('a')) b = LeafCombination(AbstractAction('b')) c = LeafCombination(AbstractAction('c')) p = ParallelCombination([a, b, c]) alt = p.to_alternative() self.assertIsInstance(alt, AlternativeCombination) self.assertEqual(len(alt.children), 6) self.assertIsInstance(alt.children[0], SequentialCombination) self.assertTrue(all([len(c.children) == 3 for c in alt.children]))
def test_alt_to_pomdp(self): # No probability of failure or human saying no here task = HierarchicalTask(root=AlternativeCombination([ LeafCombination(CollaborativeAction('Bottom left', 'A1')), LeafCombination(CollaborativeAction('Top left', 'A2')), ], name='Do all')) h2p = HTMToPOMDP(2., 8., 5., ['A1', 'A2'], end_reward=50.) p = h2p.task_to_pomdp(task) self.assertEqual(p.states, ['before-bottom-left', 'before-top-left', 'end']) self.assertEqual(p.actions, ['get-A1', 'get-A2', 'ask-A1', 'ask-A2']) self.assertEqual(p.observations, ['none', 'yes', 'no', 'error']) np.testing.assert_array_equal(p.start, np.array([.5, .5, 0])) # checked manually: T = np.array([ # get A1 [[0., 0., 1.], [0., 1., 0.], [.5, .5, 0.]], # get A2 [[1., 0., 0.], [0., 0., 1.], [.5, .5, 0.]], # ask A1 [[1., 0., 0.], [0., 1., 0.], [.5, .5, 0.]], # ask A2 [[1., 0., 0.], [0., 1., 0.], [.5, .5, 0.]], ]) np.testing.assert_allclose(T, p.T, atol=1.e-4) O = np.array([ # get A1 [[0., 0., 0., 1.], [0., 0., 0., 1.], [.9, 0., 0., .1]], # get A2 [[0., 0., 0., 1.], [0., 0., 0., 1.], [.9, 0., 0., .1]], # ask A1 [[0., 1., 0., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.]], # ask A2 [[0., 0., 1., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.]], ]) np.testing.assert_array_equal(O, p.O) R = np.broadcast_to( np.array([ # get A1 [[-5, -5, -8], [-5] * 3, [50, 50, 50]], # get A2 [[-5.] * 3, [-5, -5, -8], [50, 50, 50]], # ask A1 [[-2.] * 3, [-2.] * 3, [50, 50, 50]], # ask A2 [[-2.] * 3, [-2.] * 3, [50, 50, 50]], ])[..., None], (4, 3, 3, 4)) np.testing.assert_array_equal(R, p.R)
def test_end_reward_on_seq(self): h2p = HTMToPOMDP(1., 2., 1., 1., end_reward=13.) task = HierarchicalTask(root=SequentialCombination([ LeafCombination( CollaborativeAction('Do a', (3., 2., 5.), fail_probability=0., no_probability=0.)), LeafCombination( CollaborativeAction('Do b', (2., 3., 4.), fail_probability=0., no_probability=0.)), ], name='Do all')) p = h2p.task_to_pomdp(task) self.assertTrue((p.R[h2p.wait, h2p.end, h2p.end, :] == 13.).all())
def test_leaf_to_pomdp(self): h2p = HTMToPOMDP(2., 8., 5., ['A1'], end_reward=50.) task = HierarchicalTask( root=LeafCombination(CollaborativeAction('bottom left', 'A1'))) p = h2p.task_to_pomdp(task) self.assertEqual(p.states, ['before-bottom-left', 'end']) self.assertEqual(p.actions, ['get-A1', 'ask-A1']) self.assertEqual(p.observations, ['none', 'yes', 'no', 'error']) np.testing.assert_array_equal(p.start, np.array([1, 0.])) # checked manually: T = np.array([ # get [[0., 1.], [1., 0.]], # ask [[1., 0.], [1., 0.]], ]) np.testing.assert_allclose(T, p.T) O = np.array([ # get [[0., 0., 0., 1.], [.9, 0., 0., .1]], # ask [[0., 1., 0., 0.], [0., 0., 1., 0.]], ]) np.testing.assert_array_equal(O, p.O) R = np.broadcast_to( np.array([ [[-5., -8.], [50., 50.]], [[-2., -2.], [50., 50.]], ])[..., None], (2, 2, 2, 4)) np.testing.assert_array_equal(R, p.R)
def test_with_no_probability(self): leaf = LeafCombination( CollaborativeAction('Do it', (3., 2., 5.), no_probability=.2)) l2p = _NodeToPOMDP.from_node(leaf, 2., 1.) T = np.zeros((7, 8, 8)) l2p.update_T(T, 0, 2, 3, [6, 7], [.2, .8], list(range(7))) p_ask_int = np.zeros((8)) p_ask_int[4] = .8 p_ask_int[5] = .2 np.testing.assert_allclose(T[3, 3, :], p_ask_int)
def test_with_failure_probability(self): leaf = LeafCombination( CollaborativeAction('Do it', (3., 2., 5.), fail_probability=.2)) l2p = _NodeToPOMDP.from_node(leaf, 2., 1.) T = np.zeros((7, 8, 8)) l2p.update_T(T, 0, 2, 3, [6, 7], [.2, .8], list(range(7))) p_phy_robot = np.zeros((8)) p_phy_robot[5] = .2 p_phy_robot[6] = .8 * .2 p_phy_robot[7] = .8 * .8 np.testing.assert_allclose(T[2, 5, :], p_phy_robot)
def sequential_combination_from_order(self, order, name=None): children = [LeafCombination(CollaborativeAction( str(i) + ('-' + name if name is not None else ''), str(i))) for i in order] return SequentialCombination(children, name=name)
rospy.loginfo("Got human message: '%s'", ans) ans = ans.lower() if 'yes' in ans: return self.model.observations[self.model.O_YES] if 'no' in ans or "don't" in ans: return self.model.observations[self.model.O_NO] else: self.say("I didn't get what you meant.", sync=False) return self.model.observations[self.model.O_NONE] # Problem definition leg_i = 'leg-{}'.format htm = SequentialCombination([ SequentialCombination([ LeafCombination(AssembleLeg(leg_i(i))), LeafCombination(AssembleLegToTop(leg_i(i), bring_top=(i == 0))) ]) for i in range(4) ]) p = SupportivePOMDP(htm) # TODO put as default p.r_subtask = 0. p.r_preference = 20. p.cost_hold = 3. p.cost_get = 20. try: pol = AsyncPOMCPPolicyRunner(p, iterations=ITERATIONS, horizon=NHTMHorizon.generator(p, n=HORIZON), exploration=EXPLORATION,
C_INTR = 1. C_ERR = 5. INF = 100. ## Tested scenarios: # 1. with full sequence of sequential actions R_END = 0.1 LOOP = False # 2. with full sequence of sequential actions # R_END = 100 # LOOP = True R_SUBTASK = None ## Define the task mount_central = SequentialCombination([ LeafCombination(CollaborativeAction('Get central frame', (INF, 20., 30.))), LeafCombination( CollaborativeAction('Start Hold central frame', (3., 10., INF))) ], name='Mount central frame') #mount_legs = ParallelCombination([ mount_legs = SequentialCombination([ SequentialCombination([ LeafCombination(CollaborativeAction('Get left leg', (INF, 20., 30.))), LeafCombination( CollaborativeAction('Snap left leg', (5., INF, INF), fail_probability=.1)), ], name='Mount left leg'), SequentialCombination([ LeafCombination(CollaborativeAction('Get right leg', (INF, 20., 30.))),
def setUp(self): self.bt = LeafCombination(BringTop()) self.af = LeafCombination(AssembleFoot('leg-1')) self.htm = SequentialCombination([self.bt, self.af]) self.model = SupportivePOMDP(self.htm) self.h = NHTMHorizon(self.model, 1)
def _task_def(self): gp_l1 = LeafCombination( PredAction('gatherparts_leg_1', self.NUM_FEATS, self.OBS_PROBS['gp_l1_probs'])) ass_l1 = LeafCombination( PredAction('assemble_leg_1', self.NUM_FEATS, self.OBS_PROBS['ass_l1_probs'])) gp_l2 = LeafCombination( PredAction('gatherparts_leg_2', self.NUM_FEATS, self.OBS_PROBS['gp_l2_probs'])) ass_l2 = LeafCombination( PredAction('assemble_leg_2', self.NUM_FEATS, self.OBS_PROBS['ass_l2_probs'])) gp_l3 = LeafCombination( PredAction('gatherparts_leg_3', self.NUM_FEATS, self.OBS_PROBS['gp_l3_probs'])) ass_l3 = LeafCombination( PredAction('assemble_leg_3', self.NUM_FEATS, self.OBS_PROBS['ass_l3_probs'])) gp_l4 = LeafCombination( PredAction('gatherparts_leg_4', self.NUM_FEATS, self.OBS_PROBS['gp_l4_probs'])) ass_l4 = LeafCombination( PredAction('assemble_leg_4', self.NUM_FEATS, self.OBS_PROBS['ass_l4_probs'])) gp_s = LeafCombination( PredAction('gatherparts_seat', self.NUM_FEATS, self.OBS_PROBS['gp_s_probs'])) ass_s = LeafCombination( PredAction('assemble_seat', self.NUM_FEATS, self.OBS_PROBS['ass_s_probs'])) gp_b = LeafCombination( PredAction('gatherparts_back', self.NUM_FEATS, self.OBS_PROBS['gp_b_probs'])) ass_b = LeafCombination( PredAction('assemble_back', self.NUM_FEATS, self.OBS_PROBS['ass_b_probs'])) f_l1 = SequentialCombination([gp_l1, ass_l1], name='finish_leg1') f_l2 = SequentialCombination([gp_l2, ass_l2], name='finish_leg2') f_l3 = SequentialCombination([gp_l3, ass_l3], name='finish_leg3') f_l4 = SequentialCombination([gp_l4, ass_l4], name='finish_leg4') f_s = SequentialCombination([gp_s, ass_s], name='finish_seat') f_b = SequentialCombination([gp_b, ass_b], name='finish_back') f_legs = ParallelCombination([f_l1, f_l2, f_l3, f_l4], name='finish_legs') f_rest = ParallelCombination([f_b, f_s], name='finish_rest') main_task = HierarchicalTaskHMMSuppRD(root=SequentialCombination( [f_legs, f_rest], name='complete'), name='TaskA', num_feats_action=self.NUM_FEATS, feats=self.FEAT, supp_bhvs=self.SUPP_BHVS, obj_presence=self.OBJ_PRESENCE, obj_count_idx=self.OBJ_COUNT_IDX, main_obj=self.MAIN_OBJ) tf_task = HierarchicalTaskHMMSuppRD(root=SequentialCombination( [f_rest, f_legs], name='complete'), name='TaskB', num_feats_action=self.NUM_FEATS, feats=self.FEAT, supp_bhvs=self.SUPP_BHVS, obj_presence=self.OBJ_PRESENCE, obj_count_idx=self.OBJ_COUNT_IDX, main_obj=self.MAIN_OBJ) self.main_task = main_task self.tf_task = tf_task
def setUp(self): leaf = LeafCombination( CollaborativeAction('Do it', (3., 2., 5.), human_probability=.3)) self.l2p = _NodeToPOMDP.from_node(leaf, 2., 1.)
import json from task_models.task import (HierarchicalTask, AbstractAction, SequentialCombination, ParallelCombination, LeafCombination) take_base = LeafCombination(AbstractAction('Take base')) mount_leg_combinations = [ SequentialCombination([ LeafCombination(AbstractAction('Take leg {}'.format(i))), LeafCombination(AbstractAction('Attach leg {}'.format(i))) ], name='Mount leg {}'.format(i)) for i in range(4) ] mount_frame = SequentialCombination([ LeafCombination(AbstractAction('Take frame'), highlighted=True), LeafCombination(AbstractAction('Attach frame')) ], name='Mount frame') chair_task = HierarchicalTask(root=SequentialCombination([ take_base, ParallelCombination(mount_leg_combinations, name='Mount legs'), mount_frame, ], name='Mount chair')) print(json.dumps(chair_task.as_dictionary(), indent=2))
def test_end_reward(self): h2p = HTMToPOMDP(1., 2., 1., 1., end_reward=13.) task = HierarchicalTask( root=LeafCombination(CollaborativeAction('Do it', (3., 2., 5.)))) p = h2p.task_to_pomdp(task) self.assertTrue((p.R[h2p.wait, h2p.end, h2p.end, :] == 13.).all())
def test_alt_to_pomdp(self): # No probability of failure or human saying no here task = HierarchicalTask(root=AlternativeCombination([ LeafCombination( CollaborativeAction('Do a', (3., 2., 5.), fail_probability=0., no_probability=0.)), LeafCombination( CollaborativeAction('Do b', (2., 3., 4.), fail_probability=0., no_probability=0.)), ], name='Do either')) p = self.h2p.task_to_pomdp(task) self.assertEqual(p.states, [ 'init-do-a', 'H-do-a', 'R-do-a', 'init-do-b', 'H-do-b', 'R-do-b', 'end' ]) self.assertEqual(p.actions, [ 'wait', 'phy-do-a', 'com-ask-intention-do-a', 'com-tell-intention-do-a', 'com-ask-finished-do-a', 'phy-do-b', 'com-ask-intention-do-b', 'com-tell-intention-do-b', 'com-ask-finished-do-b' ]) self.assertEqual(p.observations, ['none', 'yes', 'no', 'error']) np.testing.assert_array_equal(p.start, np.array([.5, 0, 0, .5, 0, 0, 0])) # checked manually: T = np.array([ # Wait [[1., 0., 0., 0., 0., 0., 0.], [0., 0.71653, 0., 0., 0., 0., 0.28347], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.60653, 0., 0.39347], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Physical a [[1., 0., 0., 0., 0., 0., 0.], [0., 0.18888, 0., 0., 0., 0., 0.81112], [0., 0., 0., 0., 0., 0., 1.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.08208, 0., 0.91792], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Ask intention a [[0., 1., 0., 0., 0., 0., 0.], [0., 0.51342, 0., 0., 0., 0., 0.48658], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.36788, 0., 0.63212], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Tell intention a [[0., 0., 1., 0., 0., 0., 0.], [0., 0.71653, 0., 0., 0., 0., 0.28347], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.60653, 0., 0.39347], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Ask finished a [[1., 0., 0., 0., 0., 0., 0.], [0., 0.51342, 0., 0., 0., 0., 0.48658], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.36788, 0., 0.63212], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Physical b [[1., 0., 0., 0., 0., 0., 0.], [0., 0.2636, 0., 0., 0., 0., 0.7364], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.13534, 0., 0.86466], [0., 0., 0., 0., 0., 0., 1.], [0., 0., 0., 0., 0., 0., 1.]], # Ask intention b [[1., 0., 0., 0., 0., 0., 0.], [0., 0.51342, 0., 0., 0., 0., 0.48658], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0.], [0., 0., 0., 0., 0.36788, 0., 0.63212], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Tell intention b [[1., 0., 0., 0., 0., 0., 0.], [0., 0.71653, 0., 0., 0., 0., 0.28347], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0.60653, 0., 0.39347], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Ask finished b [[1., 0., 0., 0., 0., 0., 0.], [0., 0.51342, 0., 0., 0., 0., 0.48658], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.36788, 0., 0.63212], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], ]) np.testing.assert_allclose(T, p.T, atol=1.e-4) O = np.array([ # Wait [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Physical a [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [1., 0., 0., 0.]], # Ask intention a [[1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Tell intention a [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Ask finished a [[0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 1., 0., 0.]], # Physical b [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [1., 0., 0., 0.]], # Ask intention [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.], [1., 0., 0., 0.]], # Tell intention [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Ask finished b [[0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 1., 0., 0.]], ]) np.testing.assert_array_equal(O, p.O) R = -np.broadcast_to( np.array([[1] * 6 + [0], [6, 6, 3, 6, 6, 6, 1], [3] * 6 + [1], [2] * 6 + [1], [3] * 6 + [1], [5, 5, 5, 5, 5, 4, 1], [3] * 6 + [1], [2] * 6 + [1], [3] * 6 + [1]])[:, :, None, None], (9, 7, 7, 4)) np.testing.assert_array_equal(R, p.R)
def test_leaf_to_pomdp(self): # No probability of failure or human saying no here task = HierarchicalTask(root=LeafCombination( CollaborativeAction( 'Do it', (3., 2., 5.), fail_probability=0., no_probability=0.))) p = self.h2p.task_to_pomdp(task) self.assertEqual(p.states, ['init-do-it', 'H-do-it', 'R-do-it', 'end']) self.assertEqual(p.actions, [ 'wait', 'phy-do-it', 'com-ask-intention-do-it', 'com-tell-intention-do-it', 'com-ask-finished-do-it' ]) self.assertEqual(p.observations, ['none', 'yes', 'no', 'error']) np.testing.assert_array_equal(p.start, np.array([1, 0., 0., 0.])) # checked manually: T = np.array([ [[1., 0., 0., 0.], [0., 0.71653131, 0., 0.28346869], [0., 0., 1., 0.], [0., 0., 0., 1.]], [[1., 0., 0., 0.], [0., 0.1888756, 0., 0.8111244], [0., 0., 0., 1.], [0., 0., 0., 1.]], [[0., 1., 0., 0.], [0., 0.51341712, 0., 0.48658288], [0., 0., 1., 0.], [0., 0., 0., 1.]], [[0., 0., 1., 0.], [0., 0.71653131, 0., 0.28346869], [0., 0., 1., 0.], [0., 0., 0., 1.]], [[1., 0., 0., 0.], [0., 0.51341712, 0., 0.48658288], [0., 0., 1., 0.], [0., 0., 0., 1.]], ]) np.testing.assert_allclose(T, p.T) O = np.array([ # Wait [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Act [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [1., 0., 0., 0.]], # Ask intention [ [1., 0., 0., 0.], # not possible in T [0., 1., 0., 0.], [0., 0., 1., 0.], # robot has told its int. to act [1., 0., 0., 0.] ], # human acts while robot ask again # # TODO: change to Yes? # Tell intention [ [1., 0., 0., 0.], # not possible in T [1., 0., 0., 0.], # TODO: No? [1., 0., 0., 0.], # TODO: maybe H answers to R tell [1., 0., 0., 0.] ], # TODO: answer? # Ask finished [ [0., 0., 1., 0.], # not started [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 1., 0., 0.] ], ]) np.testing.assert_array_equal(O, p.O) R = -np.broadcast_to( np.array([ [1, 1, 1, 0], [6, 6, 3, 1], [3, 3, 3, 1], [2, 2, 2, 1], [3, 3, 3, 1], ])[:, :, None, None], (5, 4, 4, 4)) np.testing.assert_array_equal(R, p.R)