def setUp(self):
        self.batch_size = 10

        self.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST)
        self.rddl2 = rddlgym.make('Navigation-v2', mode=rddlgym.AST)
        self.compiler1 = Compiler(self.rddl1, batch_mode=True)
        self.compiler2 = Compiler(self.rddl2, batch_mode=True)

        self.cell1 = ActionSimulationCell(self.compiler1)
        self.initial_state1 = self.compiler1.compile_initial_state(batch_size=self.batch_size)
        self.default_action1 = self.compiler1.compile_default_action(batch_size=1)

        self.cell2 = ActionSimulationCell(self.compiler2)
        self.initial_state2 = self.compiler2.compile_initial_state(batch_size=self.batch_size)
        self.default_action2 = self.compiler2.compile_default_action(batch_size=1)
Ejemplo n.º 2
0
    def setUpClass(cls):

        # initialize hyper-parameters
        cls.horizon = 40
        cls.batch_size = 64
        cls.epochs = 50
        cls.learning_rate = 0.01

        # parse RDDL file
        with open('rddl/deterministic/Navigation.rddl') as file:
            parser = RDDLParser()
            parser.build()
            rddl = parser.parse(file.read())
            rddl.build()

        # initializer RDDL2TensorFlow compiler
        cls.rddl2tf = Compiler(rddl, batch_mode=True)

        # initialize open-loop policy
        cls.policy = OpenLoopPolicy(cls.rddl2tf, cls.batch_size, cls.horizon)
        cls.policy.build('test')

        # initialize ActionOptimizer
        cls.optimizer = ActionOptimizer(cls.rddl2tf, cls.policy)
        cls.optimizer.build(cls.learning_rate, cls.batch_size, cls.horizon)
Ejemplo n.º 3
0
    def setUpClass(cls):

        # initialize hyper-parameters
        cls.horizon = 40
        cls.batch_size = 64

        # parse RDDL file
        with open('rddl/deterministic/Navigation.rddl') as file:
            parser = RDDLParser()
            parser.build()
            rddl = parser.parse(file.read())
            rddl.build()

        # initializer RDDL2TensorFlow compiler
        cls.rddl2tf = Compiler(rddl, batch_mode=True)

        # initialize open-loop policy
        cls.policy = OpenLoopPolicy(cls.rddl2tf, cls.batch_size, cls.horizon)
        cls.policy.build('test')

        # execute policy for the given horizon and initial state
        with cls.rddl2tf.graph.as_default():
            cls.state = cls.rddl2tf.compile_initial_state(cls.batch_size)
            cls.actions = []
            for t in range(cls.horizon - 1, -1, -1):
                timestep = tf.constant(t,
                                       dtype=tf.float32,
                                       shape=(cls.batch_size, 1))
                action = cls.policy(cls.state, timestep)
                cls.actions.append(action)
Ejemplo n.º 4
0
    def setUpClass(cls):

        # initialize hyper-parameters
        cls.horizon = 40
        cls.batch_size = 1

        # parse RDDL file
        with open('rddl/deterministic/Navigation.rddl') as file:
            parser = RDDLParser()
            parser.build()
            rddl = parser.parse(file.read())
            rddl.build()

        # initializer RDDL2TensorFlow compiler
        cls.rddl2tf = Compiler(rddl, batch_mode=True)

        # initialize open-loop policy
        cls.policy = OpenLoopPolicy(cls.rddl2tf, cls.batch_size, cls.horizon)
        cls.policy.build('test')

        # sample policy variables to initialize open-loop policy
        cls.policy_variables = []
        for shape in cls.rddl2tf.rddl.action_size:
            size = [cls.horizon] + list(shape)
            cls.policy_variables.append(
                np.random.uniform(low=-1.0, high=1.0, size=size))

        # initialize action evaluator
        cls.evaluator = ActionEvaluator(cls.rddl2tf, cls.policy)
Ejemplo n.º 5
0
    def setUp(self):
        self.rddl1 = rddlgym.make('Navigation-v3', mode=rddlgym.AST)
        self.compiler1 = Compiler(self.rddl1, batch_mode=True)

        self.policy1 = FeedforwardPolicy(self.compiler1, {
            'layers': [64],
            'activation': 'elu',
            'input_layer_norm': False
        })
        self.policy1.build()
        self.valuefn1 = Value(self.compiler1, self.policy1)
Ejemplo n.º 6
0
 def setUp(self):
     self.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST)
     self.rddl2 = rddlgym.make('Mars_Rover', mode=rddlgym.AST)
     self.rddl3 = rddlgym.make('HVAC-v1', mode=rddlgym.AST)
     self.rddl4 = rddlgym.make('CrossingTraffic-10', mode=rddlgym.AST)
     self.rddl5 = rddlgym.make('GameOfLife-10', mode=rddlgym.AST)
     self.rddl6 = rddlgym.make('CarParking-v1', mode=rddlgym.AST)
     self.rddl7 = rddlgym.make('Navigation-v3', mode=rddlgym.AST)
     self.compiler1 = Compiler(self.rddl1)
     self.compiler2 = Compiler(self.rddl2)
     self.compiler3 = Compiler(self.rddl3)
     self.compiler4 = Compiler(self.rddl4)
     self.compiler5 = Compiler(self.rddl5)
     self.compiler6 = Compiler(self.rddl6)
     self.compiler7 = Compiler(self.rddl7)
Ejemplo n.º 7
0
class TestCompiler(unittest.TestCase):
    def setUp(self):
        self.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST)
        self.rddl2 = rddlgym.make('Mars_Rover', mode=rddlgym.AST)
        self.rddl3 = rddlgym.make('HVAC-v1', mode=rddlgym.AST)
        self.rddl4 = rddlgym.make('CrossingTraffic-10', mode=rddlgym.AST)
        self.rddl5 = rddlgym.make('GameOfLife-10', mode=rddlgym.AST)
        self.rddl6 = rddlgym.make('CarParking-v1', mode=rddlgym.AST)
        self.rddl7 = rddlgym.make('Navigation-v3', mode=rddlgym.AST)
        self.compiler1 = Compiler(self.rddl1)
        self.compiler2 = Compiler(self.rddl2)
        self.compiler3 = Compiler(self.rddl3)
        self.compiler4 = Compiler(self.rddl4)
        self.compiler5 = Compiler(self.rddl5)
        self.compiler6 = Compiler(self.rddl6)
        self.compiler7 = Compiler(self.rddl7)

    def test_compile_state_action_constraints(self):
        batch_size = 1000
        compilers = [self.compiler4, self.compiler5]
        expected_preconds = [(12, [True] + [False] * 11), (1, [False])]
        for compiler, expected in zip(compilers, expected_preconds):
            compiler.batch_mode_on()
            state = compiler.compile_initial_state(batch_size)
            action = compiler.compile_default_action(batch_size)
            constraints = compiler.compile_state_action_constraints(
                state, action)
            self.assertIsInstance(constraints, list)
            self.assertEqual(len(constraints), expected[0])
            for c, batch_mode in zip(constraints, expected[1]):
                self.assertIsInstance(c, TensorFluent)
                self.assertEqual(c.dtype, tf.bool)
                if batch_mode:
                    self.assertEqual(c.shape.batch_size, batch_size)
                else:
                    self.assertEqual(c.shape.batch_size, 1)
                self.assertEqual(c.shape.batch, batch_mode)
                self.assertTupleEqual(c.shape.fluent_shape, ())

    def test_compile_action_preconditions(self):
        batch_size = 1000
        compilers = [self.compiler1, self.compiler2]
        expected_preconds = [2, 1]
        for compiler, expected in zip(compilers, expected_preconds):
            compiler.batch_mode_on()
            state = compiler.compile_initial_state(batch_size)
            action = compiler.compile_default_action(batch_size)
            preconds = compiler.compile_action_preconditions(state, action)
            self.assertIsInstance(preconds, list)
            self.assertEqual(len(preconds), expected)
            for p in preconds:
                self.assertIsInstance(p, TensorFluent)
                self.assertEqual(p.dtype, tf.bool)
                self.assertEqual(p.shape.batch_size, batch_size)
                self.assertTrue(p.shape.batch)
                self.assertTupleEqual(p.shape.fluent_shape, ())

    def test_compile_state_invariants(self):
        batch_size = 1000
        compilers = [self.compiler1, self.compiler2]
        expected_invariants = [2, 0]
        for compiler, expected in zip(compilers, expected_invariants):
            compiler.batch_mode_on()
            state = compiler.compile_initial_state(batch_size)
            invariants = compiler.compile_state_invariants(state)
            self.assertIsInstance(invariants, list)
            self.assertEqual(len(invariants), expected)
            for p in invariants:
                self.assertIsInstance(p, TensorFluent)
                self.assertEqual(p.dtype, tf.bool)
                self.assertTupleEqual(p.shape.fluent_shape, ())

    def test_compile_action_preconditions_checking(self):
        batch_size = 1000
        compilers = [self.compiler1, self.compiler2]
        for compiler in compilers:
            compiler.batch_mode_on()
            state = compiler.compile_initial_state(batch_size)
            action = compiler.compile_default_action(batch_size)
            checking = compiler.compile_action_preconditions_checking(
                state, action)
            self.assertIsInstance(checking, tf.Tensor)
            self.assertEqual(checking.dtype, tf.bool)
            self.assertListEqual(checking.shape.as_list(), [batch_size])

    def test_compile_action_lower_bound_constraints(self):
        batch_size = 1000
        compilers = [self.compiler1, self.compiler3]
        expected = [[('outflow/1', [], tf.int32)], [('AIR/1', [], tf.int32)]]

        for compiler, expected_bounds in zip(compilers, expected):
            compiler.batch_mode_on()
            initial_state = compiler.compile_initial_state(batch_size)
            default_action_fluents = compiler.compile_default_action(
                batch_size)
            bounds = compiler.compile_action_bound_constraints(initial_state)
            self.assertIsInstance(bounds, dict)

            self.assertEqual(len(bounds), len(expected_bounds))
            for fluent_name, shape, dtype in expected_bounds:
                self.assertIn(fluent_name, bounds)
                self.assertIsInstance(bounds[fluent_name], tuple)
                self.assertEqual(len(bounds[fluent_name]), 2)
                lower, _ = bounds[fluent_name]
                self.assertIsInstance(lower, TensorFluent)
                self.assertListEqual(lower.shape.as_list(), shape)
                self.assertEqual(lower.dtype, dtype)

    def test_compile_action_lower_bound_constraints(self):
        batch_size = 1000
        compilers = [self.compiler1, self.compiler3]
        expected = [[('outflow/1', [], tf.int32)], [('AIR/1', [], tf.int32)]]

        for compiler, expected_bounds in zip(compilers, expected):
            compiler.batch_mode_on()
            initial_state = compiler.compile_initial_state(batch_size)
            default_action_fluents = compiler.compile_default_action(
                batch_size)
            bounds = compiler.compile_action_bound_constraints(initial_state)
            self.assertIsInstance(bounds, dict)

            self.assertEqual(len(bounds), len(expected_bounds))
            for fluent_name, shape, dtype in expected_bounds:
                self.assertIn(fluent_name, bounds)
                self.assertIsInstance(bounds[fluent_name], tuple)
                self.assertEqual(len(bounds[fluent_name]), 2)
                lower, _ = bounds[fluent_name]
                self.assertIsInstance(lower, TensorFluent)
                self.assertListEqual(lower.shape.as_list(), shape)
                self.assertEqual(lower.dtype, dtype)

    def test_compile_action_upper_bound_constraints(self):
        batch_size = 1000
        compilers = [self.compiler1, self.compiler3]
        expected = [[('outflow/1', [batch_size, 8], tf.float32)],
                    [('AIR/1', [3], tf.float32)]]

        for compiler, expected_bounds in zip(compilers, expected):
            compiler.batch_mode_on()
            initial_state = compiler.compile_initial_state(batch_size)
            default_action_fluents = compiler.compile_default_action(
                batch_size)
            bounds = compiler.compile_action_bound_constraints(initial_state)
            self.assertIsInstance(bounds, dict)

            self.assertEqual(len(bounds), len(expected_bounds))
            for fluent_name, shape, dtype in expected_bounds:
                self.assertIn(fluent_name, bounds)
                self.assertIsInstance(bounds[fluent_name], tuple)
                self.assertEqual(len(bounds[fluent_name]), 2)
                _, upper = bounds[fluent_name]
                self.assertIsInstance(upper, TensorFluent)
                self.assertEqual(upper.dtype, dtype)
                self.assertListEqual(upper.shape.as_list(), shape)

    def test_initialize_non_fluents(self):
        nf = dict(self.compiler1.compile_non_fluents())

        expected_non_fluents = {
            'MAX_RES_CAP/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32
            },
            'UPPER_BOUND/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32
            },
            'LOWER_BOUND/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32
            },
            'RAIN_SHAPE/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32
            },
            'RAIN_SCALE/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32
            },
            'DOWNSTREAM/2': {
                'shape': [8, 8],
                'dtype': tf.bool
            },
            'SINK_RES/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.bool
            },
            'MAX_WATER_EVAP_FRAC_PER_TIME_UNIT/0': {
                'shape': [],
                'dtype': tf.float32
            },
            'LOW_PENALTY/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32
            },
            'HIGH_PENALTY/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32
            }
        }
        self.assertIsInstance(nf, dict)
        self.assertEqual(len(nf), len(expected_non_fluents))
        for name, fluent in nf.items():
            self.assertIn(name, expected_non_fluents)
            shape = expected_non_fluents[name]['shape']
            dtype = expected_non_fluents[name]['dtype']
            self.assertEqual(fluent.name,
                             'non_fluents/{}:0'.format(name.replace('/', '-')))
            self.assertIsInstance(fluent, TensorFluent)
            self.assertEqual(fluent.dtype, dtype)
            self.assertEqual(fluent.shape.as_list(), shape)

        expected_initializers = {
            'MAX_RES_CAP/1': [100., 100., 200., 300., 400., 500., 800., 1000.],
            'UPPER_BOUND/1': [80., 80., 180., 280., 380., 480., 780., 980.],
            'LOWER_BOUND/1': [20., 20., 20., 20., 20., 20., 20., 20.],
            'RAIN_SHAPE/1': [1., 1., 1., 1., 1., 1., 1., 1.],
            'RAIN_SCALE/1': [5., 3., 9., 7., 15., 13., 25., 30.],
            'DOWNSTREAM/2':
            [[False, False, False, False, False, True, False, False],
             [False, False, True, False, False, False, False, False],
             [False, False, False, False, True, False, False, False],
             [False, False, False, False, False, False, False, True],
             [False, False, False, False, False, False, True, False],
             [False, False, False, False, False, False, True, False],
             [False, False, False, False, False, False, False, True],
             [False, False, False, False, False, False, False, False]],
            'SINK_RES/1':
            [False, False, False, False, False, False, False, True],
            'MAX_WATER_EVAP_FRAC_PER_TIME_UNIT/0':
            0.05,
            'LOW_PENALTY/1': [-5., -5., -5., -5., -5., -5., -5., -5.],
            'HIGH_PENALTY/1': [-10., -10., -10., -10., -10., -10., -10., -10.]
        }
        with tf.Session(graph=self.compiler1.graph) as sess:
            for name, fluent in nf.items():
                value = sess.run(fluent.tensor)
                list1 = list(value.flatten())
                list2 = list(np.array(expected_initializers[name]).flatten())
                for v1, v2 in zip(list1, list2):
                    self.assertAlmostEqual(v1, v2)

    def test_initialize_initial_state_fluents(self):
        sf = dict(self.compiler1.compile_initial_state())

        expected_state_fluents = {
            'rlevel/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32
            }
        }
        self.assertIsInstance(sf, dict)
        self.assertEqual(len(sf), len(expected_state_fluents))
        for name, fluent in sf.items():
            self.assertIn(name, expected_state_fluents)
            shape = expected_state_fluents[name]['shape']
            dtype = expected_state_fluents[name]['dtype']
            self.assertEqual(
                fluent.name,
                'initial_state/{}:0'.format(name.replace('/', '-')))
            self.assertIsInstance(fluent, TensorFluent)
            self.assertEqual(fluent.dtype, dtype)
            self.assertEqual(fluent.shape.as_list(), shape)

        expected_initializers = {
            'rlevel/1': [75., 50., 50., 50., 50., 50., 50., 50.]
        }
        with tf.Session(graph=self.compiler1.graph) as sess:
            for name, fluent in sf.items():
                value = sess.run(fluent.tensor)
                list1 = list(value.flatten())
                list2 = list(np.array(expected_initializers[name]).flatten())
                for v1, v2 in zip(list1, list2):
                    self.assertAlmostEqual(v1, v2)

    def test_initialize_default_action_fluents(self):
        action_fluents = self.compiler1.compile_default_action()
        self.assertIsInstance(action_fluents, list)
        for fluent in action_fluents:
            self.assertIsInstance(fluent, tuple)
            self.assertEqual(len(fluent), 2)
            self.assertIsInstance(fluent[0], str)
            self.assertIsInstance(fluent[1], TensorFluent)

        af = dict(action_fluents)

        expected_action_fluents = {
            'outflow/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32
            }
        }
        self.assertEqual(len(af), len(expected_action_fluents))
        for name, fluent in af.items():
            self.assertIn(name, expected_action_fluents)
            shape = expected_action_fluents[name]['shape']
            dtype = expected_action_fluents[name]['dtype']
            self.assertEqual(
                fluent.name,
                'default_action/{}:0'.format(name.replace('/', '-')))
            self.assertIsInstance(fluent, TensorFluent)
            self.assertEqual(fluent.dtype, dtype)
            self.assertEqual(fluent.shape.as_list(), shape)

        expected_initializers = {'outflow/1': [0., 0., 0., 0., 0., 0., 0., 0.]}
        with tf.Session(graph=self.compiler1.graph) as sess:
            for name, fluent in af.items():
                value = sess.run(fluent.tensor)
                list1 = list(value.flatten())
                list2 = list(np.array(expected_initializers[name]).flatten())
                for v1, v2 in zip(list1, list2):
                    self.assertAlmostEqual(v1, v2)

    def test_state_scope(self):
        compilers = [
            self.compiler1, self.compiler2, self.compiler3, self.compiler4,
            self.compiler5, self.compiler6, self.compiler7
        ]
        for compiler in compilers:
            fluents = compiler.compile_initial_state()
            scope = dict(fluents)
            self.assertEqual(len(fluents), len(scope))
            for i, name in enumerate(
                    compiler.rddl.domain.state_fluent_ordering):
                self.assertIs(scope[name], fluents[i][1])

    def test_action_scope(self):
        compilers = [
            self.compiler1, self.compiler2, self.compiler3, self.compiler4,
            self.compiler5, self.compiler6, self.compiler7
        ]
        for compiler in compilers:
            fluents = compiler.compile_default_action()
            scope = dict(fluents)
            self.assertEqual(len(fluents), len(scope))
            for i, name in enumerate(
                    compiler.rddl.domain.action_fluent_ordering):
                self.assertIs(scope[name], fluents[i][1])

    def test_compile_expressions(self):
        expected = {
            # rddl1: RESERVOIR ====================================================
            'rainfall/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32,
                'scope': ['?r']
            },
            'evaporated/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32,
                'scope': ['?r']
            },
            'overflow/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32,
                'scope': ['?r']
            },
            'inflow/1': {
                'shape': [
                    8,
                ],
                'dtype': tf.float32,
                'scope': ['?r']
            },
            "rlevel'/1": {
                'shape': [
                    8,
                ],
                'dtype': tf.float32,
                'scope': ['?r']
            },

            # rddl2: MARS ROVER ===================================================
            "xPos'/0": {
                'shape': [],
                'dtype': tf.float32,
                'scope': []
            },
            "yPos'/0": {
                'shape': [],
                'dtype': tf.float32,
                'scope': []
            },
            "time'/0": {
                'shape': [],
                'dtype': tf.float32,
                'scope': []
            },
            "picTaken'/1": {
                'shape': [
                    3,
                ],
                'dtype': tf.bool,
                'scope': ['?p']
            }
        }

        compilers = [self.compiler1, self.compiler2]
        rddls = [self.rddl1, self.rddl2]
        for compiler, rddl in zip(compilers, rddls):
            nf = compiler.non_fluents_scope()
            sf = dict(compiler.compile_initial_state())
            af = dict(compiler.compile_default_action())
            scope = {}
            scope.update(nf)
            scope.update(sf)
            scope.update(af)

            _, cpfs = rddl.domain.cpfs
            for cpf in cpfs:
                name = cpf.name
                expr = cpf.expr
                t = compiler._compile_expression(expr, scope)
                scope[name] = t
                self.assertIsInstance(t, TensorFluent)
                self.assertEqual(t.shape.as_list(), expected[name]['shape'])
                self.assertEqual(t.dtype, expected[name]['dtype'])
                self.assertEqual(t.scope.as_list(), expected[name]['scope'])

            reward_expr = rddl.domain.reward
            t = compiler._compile_expression(reward_expr, scope)
            self.assertIsInstance(t, TensorFluent)
            self.assertEqual(t.shape.as_list(), [])
            self.assertEqual(t.dtype, tf.float32)
            self.assertEqual(t.scope.as_list(), [])

    def test_compile_cpfs(self):
        compilers = [self.compiler1, self.compiler2]
        expected = [
            (['evaporated/1', 'overflow/1', 'rainfall/1',
              'inflow/1'], ["rlevel'/1"]),
            ([], ["picTaken'/1", "time'/0", "xPos'/0", "yPos'/0"]),
        ]
        for compiler, (expected_interm,
                       expected_state) in zip(compilers, expected):
            nf = compiler.non_fluents_scope()
            sf = dict(compiler.compile_initial_state())
            af = dict(compiler.compile_default_action())
            scope = {**nf, **sf, **af}

            interm_fluents, next_state_fluents = compiler.compile_cpfs(scope)

            self.assertIsInstance(interm_fluents, list)
            self.assertEqual(len(interm_fluents), len(expected_interm))
            for fluent, expected_fluent in zip(interm_fluents,
                                               expected_interm):
                self.assertEqual(fluent[0], expected_fluent)

            self.assertIsInstance(next_state_fluents, list)
            self.assertEqual(len(next_state_fluents), len(expected_state))
            for fluent, expected_fluent in zip(next_state_fluents,
                                               expected_state):
                self.assertEqual(fluent[0], expected_fluent)

    def test_compile_state_cpfs(self):
        compilers = [
            self.compiler1, self.compiler2, self.compiler3, self.compiler4,
            self.compiler5, self.compiler6, self.compiler7
        ]
        for compiler in compilers:
            nf = compiler.non_fluents_scope()
            sf = dict(compiler.compile_initial_state())
            af = dict(compiler.compile_default_action())
            scope = {**nf, **sf, **af}

            interm_fluents = compiler.compile_intermediate_cpfs(scope)
            scope.update(dict(interm_fluents))
            next_state_fluents = compiler.compile_state_cpfs(scope)

            self.assertIsInstance(next_state_fluents, list)
            for cpf in next_state_fluents:
                self.assertIsInstance(cpf, tuple)
            self.assertEqual(len(next_state_fluents), len(sf))

            next_state_fluents = dict(next_state_fluents)
            for fluent in sf:
                next_fluent = utils.rename_state_fluent(fluent)
                self.assertIn(next_fluent, next_state_fluents)
                self.assertIsInstance(next_state_fluents[next_fluent],
                                      TensorFluent)

    def test_compile_intermediate_cpfs(self):
        compilers = [
            self.compiler1, self.compiler2, self.compiler3, self.compiler4,
            self.compiler5, self.compiler6, self.compiler7
        ]
        for compiler in compilers:
            fluents = compiler.rddl.domain.interm_fluent_ordering

            nf = compiler.non_fluents_scope()
            sf = dict(compiler.compile_initial_state())
            af = dict(compiler.compile_default_action())
            scope = {**nf, **sf, **af}
            interm_fluents = compiler.compile_intermediate_cpfs(scope)
            self.assertIsInstance(interm_fluents, list)
            self.assertEqual(len(interm_fluents), len(fluents))
            for actual, expected in zip(interm_fluents, fluents):
                self.assertIsInstance(actual, tuple)
                self.assertEqual(len(actual), 2)
                self.assertIsInstance(actual[0], str)
                self.assertIsInstance(actual[1], TensorFluent)
                self.assertEqual(actual[0], expected)

    def test_compile_reward(self):
        # TODO: self.compiler4
        compilers = [
            self.compiler1, self.compiler2, self.compiler3, self.compiler5,
            self.compiler6, self.compiler7
        ]
        batch_size = 32
        for compiler in compilers:
            compiler.batch_mode_on()
            state = compiler.compile_initial_state(batch_size)
            action = compiler.compile_default_action(batch_size)
            scope = compiler.transition_scope(state, action)
            interm_fluents, next_state_fluents = compiler.compile_cpfs(scope)
            scope.update(next_state_fluents)
            reward = compiler.compile_reward(scope)
            self.assertIsInstance(reward, TensorFluent)
            self.assertEqual(reward.shape.as_list(), [batch_size])

    def test_compile_probabilistic_normal_random_variable(self):
        mean = Expression(('number', 0.0))
        var = Expression(('number', 1.0))
        normal = Expression(('randomvar', ('Normal', (mean, var))))

        expressions = [normal]
        self._test_random_variable_expressions(expressions)

    def test_compile_probabilistic_gamma_random_variable(self):
        shape = Expression(('number', 5.0))
        scale = Expression(('number', 1.0))
        gamma = Expression(('randomvar', ('Gamma', (shape, scale))))

        expressions = [gamma]
        self._test_random_variable_expressions(expressions)

    def __get_batch_compiler_with_state_action_scope(self):
        compilers = [self.compiler2]
        batch_sizes = [8]

        for compiler, batch_size in zip(compilers, batch_sizes):
            compiler.batch_mode_on()

            nf = compiler.non_fluents_scope()
            sf = dict(compiler.compile_initial_state())
            af = dict(compiler.compile_default_action())
            scope = {**nf, **sf, **af}

            yield (compiler, batch_size, scope)

    def _test_random_variable_expressions(self, expressions):
        for compiler, batch_size, scope in self.__get_batch_compiler_with_state_action_scope(
        ):
            for expr in expressions:
                self._test_random_variable_expression(expr, compiler, scope,
                                                      batch_size)

    def _test_random_variable_expression(self, expr, compiler, scope,
                                         batch_size):
        sample = compiler._compile_random_variable_expression(
            expr, scope, batch_size)
        self._test_sample_fluents(sample, batch_size)
        self._test_sample_fluent(sample)

    def _test_sample_fluents(self, sample, batch_size=None):
        self.assertIsInstance(sample, TensorFluent)
        if batch_size is not None:
            self.assertEqual(sample.shape[0], batch_size)

    def _test_sample_fluent(self, sample):
        self.assertTrue(sample.tensor.name.startswith('sample'), sample.tensor)

    def _test_conditional_sample(self, sample):
        inputs = sample.tensor.op.inputs
        self.assertEqual(len(inputs), 3)
        self.assertTrue(inputs[0].name.startswith('LogicalNot'), inputs[0])
        self.assertTrue(inputs[1].name.startswith('StopGradient'), inputs[1])
        self.assertTrue(inputs[2].name.startswith('sample'), inputs[2])
Ejemplo n.º 8
0
 def setUpClass(cls):
     cls.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST)
     cls.rddl2 = rddlgym.make('Mars_Rover', mode=rddlgym.AST)
     cls.compiler1 = Compiler(cls.rddl1, batch_mode=True)
     cls.compiler2 = Compiler(cls.rddl2, batch_mode=True)
Ejemplo n.º 9
0
def compile_model(filename):
    model = parse_model(filename)
    compiler = Compiler(model)
    return compiler
Ejemplo n.º 10
0
 def __init__(self, compiler: Compiler, policy: Policy,
              batch_size: int) -> None:
     self._cell = PolicySimulationCell(compiler, policy, batch_size)
     self._non_fluents = [
         fluent.tensor for _, fluent in compiler.compile_non_fluents()
     ]
class TestActionSimulationCell(unittest.TestCase):

    def setUp(self):
        self.batch_size = 10

        self.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST)
        self.rddl2 = rddlgym.make('Navigation-v2', mode=rddlgym.AST)
        self.compiler1 = Compiler(self.rddl1, batch_mode=True)
        self.compiler2 = Compiler(self.rddl2, batch_mode=True)

        self.cell1 = ActionSimulationCell(self.compiler1)
        self.initial_state1 = self.compiler1.compile_initial_state(batch_size=self.batch_size)
        self.default_action1 = self.compiler1.compile_default_action(batch_size=1)

        self.cell2 = ActionSimulationCell(self.compiler2)
        self.initial_state2 = self.compiler2.compile_initial_state(batch_size=self.batch_size)
        self.default_action2 = self.compiler2.compile_default_action(batch_size=1)

    def test_state_size(self):
        state_size1 = self.cell1.state_size
        self.assertIsInstance(state_size1, tuple)
        self.assertEqual(len(state_size1), len(self.initial_state1))
        for shape, tensor in zip(state_size1, self.initial_state1):
            self.assertListEqual(list(shape), tensor.shape.as_list()[1:])

        state_size2 = self.cell2.state_size
        self.assertIsInstance(state_size2, tuple)
        self.assertEqual(len(state_size2), len(self.initial_state2))
        for shape, tensor in zip(state_size2, self.initial_state2):
            self.assertListEqual(list(shape), tensor.shape.as_list()[1:])

    def test_interm_size(self):
        expected = [((8,), (8,), (8,), (8,)), ((2,), (2,))]
        cells = [self.cell1, self.cell2]
        for cell, sz in zip(cells, expected):
            interm_size = cell.interm_size
            self.assertIsInstance(interm_size, tuple)
            self.assertTupleEqual(interm_size, sz)

    def test_output_size(self):
        cells = [self.cell1, self.cell2]
        for cell in cells:
            output_size = cell.output_size
            state_size = cell.state_size
            interm_size = cell.interm_size
            action_size = cell.action_size
            self.assertEqual(output_size, (state_size, action_size, interm_size, 1))

    def test_next_state(self):
        cells = [self.cell1, self.cell2]
        actions = [self.default_action1, self.default_action2]
        states = [self.initial_state1, self.initial_state2]
        for cell, inputs, state in zip(cells, actions, states):

            output, next_state = cell(inputs, state)
            self.assertIsInstance(output, tuple)
            self.assertEqual(len(output), 4)

            next_state, action, interm, reward = output
            state_size, action_size, interm_size, reward_size = cell.output_size

            # interm_state
            # TO DO

            # next_state
            self.assertIsInstance(next_state, tuple)
            self.assertEqual(len(next_state), len(state_size))
            for s, sz in zip(next_state, state_size):
                self.assertIsInstance(s, tf.Tensor)
                self.assertListEqual(s.shape.as_list(), [self.batch_size] + list(sz))

            # action
            self.assertIsInstance(action, tuple)
            self.assertEqual(len(action), len(action_size))
            for a, sz in zip(action, action_size):
                self.assertIsInstance(a, tf.Tensor)
                self.assertListEqual(a.shape.as_list(), [1] + list(sz))

            # reward
            self.assertIsInstance(reward, tf.Tensor)
            self.assertListEqual(reward.shape.as_list(), [self.batch_size, reward_size])


    def test_output(self):
        (output1, next_state1) = self.cell1(self.default_action1, self.initial_state1)
        next_state1, action1, interm_state1, reward1 = output1
        self.assertIsInstance(reward1, tf.Tensor)
        self.assertListEqual(reward1.shape.as_list(), [self.batch_size, 1])
        self.assertEqual(reward1.dtype, tf.float32)

        (output2, next_state2) = self.cell2(self.default_action2, self.initial_state2)
        next_state2, action2, interm_state2, reward2 = output2
        self.assertIsInstance(reward2, tf.Tensor)
        self.assertListEqual(reward2.shape.as_list(), [self.batch_size, 1])
        self.assertEqual(reward2.dtype, tf.float32)
Ejemplo n.º 12
0
 def __init__(self, compiler: Compiler, batch_size: int) -> None:
     self._default = compiler.compile_default_action(batch_size)