def setUp(self): self.rddl1 = rddlgym.make('Navigation-v3', mode=rddlgym.AST) self.compiler1 = Compiler(self.rddl1, batch_mode=True) self.policy1 = FeedforwardPolicy(self.compiler1, { 'layers': [64], 'activation': 'elu', 'input_layer_norm': False }) self.policy1.build() self.valuefn1 = Value(self.compiler1, self.policy1)
def setUpClass(cls): # hyper-parameters cls.batch_size = 16 cls.horizon = 15 # model cls.compiler = rddlgym.make('Reservoir-8', mode=rddlgym.SCG) cls.compiler.init() cls.compiler.batch_size = cls.batch_size # initial state cls.initial_state = cls.compiler.initial_state() # default action cls.default_action = cls.compiler.default_action() # policy cls.config = { 'layers': [128, 64, 32], 'activation': 'elu', 'input_layer_norm': True } cls.policy = FeedforwardPolicy(cls.compiler, cls.config) cls.policy.build()
def setUp(self): self.horizon = 40 self.batch_size = 128 self.rddl1 = rddlgym.make('Navigation-v3', mode=rddlgym.AST) self.compiler1 = rddl2tf.compilers.DefaultCompiler( self.rddl1, batch_size=self.batch_size) self.compiler1.init() self.policy1 = FeedforwardPolicy(self.compiler1, { 'layers': [64], 'activation': 'elu', 'input_layer_norm': False }) self.policy1.build() self.valuefn1 = Value(self.compiler1, self.policy1)
def setUpClass(cls): # hyper-parameters cls.horizon = 40 cls.batch_size = 16 # rddl cls.compiler = rddlgym.make('Reservoir-8', mode=rddlgym.SCG) cls.compiler.init() cls.compiler.batch_size = cls.batch_size # initial state cls.initial_state = cls.compiler.initial_state() # default action cls.default_action = cls.compiler.default_action() # policy cls.policy = FeedforwardPolicy(cls.compiler, { 'layers': [64, 64], 'activation': 'relu', 'input_layer_norm': True }) cls.policy.build() # cell cls.cell = BasicMarkovCell(cls.compiler, cls.policy) with cls.cell.graph.as_default(): # timestep cls.timestep = tf.constant(cls.horizon, dtype=tf.float32) cls.timestep = tf.expand_dims(cls.timestep, -1) cls.timestep = tf.stack([cls.timestep] * cls.batch_size)
def setUpClass(cls): # hyper-parameters cls.horizon = 20 cls.batch_size = 64 # rddl cls.compiler = rddlgym.make('Navigation-v2', rddlgym.SCG) cls.compiler.batch_mode_on() # initial state cls.initial_state = cls.compiler.compile_initial_state(cls.batch_size) # default action cls.default_action = cls.compiler.compile_default_action(cls.batch_size) # policy cls.policy = FeedforwardPolicy(cls.compiler, {'layers': [32, 32], 'activation': 'elu', 'input_layer_norm': False}) cls.policy.build() # model cls.config = {} cls.model = ReparameterizationSampling(cls.compiler, cls.config) cls.model.build(cls.policy) cls.output = cls.model(cls.initial_state, cls.horizon)
def setUpClass(cls): # hyper-parameters cls.batch_size = 64 cls.horizon = 20 cls.learning_rate = 0.001 cls.regularization_rate = 0.1 # rddl cls.compiler = rddlgym.make('Navigation-v2', rddlgym.SCG) cls.compiler.batch_mode_on() # policy cls.policy = FeedforwardPolicy(cls.compiler, { 'layers': [256], 'activation': 'elu', 'input_layer_norm': False }) cls.policy.build() # planner cls.config = { 'batch_size': cls.batch_size, 'horizon': cls.horizon, 'learning_rate': cls.learning_rate, 'regularization_rate': cls.regularization_rate } cls.planner = MinimaxOptimizationPlanner(cls.compiler, cls.config) cls.planner.build(cls.policy, loss='mse', optimizer='RMSProp')
def setUpClass(cls): # hyper-parameters cls.horizon = 40 cls.batch_size = 128 # rddl cls.compiler = rddlgym.make('Reservoir-8', rddlgym.SCG) cls.compiler.init() cls.compiler.batch_size = cls.batch_size # initial state cls.initial_state = cls.compiler.initial_state() # default action cls.default_action = cls.compiler.default_action() # policy cls.policy = FeedforwardPolicy(cls.compiler, { 'layers': [32, 32], 'activation': 'elu', 'input_layer_norm': True }) cls.policy.build() # model cls.config = {} cls.model = MonteCarloSampling(cls.compiler, cls.config) cls.model.build(cls.policy)
def setUpClass(cls): # hyper-parameters cls.batch_size = 64 cls.horizon = 20 cls.learning_rate = 0.001 # rddl cls.compiler = rddlgym.make('Reservoir-8', rddlgym.SCG) cls.compiler.init() cls.compiler.batch_size = cls.batch_size # policy cls.policy = FeedforwardPolicy(cls.compiler, { 'layers': [256], 'activation': 'elu', 'input_layer_norm': True }) cls.policy.build() # planner cls.config = { 'batch_size': cls.batch_size, 'horizon': cls.horizon, 'learning_rate': cls.learning_rate } cls.planner = PathwiseOptimizationPlanner(cls.compiler, cls.config) cls.planner.build(cls.policy, loss='mse', optimizer='RMSProp')
def setUpClass(cls): # hyper-parameters cls.horizon = 40 cls.batch_size = 16 # rddl rddl = rddlgym.make('Navigation-v2', mode=rddlgym.AST) cls.compiler = rddl2tf.compilers.ReparameterizationCompiler( rddl, batch_size=cls.batch_size) cls.compiler.init() # initial state cls.initial_state = cls.compiler.initial_state() # default action cls.default_action = cls.compiler.default_action() # policy cls.policy = FeedforwardPolicy(cls.compiler, { 'layers': [64, 64], 'activation': 'relu', 'input_layer_norm': True }) cls.policy.build() with cls.compiler.graph.as_default(): # reparameterization cls.noise_shapes = cls.compiler.get_cpfs_reparameterization() cls.noise_variables = utils.get_noise_variables( cls.noise_shapes, cls.batch_size, cls.horizon) cls.noise_inputs, cls.encoding = utils.encode_noise_as_inputs( cls.noise_variables) # timestep cls.timestep = tf.constant(cls.horizon, dtype=tf.float32) cls.timestep = tf.expand_dims(cls.timestep, -1) cls.timestep = tf.stack([cls.timestep] * cls.batch_size) # inputs cls.inputs = tf.concat([cls.timestep, cls.noise_inputs[:, 0, :]], axis=1) # cell cls.config = {'encoding': cls.encoding} cls.cell = ReparameterizationCell(cls.compiler, cls.policy, cls.config)
class TestValueFn(unittest.TestCase): def setUp(self): self.horizon = 40 self.batch_size = 128 self.rddl1 = rddlgym.make('Navigation-v3', mode=rddlgym.AST) self.compiler1 = rddl2tf.compilers.DefaultCompiler( self.rddl1, batch_size=self.batch_size) self.compiler1.init() self.policy1 = FeedforwardPolicy(self.compiler1, { 'layers': [64], 'activation': 'elu', 'input_layer_norm': False }) self.policy1.build() self.valuefn1 = Value(self.compiler1, self.policy1) def test_build_trajectory_graph(self): self.valuefn1.build(self.horizon, self.batch_size) states = self.valuefn1._states state_size = self.compiler1.rddl.state_size self.assertIsInstance(states, tuple) self.assertEqual(len(states), len(state_size)) for fluent, size in zip(states, state_size): self.assertIsInstance(fluent, tf.Tensor) self.assertListEqual(fluent.shape.as_list(), [self.batch_size, self.horizon] + list(size)) timesteps = self.valuefn1._timesteps self.assertIsInstance(timesteps, tf.Tensor) self.assertListEqual(timesteps.shape.as_list(), [self.batch_size, self.horizon]) def test_build_value_estimates_graph(self): self.valuefn1.build(self.horizon, self.batch_size) estimates = self.valuefn1._estimates self.assertIsInstance(estimates, tf.Tensor) self.assertListEqual(estimates.shape.as_list(), [self.batch_size, self.horizon]) def test_regression_graph(self): self.valuefn1.build(self.horizon, self.batch_size) features = self.valuefn1._dataset_features self.assertIsInstance(features, tuple) self.assertEqual(len(features), 2) timesteps, states = features self.assertIsInstance(timesteps, tf.Tensor) self.assertListEqual(timesteps.shape.as_list(), [self.batch_size * self.horizon]) state_size = self.compiler1.rddl.state_size self.assertIsInstance(states, tuple) self.assertEqual(len(states), len(state_size)) for fluent, size in zip(states, state_size): self.assertIsInstance(fluent, tf.Tensor) self.assertListEqual(fluent.shape.as_list(), [self.batch_size * self.horizon] + list(size)) targets = self.valuefn1._dataset_targets self.assertIsInstance(targets, tf.Tensor) self.assertListEqual(targets.shape.as_list(), [self.batch_size * self.horizon]) def test_prediction_graph(self): self.valuefn1.build(self.horizon, self.batch_size) predictions = self.valuefn1._predictions self.assertIsInstance(predictions, tf.Tensor) self.assertListEqual(predictions.shape.as_list(), [None]) def test_loss_graph(self): self.valuefn1.build(self.horizon, self.batch_size) loss = self.valuefn1._loss def test_optimization_graph(self): horizon = 20 batch_size = 4 self.valuefn1.build(self.horizon, self.batch_size) optimizer = self.valuefn1._optimizer grad_and_vars = self.valuefn1._grad_and_vars train_op = self.valuefn1._train_op def test_training_batch(self): self.valuefn1.build(self.horizon, self.batch_size) with tf.compat.v1.Session(graph=self.compiler1.graph) as sess: sess.run(tf.compat.v1.global_variables_initializer()) training_batch = 2 dataset = self.valuefn1._regression_dataset(sess) batchs = self.valuefn1._training_batch_generator( training_batch, dataset) state_size = self.compiler1.rddl.state_size n = 0 for state, timestep, target in batchs: self.assertIsInstance(state, tuple) self.assertEqual(len(state), len(state_size)) for fluent, size in zip(state, state_size): self.assertIsInstance(fluent, np.ndarray) self.assertListEqual(list(fluent.shape), [training_batch] + list(size)) self.assertIsInstance(timestep, np.ndarray) self.assertListEqual(list(timestep.shape), [training_batch]) self.assertIsInstance(target, np.ndarray) self.assertListEqual(list(target.shape), [training_batch]) n += 1 self.assertEqual( n, int(self.batch_size * self.horizon / training_batch)) def test_value_fn_fitting(self): self.valuefn1.build(self.horizon, self.batch_size) with tf.compat.v1.Session(graph=self.compiler1.graph) as sess: sess.run(tf.compat.v1.global_variables_initializer()) epochs = 20 batch_size = 64 loss = self.valuefn1.fit(sess, batch_size, epochs, show_progress=False) self.assertIsInstance(loss, list)