def setUp(self): self.batch_size = 10 self.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST) self.rddl2 = rddlgym.make('Navigation-v2', mode=rddlgym.AST) self.compiler1 = Compiler(self.rddl1, batch_mode=True) self.compiler2 = Compiler(self.rddl2, batch_mode=True) self.cell1 = ActionSimulationCell(self.compiler1) self.initial_state1 = self.compiler1.compile_initial_state(batch_size=self.batch_size) self.default_action1 = self.compiler1.compile_default_action(batch_size=1) self.cell2 = ActionSimulationCell(self.compiler2) self.initial_state2 = self.compiler2.compile_initial_state(batch_size=self.batch_size) self.default_action2 = self.compiler2.compile_default_action(batch_size=1)
def _session_run(planner, fetches): env = rddlgym.make(planner.rddl, mode=rddlgym.GYM) with tf.Session(graph=planner.compiler.graph) as sess: sess.run(tf.global_variables_initializer()) feed_dict = _get_feed_dict(sess, planner, env) return sess.run(fetches, feed_dict=feed_dict)
def setUpClass(cls): # hyper-parameters cls.batch_size = 64 cls.horizon = 20 cls.learning_rate = 0.001 cls.regularization_rate = 0.1 # rddl cls.compiler = rddlgym.make('Navigation-v2', rddlgym.SCG) cls.compiler.batch_mode_on() # policy cls.policy = FeedforwardPolicy(cls.compiler, { 'layers': [256], 'activation': 'elu', 'input_layer_norm': False }) cls.policy.build() # planner cls.config = { 'batch_size': cls.batch_size, 'horizon': cls.horizon, 'learning_rate': cls.learning_rate, 'regularization_rate': cls.regularization_rate } cls.planner = MinimaxOptimizationPlanner(cls.compiler, cls.config) cls.planner.build(cls.policy, loss='mse', optimizer='RMSProp')
def run(config): # pylint: disable=import-outside-toplevel import os import psutil import rddlgym import tensorflow as tf import tfplan tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # os.environ["CUDA_VISIBLE_DEVICES"] = "-1" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" os.environ["OMP_NUM_THREADS"] = str(psutil.cpu_count(logical=False)) planner = config["planner"] rddl = config["rddl"] filepath = os.path.join(config["logdir"], "data.csv") config["optimization"] = { "optimizer": config["optimizer"], "learning_rate": config["learning_rate"], } env = rddlgym.make(rddl, mode=rddlgym.GYM, config=config) env.set_horizon(config["horizon"]) planner = tfplan.make(planner, rddl, config) with rddlgym.Runner(env, planner, debug=config["verbose"]) as runner: trajectory = runner.run() trajectory.save(filepath) print(trajectory.as_dataframe())
def setUpClass(cls): # hyper-parameters cls.horizon = 20 cls.batch_size = 64 # rddl cls.compiler = rddlgym.make('Navigation-v2', rddlgym.SCG) cls.compiler.batch_mode_on() # initial state cls.initial_state = cls.compiler.compile_initial_state(cls.batch_size) # default action cls.default_action = cls.compiler.compile_default_action(cls.batch_size) # policy cls.policy = FeedforwardPolicy(cls.compiler, {'layers': [32, 32], 'activation': 'elu', 'input_layer_norm': False}) cls.policy.build() # model cls.config = {} cls.model = ReparameterizationSampling(cls.compiler, cls.config) cls.model.build(cls.policy) cls.output = cls.model(cls.initial_state, cls.horizon)
def setUpClass(cls): # hyper-parameters cls.batch_size = 64 cls.horizon = 20 cls.learning_rate = 0.001 # rddl cls.compiler = rddlgym.make('Reservoir-8', rddlgym.SCG) cls.compiler.init() cls.compiler.batch_size = cls.batch_size # policy cls.policy = FeedforwardPolicy(cls.compiler, { 'layers': [256], 'activation': 'elu', 'input_layer_norm': True }) cls.policy.build() # planner cls.config = { 'batch_size': cls.batch_size, 'horizon': cls.horizon, 'learning_rate': cls.learning_rate } cls.planner = PathwiseOptimizationPlanner(cls.compiler, cls.config) cls.planner.build(cls.policy, loss='mse', optimizer='RMSProp')
def __init__(self, rddl, config=None): self._compiler = rddlgym.make(rddl, mode=rddlgym.SCG) self._compiler.init() self.config = config self._graph = self._compiler.graph self._config_proto = tf.ConfigProto( inter_op_parallelism_threads=1, intra_op_parallelism_threads=1, log_device_placement=False, ) self._sess = tf.Session(graph=self._graph, config=self._config_proto) self.observation_space = self._create_observation_space() self.action_space = self._create_action_space() self.non_fluents = self._eval_non_fluents() with self._compiler.graph.as_default(): self._state_inputs = self._build_state_inputs() self._action_inputs = self._build_action_inputs() self._interms, self._next_state, self._reward = self._build_model_ops( ) self._state = None self._timestep = None self._horizon = None
def setUpClass(cls): # hyper-parameters cls.horizon = 40 cls.batch_size = 16 # rddl cls.compiler = rddlgym.make('Reservoir-8', mode=rddlgym.SCG) cls.compiler.init() cls.compiler.batch_size = cls.batch_size # initial state cls.initial_state = cls.compiler.initial_state() # default action cls.default_action = cls.compiler.default_action() # policy cls.policy = FeedforwardPolicy(cls.compiler, { 'layers': [64, 64], 'activation': 'relu', 'input_layer_norm': True }) cls.policy.build() # cell cls.cell = BasicMarkovCell(cls.compiler, cls.policy) with cls.cell.graph.as_default(): # timestep cls.timestep = tf.constant(cls.horizon, dtype=tf.float32) cls.timestep = tf.expand_dims(cls.timestep, -1) cls.timestep = tf.stack([cls.timestep] * cls.batch_size)
def setUpClass(cls): # hyper-parameters cls.horizon = 40 cls.batch_size = 128 # rddl cls.compiler = rddlgym.make('Reservoir-8', rddlgym.SCG) cls.compiler.init() cls.compiler.batch_size = cls.batch_size # initial state cls.initial_state = cls.compiler.initial_state() # default action cls.default_action = cls.compiler.default_action() # policy cls.policy = FeedforwardPolicy(cls.compiler, { 'layers': [32, 32], 'activation': 'elu', 'input_layer_norm': True }) cls.policy.build() # model cls.config = {} cls.model = MonteCarloSampling(cls.compiler, cls.config) cls.model.build(cls.policy)
def setUpClass(cls): # hyper-parameters cls.batch_size = 16 cls.horizon = 15 # model cls.compiler = rddlgym.make('Reservoir-8', mode=rddlgym.SCG) cls.compiler.init() cls.compiler.batch_size = cls.batch_size # initial state cls.initial_state = cls.compiler.initial_state() # default action cls.default_action = cls.compiler.default_action() # policy cls.config = { 'layers': [128, 64, 32], 'activation': 'elu', 'input_layer_norm': True } cls.policy = FeedforwardPolicy(cls.compiler, cls.config) cls.policy.build()
def test_runner(planner): # pylint: disable=protected-access rddl = planner.rddl env = rddlgym.make(rddl, mode=rddlgym.GYM) env._horizon = 3 runner = rddlgym.Runner(env, planner) trajectory = runner.run() assert len(trajectory) == env._horizon
def cell(request): rddl = request.param model = rddlgym.make(rddl, mode=rddlgym.AST) compiler = DefaultCompiler(model, batch_size=BATCH_SIZE) compiler.init() policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True) policy.build("tensorplan") yield SimulationCell(compiler, policy)
def runner(request): rddl = request.param env = make(rddl, mode=GYM) def planner(state, timestep): # pylint: disable=unused-argument return env.action_space.sample() return Runner(env, planner)
def run(model_id, logdir, layers, activation, batch_size, learning_rate, horizon, epochs): compiler = rddlgym.make(model_id, mode=rddlgym.SCG) compiler.batch_mode_on() input_layer_norm = True hidden_layer_norm = False planner = PolicyOptimizationPlanner(compiler, layers, activation, input_layer_norm, hidden_layer_norm, logdir=logdir) planner.build(learning_rate, batch_size, horizon) rewards, policy, _ = planner.run(epochs) return rewards, policy
def setUp(self): self.rddl1 = rddlgym.make('Navigation-v3', mode=rddlgym.AST) self.compiler1 = Compiler(self.rddl1, batch_mode=True) self.policy1 = FeedforwardPolicy(self.compiler1, { 'layers': [64], 'activation': 'elu', 'input_layer_norm': False }) self.policy1.build() self.valuefn1 = Value(self.compiler1, self.policy1)
def setUpClass(cls): # hyper-parameters cls.batch_size = 16 cls.horizon = 15 # model cls.compiler = rddlgym.make('Reservoir-8', mode=rddlgym.SCG) cls.compiler.batch_mode_on() # initial state cls.initial_state = cls.compiler.compile_initial_state(cls.batch_size)
def test_get_batch_initial_state(planner): # pylint: disable=protected-access env = rddlgym.make(planner.rddl, mode=rddlgym.GYM) with planner.compiler.graph.as_default(): state = env.observation_space.sample() batch_state = planner._get_batch_initial_state(state) assert len(state) == len(batch_state) for fluent, batch_fluent in zip(state.values(), batch_state): assert fluent.dtype == batch_fluent.dtype assert fluent.shape == batch_fluent.shape[1:] assert batch_fluent.shape[0] == planner.compiler.batch_size
def simulator(request): rddl = request.param model = rddlgym.make(rddl, mode=rddlgym.AST) compiler = ReparameterizationCompiler(model, batch_size=BATCH_SIZE) compiler.init() policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True) policy.build("planning") simulator = Simulator(compiler, policy, config=None) simulator.build() yield simulator
def __init__(self, rddl, compiler_cls, config): self.rddl = rddl self.model = rddlgym.make(rddl, mode=rddlgym.AST) self.compiler = compiler_cls(self.model, batch_size=config["batch_size"]) self.config = config self.compiler.init() config = tf.ConfigProto( inter_op_parallelism_threads=1, intra_op_parallelism_threads=1, log_device_placement=False, ) self._sess = tf.Session(graph=self.graph, config=config)
def setUpClass(cls): # hyper-parameters cls.batch_size = 16 cls.horizon = 15 # model cls.compiler = rddlgym.make('Reservoir-8', mode=rddlgym.SCG) cls.compiler.init() cls.compiler.batch_size = cls.batch_size # initial state cls.initial_state = cls.compiler.initial_state() # default action cls.default_action = cls.compiler.default_action()
def setUpClass(cls): # hyper-parameters cls.batch_size = 16 cls.horizon = 20 cls.compiler = rddlgym.make('Navigation-v2', mode=rddlgym.SCG) cls.compiler.batch_mode_on() cls.noise_shapes = get_cpfs_reparameterization(cls.compiler.rddl) with cls.compiler.graph.as_default(): cls.noise_variables = utils.get_noise_variables( cls.noise_shapes, cls.batch_size, cls.horizon) cls.inputs, cls.encoding = utils.encode_noise_as_inputs( cls.noise_variables)
def setUp(self): self.horizon = 40 self.batch_size = 128 self.rddl1 = rddlgym.make('Navigation-v3', mode=rddlgym.AST) self.compiler1 = rddl2tf.compilers.DefaultCompiler( self.rddl1, batch_size=self.batch_size) self.compiler1.init() self.policy1 = FeedforwardPolicy(self.compiler1, { 'layers': [64], 'activation': 'elu', 'input_layer_norm': False }) self.policy1.build() self.valuefn1 = Value(self.compiler1, self.policy1)
def setUpClass(cls): # hyper-parameters cls.horizon = 40 cls.batch_size = 16 # rddl rddl = rddlgym.make('Navigation-v2', mode=rddlgym.AST) cls.compiler = rddl2tf.compilers.ReparameterizationCompiler( rddl, batch_size=cls.batch_size) cls.compiler.init() # initial state cls.initial_state = cls.compiler.initial_state() # default action cls.default_action = cls.compiler.default_action() # policy cls.policy = FeedforwardPolicy(cls.compiler, { 'layers': [64, 64], 'activation': 'relu', 'input_layer_norm': True }) cls.policy.build() with cls.compiler.graph.as_default(): # reparameterization cls.noise_shapes = cls.compiler.get_cpfs_reparameterization() cls.noise_variables = utils.get_noise_variables( cls.noise_shapes, cls.batch_size, cls.horizon) cls.noise_inputs, cls.encoding = utils.encode_noise_as_inputs( cls.noise_variables) # timestep cls.timestep = tf.constant(cls.horizon, dtype=tf.float32) cls.timestep = tf.expand_dims(cls.timestep, -1) cls.timestep = tf.stack([cls.timestep] * cls.batch_size) # inputs cls.inputs = tf.concat([cls.timestep, cls.noise_inputs[:, 0, :]], axis=1) # cell cls.config = {'encoding': cls.encoding} cls.cell = ReparameterizationCell(cls.compiler, cls.policy, cls.config)
def test_get_action(planner): # pylint: disable=protected-access env = rddlgym.make(planner.rddl, mode=rddlgym.GYM) with tf.Session(graph=planner.compiler.graph) as sess: sess.run(tf.global_variables_initializer()) feed_dict = _get_feed_dict(sess, planner, env) actions_ = planner._get_action(planner.action, feed_dict) action_fluents = planner.compiler.default_action_fluents assert isinstance(actions_, OrderedDict) assert len(actions_) == len(action_fluents) for action_, action_fluent in zip(actions_.values(), action_fluents): assert tf.dtypes.as_dtype(action_.dtype) == action_fluent[1].dtype assert list(action_.shape) == list( action_fluent[1].shape.fluent_shape)
def setUp(self): self.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST) self.rddl2 = rddlgym.make('Mars_Rover', mode=rddlgym.AST) self.rddl3 = rddlgym.make('HVAC-v1', mode=rddlgym.AST) self.rddl4 = rddlgym.make('CrossingTraffic-10', mode=rddlgym.AST) self.rddl5 = rddlgym.make('GameOfLife-10', mode=rddlgym.AST) self.rddl6 = rddlgym.make('CarParking-v1', mode=rddlgym.AST) self.rddl7 = rddlgym.make('Navigation-v3', mode=rddlgym.AST) self.compiler1 = Compiler(self.rddl1) self.compiler2 = Compiler(self.rddl2) self.compiler3 = Compiler(self.rddl3) self.compiler4 = Compiler(self.rddl4) self.compiler5 = Compiler(self.rddl5) self.compiler6 = Compiler(self.rddl6) self.compiler7 = Compiler(self.rddl7)
def reparameterization(request): rddl = request.param model = rddlgym.make(rddl, mode=rddlgym.AST) compiler = ReparameterizationCompiler(model, batch_size=BATCH_SIZE) compiler.init() with compiler.graph.as_default(): mapping = compiler.get_cpfs_reparameterization() samples = utils.get_noise_samples(mapping, BATCH_SIZE, HORIZON) inputs, encoding = utils.encode_noise_samples_as_inputs(samples) decoded_samples = utils.decode_inputs_as_noise_samples( inputs[:, 0, ...], encoding) return Reparameterization(compiler, mapping, samples, inputs, encoding, decoded_samples)
def setUpClass(cls): # hyper-parameters cls.batch_size = 16 cls.horizon = 20 rddl = rddlgym.make('Navigation-v2', mode=rddlgym.AST) cls.compiler = rddl2tf.compilers.ReparameterizationCompiler( rddl, batch_size=cls.batch_size) cls.compiler.init() cls.noise_shapes = cls.compiler.get_cpfs_reparameterization() with cls.compiler.graph.as_default(): cls.noise_variables = utils.get_noise_variables( cls.noise_shapes, cls.batch_size, cls.horizon) cls.inputs, cls.encoding = utils.encode_noise_as_inputs( cls.noise_variables)
def cell(request): rddl = request.param model = rddlgym.make(rddl, mode=rddlgym.AST) compiler = ReparameterizationCompiler(model, batch_size=BATCH_SIZE) compiler.init() policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True) policy.build("planning") with compiler.graph.as_default(): reparameterization_map = compiler.get_cpfs_reparameterization() cell_samples = utils.get_noise_samples(reparameterization_map, BATCH_SIZE, horizon=1) cell_noise, encoding = utils.encode_noise_samples_as_inputs( cell_samples) cell = SimulationCell(compiler, policy, config={"encoding": encoding}) cell.cell_noise = cell_noise yield cell
def _init_domain(self, model_id): compiler = rddlgym.make(model_id, mode=rddlgym.SCG) compiler.batch_mode_on() initial_state = compiler.compile_initial_state(batch_size=1) default_action = compiler.compile_default_action(batch_size=1) planner = OnlineOpenLoopPlanner(compiler, self.batch_size, self.horizon) planner.build(self.learning_rate, epochs=self.epochs, show_progress=False) online_planner = OnlinePlanning(compiler, planner) online_planner.build() return { 'initial_state': initial_state, 'default_action': default_action, 'online_planner': online_planner }
def test_get_action(planner): # pylint: disable=protected-access env = rddlgym.make(planner.rddl, mode=rddlgym.GYM) with tf.Session(graph=planner.compiler.graph) as sess: sess.run(tf.global_variables_initializer()) state = env.observation_space.sample() batch_state = planner._get_batch_initial_state(state) samples = utils.evaluate_noise_samples_as_inputs( sess, planner.simulator.samples) feed_dict = { planner.initial_state: batch_state, planner.simulator.noise: samples, planner.steps_to_go: HORIZON, } actions_ = planner._get_action(planner.trajectory.actions, feed_dict) action_fluents = planner.compiler.default_action_fluents assert isinstance(actions_, OrderedDict) assert len(actions_) == len(action_fluents) for action_, action_fluent in zip(actions_.values(), action_fluents): assert tf.dtypes.as_dtype(action_.dtype) == action_fluent[1].dtype assert list(action_.shape) == list( action_fluent[1].shape.fluent_shape)