Example #1
0
    def setUpClass(cls):

        # initialize hyper-parameters
        cls.horizon = 40
        cls.batch_size = 64
        cls.epochs = 50
        cls.learning_rate = 0.01

        # parse RDDL file
        with open('rddl/deterministic/Navigation.rddl') as file:
            parser = RDDLParser()
            parser.build()
            rddl = parser.parse(file.read())
            rddl.build()

        # initializer RDDL2TensorFlow compiler
        cls.rddl2tf = Compiler(rddl, batch_mode=True)

        # initialize open-loop policy
        cls.policy = OpenLoopPolicy(cls.rddl2tf, cls.batch_size, cls.horizon)
        cls.policy.build('test')

        # initialize ActionOptimizer
        cls.optimizer = ActionOptimizer(cls.rddl2tf, cls.policy)
        cls.optimizer.build(cls.learning_rate, cls.batch_size, cls.horizon)
Example #2
0
    def setUpClass(cls):

        # initialize hyper-parameters
        cls.horizon = 40
        cls.batch_size = 64

        # parse RDDL file
        with open('rddl/deterministic/Navigation.rddl') as file:
            parser = RDDLParser()
            parser.build()
            rddl = parser.parse(file.read())
            rddl.build()

        # initializer RDDL2TensorFlow compiler
        cls.rddl2tf = Compiler(rddl, batch_mode=True)

        # initialize open-loop policy
        cls.policy = OpenLoopPolicy(cls.rddl2tf, cls.batch_size, cls.horizon)
        cls.policy.build('test')

        # execute policy for the given horizon and initial state
        with cls.rddl2tf.graph.as_default():
            cls.state = cls.rddl2tf.compile_initial_state(cls.batch_size)
            cls.actions = []
            for t in range(cls.horizon - 1, -1, -1):
                timestep = tf.constant(t,
                                       dtype=tf.float32,
                                       shape=(cls.batch_size, 1))
                action = cls.policy(cls.state, timestep)
                cls.actions.append(action)
Example #3
0
    def setUpClass(cls):

        # initialize hyper-parameters
        cls.horizon = 40
        cls.batch_size = 1

        # parse RDDL file
        with open('rddl/deterministic/Navigation.rddl') as file:
            parser = RDDLParser()
            parser.build()
            rddl = parser.parse(file.read())
            rddl.build()

        # initializer RDDL2TensorFlow compiler
        cls.rddl2tf = Compiler(rddl, batch_mode=True)

        # initialize open-loop policy
        cls.policy = OpenLoopPolicy(cls.rddl2tf, cls.batch_size, cls.horizon)
        cls.policy.build('test')

        # sample policy variables to initialize open-loop policy
        cls.policy_variables = []
        for shape in cls.rddl2tf.rddl.action_size:
            size = [cls.horizon] + list(shape)
            cls.policy_variables.append(
                np.random.uniform(low=-1.0, high=1.0, size=size))

        # initialize action evaluator
        cls.evaluator = ActionEvaluator(cls.rddl2tf, cls.policy)
Example #4
0
    def _build_scenario_policy_ops(self):
        horizon = self.horizon - 1

        self.scenario_policy = OpenLoopPolicy(
            self.compiler, horizon, parallel_plans=True
        )
        self.scenario_policy.build("scenario_policy")
Example #5
0
def cell(request):
    rddl = request.param
    model = rddlgym.make(rddl, mode=rddlgym.AST)
    compiler = DefaultCompiler(model, batch_size=BATCH_SIZE)
    compiler.init()
    policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True)
    policy.build("tensorplan")
    yield SimulationCell(compiler, policy)
Example #6
0
    def _build_policy_ops(self):
        horizon = self.horizon
        self.policy = OpenLoopPolicy(self.compiler,
                                     horizon,
                                     parallel_plans=False)
        self.policy.build("planning")

        if "warm_start" in self.config:
            self.warm_start_op = self.policy._build_warm_start_op()
Example #7
0
def simulator(request):
    rddl = request.param
    model = rddlgym.make(rddl, mode=rddlgym.AST)

    compiler = ReparameterizationCompiler(model, batch_size=BATCH_SIZE)
    compiler.init()

    policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True)
    policy.build("planning")

    simulator = Simulator(compiler, policy, config=None)
    simulator.build()
    yield simulator
Example #8
0
def cell(request):
    rddl = request.param
    model = rddlgym.make(rddl, mode=rddlgym.AST)

    compiler = ReparameterizationCompiler(model, batch_size=BATCH_SIZE)
    compiler.init()

    policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True)
    policy.build("planning")

    with compiler.graph.as_default():
        reparameterization_map = compiler.get_cpfs_reparameterization()
        cell_samples = utils.get_noise_samples(reparameterization_map,
                                               BATCH_SIZE,
                                               horizon=1)
        cell_noise, encoding = utils.encode_noise_samples_as_inputs(
            cell_samples)

    cell = SimulationCell(compiler, policy, config={"encoding": encoding})
    cell.cell_noise = cell_noise
    yield cell
Example #9
0
 def _build_policy_graph(self) -> None:
     '''Builds the open loop policy ops.'''
     self._policy = OpenLoopPolicy(self._compiler, self.batch_size, self.horizon, self.parallel_plans)
     self._policy.build('planning')
Example #10
0
def non_parallel_plans(compiler):
    policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=False)
    policy.build("non_parallel_plans")
    return policy
Example #11
0
def parallel_plans(compiler):
    policy = OpenLoopPolicy(compiler, HORIZON, parallel_plans=True)
    policy.build("parallel_plans")
    return policy
Example #12
0
 def _build_policy_ops(self):
     horizon = self.config["horizon"]
     self.policy = OpenLoopPolicy(self.compiler,
                                  horizon,
                                  parallel_plans=True)
     self.policy.build("tensorplan")
Example #13
0
 def _build_base_policy_ops(self):
     horizon = 1
     self.base_policy = OpenLoopPolicy(self.compiler, horizon, parallel_plans=False)
     self.base_policy.build("base_policy")