Example #1
0
    def __init__(self, world, seed, base, add):
        add = Value.get_float(add, world.act_shape)
        base = Agent.build(base, AddValueWorld(world, add), seed)

        def process_action(a):
            return a + add.get()

        super().__init__(base, process_action=process_action)
Example #2
0
    def __init__(self, world, seed, log_lr, n_steps, init):
        init = Value.get_float(init, world.act_shape)

        # Wrap the original world with a meta world of learning rates
        world = GradAscentLRWorld(world, n_steps, init)
        log_lr = Agent.build(log_lr, world, seed)

        # Run a single trajectory on GradAscentLRWorld
        value = world.final_value(log_lr)
        value.setflags(write=False)
        super().__init__(get_action=lambda _: value)