def __init__(self, world, seed, base, add): add = Value.get_float(add, world.act_shape) base = Agent.build(base, AddValueWorld(world, add), seed) def process_action(a): return a + add.get() super().__init__(base, process_action=process_action)
def __init__(self, world, seed, log_lr, n_steps, init): init = Value.get_float(init, world.act_shape) # Wrap the original world with a meta world of learning rates world = GradAscentLRWorld(world, n_steps, init) log_lr = Agent.build(log_lr, world, seed) # Run a single trajectory on GradAscentLRWorld value = world.final_value(log_lr) value.setflags(write=False) super().__init__(get_action=lambda _: value)