Esempio n. 1
0
def CFagent(defaults):
    env = Game(**defaults)
    mover = Mover(env, _extra_dim=1, **defaults)
    teleporter = Teleporter(env, **defaults)
    buffer = ReplayBuffer(**defaults)
    CFagent = CFAgent(env, **defaults)
    CFbuffer = CFReplayBuffer(**defaults)
    collector = Collector(**defaults)

    with Save(env, collector, mover, teleporter, CFagent, **defaults) as save:
        intervention_idx, modified_board = teleporter.pre_process(env)
        dones = CFagent.pre_process(env)
        CF_dones, cfs = None, None
        for frame in loop(env, collector, save, teleporter):
            CFagent.counterfact(env, dones, teleporter, CF_dones, cfs)
            modified_board = teleporter.interveen(env.board, intervention_idx, modified_board)
            actions = mover(modified_board)
            observations, rewards, dones, info = env.step(actions)
            modified_board, modified_rewards, modified_dones, teleport_rewards, intervention_idx = teleporter.modify(observations, rewards, dones, info)
            buffer.teleporter_save_data(teleporter.boards, observations, teleporter.interventions, teleport_rewards, dones, intervention_idx)
            mover.learn(modified_board, actions, modified_rewards, modified_dones)
            board_before, board_after, intervention, tele_rewards, tele_dones = buffer.sample_data()
            teleporter.learn(board_after, intervention, tele_rewards, tele_dones, board_before)
            collector.collect([rewards, modified_rewards, teleport_rewards], [dones, modified_dones])
            CF_dones, cfs = CFagent.counterfact_check(dones, env, **defaults)
            CFbuffer.CF_save_data(CFagent.boards, observations, CFagent.counterfactuals, rewards, dones, CF_dones)
            CFboard, CFobs, cf, CFrewards, CFdones1 = CFbuffer.sample_data()
            CFagent.learn(CFobs, cf, CFrewards, CFdones1, CFboard)
Esempio n. 2
0
def metateleport(defaults):
    collector = Collector(**defaults)
    env = Game(**defaults)
    mover = Mover(env, _extra_dim=1, **defaults)
    teleporter1 = Teleporter(env, _extra_dim=1, **defaults)
    teleporter2 = MetaTeleporter(env, **defaults)
    buffer1 = ReplayBuffer(**defaults)
    buffer2 = ReplayBuffer(**defaults)

    with Save(env, collector, mover, teleporter1, teleporter2, **defaults) as save:
        intervention_idx2, modified_board2 = teleporter2.pre_process(env)
        intervention_idx1, _ = teleporter1.pre_process(env)
        for frame in loop(env, collector, save, teleporter1, teleporter2):
            modified_board2 = teleporter2.interveen(env.board, intervention_idx2, modified_board2)
            modified_board1 = teleporter1.interveen(env.board, intervention_idx1, modified_board2)
            actions = mover(modified_board1)
            observations, rewards, dones, info = env.step(actions)
            modified_board1, modified_board2, modified_rewards1, modified_rewards2, modified_dones1, modified_dones2, tele_rewards, intervention_idx1, intervention_idx2 = teleporter2.metamodify(observations, rewards, dones, info, teleporter1.interventions)
            buffer1.teleporter_save_data(teleporter1.boards, modified_board2, teleporter1.interventions, modified_rewards2, modified_dones2, intervention_idx1)
            buffer2.teleporter_save_data(teleporter2.boards, observations, teleporter2.interventions, tele_rewards, dones, intervention_idx2)
            mover.learn(modified_board1, actions, modified_rewards1, modified_dones1)
            board_before, board_after, intervention, tel_rewards, tele_dones = buffer1.sample_data()
            teleporter1.learn(board_after, intervention, tel_rewards, tele_dones, board_before)
            board_before, board_after, intervention, tel_rewards, tele_dones = buffer2.sample_data()
            teleporter2.learn(board_after, intervention, tel_rewards, tele_dones, board_before)
            collector.collect([rewards, modified_rewards1, modified_rewards2, tele_rewards], [dones, modified_dones1, modified_dones2])
Esempio n. 3
0
def Load_Cfagent(defaults):
    with Load(defaults["load_name"], num=defaults['num']) as load:
        collector, env, mover, teleporter, CFagent = load.items(Collector, Game, Mover, Teleporter, CFAgent)
        buffer = ReplayBuffer(**defaults)
        CFbuffer = CFReplayBuffer(**defaults)

        with Save(env, collector, mover, teleporter, CFagent, **defaults) as save:
            intervention_idx, modified_board = teleporter.pre_process(env)
            dones = CFagent.pre_process(env)
            CF_dones, cfs = None, None
            CFagent.CF_count = 0
            for frame in loop(env, collector, save, teleporter):
                CFagent.counterfact(env, dones, teleporter, CF_dones, cfs)
                modified_board = teleporter.interveen(env.board, intervention_idx, modified_board)
                actions = mover(modified_board)
                observations, rewards, dones, info = env.step(actions)
                modified_board, modified_rewards, modified_dones, teleport_rewards, intervention_idx = teleporter.modify(observations, rewards, dones, info)
                buffer.teleporter_save_data(teleporter.boards, observations, teleporter.interventions, teleport_rewards, dones, intervention_idx)
                mover.learn(modified_board, actions, modified_rewards, modified_dones)
                board_before, board_after, intervention, tele_rewards, tele_dones = buffer.sample_data()
                teleporter.learn(board_after, intervention, tele_rewards, tele_dones, board_before)
                collector.collect([rewards, modified_rewards, teleport_rewards], [dones, modified_dones])
                CF_dones, cfs = CFagent.counterfact_check(dones, env, **defaults)
                CFbuffer.CF_save_data(CFagent.boards, observations, CFagent.counterfactuals, rewards, dones, CF_dones)
                CFboard, CFobs, cf, CFrewards, CFdones1 = CFbuffer.sample_data()
                CFagent.learn(CFobs, cf, CFrewards, CFdones1, CFboard)
Esempio n. 4
0
def teleport(defaults):
    collector = Collector(**defaults)
    env = Game(**defaults)
    mover = Mover(env, _extra_dim=1, **defaults)
    teleporter = Teleporter(env, **defaults)
    buffer = ReplayBuffer(**defaults)

    with Save(env, collector, mover, teleporter, **defaults) as save:
        intervention_idx, modified_board = teleporter.pre_process(env)
        for frame in loop(env, collector, save, teleporter):
            modified_board = teleporter.interveen(env.board, intervention_idx, modified_board)
            actions = mover(modified_board)
            observations, rewards, dones, info = env.step(actions)
            modified_board, modified_rewards, modified_dones, teleport_rewards, intervention_idx = teleporter.modify(observations, rewards, dones, info)
            buffer.teleporter_save_data(teleporter.boards, observations, teleporter.interventions, teleport_rewards, dones, intervention_idx)
            mover.learn(modified_board, actions, modified_rewards, modified_dones)
            board_before, board_after, intervention, tele_rewards, tele_dones = buffer.sample_data()
            teleporter.learn(board_after, intervention, tele_rewards, tele_dones, board_before)
            collector.collect([rewards, modified_rewards, teleport_rewards], [dones, modified_dones])