def CFagent(defaults): env = Game(**defaults) mover = Mover(env, _extra_dim=1, **defaults) teleporter = Teleporter(env, **defaults) buffer = ReplayBuffer(**defaults) CFagent = CFAgent(env, **defaults) CFbuffer = CFReplayBuffer(**defaults) collector = Collector(**defaults) with Save(env, collector, mover, teleporter, CFagent, **defaults) as save: intervention_idx, modified_board = teleporter.pre_process(env) dones = CFagent.pre_process(env) CF_dones, cfs = None, None for frame in loop(env, collector, save, teleporter): CFagent.counterfact(env, dones, teleporter, CF_dones, cfs) modified_board = teleporter.interveen(env.board, intervention_idx, modified_board) actions = mover(modified_board) observations, rewards, dones, info = env.step(actions) modified_board, modified_rewards, modified_dones, teleport_rewards, intervention_idx = teleporter.modify(observations, rewards, dones, info) buffer.teleporter_save_data(teleporter.boards, observations, teleporter.interventions, teleport_rewards, dones, intervention_idx) mover.learn(modified_board, actions, modified_rewards, modified_dones) board_before, board_after, intervention, tele_rewards, tele_dones = buffer.sample_data() teleporter.learn(board_after, intervention, tele_rewards, tele_dones, board_before) collector.collect([rewards, modified_rewards, teleport_rewards], [dones, modified_dones]) CF_dones, cfs = CFagent.counterfact_check(dones, env, **defaults) CFbuffer.CF_save_data(CFagent.boards, observations, CFagent.counterfactuals, rewards, dones, CF_dones) CFboard, CFobs, cf, CFrewards, CFdones1 = CFbuffer.sample_data() CFagent.learn(CFobs, cf, CFrewards, CFdones1, CFboard)
def metateleport(defaults): collector = Collector(**defaults) env = Game(**defaults) mover = Mover(env, _extra_dim=1, **defaults) teleporter1 = Teleporter(env, _extra_dim=1, **defaults) teleporter2 = MetaTeleporter(env, **defaults) buffer1 = ReplayBuffer(**defaults) buffer2 = ReplayBuffer(**defaults) with Save(env, collector, mover, teleporter1, teleporter2, **defaults) as save: intervention_idx2, modified_board2 = teleporter2.pre_process(env) intervention_idx1, _ = teleporter1.pre_process(env) for frame in loop(env, collector, save, teleporter1, teleporter2): modified_board2 = teleporter2.interveen(env.board, intervention_idx2, modified_board2) modified_board1 = teleporter1.interveen(env.board, intervention_idx1, modified_board2) actions = mover(modified_board1) observations, rewards, dones, info = env.step(actions) modified_board1, modified_board2, modified_rewards1, modified_rewards2, modified_dones1, modified_dones2, tele_rewards, intervention_idx1, intervention_idx2 = teleporter2.metamodify(observations, rewards, dones, info, teleporter1.interventions) buffer1.teleporter_save_data(teleporter1.boards, modified_board2, teleporter1.interventions, modified_rewards2, modified_dones2, intervention_idx1) buffer2.teleporter_save_data(teleporter2.boards, observations, teleporter2.interventions, tele_rewards, dones, intervention_idx2) mover.learn(modified_board1, actions, modified_rewards1, modified_dones1) board_before, board_after, intervention, tel_rewards, tele_dones = buffer1.sample_data() teleporter1.learn(board_after, intervention, tel_rewards, tele_dones, board_before) board_before, board_after, intervention, tel_rewards, tele_dones = buffer2.sample_data() teleporter2.learn(board_after, intervention, tel_rewards, tele_dones, board_before) collector.collect([rewards, modified_rewards1, modified_rewards2, tele_rewards], [dones, modified_dones1, modified_dones2])
def Load_Cfagent(defaults): with Load(defaults["load_name"], num=defaults['num']) as load: collector, env, mover, teleporter, CFagent = load.items(Collector, Game, Mover, Teleporter, CFAgent) buffer = ReplayBuffer(**defaults) CFbuffer = CFReplayBuffer(**defaults) with Save(env, collector, mover, teleporter, CFagent, **defaults) as save: intervention_idx, modified_board = teleporter.pre_process(env) dones = CFagent.pre_process(env) CF_dones, cfs = None, None CFagent.CF_count = 0 for frame in loop(env, collector, save, teleporter): CFagent.counterfact(env, dones, teleporter, CF_dones, cfs) modified_board = teleporter.interveen(env.board, intervention_idx, modified_board) actions = mover(modified_board) observations, rewards, dones, info = env.step(actions) modified_board, modified_rewards, modified_dones, teleport_rewards, intervention_idx = teleporter.modify(observations, rewards, dones, info) buffer.teleporter_save_data(teleporter.boards, observations, teleporter.interventions, teleport_rewards, dones, intervention_idx) mover.learn(modified_board, actions, modified_rewards, modified_dones) board_before, board_after, intervention, tele_rewards, tele_dones = buffer.sample_data() teleporter.learn(board_after, intervention, tele_rewards, tele_dones, board_before) collector.collect([rewards, modified_rewards, teleport_rewards], [dones, modified_dones]) CF_dones, cfs = CFagent.counterfact_check(dones, env, **defaults) CFbuffer.CF_save_data(CFagent.boards, observations, CFagent.counterfactuals, rewards, dones, CF_dones) CFboard, CFobs, cf, CFrewards, CFdones1 = CFbuffer.sample_data() CFagent.learn(CFobs, cf, CFrewards, CFdones1, CFboard)
def teleport(defaults): collector = Collector(**defaults) env = Game(**defaults) mover = Mover(env, _extra_dim=1, **defaults) teleporter = Teleporter(env, **defaults) buffer = ReplayBuffer(**defaults) with Save(env, collector, mover, teleporter, **defaults) as save: intervention_idx, modified_board = teleporter.pre_process(env) for frame in loop(env, collector, save, teleporter): modified_board = teleporter.interveen(env.board, intervention_idx, modified_board) actions = mover(modified_board) observations, rewards, dones, info = env.step(actions) modified_board, modified_rewards, modified_dones, teleport_rewards, intervention_idx = teleporter.modify(observations, rewards, dones, info) buffer.teleporter_save_data(teleporter.boards, observations, teleporter.interventions, teleport_rewards, dones, intervention_idx) mover.learn(modified_board, actions, modified_rewards, modified_dones) board_before, board_after, intervention, tele_rewards, tele_dones = buffer.sample_data() teleporter.learn(board_after, intervention, tele_rewards, tele_dones, board_before) collector.collect([rewards, modified_rewards, teleport_rewards], [dones, modified_dones])