def light_env_learning(base_dir="results/light-switches", structure="one_to_one", num=5, rounds=50, l=1, experiments_per_structure=1, num_structures=10, causal_order=True): from env.light_env import LightEnv exploration_steps = l env = LightEnv(structure=structure, num=num) base_path = os.path.join(base_dir, structure, str(num)) create_dirs_results(base_path) p_bar_structures = tqdm.trange(num_structures) start_time = time.time() for s in p_bar_structures: results_data = dict() p_bar_structures.set_description("Learning Structure") env.keep_struct = False env.reset() env.keep_struct = True lights_on_model = generate_model_from_env(env) lights_off_model = generate_model_from_env(env, lights_off=True) unknown_model_on = deepcopy(lights_on_model) unknown_model_off = deepcopy(lights_off_model) variables = sorted(lights_on_model.get_graph_toposort()) causal_order = variables if causal_order else [] invalid_edges = [] causes = lights_on_model.get_intervention_variables() invalid_edges = generate_invalid_edges_light(variables, causes) global_beliefs_results = dict() rewards_per_struct = [] base_structure_filename = f"light_env_struct_{structure}_{s}" lights_on_model.save_digraph_as_img(os.path.join(base_path, "graphs", base_structure_filename + ".pdf")) g_truth = {e : 1 for e in lights_on_model.digraph.edges} for i in range(experiments_per_structure): connection_tables = create_pij(variables, causal_order, invalid_edges) adj_list = create_graph_from_beliefs(variables, connection_tables) ebunch, nodes = adj_list_to_ebunch_and_nodes(adj_list) data_on = dict() data_off = dict() data_on, data_off = explore_light_env(env, exploration_steps) df_on = pd.DataFrame.from_dict(data_on) df_off = pd.DataFrame.from_dict(data_off) approx_model_on = generate_approx_model_from_graph(ebunch, nodes, df_on) approx_model_off = generate_approx_model_from_graph(ebunch, nodes, df_off) unknown_model_on.reset(approx_model_on, ebunch, nodes) unknown_model_off.reset(approx_model_off, ebunch, nodes) connection_probs, rewards = training_ligh_env_learning( variables, rounds, connection_tables, data_on, data_off, unknown_model_on, unknown_model_off, env) rewards_per_struct.append(rewards) for key in connection_probs: if key not in global_beliefs_results: global_beliefs_results[key] = [] global_beliefs_results[key].append(connection_probs[key]) results_data[f"gt_{s}"] = g_truth results_data[f"beliefs_{s}"] = global_beliefs_results results_data[f"training_time_{s}"] = time.time() - start_time results_data[f"rewards_{s}"] = rewards_per_struct dict_filename = os.path.join(base_path, "mats", base_structure_filename + ".pickle") with open(dict_filename, "wb") as handle: pickle.dump(results_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
def main(): logging.basicConfig(filename="logs/test_nature_light.log", filemode='w', level=logging.INFO) # model = BaseModel('configs/model_parameters.json') # tcm = TrueCausalModel(model) # r = tcm.action_simulator(['Tratamiento'], [1]) # print(r) # r = tcm.action_simulator(['Tratamiento'], [0]) # print(r) from utils.light_env_utils import generate_model_from_env from env.light_env import LightEnv env = LightEnv(structure="one_to_one") env.keep_struct = False env.reset() env.keep_struct = True model = generate_model_from_env(env) nature_light_switch = TrueCausalModelEnv(model) variable = "cause_1" value = 1 r = nature_light_switch.action_simulator(env, variable) print(r)
parser.add_argument('--images', type=int, default=0, help='images or no') parser.add_argument('--data-dir', type=str, help='data dir') args = parser.parse_args() gc = 1 - args.fixed_goal if args.structure == "masterswitch": st = (args.horizon*(2*args.num+1) + (args.horizon-1)*(2*args.num+1)) else: st = (args.horizon*(2*args.num+1)) tj = "gt" l = LightEnv(args.horizon, args.num, tj, args.structure, gc, filename="exp/"+str(gc)+"_"+args.method, seen = args.seen) env = DummyVecEnv(1 * [lambda: l]) if args.images: addonn = "_I1" else: addonn = "" if args.method == "trajF": F = th.load(args.data_dir+"cnn_Redo_L2_S"+str(args.seen)+"_h"+str(args.horizon)+\ "_"+str(args.structure)+addonn).cuda() elif args.method == "trajFia": F = th.load(args.data_dir+"iter_attn_Redo_L2_S"+str(args.seen)+"_h"+str(args.horizon)+\
fname = args.data_dir + "polattn_" + str(gc) + "_" + args.method memsize = 10000 memory = {'state': [], 'graph': [], 'action': []} if args.method == 'trajlstm': pol = BCPolicyMemory(args.num, args.structure).cuda() else: pol = BCPolicy(args.num, args.structure, True).cuda() optimizer = th.optim.Adam(pol.parameters(), lr=0.0001) ## Using ground truth graph if args.method == "gt": l = LightEnv(args.horizon * 2, args.num, "gt", args.structure, gc, filename=fname, seen=args.seen) successes = [] l.keep_struct = False l.train = True ## Per episode for mep in range(100000): l.train = True obs = l.reset() curr = np.zeros((args.num)) obs = curr imobs = l._get_obs(images=True)
new_state = tuple(new_state[:self.env.num]) self.Q[state][action] = self.Q[state][action] + self.alpha * \ (reward + self.gamma * np.max(self.Q[new_state]) -\ self.Q[state][action]) state = new_state total_episode_reward += reward rewards_per_episode.append(total_episode_reward) results_data[f"rewards_{s}"] = rewards_per_episode print(results_data) with open(dict_filename + ".pickle", "wb") as handle: pickle.dump(results_data, handle, protocol=pickle.HIGHEST_PROTOCOL) return rewards_per_episode if __name__ == "__main__": num = 5 simulations = 5 episodes = 500 structures = ["one_to_one", "one_to_many", "many_to_one"] for structure in structures: env = LightEnv(num=num, structure=structure) for s in range(simulations): env.keep_struct = False env.reset() env.keep_struct = True policy = Policy(linear=False) q_agent = QLearningAgent(env, policy, episodes=episodes) rewards = q_agent.train( f"results/light-switches-q-learning-exp-decay/{structure}/{num}/mats/light_env_struct_{structure}_{s}", s=s)
""" Obtiene un diccionario donde las llaves son nodos y los valores son listas con los padres de cada nodo. """ parents = dict() for edge in ebunch: if not edge[1] in parents: parents[edge[1]] = [] parents[edge[1]].append(edge[0]) return parents if __name__ == "__main__": n = 5 logging.basicConfig(filename='logs/envToModel.log', filemode='w', level=logging.INFO) env = LightEnv(structure="many_to_one") env.keep_struct = False print(env._get_obs()[:n]) print(env.goal) env.reset() env.keep_struct = True print(env._get_obs()[:n]) print(env.goal) env.reset() print(env._get_obs()[:n]) print(env.goal) print(get_targets(env)) lights_on_model = generate_model_from_env(env) lights_off_model = generate_model_from_env(env, lights_off=True) data_on, data_off = explore_light_env(env, 10) # print(data_on)
logging.info(nature_response) done = nature_response["done"] done = nature_response.get("done", False) self.rewards_per_round.append(episode_reward) return self.rewards_per_round class LightsHalfBlindAgent(object): pass if __name__ == "__main__": from utils.light_env_utils import generate_model_from_env from utils.vis_utils import plot_reward from true_causal_model import TrueCausalModelEnv env = LightEnv(structure="one_to_many") env.keep_struct = False env.reset() env.keep_struct = True model = generate_model_from_env(env) nature_light_switch = TrueCausalModelEnv(model) print(env.aj) print(env.goal) for cpd in model.pgmodel.get_cpds(): print(cpd) episodes = 100 print( model.conditional_probability("effect_1", { "cause_0": 1, "cause_1": 1, "cause_3": 0,
buffer = [] gtbuffer = [] num_episodes = 40000 ## Set Horizon Based On Task if args.structure == "masterswitch": st = (args.horizon * (2 * args.num + 1) + (args.horizon - 1) * (2 * args.num + 1)) else: st = (args.horizon * (2 * args.num + 1)) ## Init Env l = LightEnv(args.horizon, args.num, st, args.structure, gc, filename=str(gc) + "_traj", seen=args.seen) env = DummyVecEnv(1 * [lambda: l]) for q in range(num_episodes): ## Reset Causal Structure l.keep_struct = False obs = env.reset() l.keep_struct = True ##### INDUCTION ##### ##### OPTIMAL POLICY 1 if args.structure == "masterswitch": it = None for i in range(args.num):