Ejemplo n.º 1
0
def light_env_learning(base_dir="results/light-switches", structure="one_to_one", num=5,
						rounds=50, l=1, experiments_per_structure=1, num_structures=10,
						causal_order=True):
	from env.light_env import LightEnv
	exploration_steps = l
	env = LightEnv(structure=structure, num=num)
	base_path = os.path.join(base_dir, structure, str(num))
	create_dirs_results(base_path)
	p_bar_structures = tqdm.trange(num_structures)
	start_time = time.time()
	for s in p_bar_structures:
		results_data = dict()
		p_bar_structures.set_description("Learning Structure")
		env.keep_struct = False
		env.reset()
		env.keep_struct = True
		lights_on_model = generate_model_from_env(env)
		lights_off_model = generate_model_from_env(env, lights_off=True)
		unknown_model_on = deepcopy(lights_on_model)
		unknown_model_off = deepcopy(lights_off_model)
		variables = sorted(lights_on_model.get_graph_toposort())
		causal_order = variables if causal_order else []
		invalid_edges = []
		causes = lights_on_model.get_intervention_variables()
		invalid_edges = generate_invalid_edges_light(variables, causes)
		global_beliefs_results = dict()
		rewards_per_struct = []
		base_structure_filename = f"light_env_struct_{structure}_{s}"
		lights_on_model.save_digraph_as_img(os.path.join(base_path, "graphs", base_structure_filename + ".pdf"))
		g_truth = {e : 1 for e in lights_on_model.digraph.edges}
		for i in range(experiments_per_structure):
			connection_tables = create_pij(variables, causal_order, invalid_edges)
			adj_list = create_graph_from_beliefs(variables, connection_tables)
			ebunch, nodes = adj_list_to_ebunch_and_nodes(adj_list)
			data_on = dict()
			data_off = dict()
			data_on, data_off = explore_light_env(env, exploration_steps)
			df_on = pd.DataFrame.from_dict(data_on)
			df_off = pd.DataFrame.from_dict(data_off)
			approx_model_on = generate_approx_model_from_graph(ebunch, nodes, df_on)
			approx_model_off = generate_approx_model_from_graph(ebunch, nodes, df_off)
			unknown_model_on.reset(approx_model_on, ebunch, nodes)
			unknown_model_off.reset(approx_model_off, ebunch, nodes)
			connection_probs, rewards = training_ligh_env_learning(
				variables, rounds, connection_tables, data_on, data_off, unknown_model_on, unknown_model_off, env)
			rewards_per_struct.append(rewards)
			for key in connection_probs:
				if key not in global_beliefs_results:
					global_beliefs_results[key] = []
				global_beliefs_results[key].append(connection_probs[key])
		results_data[f"gt_{s}"] = g_truth
		results_data[f"beliefs_{s}"] = global_beliefs_results
		results_data[f"training_time_{s}"] = time.time() - start_time
		results_data[f"rewards_{s}"] = rewards_per_struct
		dict_filename = os.path.join(base_path, "mats", base_structure_filename + ".pickle")
		with open(dict_filename, "wb") as handle:
			pickle.dump(results_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 2
0
def main():
	logging.basicConfig(filename="logs/test_nature_light.log", filemode='w', level=logging.INFO)
	# model = BaseModel('configs/model_parameters.json')
	# tcm = TrueCausalModel(model)
	# r = tcm.action_simulator(['Tratamiento'], [1])
	# print(r)
	# r = tcm.action_simulator(['Tratamiento'], [0])
	# print(r)
	from utils.light_env_utils import generate_model_from_env
	from env.light_env import LightEnv
	env = LightEnv(structure="one_to_one")
	env.keep_struct = False
	env.reset()
	env.keep_struct = True
	model = generate_model_from_env(env)
	nature_light_switch = TrueCausalModelEnv(model)
	variable = "cause_1"
	value = 1
	r = nature_light_switch.action_simulator(env, variable)
	print(r)
Ejemplo n.º 3
0
    parser.add_argument('--images', type=int, default=0, help='images or no')
    parser.add_argument('--data-dir', type=str, help='data dir')
    args = parser.parse_args()
    
    
    gc = 1 - args.fixed_goal

    if args.structure == "masterswitch":
        st = (args.horizon*(2*args.num+1) + (args.horizon-1)*(2*args.num+1))
    else:
        st = (args.horizon*(2*args.num+1))
    tj = "gt"
    l = LightEnv(args.horizon, 
                 args.num, 
                 tj,
                 args.structure, 
                 gc, 
                 filename="exp/"+str(gc)+"_"+args.method,
                    seen = args.seen)
    env = DummyVecEnv(1 * [lambda: l])
        
    if args.images:
        addonn = "_I1"
    else:
        addonn = ""
    
    if args.method == "trajF":
        F = th.load(args.data_dir+"cnn_Redo_L2_S"+str(args.seen)+"_h"+str(args.horizon)+\
                        "_"+str(args.structure)+addonn).cuda()
    elif args.method == "trajFia":
        F = th.load(args.data_dir+"iter_attn_Redo_L2_S"+str(args.seen)+"_h"+str(args.horizon)+\
Ejemplo n.º 4
0
    fname = args.data_dir + "polattn_" + str(gc) + "_" + args.method

    memsize = 10000
    memory = {'state': [], 'graph': [], 'action': []}
    if args.method == 'trajlstm':
        pol = BCPolicyMemory(args.num, args.structure).cuda()
    else:
        pol = BCPolicy(args.num, args.structure, True).cuda()
    optimizer = th.optim.Adam(pol.parameters(), lr=0.0001)

    ## Using ground truth graph
    if args.method == "gt":
        l = LightEnv(args.horizon * 2,
                     args.num,
                     "gt",
                     args.structure,
                     gc,
                     filename=fname,
                     seen=args.seen)

        successes = []
        l.keep_struct = False
        l.train = True
        ## Per episode
        for mep in range(100000):
            l.train = True
            obs = l.reset()

            curr = np.zeros((args.num))
            obs = curr
            imobs = l._get_obs(images=True)
Ejemplo n.º 5
0
                new_state = tuple(new_state[:self.env.num])
                self.Q[state][action] = self.Q[state][action] + self.alpha * \
                                        (reward + self.gamma * np.max(self.Q[new_state]) -\
                                        self.Q[state][action])
                state = new_state
                total_episode_reward += reward
            rewards_per_episode.append(total_episode_reward)
        results_data[f"rewards_{s}"] = rewards_per_episode
        print(results_data)
        with open(dict_filename + ".pickle", "wb") as handle:
            pickle.dump(results_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
        return rewards_per_episode


if __name__ == "__main__":
    num = 5
    simulations = 5
    episodes = 500
    structures = ["one_to_one", "one_to_many", "many_to_one"]
    for structure in structures:
        env = LightEnv(num=num, structure=structure)
        for s in range(simulations):
            env.keep_struct = False
            env.reset()
            env.keep_struct = True
            policy = Policy(linear=False)
            q_agent = QLearningAgent(env, policy, episodes=episodes)
            rewards = q_agent.train(
                f"results/light-switches-q-learning-exp-decay/{structure}/{num}/mats/light_env_struct_{structure}_{s}",
                s=s)
Ejemplo n.º 6
0
    """
    Obtiene un diccionario donde las llaves son nodos
    y los valores son listas con los padres de cada nodo.
    """
    parents = dict()
    for edge in ebunch:
        if not edge[1] in parents:
            parents[edge[1]] = []
        parents[edge[1]].append(edge[0])
    return parents

if __name__ == "__main__":
    n = 5
    logging.basicConfig(filename='logs/envToModel.log',
                        filemode='w', level=logging.INFO)
    env = LightEnv(structure="many_to_one")
    env.keep_struct = False
    print(env._get_obs()[:n])
    print(env.goal)
    env.reset()
    env.keep_struct = True
    print(env._get_obs()[:n])
    print(env.goal)
    env.reset()
    print(env._get_obs()[:n])
    print(env.goal)
    print(get_targets(env))
    lights_on_model = generate_model_from_env(env)
    lights_off_model = generate_model_from_env(env, lights_off=True)
    data_on, data_off = explore_light_env(env, 10)
    # print(data_on)
Ejemplo n.º 7
0
                        logging.info(nature_response)
                        done = nature_response["done"]
                done = nature_response.get("done", False)
                self.rewards_per_round.append(episode_reward)
        return self.rewards_per_round


class LightsHalfBlindAgent(object):
    pass


if __name__ == "__main__":
    from utils.light_env_utils import generate_model_from_env
    from utils.vis_utils import plot_reward
    from true_causal_model import TrueCausalModelEnv
    env = LightEnv(structure="one_to_many")
    env.keep_struct = False
    env.reset()
    env.keep_struct = True
    model = generate_model_from_env(env)
    nature_light_switch = TrueCausalModelEnv(model)
    print(env.aj)
    print(env.goal)
    for cpd in model.pgmodel.get_cpds():
        print(cpd)
    episodes = 100
    print(
        model.conditional_probability("effect_1", {
            "cause_0": 1,
            "cause_1": 1,
            "cause_3": 0,
Ejemplo n.º 8
0
    buffer = []
    gtbuffer = []
    num_episodes = 40000

    ## Set Horizon Based On Task
    if args.structure == "masterswitch":
        st = (args.horizon * (2 * args.num + 1) + (args.horizon - 1) *
              (2 * args.num + 1))
    else:
        st = (args.horizon * (2 * args.num + 1))

    ## Init Env
    l = LightEnv(args.horizon,
                 args.num,
                 st,
                 args.structure,
                 gc,
                 filename=str(gc) + "_traj",
                 seen=args.seen)
    env = DummyVecEnv(1 * [lambda: l])

    for q in range(num_episodes):
        ## Reset Causal Structure
        l.keep_struct = False
        obs = env.reset()
        l.keep_struct = True
        ##### INDUCTION #####
        ##### OPTIMAL POLICY 1
        if args.structure == "masterswitch":
            it = None
            for i in range(args.num):