def generate_causal_structures(max_delay: int = 0):
    params = dict()
    params["use_physics"] = False
    params["train_scenario_name"] = "CE3D"
    params["src_dir"] = None

    env = Agent.pre_instantiation_setup(params, bypass_confirmation=True)
    env.lever_index_mode = "position"

    attributes = [
        env.attribute_labels[attribute] for attribute in env.attribute_order
    ]
    structure = CAUSAL_CHAIN_EDGES

    (
        causal_chain_structure_space_path,
        two_solution_schemas_structure_space_path,
        three_solution_schemas_structure_space_path,
    ) = setup_structure_space_paths()
    generate_hypothesis_space(
        env=env,
        structure=structure,
        causal_chain_structure_space_path=causal_chain_structure_space_path,
        two_solution_schemas_structure_space_path=
        two_solution_schemas_structure_space_path,
        three_solution_schemas_structure_space_path=
        three_solution_schemas_structure_space_path,
        attributes=attributes,
        actions=ACTIONS,
        fluents=FLUENTS,
        fluent_states=FLUENT_STATES,
        perceptually_causal_relations=None,
        max_delay=max_delay,
    )
    return
def main():

    global_start_time = time.time()

    args = parse_arguments()

    ablation_params = AblationParams()

    if args.savedir is None:
        data_dir = "~/Desktop/Mass/OpenLockLearningResults/cc3-ce4_subjects"
    else:
        data_dir = args.savedir
    if args.scenario is None:
        param_scenario = "CC3-CE4"
    else:
        param_scenario = args.scenario
    if args.bypass_confirmation is None:
        bypass_confirmation = False
    else:
        bypass_confirmation = True
    if args.ablations is None:
        # ablation_params.INDEXED_DISTRIBUTIONS = True
        # ablation_params.PRUNING = True
        # ablation_params.TOP_DOWN_FIRST_TRIAL = True
        pass
    else:
        # process ablations
        for ablation in args.ablations:
            ablation = ablation.upper()
            if hasattr(ablation_params, ablation):
                setattr(ablation_params, ablation, True)
            else:
                exception_str = "Unknown ablation argument: {}".format(
                    ablation)
                raise ValueError(exception_str)

    params = PARAMS[param_scenario]
    params["data_dir"] = data_dir
    params["train_attempt_limit"] = 30
    params["test_attempt_limit"] = 30
    # params['full_attempt_limit'] = True      # run to the full attempt limit, regardless of whether or not all solutions were found
    # run to the full attempt limit, regardless of whether or not all solutions were found
    params["full_attempt_limit"] = False
    params["intervention_sample_size"] = 10
    params["chain_sample_size"] = 1000
    params["use_physics"] = False

    # openlock learner params
    params["lambda_multiplier"] = 1
    params["local_alpha_update"] = 1
    params["global_alpha_update"] = 1
    params["epsilon"] = 0.99
    params["epsilon_decay"] = 0.99
    params["epsilon_active"] = False
    # these params were extracted using matlab
    # params["epsilon_ratios"] = [0.5422, 0.3079, 0.1287, 0.1067, 0, 0]
    params["intervention_mode"] = "action"
    # params["intervention_mode"] = 'attempt'
    # setup ablations
    params["ablation_params"] = ablation_params
    params["effect_probabilities"] = generate_effect_probabilities(l0=1,
                                                                   l1=1,
                                                                   l2=1,
                                                                   door=1)

    params["using_ids"] = False
    params["multiproc"] = False
    params["deterministic"] = False
    params["num_agent_runs"] = 40
    params["src_dir"] = "/tmp/openlocklearner/" + str(hash(
        time.time())) + "/src/"
    params["print_messages"] = False

    env = Agent.pre_instantiation_setup(params, bypass_confirmation)
    env.lever_index_mode = "position"

    causal_chain_structure_space_path, two_solution_schemas_structure_space_path, three_solution_schemas_structure_space_path = setup_structure_space_paths(
    )

    if not os.path.exists(causal_chain_structure_space_path):
        print(
            "WARNING: no hypothesis space files found, generating hypothesis spaces"
        )
        generate_causal_structures()

    interventions_predefined = []
    # interventions_predefined = [("push_LOWERLEFT", "push_UPPERRIGHT", "push_door")]

    # these are used to advance to the next trial after there have no chains pruned for num_steps_with_no_pruning_to_finish_trial steps
    num_steps_with_no_pruning_to_finish_trial = 500
    num_agent_runs = params["num_agent_runs"]
    for i in range(num_agent_runs):
        agent_start_time = time.time()

        env = Agent.make_env(params)
        env.lever_index_mode = "position"

        causal_chain_structure_space, two_solution_schemas, three_solution_schemas = load_causal_structures_from_file(
            causal_chain_structure_space_path,
            two_solution_schemas_structure_space_path,
            three_solution_schemas_structure_space_path,
        )

        # setup agent
        agent = OpenLockLearnerAgent(
            env, causal_chain_structure_space, params, **{
                "two_solution_schemas": two_solution_schemas,
                "three_solution_schemas": three_solution_schemas,
            })

        possible_trials = agent.get_random_order_of_possible_trials(
            params["train_scenario_name"])

        agent.training_trial_order = possible_trials
        # training
        for trial_name in possible_trials:
            trial_selected, chain_idxs_pruned_from_initial_observation = agent.setup_trial(
                scenario_name=params["train_scenario_name"],
                action_limit=params["train_action_limit"],
                attempt_limit=params["train_attempt_limit"],
                specified_trial=trial_name,
            )

            agent.run_trial_openlock_learner(
                trial_selected,
                num_steps_with_no_pruning_to_finish_trial,
                interventions_predefined=interventions_predefined,
                chain_idxs_pruned_from_initial_observation=
                chain_idxs_pruned_from_initial_observation,
                intervention_mode=params["intervention_mode"],
            )

        # testing
        if params["test_scenario_name"] == "CE4" or params[
                "test_scenario_name"] == "CC4":
            trial_selected, chain_idxs_pruned_from_initial_observation = agent.setup_trial(
                scenario_name=params["test_scenario_name"],
                action_limit=params["test_action_limit"],
                attempt_limit=params["test_attempt_limit"],
            )

            agent.run_trial_openlock_learner(
                trial_selected,
                num_steps_with_no_pruning_to_finish_trial,
                interventions_predefined=interventions_predefined,
                chain_idxs_pruned_from_initial_observation=
                chain_idxs_pruned_from_initial_observation,
                intervention_mode=params["intervention_mode"],
            )

        agent.print_agent_summary()
        print("Finished agent. Total runtime: {}s".format(time.time() -
                                                          agent_start_time))
        agent.finish_subject("OpenLockLearner", "OpenLockLearner")

    print("Finished all agents for {}. Total runtime: {}s".format(
        param_scenario,
        time.time() - global_start_time))
    return
def replot_training_results(path):
    agent_json = json.load(open(path))
    agent_folder = os.path.dirname(path)
    Agent.plot_rewards(agent_json["rewards"], agent_json["epsilons"],
                       agent_folder + "/reward_plot.png")
    # params["data_dir"] = os.path.dirname(ROOT_DIR) + "/OpenLockResults/subjects"
    params["data_dir"] = human_config_data["HUMAN_SAVE_DIR"]
    params["src_dir"] = "/tmp/openlocklearner/" + str(hash(
        time.time())) + "/src/"
    params["use_physics"] = True
    params["effect_probabilities"] = generate_effect_probabilities()

    # this section randomly selects a testing and training scenario
    # train_scenario_name, test_scenario_name = select_random_scenarios()
    # params['train_scenario_name'] = train_scenario_name
    # params['test_scenario_name'] = test_scenario_name

    scenario = select_scenario(params["train_scenario_name"])

    # todo: this should not be part of OpenLockLearnerAgent
    env = Agent.pre_instantiation_setup(params)
    env.lever_index_mode = "role"

    # create session/trial/experiment manager
    agent = HumanAgent(params, env)

    atexit.register(agent.cleanup)

    # used for debugging, runs a specific scenario & trial
    # run_specific_trial_and_scenario(manager, 'CC3', 'trial5', params['train_action_limit'], params['train_attempt_limit'])

    for trial_num in range(0, params["train_num_trials"]):
        agent.run_trial_human(
            params["train_scenario_name"],
            params["train_action_limit"],
            params["train_attempt_limit"],
Esempio n. 5
0
def main():

    global_start_time = time.time()

    args = parse_arguments()

    logging.basicConfig(
        level=args.verbosity,
        format="%(asctime)s:%(filename)s:%(lineno)d:%(levelname)s %(message)s",
    )
    logging.info(args)

    ablation_params = AblationParams()

    if args.savedir is None:
        data_dir = "/home/joschnei/OpenLock/agent/data/OpenLockLearningResults/cc3-ce4_subjects"
    else:
        data_dir = args.savedir
    if args.scenario is None:
        param_scenario = "CC3-CE4"
    else:
        param_scenario = args.scenario
    if args.bypass_confirmation is None:
        bypass_confirmation = False
    else:
        bypass_confirmation = True
    if args.ablations is None:
        pass
    else:
        # process ablations
        for ablation in args.ablations:
            ablation = ablation.upper()
            if hasattr(ablation_params, ablation):
                setattr(ablation_params, ablation, True)
            else:
                exception_str = "Unknown ablation argument: {}".format(
                    ablation)
                raise ValueError(exception_str)

    params = PARAMS[param_scenario]
    params["data_dir"] = data_dir
    params["train_attempt_limit"] = args.train_attempt_limit
    params["test_attempt_limit"] = args.test_attempt_limit
    # run to the full attempt limit, regardless of whether or not all solutions were found
    params["full_attempt_limit"] = False
    params["intervention_sample_size"] = 10  # doesn't matter
    params["chain_sample_size"] = 1000  # doesn't matter
    params["use_physics"] = False

    # openlock learner params
    params["lambda_multiplier"] = 1
    params["local_alpha_update"] = 1
    params["global_alpha_update"] = 1
    params["epsilon"] = 0.99
    params["epsilon_decay"] = 0.99
    params["epsilon_active"] = False
    # these params were extracted using matlab
    params["intervention_mode"] = "action"
    # setup ablations
    params["ablation_params"] = ablation_params
    params["effect_probabilities"] = generate_effect_probabilities(l0=1.0,
                                                                   l1=1.0,
                                                                   l2=1.0,
                                                                   door=1.0)

    params["using_ids"] = False
    params["multiproc"] = False
    params["deterministic"] = True  # Why would you shuffle the chains????
    params["num_agent_runs"] = args.n_replications
    params["src_dir"] = None
    params["print_messages"] = False
    params["n_cpus"] = args.n_cpus

    logging.info(params)

    logging.info("Pre-instantiation setup")
    env = Agent.pre_instantiation_setup(params, bypass_confirmation)
    env.lever_index_mode = "position"

    (
        causal_chain_structure_space_path,
        two_solution_schemas_structure_space_path,
        three_solution_schemas_structure_space_path,
    ) = setup_structure_space_paths()

    if not os.path.exists(causal_chain_structure_space_path):
        logging.warning(
            "No hypothesis space files found, generating hypothesis spaces")
        generate_causal_structures(max_delay=params.get("max_delay", 0))

    interventions_predefined = []

    # these are used to advance to the next trial after there have no chains pruned for num_steps_with_no_pruning_to_finish_trial steps
    num_steps_with_no_pruning_to_finish_trial = 500
    num_agent_runs = params["num_agent_runs"]

    logging.info("Loading structure and schemas")
    (
        causal_chain_structure_space,
        two_solution_schemas,
        three_solution_schemas,
    ) = load_causal_structures_from_file(
        causal_chain_structure_space_path,
        two_solution_schemas_structure_space_path,
        three_solution_schemas_structure_space_path,
    )

    logging.info("Starting trials")
    for i in range(num_agent_runs):
        logging.info(f"Starting agent run {i} of {num_agent_runs}")
        agent_start_time = time.time()

        env = Agent.make_env(params)
        env.lever_index_mode = "position"

        # setup agent
        agent = OpenLockLearnerAgent(
            env,
            causal_chain_structure_space,
            params,
            **{
                "two_solution_schemas": two_solution_schemas,
                "three_solution_schemas": three_solution_schemas,
            },
        )

        possible_trials = agent.get_random_order_of_possible_trials(
            params["train_scenario_name"])

        agent.training_trial_order = possible_trials
        logging.info("Training agent")
        for trial_name in possible_trials:
            agent.multiproc = "4" in params["train_scenario_name"]
            (
                trial_selected,
                chain_idxs_pruned_from_initial_observation,
            ) = agent.setup_trial(
                scenario_name=params["train_scenario_name"],
                action_limit=params["train_action_limit"],
                attempt_limit=params["train_attempt_limit"],
                specified_trial=trial_name,
            )

            agent.run_trial_openlock_learner(
                trial_selected,
                num_steps_with_no_pruning_to_finish_trial,
                interventions_predefined=interventions_predefined,
                chain_idxs_pruned_from_initial_observation=
                chain_idxs_pruned_from_initial_observation,
                intervention_mode=params["intervention_mode"],
            )

        # testing
        if params["test_scenario_name"] in ("CE4", "CC4", "CE4D", "CC4D"):
            logging.info("Testing agent")

            agent.multiproc = True

            (
                trial_selected,
                chain_idxs_pruned_from_initial_observation,
            ) = agent.setup_trial(
                scenario_name=params["test_scenario_name"],
                action_limit=params["test_action_limit"],
                attempt_limit=params["test_attempt_limit"],
            )

            agent.run_trial_openlock_learner(
                trial_selected,
                num_steps_with_no_pruning_to_finish_trial,
                interventions_predefined=interventions_predefined,
                chain_idxs_pruned_from_initial_observation=
                chain_idxs_pruned_from_initial_observation,
                intervention_mode=params["intervention_mode"],
            )

        agent.print_agent_summary()
        logging.info(
            "Finished agent. Total runtime: {}s".format(time.time() -
                                                        agent_start_time))
        agent.finish_subject("OpenLockLearner", "OpenLockLearner")

    logging.info("Finished all agents for {}. Total runtime: {}s".format(
        param_scenario,
        time.time() - global_start_time))
    return
def main():

    global_start_time = time.time()

    param_scenario = "CE3-CE4"
    params = PARAMS[param_scenario]
    params["data_dir"] = "~/Desktop/OpenLockLearningResultsTesting/subjects"
    params["train_scenario_name"] = "CE3_simplified"
    params["test_scenario_name"] = "CE3_simplified"
    params["train_attempt_limit"] = 10000
    params["test_attempt_limit"] = 10000
    # params['full_attempt_limit'] = True      # run to the full attempt limit, regardless of whether or not all solutions were found
    # run to the full attempt limit, regardless of whether or not all solutions were found
    params["full_attempt_limit"] = False
    params["intervention_sample_size"] = 10
    params["chain_sample_size"] = 1000

    # openlock learner params
    params["lambda_multiplier"] = 1
    params["local_alpha_update"] = 2
    params["global_alpha_update"] = 1
    params["epsilon"] = 0.99
    params["epsilon_decay"] = 0.99
    params["epsilon_active"] = False
    params["intervention_mode"] = "action"
    # params["intervention_mode"] = 'attempt'
    # setup ablations
    ablation_params = AblationParams()
    # ablation_params.INDEXED_DISTRIBUTIONS = True
    # ablation_params.PRUNING = True
    # ablation_params.TOP_DOWN_FIRST_TRIAL = True
    params["ablation_params"] = ablation_params
    params["effect_probabilities"] = generate_effect_probabilities(l0=1,
                                                                   l1=1,
                                                                   l2=1,
                                                                   door=1)
    params["using_ids"] = False
    params["multiproc"] = False
    params["use_physics"] = False

    params["deterministic"] = False
    params["num_agent_runs"] = 40
    params["src_dir"] = "/tmp/openlocklearner/" + str(hash(
        time.time())) + "/src/"

    np.random.seed(1234)

    env = Agent.pre_instantiation_setup(params)
    env.lever_index_mode = "position"

    attributes = [
        env.attribute_labels[attribute] for attribute in env.attribute_order
    ]

    structure = CAUSAL_CHAIN_EDGES

    generate_causal_structures = False
    causal_chain_structure_space_path = os.path.expanduser(
        "~/Desktop/simplified_causal_chain_space.pickle")
    two_solution_schemas_structure_space_path = os.path.expanduser(
        "~/Desktop/simplified_two_solution_schemas.pickle")
    three_solution_schemas_structure_space_path = os.path.expanduser(
        "~/Desktop/simplified_three_solution_schemas.pickle")
    if generate_causal_structures:
        # perceptually_causal_relations = (
        #     generate_perceptually_causal_relations_simplified_testing_scenario()
        # )
        perceptually_causal_relations = None
        causal_chain_structure_space = generate_chain_structure_space(
            env=env,
            actions=ACTIONS,
            attributes=attributes,
            fluents=FLUENTS,
            fluent_states=FLUENT_STATES,
            perceptually_causal_relations=perceptually_causal_relations,
            structure=structure,
        )
        write_causal_structure_space(
            causal_chain_structure_space=causal_chain_structure_space,
            causal_chain_structure_space_path=causal_chain_structure_space_path,
        )

        t = time.time()

        two_solution_schemas = AbstractSchemaStructureSpace(
            causal_chain_structure_space.structure, 2, draw_chains=False)
        write_schema_structure_space(
            schema_structure_space=two_solution_schemas,
            schema_structure_space_path=
            two_solution_schemas_structure_space_path,
        )

        three_solution_schemas = AbstractSchemaStructureSpace(
            causal_chain_structure_space.structure, 3, draw_chains=False)
        write_schema_structure_space(
            schema_structure_space=three_solution_schemas,
            schema_structure_space_path=
            three_solution_schemas_structure_space_path,
        )

        print("Schema generation time: {}s".format(time.time() - t))

        return

    # these are used to advance to the next trial after there have no chains pruned for num_steps_with_no_pruning_to_finish_trial steps
    num_steps_with_no_pruning_to_finish_trial = 5

    interventions_predefined = []
    # interventions_predefined = [
    #     ("push_LEFT", "pull_LEFT"),   # pushing and pulling the same lever
    #     ("push_UPPER", "pull_UPPER"), # unlocking and locking the door
    #     ("pull_LEFT", "pull_UPPER"),  # no state change
    #     ("push_LEFT", "pull_UPPER"),  # left lever state change
    #     ("push_LEFT", "push_UPPER"),  # door unlocks at the last stage
    #     ("push_LEFT", "push_UPPER"),  # door unlocks at the last stage
    #     # ("push_UPPER", "push_door"),  # only solution
    # ]

    # these are used to advance to the next trial after there have no chains pruned for num_steps_with_no_pruning_to_finish_trial steps
    num_steps_with_no_pruning_to_finish_trial = 500
    num_agent_runs = params["num_agent_runs"]
    for i in range(num_agent_runs):
        agent_start_time = time.time()

        env = Agent.make_env(params)
        env.lever_index_mode = "position"

        causal_chain_structure_space, two_solution_schemas, three_solution_schemas = load_causal_structures_from_file(
            causal_chain_structure_space_path,
            two_solution_schemas_structure_space_path,
            three_solution_schemas_structure_space_path,
        )

        # setup agent
        agent = OpenLockLearnerAgent(
            env, causal_chain_structure_space, params, **{
                "two_solution_schemas": two_solution_schemas,
                "three_solution_schemas": three_solution_schemas,
            })

        possible_trials = agent.get_random_order_of_possible_trials(
            params["train_scenario_name"])

        # training
        agent.training_trial_order = possible_trials
        for trial_name in possible_trials:
            trial_selected, chain_idxs_pruned_from_initial_observation = agent.setup_trial(
                scenario_name=params["train_scenario_name"],
                action_limit=params["train_action_limit"],
                attempt_limit=params["train_attempt_limit"],
                specified_trial=trial_name,
            )

            agent.run_trial_openlock_learner(
                trial_selected,
                num_steps_with_no_pruning_to_finish_trial,
                interventions_predefined=interventions_predefined,
                chain_idxs_pruned_from_initial_observation=
                chain_idxs_pruned_from_initial_observation,
                intervention_mode=params["intervention_mode"],
            )

        # testing
        trial_selected, chain_idxs_pruned_from_initial_observation = agent.setup_trial(
            scenario_name=params["test_scenario_name"],
            action_limit=params["test_action_limit"],
            attempt_limit=params["test_attempt_limit"],
        )

        agent.run_trial_openlock_learner(
            trial_selected,
            num_steps_with_no_pruning_to_finish_trial,
            interventions_predefined=interventions_predefined,
            chain_idxs_pruned_from_initial_observation=
            chain_idxs_pruned_from_initial_observation,
            intervention_mode=params["intervention_mode"],
        )

        agent.print_agent_summary()
        print("Finished agent. Total runtime: {}s".format(time.time() -
                                                          agent_start_time))
        agent.finish_subject("OpenLockLearner", "OpenLockLearner")

    print("Finished all agents for {}. Total runtime: {}s".format(
        param_scenario,
        time.time() - global_start_time))
Esempio n. 7
0
def replay_subject_data(subject_dir):
    subject_data = load_subject_data(subject_dir, use_json_pickle_for_trial=False)

    print("Replaying subject {}".format(subject_data.subject_id))

    max_attempts = 999999999
    # subject has agent, more recent model data
    if hasattr(subject_data, "agent"):
        train_scenario_name = subject_data.agent["params"]["train_scenario_name"]
        train_action_limit = subject_data.agent["params"]["train_action_limit"]
        train_attempt_limit = max_attempts
        test_action_limit = subject_data.agent["params"]["test_action_limit"]
        test_attempt_limit = max_attempts
        lever_index_mode = "position"
    else:
        train_scenario_name = subject_data.trial_seq[0]["scenario_name"]
        train_action_limit = 3
        train_attempt_limit = max_attempts
        test_action_limit = 3
        test_attempt_limit = max_attempts
        # human subjects use role
        lever_index_mode = "role"

    # minimal construction of params
    params = dict()
    params["use_physics"] = True
    params["train_scenario_name"] = train_scenario_name
    params["src_dir"] = None

    env = Agent.pre_instantiation_setup(params, bypass_confirmation=True)
    env.lever_index_mode = lever_index_mode
    # setup dummy window so we can start recording
    env.setup_trial(
            "CC3",
            action_limit=3,
            attempt_limit=30,
            multiproc=False,
        )
    env.reset()

    input("Press enter to start")

    # for each trial, setup the env and execute all of the action sequences
    for trial in subject_data.trial_seq:
        trial_scenario_name = trial["scenario_name"]
        # 3 lever trial
        if trial_scenario_name == "CE3" or trial_scenario_name == "CC3":
            action_limit = train_action_limit
            attempt_limit = train_attempt_limit
        # 4 lever trial
        elif trial_scenario_name == "CE4" or trial_scenario_name == "CC4":
            action_limit = test_action_limit
            attempt_limit = test_attempt_limit
            # corrects a bug where some 4 lever testing trials did not properly save their trial name as a string
            if not isinstance(trial["name"], str):
                # we can directly reencode the bytes back into a python string
                raw_dtype, raw_bytes = jsonpickle.decode(json.dumps(trial["name"]["py/reduce"][1]))
                trial["name"] = decode_bad_jsonpickle_str(raw_bytes)
        else:
            raise ValueError("Unknown scenario name")

        # setup the env for the trial
        env.setup_trial(
            trial_scenario_name,
            action_limit=action_limit,
            attempt_limit=attempt_limit,
            specified_trial=trial["name"],
            multiproc=False,
        )

        # go through every attempt in the trial
        for attempt_seq in trial["attempt_seq"]:
            env.reset()
            # go through every action sequence in this attempt
            action_seq = attempt_seq["action_seq"]
            done = False
            action_num = 0
            # render in a loop
            while not done:
                # execute the next action if an action is not currently executing
                if env.action_executing is False:
                    action_str = action_seq[action_num]["name"]
                    action_env = env.action_map[action_str]
                    env.step(action_env)
                    action_num += 1

                env.render(env)
                done = env.determine_attempt_finished()



    print('hi')