def run_do():
    from skdecide.hub.solver.do_solver.do_solver_scheduling import PolicyRCPSP, DOSolver, \
        PolicyMethodParams, BasePolicyMethod, SolvingMethod
    domain = MyExampleRCPSPDomain()
    # domain: RCPSP = load_domain("j1010_2.mm")
    # domain: RCPSP = load_domain("j301_1.sm")

    domain.set_inplace_environment(False)
    state = domain.get_initial_state()
    print("Initial state : ", state)
    solver = DOSolver(policy_method_params=PolicyMethodParams(base_policy_method=BasePolicyMethod.SGS_PRECEDENCE,
                                                              delta_index_freedom=0,
                                                              delta_time_freedom=0),
                      method=SolvingMethod.LNS_CP_CALENDAR)
    solver.solve(domain_factory=lambda: domain)
    states, actions, values = rollout_episode(domain=domain,
                                              max_steps=1000,
                                              solver=solver,
                                              from_memory=state,
                                              action_formatter=lambda o: str(o),
                                              outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}')
    print("Cost :", sum([v.cost for v in values]))
    from skdecide.hub.solver.do_solver.sk_to_do_binding import from_last_state_to_solution
    do_sol = from_last_state_to_solution(states[-1], domain)
    from skdecide.builders.discrete_optimization.rcpsp.rcpsp_utils import plot_task_gantt, plot_ressource_view, \
        plot_resource_individual_gantt, plt

    plot_task_gantt(do_sol.problem, do_sol)
    plot_ressource_view(do_sol.problem, do_sol)
    plot_resource_individual_gantt(do_sol.problem, do_sol)
    plt.show()
def do_multimode():
    domain: RCPSP = load_domain(get_complete_path("j1010_2.mm"))
    state = domain.get_initial_state()
    solver = DOSolver(
        policy_method_params=PolicyMethodParams(
            base_policy_method=BasePolicyMethod.FOLLOW_GANTT,
            delta_index_freedom=0,
            delta_time_freedom=0,
        ),
        method=SolvingMethod.CP,
    )
    solver.solve(domain_factory=lambda: domain)
    states, actions, values = rollout_episode(
        domain=domain,
        solver=solver,
        from_memory=state,
        max_steps=1000,
        action_formatter=lambda a: f"{a}",
        outcome_formatter=lambda o:
        f"{o.observation} - cost: {o.value.cost:.2f}",
    )
    print("rollout done")
    print("end times: ")
    for task_id in states[-1].tasks_details.keys():
        print("end task", task_id, ": ", states[-1].tasks_details[task_id].end)
def do_multiskill():
    domain: MSRCPSP = load_multiskill_domain(get_data_available_ms()[0])
    domain.set_inplace_environment(False)
    state = domain.get_initial_state()
    solver = DOSolver(
        policy_method_params=PolicyMethodParams(
            base_policy_method=BasePolicyMethod.SGS_PRECEDENCE,
            delta_index_freedom=0,
            delta_time_freedom=0,
        ),
        method=SolvingMethod.LNS_CP,
    )
    solver.get_available_methods(domain)
    solver.solve(domain_factory=lambda: domain)
    states, actions, values = rollout_episode(
        domain=domain,
        solver=solver,
        from_memory=state,
        max_steps=3000,
        action_formatter=lambda a: f"{a}",
        outcome_formatter=lambda o:
        f"{o.observation} - cost: {o.value.cost:.2f}",
    )
    print(sum([v.cost for v in values]))
    print("rollout done")
    print("end times: ")
    for task_id in states[-1].tasks_details.keys():
        print("end task", task_id, ": ", states[-1].tasks_details[task_id].end)
def do_singlemode():
    do_solver = SolvingMethod.CP
    domain: RCPSP = load_domain(get_complete_path("j301_1.sm"))
    domain.set_inplace_environment(False)
    state = domain.get_initial_state()
    print("Initial state : ", state)
    solver = DOSolver(
        policy_method_params=PolicyMethodParams(
            base_policy_method=BasePolicyMethod.SGS_PRECEDENCE,
            delta_index_freedom=0,
            delta_time_freedom=0,
        ),
        method=do_solver,
    )
    solver.solve(domain_factory=lambda: domain)
    print(do_solver)

    states, actions, values = rollout_episode(
        domain=domain,
        solver=solver,
        from_memory=state,
        max_steps=500,
        outcome_formatter=lambda o:
        f"{o.observation} - cost: {o.value.cost:.2f}",
    )
    print(sum([v.cost for v in values]))
    print("rollout done")
    print("end times: ")
    for task_id in states[-1].tasks_details.keys():
        print("end task", task_id, ": ", states[-1].tasks_details[task_id].end)
Exemple #5
0
 def init_reference_permutations(
     self, reference_permutations={}, training_domains_names=[]
 ) -> None:
     self.reference_permutations = {}
     for i in range(len(self.training_domains)):
         td = self.training_domains[i]
         td_name = training_domains_names[i]
         if td_name not in reference_permutations.keys():
             # Run CP
             td.set_inplace_environment(False)
             solver = DOSolver(
                 policy_method_params=PolicyMethodParams(
                     base_policy_method=BasePolicyMethod.SGS_PRECEDENCE,
                     delta_index_freedom=0,
                     delta_time_freedom=0,
                 ),
                 method=SolvingMethod.CP,
             )
             solver.solve(domain_factory=lambda: td)
             raw_permutation = solver.best_solution.rcpsp_permutation
             full_permutation = [x + 2 for x in raw_permutation]
             full_permutation.insert(0, 1)
             full_permutation.append(np.max(full_permutation) + 1)
             print("full_perm: ", full_permutation)
             self.reference_permutations[td] = full_permutation
         else:
             self.reference_permutations[td] = reference_permutations[td_name]
Exemple #6
0
def test_optimality(domain, do_solver):
    print("domain: ", domain)
    domain.set_inplace_environment(False)
    state = domain.get_initial_state()
    print("Initial state : ", state)
    solver = DOSolver(
        policy_method_params=PolicyMethodParams(
            base_policy_method=BasePolicyMethod.SGS_PRECEDENCE,
            delta_index_freedom=0,
            delta_time_freedom=0,
        ),
        method=do_solver,
    )
    solver.solve(domain_factory=lambda: domain)
    print(do_solver)
    states, actions, values = rollout_episode(
        domain=domain,
        max_steps=1000,
        solver=solver,
        from_memory=state,
        action_formatter=None,
        outcome_formatter=None,
        verbose=False,
    )

    if isinstance(domain, ToyRCPSPDomain):
        makespan = max([
            states[-1].tasks_details[x].end for x in states[-1].tasks_complete
        ])
        assert makespan == optimal_solutions["ToyRCPSPDomain"]["makespan"]
    if isinstance(domain, ToyMS_RCPSPDomain):
        makespan = max([
            states[-1].tasks_details[x].end for x in states[-1].tasks_complete
        ])
        assert makespan == optimal_solutions["ToyMS_RCPSPDomain"]["makespan"]
Exemple #7
0
def test_do(domain, do_solver):
    print("domain: ", domain)
    domain.set_inplace_environment(False)
    state = domain.get_initial_state()
    print("Initial state : ", state)
    solver = DOSolver(
        policy_method_params=PolicyMethodParams(
            base_policy_method=BasePolicyMethod.SGS_PRECEDENCE,
            delta_index_freedom=0,
            delta_time_freedom=0,
        ),
        method=do_solver,
    )
    solver.solve(domain_factory=lambda: domain)
    print(do_solver)
    states, actions, values = rollout_episode(
        domain=domain,
        max_steps=1000,
        solver=solver,
        from_memory=state,
        action_formatter=None,
        outcome_formatter=None,
        verbose=False,
    )
    # action_formatter=lambda o: str(o),
    # outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}')
    check_rollout_consistency(domain, states)
def run_and_compare_policies():
    import random
    domain: RCPSP = load_domain("j1201_1.sm")
    task_to_noise = set(
        random.sample(domain.get_tasks_ids(),
                      min(30, len(domain.get_tasks_ids()))))
    stochastic_domain = build_stochastic_from_deterministic(
        domain, task_to_noise=task_to_noise)
    stochastic_domain.set_inplace_environment(False)
    state = domain.get_initial_state()
    domain.set_inplace_environment(False)
    solver = DOSolver(policy_method_params=PolicyMethodParams(
        base_policy_method=BasePolicyMethod.FOLLOW_GANTT,
        delta_index_freedom=0,
        delta_time_freedom=0),
                      method=SolvingMethod.LS)
    solver.solve(domain_factory=lambda: domain)
    policy_methods = [
        PolicyMethodParams(base_policy_method=method,
                           delta_time_freedom=0,
                           delta_index_freedom=0) for method in [
                               BasePolicyMethod.SGS_PRECEDENCE,  #,
                               BasePolicyMethod.SGS_READY,
                               BasePolicyMethod.SGS_STRICT
                           ]
    ]
    policy_methods += [
        PolicyMethodParams(
            base_policy_method=BasePolicyMethod.SGS_INDEX_FREEDOM,
            delta_time_freedom=0,
            delta_index_freedom=i) for i in range(10)
    ]
    policy_methods += [
        PolicyMethodParams(
            base_policy_method=BasePolicyMethod.SGS_TIME_FREEDOM,
            delta_time_freedom=t,
            delta_index_freedom=0) for t in range(0, 200, 5)
    ]
    policies = {
        i: from_solution_to_policy(solution=solver.best_solution,
                                   domain=stochastic_domain,
                                   policy_method_params=policy_methods[i])
        for i in range(len(policy_methods))
    }
    from skdecide.hub.solver.meta_policy.meta_policies import MetaPolicy
    keys = list(policies.keys())
    for key in keys:
        value_function_dict, policy_dict, preds, succs = \
            rollout_based_compute_expected_cost_for_policy_scheduling(stochastic_domain,
                                                                      policies[key],
                                                                      nb_rollout=30)
        print("key : ", key, value_function_dict[state])
Exemple #9
0
def compute_ref_permutations():
    import os

    files = get_data_available()
    all_single_mode = [os.path.basename(f) for f in files if "sm" in f]

    all_permutations = {}
    all_makespans = {}
    for td_name in all_single_mode:
        td = load_domain(get_complete_path(td_name))
        td.set_inplace_environment(False)
        solver = DOSolver(
            policy_method_params=PolicyMethodParams(
                base_policy_method=BasePolicyMethod.SGS_PRECEDENCE,
                delta_index_freedom=0,
                delta_time_freedom=0,
            ),
            method=SolvingMethod.CP,
        )
        solver.solve(domain_factory=lambda: td)
        raw_permutation = solver.best_solution.rcpsp_permutation
        full_permutation = [int(x + 2) for x in raw_permutation]
        full_permutation.insert(0, 1)
        full_permutation.append(int(np.max(full_permutation) + 1))
        print("full_perm: ", full_permutation)
        all_permutations[td_name] = full_permutation

        state = td.get_initial_state()
        states, actions, values = rollout_episode(
            domain=td,
            max_steps=1000,
            solver=solver,
            from_memory=state,
            verbose=False,
            outcome_formatter=lambda o:
            f"{o.observation} - cost: {o.value.cost:.2f}",
        )

        makespan = sum([v.cost for v in values])
        all_makespans[td_name] = makespan
        print("makespan: ", makespan)

    print("all_permutations: ", all_permutations)
    print("all_makespans: ", all_makespans)

    json.dump(all_permutations,
              open("cp_reference_permutations", "w"),
              indent=2)
    json.dump(all_makespans, open("cp_reference_makespans", "w"), indent=2)
Exemple #10
0
def test_do_mskill(domain_multiskill, do_solver_multiskill):
    domain_multiskill.set_inplace_environment(False)
    state = domain_multiskill.get_initial_state()
    print("Initial state : ", state)
    solver = DOSolver(
        policy_method_params=PolicyMethodParams(
            base_policy_method=BasePolicyMethod.SGS_PRECEDENCE,
            delta_index_freedom=0,
            delta_time_freedom=0,
        ),
        method=do_solver_multiskill,
    )
    solver.solve(domain_factory=lambda: domain_multiskill)
    print(do_solver_multiskill)
    states, actions, values = rollout_episode(
        domain=domain_multiskill,
        max_steps=1000,
        solver=solver,
        from_memory=state,
        action_formatter=None,
        outcome_formatter=None,
        verbose=False,
    )
    check_rollout_consistency(domain_multiskill, states)
Exemple #11
0
def run_comparaison():
    import os

    from examples.discrete_optimization.rcpsp_parser_example import get_data_available

    files = get_data_available()
    all_single_mode = [os.path.basename(f) for f in files if "sm" in f]
    # training_cphh = ["j1201_"+str(i)+".sm" for i in range(2, 11)]
    training_cphh = ["j301_" + str(i) + ".sm" for i in range(1, 11)]

    # all_testing_domains_names = [f for f in all_single_mode
    #                              if not(any(g in f for g in training_cphh))]
    all_testing_domains_names = ["j1201_2.sm"]
    # all_testing_domains_names = ["j601_2.sm"]

    # all_testing_domains_names = random.sample(all_testing_domains_names, 1)
    # training_domains_names = [f for f in all_single_mode
    #                           if any(g in f for g in training_cphh)]

    training_domains_names = all_testing_domains_names
    domains_loaded = {
        domain_name: load_domain(get_complete_path(domain_name))
        for domain_name in all_testing_domains_names
    }
    test_domain_names = all_testing_domains_names
    # test_domain_names = [test_domain_names[-1]]
    # test_domain_names = ["j1201_1.sm"]
    print("test_domain_names: ", test_domain_names)
    print("training_domains_names: ", training_domains_names)
    n_walks = 5
    for td in training_domains_names:
        domains_loaded[td] = load_domain(get_complete_path(td))

    all_results = {}
    for dom in test_domain_names:
        all_results[dom] = {
            "random_walk": [],
            "cp": [],
            "cp_sgs": [],
            "gphh": [],
            "pile": [],
        }

    # RANDOM WALK
    for test_domain_str in test_domain_names:
        domain: RCPSP = domains_loaded[test_domain_str]
        domain.set_inplace_environment(False)
        n_walks = 5
        for i in range(n_walks):
            state = domain.get_initial_state()
            solver = None
            states, actions, values = rollout_episode(
                domain=domain,
                max_steps=1000,
                solver=solver,
                from_memory=state,
                verbose=False,
                outcome_formatter=lambda o:
                f"{o.observation} - cost: {o.value.cost:.2f}",
            )
            print("One random Walk complete")
            print("Cost :", sum([v.cost for v in values]))
            all_results[test_domain_str]["random_walk"].append(
                sum([v.cost for v in values]))
        print("All random Walk complete")

    # CP
    for test_domain_str in test_domain_names:
        domain: RCPSP = domains_loaded[test_domain_str]
        do_solver = SolvingMethod.CP
        domain.set_inplace_environment(False)
        state = domain.get_initial_state()
        solver = DOSolver(
            policy_method_params=PolicyMethodParams(
                base_policy_method=BasePolicyMethod.FOLLOW_GANTT,
                delta_index_freedom=0,
                delta_time_freedom=0,
            ),
            method=do_solver,
        )
        solver.solve(domain_factory=lambda: domain)
        print(do_solver)
        states, actions, values = rollout_episode(
            domain=domain,
            solver=solver,
            from_memory=state,
            max_steps=500,
            verbose=False,
            outcome_formatter=lambda o:
            f"{o.observation} - cost: {o.value.cost:.2f}",
        )
        print("Cost: ", sum([v.cost for v in values]))
        print("CP done")
        all_results[test_domain_str]["cp"].append(sum([v.cost
                                                       for v in values]))

    # CP SGS
    for test_domain_str in test_domain_names:
        domain: RCPSP = domains_loaded[test_domain_str]
        do_solver = SolvingMethod.CP
        domain.set_inplace_environment(False)
        state = domain.get_initial_state()
        solver = DOSolver(
            policy_method_params=PolicyMethodParams(
                base_policy_method=BasePolicyMethod.SGS_STRICT,
                delta_index_freedom=0,
                delta_time_freedom=0,
            ),
            method=do_solver,
        )
        solver.solve(domain_factory=lambda: domain)
        print(do_solver)
        states, actions, values = rollout_episode(
            domain=domain,
            solver=solver,
            from_memory=state,
            max_steps=500,
            verbose=False,
            outcome_formatter=lambda o:
            f"{o.observation} - cost: {o.value.cost:.2f}",
        )
        print("Cost: ", sum([v.cost for v in values]))
        print("CP_SGS done")
        all_results[test_domain_str]["cp_sgs"].append(
            sum([v.cost for v in values]))

    # PILE
    for test_domain_str in test_domain_names:
        domain: RCPSP = domains_loaded[test_domain_str]
        do_solver = SolvingMethod.PILE
        domain.set_inplace_environment(False)
        state = domain.get_initial_state()
        solver = DOSolver(
            policy_method_params=PolicyMethodParams(
                base_policy_method=BasePolicyMethod.FOLLOW_GANTT,
                delta_index_freedom=0,
                delta_time_freedom=0,
            ),
            method=do_solver,
        )
        solver.solve(domain_factory=lambda: domain)
        print(do_solver)
        states, actions, values = rollout_episode(
            domain=domain,
            solver=solver,
            from_memory=state,
            max_steps=500,
            verbose=False,
            outcome_formatter=lambda o:
            f"{o.observation} - cost: {o.value.cost:.2f}",
        )
        print("Cost: ", sum([v.cost for v in values]))
        print("PILE done")
        all_results[test_domain_str]["pile"].append(
            sum([v.cost for v in values]))

    # GPHH
    domain: RCPSP = load_domain(get_complete_path("j301_1.sm"))
    training_domains = [
        domains_loaded[training_domain]
        for training_domain in training_domains_names
    ]

    with open("cp_reference_permutations") as json_file:
        cp_reference_permutations = json.load(json_file)

    # with open('cp_reference_makespans') as json_file:
    #     cp_reference_makespans = json.load(json_file)

    for i in range(n_walks):
        domain.set_inplace_environment(False)

        set_feature = {
            FeatureEnum.EARLIEST_FINISH_DATE,
            FeatureEnum.EARLIEST_START_DATE,
            FeatureEnum.LATEST_FINISH_DATE,
            FeatureEnum.LATEST_START_DATE,
            FeatureEnum.N_PREDECESSORS,
            FeatureEnum.N_SUCCESSORS,
            FeatureEnum.ALL_DESCENDANTS,
            FeatureEnum.RESSOURCE_REQUIRED,
            FeatureEnum.RESSOURCE_AVG,
            FeatureEnum.RESSOURCE_MAX,
            # FeatureEnum.RESSOURCE_MIN
            FeatureEnum.RESSOURCE_NZ_MIN,
        }

        pset = PrimitiveSet("main", len(set_feature))
        pset.addPrimitive(operator.add, 2)
        pset.addPrimitive(operator.sub, 2)
        pset.addPrimitive(operator.mul, 2)
        pset.addPrimitive(protected_div, 2)
        pset.addPrimitive(max_operator, 2)
        pset.addPrimitive(min_operator, 2)
        pset.addPrimitive(operator.neg, 1)
        # pset.addPrimitive(operator.pow, 2)

        params_gphh = ParametersGPHH(
            set_feature=set_feature,
            set_primitves=pset,
            tournament_ratio=0.2,
            pop_size=20,
            n_gen=7,
            min_tree_depth=1,
            max_tree_depth=3,
            crossover_rate=0.7,
            mutation_rate=0.3,
            base_policy_method=BasePolicyMethod.SGS_READY,
            delta_index_freedom=0,
            delta_time_freedom=0,
            deap_verbose=True,
            evaluation=EvaluationGPHH.SGS_DEVIATION,
            permutation_distance=PermutationDistance.KTD
            # permutation_distance = PermutationDistance.KTD_HAMMING
        )

        solver = GPHH(
            training_domains=training_domains,
            weight=-1,
            verbose=False,
            reference_permutations=cp_reference_permutations,
            # reference_makespans=cp_reference_makespans,
            training_domains_names=training_domains_names,
            params_gphh=params_gphh,
        )
        solver.solve(domain_factory=lambda: domain)

        for test_domain_str in test_domain_names:
            domain: RCPSP = domains_loaded[test_domain_str]
            domain.set_inplace_environment(False)
            state = domain.get_initial_state()
            solver.set_domain(domain)
            states, actions, values = rollout_episode(
                domain=domain,
                max_steps=1000,
                solver=solver,
                from_memory=state,
                verbose=False,
                outcome_formatter=lambda o:
                f"{o.observation} - cost: {o.value.cost:.2f}",
            )
            print("One GPHH done")
            print("Best evolved heuristic: ", solver.best_heuristic)
            print("Cost: ", sum([v.cost for v in values]))

            all_results[test_domain_str]["gphh"].append(
                sum([v.cost for v in values]))

    print("All GPHH done")

    print("##### ALL RESULTS #####")

    for test_domain_str in test_domain_names:
        print(test_domain_str, " :")
        for algo_key in all_results[test_domain_str].keys():
            print("\t", algo_key, ": ")
            print("\t\t all runs:", all_results[test_domain_str][algo_key])
            print("\t\t mean:",
                  np.mean(all_results[test_domain_str][algo_key]))
Exemple #12
0
def run_comparaison_stochastic():
    import random

    from skdecide.hub.domain.rcpsp.rcpsp_sk import (
        RCPSP,
        build_n_determinist_from_stochastic,
        build_stochastic_from_deterministic,
    )

    repeat_runs = 5

    test_domain_names = [
        "j301_1.sm",
        "j301_2.sm",
        "j301_3.sm",
        "j601_1.sm",
        "j601_2.sm",
        "j601_3.sm",
    ]

    all_results = {}
    for dom in test_domain_names:
        all_results[dom] = {
            "random_walk": [],
            "cp": [],
            "cp_sgs": [],
            "gphh": [],
            "pile": [],
        }

    for original_domain_name in test_domain_names:
        original_domain: RCPSP = load_domain(
            get_complete_path(original_domain_name))
        task_to_noise = set(
            random.sample(original_domain.get_tasks_ids(),
                          len(original_domain.get_tasks_ids())))
        stochastic_domain = build_stochastic_from_deterministic(
            original_domain, task_to_noise=task_to_noise)
        deterministic_domains = build_n_determinist_from_stochastic(
            stochastic_domain, nb_instance=6)

        training_domains = deterministic_domains[0:-1]
        training_domains_names = [None for i in range(len(training_domains))]
        test_domain = deterministic_domains[-1]
        print("training_domains:", training_domains)

        # RANDOM WALK
        domain: RCPSP = test_domain
        domain.set_inplace_environment(False)
        # random_walk_costs = []
        for i in range(repeat_runs):
            state = domain.get_initial_state()
            solver = None
            states, actions, values = rollout_episode(
                domain=domain,
                max_steps=1000,
                solver=solver,
                from_memory=state,
                verbose=False,
                outcome_formatter=lambda o:
                f"{o.observation} - cost: {o.value.cost:.2f}",
            )
            print("One random Walk complete")
            print("Cost :", sum([v.cost for v in values]))
            all_results[original_domain_name]["random_walk"].append(
                sum([v.cost for v in values]))
        print("All random Walk complete")

        # CP
        domain = test_domain
        do_solver = SolvingMethod.CP
        domain.set_inplace_environment(False)
        state = domain.get_initial_state()
        solver = DOSolver(
            policy_method_params=PolicyMethodParams(
                base_policy_method=BasePolicyMethod.FOLLOW_GANTT,
                delta_index_freedom=0,
                delta_time_freedom=0,
            ),
            method=do_solver,
        )
        solver.solve(domain_factory=lambda: domain)
        print(do_solver)
        states, actions, values = rollout_episode(
            domain=domain,
            solver=solver,
            from_memory=state,
            max_steps=500,
            verbose=False,
            outcome_formatter=lambda o:
            f"{o.observation} - cost: {o.value.cost:.2f}",
        )
        print("Cost: ", sum([v.cost for v in values]))
        print("CP done")
        all_results[original_domain_name]["cp"].append(
            sum([v.cost for v in values]))

        # CP SGS
        for train_dom in training_domains:
            do_solver = SolvingMethod.CP
            train_dom.set_inplace_environment(False)
            state = train_dom.get_initial_state()
            solver = DOSolver(
                policy_method_params=PolicyMethodParams(
                    base_policy_method=BasePolicyMethod.SGS_STRICT,
                    delta_index_freedom=0,
                    delta_time_freedom=0,
                ),
                method=do_solver,
            )
            solver.solve(domain_factory=lambda: train_dom)
            print(do_solver)
            domain: RCPSP = test_domain
            domain.set_inplace_environment(False)
            states, actions, values = rollout_episode(
                domain=domain,
                solver=solver,
                from_memory=state,
                max_steps=500,
                verbose=False,
                outcome_formatter=lambda o:
                f"{o.observation} - cost: {o.value.cost:.2f}",
            )
            print("Cost: ", sum([v.cost for v in values]))
            print("CP_SGS done")
            all_results[original_domain_name]["cp_sgs"].append(
                sum([v.cost for v in values]))

        # PILE
        domain: RCPSP = test_domain
        do_solver = SolvingMethod.PILE
        domain.set_inplace_environment(False)
        state = domain.get_initial_state()
        solver = DOSolver(
            policy_method_params=PolicyMethodParams(
                base_policy_method=BasePolicyMethod.FOLLOW_GANTT,
                delta_index_freedom=0,
                delta_time_freedom=0,
            ),
            method=do_solver,
        )
        solver.solve(domain_factory=lambda: domain)
        print(do_solver)
        states, actions, values = rollout_episode(
            domain=domain,
            solver=solver,
            from_memory=state,
            max_steps=500,
            verbose=False,
            outcome_formatter=lambda o:
            f"{o.observation} - cost: {o.value.cost:.2f}",
        )
        print("Cost: ", sum([v.cost for v in values]))
        print("PILE done")
        all_results[original_domain_name]["pile"].append(
            sum([v.cost for v in values]))

        # GPHH
        with open("cp_reference_permutations") as json_file:
            cp_reference_permutations = json.load(json_file)

        with open("cp_reference_makespans") as json_file:
            cp_reference_makespans = json.load(json_file)

        for i in range(repeat_runs):
            domain.set_inplace_environment(False)

            set_feature = {
                FeatureEnum.EARLIEST_FINISH_DATE,
                FeatureEnum.EARLIEST_START_DATE,
                FeatureEnum.LATEST_FINISH_DATE,
                FeatureEnum.LATEST_START_DATE,
                FeatureEnum.N_PREDECESSORS,
                FeatureEnum.N_SUCCESSORS,
                FeatureEnum.ALL_DESCENDANTS,
                FeatureEnum.RESSOURCE_REQUIRED,
                FeatureEnum.RESSOURCE_AVG,
                FeatureEnum.RESSOURCE_MAX,
                # FeatureEnum.RESSOURCE_MIN
                FeatureEnum.RESSOURCE_NZ_MIN,
            }

            pset = PrimitiveSet("main", len(set_feature))
            pset.addPrimitive(operator.add, 2)
            pset.addPrimitive(operator.sub, 2)
            pset.addPrimitive(operator.mul, 2)
            pset.addPrimitive(protected_div, 2)
            pset.addPrimitive(max_operator, 2)
            pset.addPrimitive(min_operator, 2)
            pset.addPrimitive(operator.neg, 1)
            # pset.addPrimitive(operator.pow, 2)

            params_gphh = ParametersGPHH(
                set_feature=set_feature,
                set_primitves=pset,
                tournament_ratio=0.2,
                pop_size=40,
                n_gen=20,
                min_tree_depth=1,
                max_tree_depth=3,
                crossover_rate=0.7,
                mutation_rate=0.3,
                base_policy_method=BasePolicyMethod.SGS_READY,
                delta_index_freedom=0,
                delta_time_freedom=0,
                deap_verbose=True,
                evaluation=EvaluationGPHH.SGS_DEVIATION,
                permutation_distance=PermutationDistance.KTD
                # permutation_distance = PermutationDistance.KTD_HAMMING
            )

            solver = GPHH(
                training_domains=training_domains,
                weight=-1,
                verbose=False,
                reference_permutations=cp_reference_permutations,
                # reference_makespans=cp_reference_makespans,
                training_domains_names=training_domains_names,
                params_gphh=params_gphh
                # set_feature=set_feature)
            )
            solver.solve(domain_factory=lambda: domain)

            domain: RCPSP = test_domain
            domain.set_inplace_environment(False)
            state = domain.get_initial_state()
            solver.set_domain(domain)
            states, actions, values = rollout_episode(
                domain=domain,
                max_steps=1000,
                solver=solver,
                from_memory=state,
                verbose=False,
                outcome_formatter=lambda o:
                f"{o.observation} - cost: {o.value.cost:.2f}",
            )
            print("One GPHH done")
            print("Best evolved heuristic: ", solver.best_heuristic)
            print("Cost: ", sum([v.cost for v in values]))

            all_results[original_domain_name]["gphh"].append(
                sum([v.cost for v in values]))

    print("##### ALL RESULTS #####")

    for test_domain_str in test_domain_names:
        print(test_domain_str, " :")
        for algo_key in all_results[test_domain_str].keys():
            print("\t", algo_key, ": ")
            print("\t\t all runs:", all_results[test_domain_str][algo_key])
            print("\t\t mean:",
                  np.mean(all_results[test_domain_str][algo_key]))
def run_and_compare_policies_sampled_scenarios():
    import random

    domain: RCPSP = load_domain(get_complete_path("j601_1.sm"))
    task_to_noise = set(
        random.sample(domain.get_tasks_ids(), min(30, len(domain.get_tasks_ids())))
    )
    stochastic_domain = build_stochastic_from_deterministic(
        domain, task_to_noise=task_to_noise
    )
    deterministic_domains = build_n_determinist_from_stochastic(
        stochastic_domain, nb_instance=5
    )
    for d in deterministic_domains:
        d.set_inplace_environment(True)
    stochastic_domain.set_inplace_environment(True)
    state = domain.get_initial_state()
    domain.set_inplace_environment(True)
    solver = DOSolver(
        policy_method_params=PolicyMethodParams(
            base_policy_method=BasePolicyMethod.FOLLOW_GANTT,
            delta_index_freedom=0,
            delta_time_freedom=0,
        ),
        method=SolvingMethod.LS,
        dict_params={"nb_iteration_max": 20},
    )
    solver.solve(domain_factory=lambda: domain)
    policy_methods = [
        PolicyMethodParams(
            base_policy_method=method, delta_time_freedom=0, delta_index_freedom=0
        )
        for method in [
            BasePolicyMethod.SGS_PRECEDENCE,  # ,
            BasePolicyMethod.SGS_READY,
            BasePolicyMethod.SGS_STRICT,
        ]
    ]
    # policy_methods += [PolicyMethodParams(base_policy_method=BasePolicyMethod.SGS_INDEX_FREEDOM,
    #                                       delta_time_freedom=0,
    #                                       delta_index_freedom=i)
    #                    for i in range(10)]
    # policy_methods += [PolicyMethodParams(base_policy_method=BasePolicyMethod.SGS_TIME_FREEDOM,
    #                                       delta_time_freedom=t,
    #                                       delta_index_freedom=0)
    #                    for t in range(0, 200, 5)]
    policies = {
        i: from_solution_to_policy(
            solution=solver.best_solution,
            domain=stochastic_domain,
            policy_method_params=policy_methods[i],
        )
        for i in range(len(policy_methods))
    }
    meta_policy = MetaPolicy(
        policies={k: policies[k] for k in policies},
        execution_domain=domain,
        known_domain=domain,
        nb_rollout_estimation=1,
        verbose=True,
    )
    policies["meta"] = meta_policy
    keys = list(policies.keys())[::-1]
    value_function_dict = {}
    for key in keys:
        value_function_dict[key] = 0.0
        for k, d in enumerate(deterministic_domains):
            (
                value_function_d,
                policy_dict,
                preds,
                succs,
            ) = rollout_based_compute_expected_cost_for_policy_scheduling(
                d, policies[key], nb_rollout=1
            )
            value_function_dict[key] += value_function_d[state]
        print("key : ", key, value_function_dict[key])