def run_do(): from skdecide.hub.solver.do_solver.do_solver_scheduling import PolicyRCPSP, DOSolver, \ PolicyMethodParams, BasePolicyMethod, SolvingMethod domain = MyExampleRCPSPDomain() # domain: RCPSP = load_domain("j1010_2.mm") # domain: RCPSP = load_domain("j301_1.sm") domain.set_inplace_environment(False) state = domain.get_initial_state() print("Initial state : ", state) solver = DOSolver(policy_method_params=PolicyMethodParams(base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0), method=SolvingMethod.LNS_CP_CALENDAR) solver.solve(domain_factory=lambda: domain) states, actions, values = rollout_episode(domain=domain, max_steps=1000, solver=solver, from_memory=state, action_formatter=lambda o: str(o), outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}') print("Cost :", sum([v.cost for v in values])) from skdecide.hub.solver.do_solver.sk_to_do_binding import from_last_state_to_solution do_sol = from_last_state_to_solution(states[-1], domain) from skdecide.builders.discrete_optimization.rcpsp.rcpsp_utils import plot_task_gantt, plot_ressource_view, \ plot_resource_individual_gantt, plt plot_task_gantt(do_sol.problem, do_sol) plot_ressource_view(do_sol.problem, do_sol) plot_resource_individual_gantt(do_sol.problem, do_sol) plt.show()
def do_multimode(): domain: RCPSP = load_domain(get_complete_path("j1010_2.mm")) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.FOLLOW_GANTT, delta_index_freedom=0, delta_time_freedom=0, ), method=SolvingMethod.CP, ) solver.solve(domain_factory=lambda: domain) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=1000, action_formatter=lambda a: f"{a}", outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("rollout done") print("end times: ") for task_id in states[-1].tasks_details.keys(): print("end task", task_id, ": ", states[-1].tasks_details[task_id].end)
def init_reference_permutations( self, reference_permutations={}, training_domains_names=[] ) -> None: self.reference_permutations = {} for i in range(len(self.training_domains)): td = self.training_domains[i] td_name = training_domains_names[i] if td_name not in reference_permutations.keys(): # Run CP td.set_inplace_environment(False) solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=SolvingMethod.CP, ) solver.solve(domain_factory=lambda: td) raw_permutation = solver.best_solution.rcpsp_permutation full_permutation = [x + 2 for x in raw_permutation] full_permutation.insert(0, 1) full_permutation.append(np.max(full_permutation) + 1) print("full_perm: ", full_permutation) self.reference_permutations[td] = full_permutation else: self.reference_permutations[td] = reference_permutations[td_name]
def do_singlemode(): do_solver = SolvingMethod.CP domain: RCPSP = load_domain(get_complete_path("j301_1.sm")) domain.set_inplace_environment(False) state = domain.get_initial_state() print("Initial state : ", state) solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print(sum([v.cost for v in values])) print("rollout done") print("end times: ") for task_id in states[-1].tasks_details.keys(): print("end task", task_id, ": ", states[-1].tasks_details[task_id].end)
def test_optimality(domain, do_solver): print("domain: ", domain) domain.set_inplace_environment(False) state = domain.get_initial_state() print("Initial state : ", state) solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, action_formatter=None, outcome_formatter=None, verbose=False, ) if isinstance(domain, ToyRCPSPDomain): makespan = max([ states[-1].tasks_details[x].end for x in states[-1].tasks_complete ]) assert makespan == optimal_solutions["ToyRCPSPDomain"]["makespan"] if isinstance(domain, ToyMS_RCPSPDomain): makespan = max([ states[-1].tasks_details[x].end for x in states[-1].tasks_complete ]) assert makespan == optimal_solutions["ToyMS_RCPSPDomain"]["makespan"]
def test_do(domain, do_solver): print("domain: ", domain) domain.set_inplace_environment(False) state = domain.get_initial_state() print("Initial state : ", state) solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, action_formatter=None, outcome_formatter=None, verbose=False, ) # action_formatter=lambda o: str(o), # outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}') check_rollout_consistency(domain, states)
def run_and_compare_policies(): import random domain: RCPSP = load_domain("j1201_1.sm") task_to_noise = set( random.sample(domain.get_tasks_ids(), min(30, len(domain.get_tasks_ids())))) stochastic_domain = build_stochastic_from_deterministic( domain, task_to_noise=task_to_noise) stochastic_domain.set_inplace_environment(False) state = domain.get_initial_state() domain.set_inplace_environment(False) solver = DOSolver(policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.FOLLOW_GANTT, delta_index_freedom=0, delta_time_freedom=0), method=SolvingMethod.LS) solver.solve(domain_factory=lambda: domain) policy_methods = [ PolicyMethodParams(base_policy_method=method, delta_time_freedom=0, delta_index_freedom=0) for method in [ BasePolicyMethod.SGS_PRECEDENCE, #, BasePolicyMethod.SGS_READY, BasePolicyMethod.SGS_STRICT ] ] policy_methods += [ PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_INDEX_FREEDOM, delta_time_freedom=0, delta_index_freedom=i) for i in range(10) ] policy_methods += [ PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_TIME_FREEDOM, delta_time_freedom=t, delta_index_freedom=0) for t in range(0, 200, 5) ] policies = { i: from_solution_to_policy(solution=solver.best_solution, domain=stochastic_domain, policy_method_params=policy_methods[i]) for i in range(len(policy_methods)) } from skdecide.hub.solver.meta_policy.meta_policies import MetaPolicy keys = list(policies.keys()) for key in keys: value_function_dict, policy_dict, preds, succs = \ rollout_based_compute_expected_cost_for_policy_scheduling(stochastic_domain, policies[key], nb_rollout=30) print("key : ", key, value_function_dict[state])
def compute_ref_permutations(): import os files = get_data_available() all_single_mode = [os.path.basename(f) for f in files if "sm" in f] all_permutations = {} all_makespans = {} for td_name in all_single_mode: td = load_domain(get_complete_path(td_name)) td.set_inplace_environment(False) solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=SolvingMethod.CP, ) solver.solve(domain_factory=lambda: td) raw_permutation = solver.best_solution.rcpsp_permutation full_permutation = [int(x + 2) for x in raw_permutation] full_permutation.insert(0, 1) full_permutation.append(int(np.max(full_permutation) + 1)) print("full_perm: ", full_permutation) all_permutations[td_name] = full_permutation state = td.get_initial_state() states, actions, values = rollout_episode( domain=td, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) makespan = sum([v.cost for v in values]) all_makespans[td_name] = makespan print("makespan: ", makespan) print("all_permutations: ", all_permutations) print("all_makespans: ", all_makespans) json.dump(all_permutations, open("cp_reference_permutations", "w"), indent=2) json.dump(all_makespans, open("cp_reference_makespans", "w"), indent=2)
def do_multiskill(): domain: MSRCPSP = load_multiskill_domain(get_data_available_ms()[0]) domain.set_inplace_environment(False) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=SolvingMethod.LNS_CP, ) solver.get_available_methods(domain) solver.solve(domain_factory=lambda: domain) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=3000, action_formatter=lambda a: f"{a}", outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print(sum([v.cost for v in values])) print("rollout done") print("end times: ") for task_id in states[-1].tasks_details.keys(): print("end task", task_id, ": ", states[-1].tasks_details[task_id].end)
def test_do_mskill(domain_multiskill, do_solver_multiskill): domain_multiskill.set_inplace_environment(False) state = domain_multiskill.get_initial_state() print("Initial state : ", state) solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver_multiskill, ) solver.solve(domain_factory=lambda: domain_multiskill) print(do_solver_multiskill) states, actions, values = rollout_episode( domain=domain_multiskill, max_steps=1000, solver=solver, from_memory=state, action_formatter=None, outcome_formatter=None, verbose=False, ) check_rollout_consistency(domain_multiskill, states)
def run_comparaison(): import os from examples.discrete_optimization.rcpsp_parser_example import get_data_available files = get_data_available() all_single_mode = [os.path.basename(f) for f in files if "sm" in f] # training_cphh = ["j1201_"+str(i)+".sm" for i in range(2, 11)] training_cphh = ["j301_" + str(i) + ".sm" for i in range(1, 11)] # all_testing_domains_names = [f for f in all_single_mode # if not(any(g in f for g in training_cphh))] all_testing_domains_names = ["j1201_2.sm"] # all_testing_domains_names = ["j601_2.sm"] # all_testing_domains_names = random.sample(all_testing_domains_names, 1) # training_domains_names = [f for f in all_single_mode # if any(g in f for g in training_cphh)] training_domains_names = all_testing_domains_names domains_loaded = { domain_name: load_domain(get_complete_path(domain_name)) for domain_name in all_testing_domains_names } test_domain_names = all_testing_domains_names # test_domain_names = [test_domain_names[-1]] # test_domain_names = ["j1201_1.sm"] print("test_domain_names: ", test_domain_names) print("training_domains_names: ", training_domains_names) n_walks = 5 for td in training_domains_names: domains_loaded[td] = load_domain(get_complete_path(td)) all_results = {} for dom in test_domain_names: all_results[dom] = { "random_walk": [], "cp": [], "cp_sgs": [], "gphh": [], "pile": [], } # RANDOM WALK for test_domain_str in test_domain_names: domain: RCPSP = domains_loaded[test_domain_str] domain.set_inplace_environment(False) n_walks = 5 for i in range(n_walks): state = domain.get_initial_state() solver = None states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("One random Walk complete") print("Cost :", sum([v.cost for v in values])) all_results[test_domain_str]["random_walk"].append( sum([v.cost for v in values])) print("All random Walk complete") # CP for test_domain_str in test_domain_names: domain: RCPSP = domains_loaded[test_domain_str] do_solver = SolvingMethod.CP domain.set_inplace_environment(False) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.FOLLOW_GANTT, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost: ", sum([v.cost for v in values])) print("CP done") all_results[test_domain_str]["cp"].append(sum([v.cost for v in values])) # CP SGS for test_domain_str in test_domain_names: domain: RCPSP = domains_loaded[test_domain_str] do_solver = SolvingMethod.CP domain.set_inplace_environment(False) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_STRICT, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost: ", sum([v.cost for v in values])) print("CP_SGS done") all_results[test_domain_str]["cp_sgs"].append( sum([v.cost for v in values])) # PILE for test_domain_str in test_domain_names: domain: RCPSP = domains_loaded[test_domain_str] do_solver = SolvingMethod.PILE domain.set_inplace_environment(False) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.FOLLOW_GANTT, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost: ", sum([v.cost for v in values])) print("PILE done") all_results[test_domain_str]["pile"].append( sum([v.cost for v in values])) # GPHH domain: RCPSP = load_domain(get_complete_path("j301_1.sm")) training_domains = [ domains_loaded[training_domain] for training_domain in training_domains_names ] with open("cp_reference_permutations") as json_file: cp_reference_permutations = json.load(json_file) # with open('cp_reference_makespans') as json_file: # cp_reference_makespans = json.load(json_file) for i in range(n_walks): domain.set_inplace_environment(False) set_feature = { FeatureEnum.EARLIEST_FINISH_DATE, FeatureEnum.EARLIEST_START_DATE, FeatureEnum.LATEST_FINISH_DATE, FeatureEnum.LATEST_START_DATE, FeatureEnum.N_PREDECESSORS, FeatureEnum.N_SUCCESSORS, FeatureEnum.ALL_DESCENDANTS, FeatureEnum.RESSOURCE_REQUIRED, FeatureEnum.RESSOURCE_AVG, FeatureEnum.RESSOURCE_MAX, # FeatureEnum.RESSOURCE_MIN FeatureEnum.RESSOURCE_NZ_MIN, } pset = PrimitiveSet("main", len(set_feature)) pset.addPrimitive(operator.add, 2) pset.addPrimitive(operator.sub, 2) pset.addPrimitive(operator.mul, 2) pset.addPrimitive(protected_div, 2) pset.addPrimitive(max_operator, 2) pset.addPrimitive(min_operator, 2) pset.addPrimitive(operator.neg, 1) # pset.addPrimitive(operator.pow, 2) params_gphh = ParametersGPHH( set_feature=set_feature, set_primitves=pset, tournament_ratio=0.2, pop_size=20, n_gen=7, min_tree_depth=1, max_tree_depth=3, crossover_rate=0.7, mutation_rate=0.3, base_policy_method=BasePolicyMethod.SGS_READY, delta_index_freedom=0, delta_time_freedom=0, deap_verbose=True, evaluation=EvaluationGPHH.SGS_DEVIATION, permutation_distance=PermutationDistance.KTD # permutation_distance = PermutationDistance.KTD_HAMMING ) solver = GPHH( training_domains=training_domains, weight=-1, verbose=False, reference_permutations=cp_reference_permutations, # reference_makespans=cp_reference_makespans, training_domains_names=training_domains_names, params_gphh=params_gphh, ) solver.solve(domain_factory=lambda: domain) for test_domain_str in test_domain_names: domain: RCPSP = domains_loaded[test_domain_str] domain.set_inplace_environment(False) state = domain.get_initial_state() solver.set_domain(domain) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("One GPHH done") print("Best evolved heuristic: ", solver.best_heuristic) print("Cost: ", sum([v.cost for v in values])) all_results[test_domain_str]["gphh"].append( sum([v.cost for v in values])) print("All GPHH done") print("##### ALL RESULTS #####") for test_domain_str in test_domain_names: print(test_domain_str, " :") for algo_key in all_results[test_domain_str].keys(): print("\t", algo_key, ": ") print("\t\t all runs:", all_results[test_domain_str][algo_key]) print("\t\t mean:", np.mean(all_results[test_domain_str][algo_key]))
def run_comparaison_stochastic(): import random from skdecide.hub.domain.rcpsp.rcpsp_sk import ( RCPSP, build_n_determinist_from_stochastic, build_stochastic_from_deterministic, ) repeat_runs = 5 test_domain_names = [ "j301_1.sm", "j301_2.sm", "j301_3.sm", "j601_1.sm", "j601_2.sm", "j601_3.sm", ] all_results = {} for dom in test_domain_names: all_results[dom] = { "random_walk": [], "cp": [], "cp_sgs": [], "gphh": [], "pile": [], } for original_domain_name in test_domain_names: original_domain: RCPSP = load_domain( get_complete_path(original_domain_name)) task_to_noise = set( random.sample(original_domain.get_tasks_ids(), len(original_domain.get_tasks_ids()))) stochastic_domain = build_stochastic_from_deterministic( original_domain, task_to_noise=task_to_noise) deterministic_domains = build_n_determinist_from_stochastic( stochastic_domain, nb_instance=6) training_domains = deterministic_domains[0:-1] training_domains_names = [None for i in range(len(training_domains))] test_domain = deterministic_domains[-1] print("training_domains:", training_domains) # RANDOM WALK domain: RCPSP = test_domain domain.set_inplace_environment(False) # random_walk_costs = [] for i in range(repeat_runs): state = domain.get_initial_state() solver = None states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("One random Walk complete") print("Cost :", sum([v.cost for v in values])) all_results[original_domain_name]["random_walk"].append( sum([v.cost for v in values])) print("All random Walk complete") # CP domain = test_domain do_solver = SolvingMethod.CP domain.set_inplace_environment(False) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.FOLLOW_GANTT, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost: ", sum([v.cost for v in values])) print("CP done") all_results[original_domain_name]["cp"].append( sum([v.cost for v in values])) # CP SGS for train_dom in training_domains: do_solver = SolvingMethod.CP train_dom.set_inplace_environment(False) state = train_dom.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_STRICT, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: train_dom) print(do_solver) domain: RCPSP = test_domain domain.set_inplace_environment(False) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost: ", sum([v.cost for v in values])) print("CP_SGS done") all_results[original_domain_name]["cp_sgs"].append( sum([v.cost for v in values])) # PILE domain: RCPSP = test_domain do_solver = SolvingMethod.PILE domain.set_inplace_environment(False) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.FOLLOW_GANTT, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost: ", sum([v.cost for v in values])) print("PILE done") all_results[original_domain_name]["pile"].append( sum([v.cost for v in values])) # GPHH with open("cp_reference_permutations") as json_file: cp_reference_permutations = json.load(json_file) with open("cp_reference_makespans") as json_file: cp_reference_makespans = json.load(json_file) for i in range(repeat_runs): domain.set_inplace_environment(False) set_feature = { FeatureEnum.EARLIEST_FINISH_DATE, FeatureEnum.EARLIEST_START_DATE, FeatureEnum.LATEST_FINISH_DATE, FeatureEnum.LATEST_START_DATE, FeatureEnum.N_PREDECESSORS, FeatureEnum.N_SUCCESSORS, FeatureEnum.ALL_DESCENDANTS, FeatureEnum.RESSOURCE_REQUIRED, FeatureEnum.RESSOURCE_AVG, FeatureEnum.RESSOURCE_MAX, # FeatureEnum.RESSOURCE_MIN FeatureEnum.RESSOURCE_NZ_MIN, } pset = PrimitiveSet("main", len(set_feature)) pset.addPrimitive(operator.add, 2) pset.addPrimitive(operator.sub, 2) pset.addPrimitive(operator.mul, 2) pset.addPrimitive(protected_div, 2) pset.addPrimitive(max_operator, 2) pset.addPrimitive(min_operator, 2) pset.addPrimitive(operator.neg, 1) # pset.addPrimitive(operator.pow, 2) params_gphh = ParametersGPHH( set_feature=set_feature, set_primitves=pset, tournament_ratio=0.2, pop_size=40, n_gen=20, min_tree_depth=1, max_tree_depth=3, crossover_rate=0.7, mutation_rate=0.3, base_policy_method=BasePolicyMethod.SGS_READY, delta_index_freedom=0, delta_time_freedom=0, deap_verbose=True, evaluation=EvaluationGPHH.SGS_DEVIATION, permutation_distance=PermutationDistance.KTD # permutation_distance = PermutationDistance.KTD_HAMMING ) solver = GPHH( training_domains=training_domains, weight=-1, verbose=False, reference_permutations=cp_reference_permutations, # reference_makespans=cp_reference_makespans, training_domains_names=training_domains_names, params_gphh=params_gphh # set_feature=set_feature) ) solver.solve(domain_factory=lambda: domain) domain: RCPSP = test_domain domain.set_inplace_environment(False) state = domain.get_initial_state() solver.set_domain(domain) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("One GPHH done") print("Best evolved heuristic: ", solver.best_heuristic) print("Cost: ", sum([v.cost for v in values])) all_results[original_domain_name]["gphh"].append( sum([v.cost for v in values])) print("##### ALL RESULTS #####") for test_domain_str in test_domain_names: print(test_domain_str, " :") for algo_key in all_results[test_domain_str].keys(): print("\t", algo_key, ": ") print("\t\t all runs:", all_results[test_domain_str][algo_key]) print("\t\t mean:", np.mean(all_results[test_domain_str][algo_key]))
def run_and_compare_policies_sampled_scenarios(): import random domain: RCPSP = load_domain(get_complete_path("j601_1.sm")) task_to_noise = set( random.sample(domain.get_tasks_ids(), min(30, len(domain.get_tasks_ids()))) ) stochastic_domain = build_stochastic_from_deterministic( domain, task_to_noise=task_to_noise ) deterministic_domains = build_n_determinist_from_stochastic( stochastic_domain, nb_instance=5 ) for d in deterministic_domains: d.set_inplace_environment(True) stochastic_domain.set_inplace_environment(True) state = domain.get_initial_state() domain.set_inplace_environment(True) solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.FOLLOW_GANTT, delta_index_freedom=0, delta_time_freedom=0, ), method=SolvingMethod.LS, dict_params={"nb_iteration_max": 20}, ) solver.solve(domain_factory=lambda: domain) policy_methods = [ PolicyMethodParams( base_policy_method=method, delta_time_freedom=0, delta_index_freedom=0 ) for method in [ BasePolicyMethod.SGS_PRECEDENCE, # , BasePolicyMethod.SGS_READY, BasePolicyMethod.SGS_STRICT, ] ] # policy_methods += [PolicyMethodParams(base_policy_method=BasePolicyMethod.SGS_INDEX_FREEDOM, # delta_time_freedom=0, # delta_index_freedom=i) # for i in range(10)] # policy_methods += [PolicyMethodParams(base_policy_method=BasePolicyMethod.SGS_TIME_FREEDOM, # delta_time_freedom=t, # delta_index_freedom=0) # for t in range(0, 200, 5)] policies = { i: from_solution_to_policy( solution=solver.best_solution, domain=stochastic_domain, policy_method_params=policy_methods[i], ) for i in range(len(policy_methods)) } meta_policy = MetaPolicy( policies={k: policies[k] for k in policies}, execution_domain=domain, known_domain=domain, nb_rollout_estimation=1, verbose=True, ) policies["meta"] = meta_policy keys = list(policies.keys())[::-1] value_function_dict = {} for key in keys: value_function_dict[key] = 0.0 for k, d in enumerate(deterministic_domains): ( value_function_d, policy_dict, preds, succs, ) = rollout_based_compute_expected_cost_for_policy_scheduling( d, policies[key], nb_rollout=1 ) value_function_dict[key] += value_function_d[state] print("key : ", key, value_function_dict[key])