def test_sgs_policies(domain): deterministic_domains = build_n_determinist_from_stochastic(domain, nb_instance=1) training_domains = deterministic_domains training_domains_names = ["my_toy_domain"] domain.set_inplace_environment(False) state = domain.get_initial_state() # Using a stochastic domain as reference + executing on stochastic domain solver = GPHH( training_domains=training_domains, domain_model=domain, weight=-1, verbose=False, training_domains_names=training_domains_names, params_gphh=ParametersGPHH.fast_test(), ) solver.solve(domain_factory=lambda: domain) solver.set_domain(domain) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, action_formatter=None, outcome_formatter=None, verbose=False, ) print("Cost :", sum([v.cost for v in values])) check_rollout_consistency(domain, states) # Using a deterministic domain as reference + executing on deterministic domain solver = GPHH( training_domains=training_domains, domain_model=training_domains[0], weight=-1, verbose=False, training_domains_names=training_domains_names, params_gphh=ParametersGPHH.fast_test(), ) solver.solve(domain_factory=lambda: training_domains[0]) solver.set_domain(training_domains[0]) states, actions, values = rollout_episode( domain=training_domains[0], max_steps=1000, solver=solver, from_memory=state, action_formatter=None, outcome_formatter=None, verbose=False, ) print("Cost :", sum([v.cost for v in values])) check_rollout_consistency(domain, states)
def test_conditional_task_models(domain): n_rollout = 250 counters = {'PROBLEM_OPERATION_2': 0, 'PROBLEM_OPERATION_3': 0} domain.set_inplace_environment(False) for i in range(n_rollout): state = domain.get_initial_state() states, actions, values = rollout_episode(domain=domain, max_steps=1000, solver=None, from_memory=state.copy(), outcome_formatter=None) # outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}') if ConditionElementsExample.PROBLEM_OPERATION_2 in states[ -1]._current_conditions: counters['PROBLEM_OPERATION_2'] += 1 if ConditionElementsExample.PROBLEM_OPERATION_3 in states[ -1]._current_conditions: counters['PROBLEM_OPERATION_3'] += 1 counters['PROBLEM_OPERATION_2'] = float( counters['PROBLEM_OPERATION_2']) / float(n_rollout) counters['PROBLEM_OPERATION_3'] = float( counters['PROBLEM_OPERATION_3']) / float(n_rollout) print('counters:', counters) assert 0.05 <= counters['PROBLEM_OPERATION_2'] <= 0.15 assert 0.85 <= counters['PROBLEM_OPERATION_3'] <= 0.95
def random_walk(): domain: RCPSP = load_domain(get_complete_path("j301_1.sm")) state = domain.get_initial_state() domain.set_inplace_environment(False) states, actions, values = rollout_episode( domain=domain, solver=None, from_memory=state, max_steps=500, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print(sum([v.cost for v in values])) print("rollout done") print("end times: ") for task_id in states[-1].tasks_details.keys(): print("end task", task_id, ": ", states[-1].tasks_details[task_id].end) from skdecide.discrete_optimization.rcpsp.rcpsp_plot_utils import ( plot_resource_individual_gantt, plot_ressource_view, plot_task_gantt, plt, ) from skdecide.hub.solver.do_solver.sk_to_do_binding import ( from_last_state_to_solution, ) do_sol = from_last_state_to_solution(states[-1], domain) plot_task_gantt(do_sol.problem, do_sol) plot_ressource_view(do_sol.problem, do_sol) plot_resource_individual_gantt(do_sol.problem, do_sol) plt.show()
def _get_next_action( self, observation: D.T_agent[D.T_observation] ) -> D.T_agent[D.T_concurrency[D.T_event]]: results = {} actions_map = {} self.known_domain.set_inplace_environment(True) actions_c = [ self.policies[method].get_next_action(observation) for method in self.policies ] if len(set(actions_c)) > 1: for method in self.policies: results[method] = 0. for j in range(self.nb_rollout_estimation): states, actions, values = rollout_episode( domain=self.known_domain, solver=self.policies[method], outcome_formatter=None, action_formatter=None, verbose=False, from_memory=observation.copy()) # cost = sum(v.cost for v in values) results[method] += states[ -1].t - observation.t # TODO, this is a trick... actions_map[method] = actions[0] if self.verbose: # print(results) print(actions_map[min(results, key=lambda x: results[x])]) return actions_map[min(results, key=lambda x: results[x])] else: return actions_c[0]
def run_example(): # domain = MyExampleRCPSPDomain() # domain = MyExampleMRCPSPDomain_WithCost() # domain = MyExampleSRCPSPDomain() # domain = MyExampleCondSRCPSPDomain() domain = MyExampleSimulatedCondSRCPSPDomain() state = domain.get_initial_state() print("Initial state : ", state) # actions = domain.get_applicable_actions(state) # print([str(action) for action in actions.get_elements()]) # action = actions.get_elements()[0] # new_state = domain.get_next_state(state, action) # print("New state ", new_state) # actions = domain.get_applicable_actions(new_state) # print("New actions : ", [str(action) for action in actions.get_elements()]) # action = actions.get_elements()[0] # print(action) # new_state = domain.get_next_state(new_state, action) # print("New state :", new_state) # print('_is_terminal: ', domain._is_terminal(state)) # ONLY KEEP LINE BELOW FOR SIMPLE ROLLOUT solver = None # UNCOMMENT BELOW TO USE ASTAR # domain.set_inplace_environment(False) # solver = lazy_astar.LazyAstar(from_state=state, heuristic=None, verbose=True) # solver.solve(domain_factory=lambda: domain) states, actions, values = rollout_episode(domain=domain, max_steps=1000, solver=solver, from_memory=state, outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}') print(states[-1])
def run_astar(): from skdecide.hub.solver.lazy_astar import LazyAstar domain = MyExampleRCPSPDomain() # domain = MyExampleSRCPSPDomain() domain.set_inplace_environment(False) state = domain.get_initial_state() print("Initial state : ", state) solver = LazyAstar(from_state=state, heuristic=None, verbose=True) solver.solve(domain_factory=lambda: domain) states, actions, values = rollout_episode(domain=domain, max_steps=1000, solver=solver, from_memory=state, outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}') print("Cost :", sum([v.cost for v in values])) from skdecide.hub.solver.do_solver.sk_to_do_binding import from_last_state_to_solution do_sol = from_last_state_to_solution(states[-1], domain) from skdecide.builders.discrete_optimization.rcpsp.rcpsp_utils import plot_task_gantt, plot_ressource_view, \ plot_resource_individual_gantt, plt plot_task_gantt(do_sol.problem, do_sol) plot_ressource_view(do_sol.problem, do_sol) plot_resource_individual_gantt(do_sol.problem, do_sol) plt.show()
def do_singlemode(): do_solver = SolvingMethod.CP domain: RCPSP = load_domain(get_complete_path("j301_1.sm")) domain.set_inplace_environment(False) state = domain.get_initial_state() print("Initial state : ", state) solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print(sum([v.cost for v in values])) print("rollout done") print("end times: ") for task_id in states[-1].tasks_details.keys(): print("end task", task_id, ": ", states[-1].tasks_details[task_id].end)
def do_multiskill(): domain: MSRCPSP = load_multiskill_domain(get_data_available_ms()[0]) domain.set_inplace_environment(False) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=SolvingMethod.LNS_CP, ) solver.get_available_methods(domain) solver.solve(domain_factory=lambda: domain) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=3000, action_formatter=lambda a: f"{a}", outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print(sum([v.cost for v in values])) print("rollout done") print("end times: ") for task_id in states[-1].tasks_details.keys(): print("end task", task_id, ": ", states[-1].tasks_details[task_id].end)
def test_optimality(domain, do_solver): print("domain: ", domain) domain.set_inplace_environment(False) state = domain.get_initial_state() print("Initial state : ", state) solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, action_formatter=None, outcome_formatter=None, verbose=False, ) if isinstance(domain, ToyRCPSPDomain): makespan = max([ states[-1].tasks_details[x].end for x in states[-1].tasks_complete ]) assert makespan == optimal_solutions["ToyRCPSPDomain"]["makespan"] if isinstance(domain, ToyMS_RCPSPDomain): makespan = max([ states[-1].tasks_details[x].end for x in states[-1].tasks_complete ]) assert makespan == optimal_solutions["ToyMS_RCPSPDomain"]["makespan"]
def test_do(domain, do_solver): print("domain: ", domain) domain.set_inplace_environment(False) state = domain.get_initial_state() print("Initial state : ", state) solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, action_formatter=None, outcome_formatter=None, verbose=False, ) # action_formatter=lambda o: str(o), # outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}') check_rollout_consistency(domain, states)
def test_conditional_task_models(domain): n_rollout = 2000 counters = {"PROBLEM_OPERATION_2": 0, "PROBLEM_OPERATION_3": 0} domain.set_inplace_environment(False) random.seed(42) import numpy as np np.random.seed(42) for i in range(n_rollout): state = domain.get_initial_state() states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=None, from_memory=state.copy(), verbose=False, outcome_formatter=None, action_formatter=None, ) if (ConditionElementsExample.PROBLEM_OPERATION_2 in states[-1]._current_conditions): counters["PROBLEM_OPERATION_2"] += 1 if (ConditionElementsExample.PROBLEM_OPERATION_3 in states[-1]._current_conditions): counters["PROBLEM_OPERATION_3"] += 1 counters["PROBLEM_OPERATION_2"] = float( counters["PROBLEM_OPERATION_2"]) / float(n_rollout) counters["PROBLEM_OPERATION_3"] = float( counters["PROBLEM_OPERATION_3"]) / float(n_rollout) print("counters:", counters) assert 0.05 <= counters["PROBLEM_OPERATION_2"] <= 0.15 assert 0.85 <= counters["PROBLEM_OPERATION_3"] <= 0.95
def run_do(): from skdecide.hub.solver.do_solver.do_solver_scheduling import PolicyRCPSP, DOSolver, \ PolicyMethodParams, BasePolicyMethod, SolvingMethod domain = MyExampleRCPSPDomain() # domain: RCPSP = load_domain("j1010_2.mm") # domain: RCPSP = load_domain("j301_1.sm") domain.set_inplace_environment(False) state = domain.get_initial_state() print("Initial state : ", state) solver = DOSolver(policy_method_params=PolicyMethodParams(base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0), method=SolvingMethod.LNS_CP_CALENDAR) solver.solve(domain_factory=lambda: domain) states, actions, values = rollout_episode(domain=domain, max_steps=1000, solver=solver, from_memory=state, action_formatter=lambda o: str(o), outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}') print("Cost :", sum([v.cost for v in values])) from skdecide.hub.solver.do_solver.sk_to_do_binding import from_last_state_to_solution do_sol = from_last_state_to_solution(states[-1], domain) from skdecide.builders.discrete_optimization.rcpsp.rcpsp_utils import plot_task_gantt, plot_ressource_view, \ plot_resource_individual_gantt, plt plot_task_gantt(do_sol.problem, do_sol) plot_ressource_view(do_sol.problem, do_sol) plot_resource_individual_gantt(do_sol.problem, do_sol) plt.show()
def do_multimode(): domain: RCPSP = load_domain(get_complete_path("j1010_2.mm")) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.FOLLOW_GANTT, delta_index_freedom=0, delta_time_freedom=0, ), method=SolvingMethod.CP, ) solver.solve(domain_factory=lambda: domain) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=1000, action_formatter=lambda a: f"{a}", outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("rollout done") print("end times: ") for task_id in states[-1].tasks_details.keys(): print("end task", task_id, ": ", states[-1].tasks_details[task_id].end)
def test_rollout(domain): state = domain.get_initial_state() states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=None, from_memory=state, outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}') check_rollout_consistency(domain, states)
def test_rollout(domain): state = domain.get_initial_state() states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=None, from_memory=state, action_formatter=None, outcome_formatter=None, verbose=False, ) check_rollout_consistency(domain, states)
def compute_ref_permutations(): import os files = get_data_available() all_single_mode = [os.path.basename(f) for f in files if "sm" in f] all_permutations = {} all_makespans = {} for td_name in all_single_mode: td = load_domain(get_complete_path(td_name)) td.set_inplace_environment(False) solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=SolvingMethod.CP, ) solver.solve(domain_factory=lambda: td) raw_permutation = solver.best_solution.rcpsp_permutation full_permutation = [int(x + 2) for x in raw_permutation] full_permutation.insert(0, 1) full_permutation.append(int(np.max(full_permutation) + 1)) print("full_perm: ", full_permutation) all_permutations[td_name] = full_permutation state = td.get_initial_state() states, actions, values = rollout_episode( domain=td, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) makespan = sum([v.cost for v in values]) all_makespans[td_name] = makespan print("makespan: ", makespan) print("all_permutations: ", all_permutations) print("all_makespans: ", all_makespans) json.dump(all_permutations, open("cp_reference_permutations", "w"), indent=2) json.dump(all_makespans, open("cp_reference_makespans", "w"), indent=2)
def random_walk_multiskill(): domain: MSRCPSP = load_multiskill_domain(get_data_available_ms()[0]) state = domain.get_initial_state() states, actions, values = rollout_episode( domain=domain, solver=None, from_memory=state, max_steps=1000, action_formatter=lambda a: f"{a}", outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("rollout done") print("end times: ") for task_id in states[-1].tasks_details.keys(): print("end task", task_id, ": ", states[-1].tasks_details[task_id].end)
def test_planning_algos(domain, solver_str): domain.set_inplace_environment(False) state = domain.get_initial_state() print("Initial state : ", state) if solver_str == 'LazyAstar': solver = LazyAstar(from_state=state, heuristic=None, verbose=True) solver.solve(domain_factory=lambda: domain) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, action_formatter=lambda o: str(o), outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}') check_rollout_consistency(domain, states)
def test_planning_algos(domain, solver_str): domain.set_inplace_environment(False) state = domain.get_initial_state() print("Initial state : ", state) if solver_str == "LazyAstar": solver = LazyAstar(from_state=state, heuristic=None, verbose=False) solver.solve(domain_factory=lambda: domain) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, action_formatter=None, outcome_formatter=None, verbose=False, ) check_rollout_consistency(domain, states)
def test_basic(): domain_rcpsp = load_domain("j1201_1.sm") domain_mrcpsp = load_domain("j1010_2.mm") assert isinstance(domain_rcpsp, RCPSP) assert isinstance(domain_mrcpsp, MRCPSP) # domain: RCPSP = load_domain("j1201_1.sm") state: State = domain_rcpsp.get_initial_state() print("Initial state : ", state) assert len(state.tasks_ongoing) == 0 assert len(state.tasks_complete) == 0 assert len(state.tasks_paused) == 0 actions = domain_rcpsp.get_applicable_actions(state) action_list: List[SchedulingAction] = actions.get_elements() assert len(action_list) == 2 action_start_source = [ ac for ac in action_list if ac.action == SchedulingActionEnum.START and ac.task == 1 ] assert len(action_start_source) == 1 next_state = domain_rcpsp.get_next_state(state, action_start_source[0]) print("New state ", next_state) assert len(next_state.tasks_complete ) == 1 # it is a dummy task, it should be completed now. action_list = domain_rcpsp.get_applicable_actions(state).get_elements() states, actions, values = rollout_episode( domain=domain_rcpsp, solver=None, from_memory=state, max_steps=500, outcome_formatter=lambda o: f'{o.observation} - cost: {o.value.cost:.2f}') print("rollout done") print('end times: ') for task_id in states[-1].tasks_details.keys(): print('end task', task_id, ': ', states[-1].tasks_details[task_id].end)
def test_do_mskill(domain_multiskill, do_solver_multiskill): domain_multiskill.set_inplace_environment(False) state = domain_multiskill.get_initial_state() print("Initial state : ", state) solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_PRECEDENCE, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver_multiskill, ) solver.solve(domain_factory=lambda: domain_multiskill) print(do_solver_multiskill) states, actions, values = rollout_episode( domain=domain_multiskill, max_steps=1000, solver=solver, from_memory=state, action_formatter=None, outcome_formatter=None, verbose=False, ) check_rollout_consistency(domain_multiskill, states)
def run_comparaison(): import os from examples.discrete_optimization.rcpsp_parser_example import get_data_available files = get_data_available() all_single_mode = [os.path.basename(f) for f in files if "sm" in f] # training_cphh = ["j1201_"+str(i)+".sm" for i in range(2, 11)] training_cphh = ["j301_" + str(i) + ".sm" for i in range(1, 11)] # all_testing_domains_names = [f for f in all_single_mode # if not(any(g in f for g in training_cphh))] all_testing_domains_names = ["j1201_2.sm"] # all_testing_domains_names = ["j601_2.sm"] # all_testing_domains_names = random.sample(all_testing_domains_names, 1) # training_domains_names = [f for f in all_single_mode # if any(g in f for g in training_cphh)] training_domains_names = all_testing_domains_names domains_loaded = { domain_name: load_domain(get_complete_path(domain_name)) for domain_name in all_testing_domains_names } test_domain_names = all_testing_domains_names # test_domain_names = [test_domain_names[-1]] # test_domain_names = ["j1201_1.sm"] print("test_domain_names: ", test_domain_names) print("training_domains_names: ", training_domains_names) n_walks = 5 for td in training_domains_names: domains_loaded[td] = load_domain(get_complete_path(td)) all_results = {} for dom in test_domain_names: all_results[dom] = { "random_walk": [], "cp": [], "cp_sgs": [], "gphh": [], "pile": [], } # RANDOM WALK for test_domain_str in test_domain_names: domain: RCPSP = domains_loaded[test_domain_str] domain.set_inplace_environment(False) n_walks = 5 for i in range(n_walks): state = domain.get_initial_state() solver = None states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("One random Walk complete") print("Cost :", sum([v.cost for v in values])) all_results[test_domain_str]["random_walk"].append( sum([v.cost for v in values])) print("All random Walk complete") # CP for test_domain_str in test_domain_names: domain: RCPSP = domains_loaded[test_domain_str] do_solver = SolvingMethod.CP domain.set_inplace_environment(False) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.FOLLOW_GANTT, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost: ", sum([v.cost for v in values])) print("CP done") all_results[test_domain_str]["cp"].append(sum([v.cost for v in values])) # CP SGS for test_domain_str in test_domain_names: domain: RCPSP = domains_loaded[test_domain_str] do_solver = SolvingMethod.CP domain.set_inplace_environment(False) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_STRICT, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost: ", sum([v.cost for v in values])) print("CP_SGS done") all_results[test_domain_str]["cp_sgs"].append( sum([v.cost for v in values])) # PILE for test_domain_str in test_domain_names: domain: RCPSP = domains_loaded[test_domain_str] do_solver = SolvingMethod.PILE domain.set_inplace_environment(False) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.FOLLOW_GANTT, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost: ", sum([v.cost for v in values])) print("PILE done") all_results[test_domain_str]["pile"].append( sum([v.cost for v in values])) # GPHH domain: RCPSP = load_domain(get_complete_path("j301_1.sm")) training_domains = [ domains_loaded[training_domain] for training_domain in training_domains_names ] with open("cp_reference_permutations") as json_file: cp_reference_permutations = json.load(json_file) # with open('cp_reference_makespans') as json_file: # cp_reference_makespans = json.load(json_file) for i in range(n_walks): domain.set_inplace_environment(False) set_feature = { FeatureEnum.EARLIEST_FINISH_DATE, FeatureEnum.EARLIEST_START_DATE, FeatureEnum.LATEST_FINISH_DATE, FeatureEnum.LATEST_START_DATE, FeatureEnum.N_PREDECESSORS, FeatureEnum.N_SUCCESSORS, FeatureEnum.ALL_DESCENDANTS, FeatureEnum.RESSOURCE_REQUIRED, FeatureEnum.RESSOURCE_AVG, FeatureEnum.RESSOURCE_MAX, # FeatureEnum.RESSOURCE_MIN FeatureEnum.RESSOURCE_NZ_MIN, } pset = PrimitiveSet("main", len(set_feature)) pset.addPrimitive(operator.add, 2) pset.addPrimitive(operator.sub, 2) pset.addPrimitive(operator.mul, 2) pset.addPrimitive(protected_div, 2) pset.addPrimitive(max_operator, 2) pset.addPrimitive(min_operator, 2) pset.addPrimitive(operator.neg, 1) # pset.addPrimitive(operator.pow, 2) params_gphh = ParametersGPHH( set_feature=set_feature, set_primitves=pset, tournament_ratio=0.2, pop_size=20, n_gen=7, min_tree_depth=1, max_tree_depth=3, crossover_rate=0.7, mutation_rate=0.3, base_policy_method=BasePolicyMethod.SGS_READY, delta_index_freedom=0, delta_time_freedom=0, deap_verbose=True, evaluation=EvaluationGPHH.SGS_DEVIATION, permutation_distance=PermutationDistance.KTD # permutation_distance = PermutationDistance.KTD_HAMMING ) solver = GPHH( training_domains=training_domains, weight=-1, verbose=False, reference_permutations=cp_reference_permutations, # reference_makespans=cp_reference_makespans, training_domains_names=training_domains_names, params_gphh=params_gphh, ) solver.solve(domain_factory=lambda: domain) for test_domain_str in test_domain_names: domain: RCPSP = domains_loaded[test_domain_str] domain.set_inplace_environment(False) state = domain.get_initial_state() solver.set_domain(domain) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("One GPHH done") print("Best evolved heuristic: ", solver.best_heuristic) print("Cost: ", sum([v.cost for v in values])) all_results[test_domain_str]["gphh"].append( sum([v.cost for v in values])) print("All GPHH done") print("##### ALL RESULTS #####") for test_domain_str in test_domain_names: print(test_domain_str, " :") for algo_key in all_results[test_domain_str].keys(): print("\t", algo_key, ": ") print("\t\t all runs:", all_results[test_domain_str][algo_key]) print("\t\t mean:", np.mean(all_results[test_domain_str][algo_key]))
def run_comparaison_stochastic(): import random from skdecide.hub.domain.rcpsp.rcpsp_sk import ( RCPSP, build_n_determinist_from_stochastic, build_stochastic_from_deterministic, ) repeat_runs = 5 test_domain_names = [ "j301_1.sm", "j301_2.sm", "j301_3.sm", "j601_1.sm", "j601_2.sm", "j601_3.sm", ] all_results = {} for dom in test_domain_names: all_results[dom] = { "random_walk": [], "cp": [], "cp_sgs": [], "gphh": [], "pile": [], } for original_domain_name in test_domain_names: original_domain: RCPSP = load_domain( get_complete_path(original_domain_name)) task_to_noise = set( random.sample(original_domain.get_tasks_ids(), len(original_domain.get_tasks_ids()))) stochastic_domain = build_stochastic_from_deterministic( original_domain, task_to_noise=task_to_noise) deterministic_domains = build_n_determinist_from_stochastic( stochastic_domain, nb_instance=6) training_domains = deterministic_domains[0:-1] training_domains_names = [None for i in range(len(training_domains))] test_domain = deterministic_domains[-1] print("training_domains:", training_domains) # RANDOM WALK domain: RCPSP = test_domain domain.set_inplace_environment(False) # random_walk_costs = [] for i in range(repeat_runs): state = domain.get_initial_state() solver = None states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("One random Walk complete") print("Cost :", sum([v.cost for v in values])) all_results[original_domain_name]["random_walk"].append( sum([v.cost for v in values])) print("All random Walk complete") # CP domain = test_domain do_solver = SolvingMethod.CP domain.set_inplace_environment(False) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.FOLLOW_GANTT, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost: ", sum([v.cost for v in values])) print("CP done") all_results[original_domain_name]["cp"].append( sum([v.cost for v in values])) # CP SGS for train_dom in training_domains: do_solver = SolvingMethod.CP train_dom.set_inplace_environment(False) state = train_dom.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.SGS_STRICT, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: train_dom) print(do_solver) domain: RCPSP = test_domain domain.set_inplace_environment(False) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost: ", sum([v.cost for v in values])) print("CP_SGS done") all_results[original_domain_name]["cp_sgs"].append( sum([v.cost for v in values])) # PILE domain: RCPSP = test_domain do_solver = SolvingMethod.PILE domain.set_inplace_environment(False) state = domain.get_initial_state() solver = DOSolver( policy_method_params=PolicyMethodParams( base_policy_method=BasePolicyMethod.FOLLOW_GANTT, delta_index_freedom=0, delta_time_freedom=0, ), method=do_solver, ) solver.solve(domain_factory=lambda: domain) print(do_solver) states, actions, values = rollout_episode( domain=domain, solver=solver, from_memory=state, max_steps=500, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost: ", sum([v.cost for v in values])) print("PILE done") all_results[original_domain_name]["pile"].append( sum([v.cost for v in values])) # GPHH with open("cp_reference_permutations") as json_file: cp_reference_permutations = json.load(json_file) with open("cp_reference_makespans") as json_file: cp_reference_makespans = json.load(json_file) for i in range(repeat_runs): domain.set_inplace_environment(False) set_feature = { FeatureEnum.EARLIEST_FINISH_DATE, FeatureEnum.EARLIEST_START_DATE, FeatureEnum.LATEST_FINISH_DATE, FeatureEnum.LATEST_START_DATE, FeatureEnum.N_PREDECESSORS, FeatureEnum.N_SUCCESSORS, FeatureEnum.ALL_DESCENDANTS, FeatureEnum.RESSOURCE_REQUIRED, FeatureEnum.RESSOURCE_AVG, FeatureEnum.RESSOURCE_MAX, # FeatureEnum.RESSOURCE_MIN FeatureEnum.RESSOURCE_NZ_MIN, } pset = PrimitiveSet("main", len(set_feature)) pset.addPrimitive(operator.add, 2) pset.addPrimitive(operator.sub, 2) pset.addPrimitive(operator.mul, 2) pset.addPrimitive(protected_div, 2) pset.addPrimitive(max_operator, 2) pset.addPrimitive(min_operator, 2) pset.addPrimitive(operator.neg, 1) # pset.addPrimitive(operator.pow, 2) params_gphh = ParametersGPHH( set_feature=set_feature, set_primitves=pset, tournament_ratio=0.2, pop_size=40, n_gen=20, min_tree_depth=1, max_tree_depth=3, crossover_rate=0.7, mutation_rate=0.3, base_policy_method=BasePolicyMethod.SGS_READY, delta_index_freedom=0, delta_time_freedom=0, deap_verbose=True, evaluation=EvaluationGPHH.SGS_DEVIATION, permutation_distance=PermutationDistance.KTD # permutation_distance = PermutationDistance.KTD_HAMMING ) solver = GPHH( training_domains=training_domains, weight=-1, verbose=False, reference_permutations=cp_reference_permutations, # reference_makespans=cp_reference_makespans, training_domains_names=training_domains_names, params_gphh=params_gphh # set_feature=set_feature) ) solver.solve(domain_factory=lambda: domain) domain: RCPSP = test_domain domain.set_inplace_environment(False) state = domain.get_initial_state() solver.set_domain(domain) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("One GPHH done") print("Best evolved heuristic: ", solver.best_heuristic) print("Cost: ", sum([v.cost for v in values])) all_results[original_domain_name]["gphh"].append( sum([v.cost for v in values])) print("##### ALL RESULTS #####") for test_domain_str in test_domain_names: print(test_domain_str, " :") for algo_key in all_results[test_domain_str].keys(): print("\t", algo_key, ": ") print("\t\t all runs:", all_results[test_domain_str][algo_key]) print("\t\t mean:", np.mean(all_results[test_domain_str][algo_key]))
def fitness_makespan_correlation(): # domain: RCPSP = load_domain("j301_1.sm") domain: RCPSP = load_domain(file_path=get_complete_path("j1201_9.sm")) training_domains_names = ["j301_" + str(i) + ".sm" for i in range(1, 11)] # training_domains_names =["j1201_9.sm"] # evaluation=EvaluationGPHH.PERMUTATION_DISTANCE # evaluation = EvaluationGPHH.SGS evaluation = EvaluationGPHH.SGS_DEVIATION training_domains = [] for td in training_domains_names: training_domains.append(load_domain(file_path=get_complete_path(td))) with open("cp_reference_permutations") as json_file: cp_reference_permutations = json.load(json_file) with open("cp_reference_makespans") as json_file: cp_reference_makespans = json.load(json_file) set_feature = { FeatureEnum.EARLIEST_FINISH_DATE, FeatureEnum.EARLIEST_START_DATE, FeatureEnum.LATEST_FINISH_DATE, FeatureEnum.LATEST_START_DATE, FeatureEnum.N_PREDECESSORS, FeatureEnum.N_SUCCESSORS, FeatureEnum.ALL_DESCENDANTS, FeatureEnum.RESSOURCE_REQUIRED, FeatureEnum.RESSOURCE_AVG, FeatureEnum.RESSOURCE_MAX, # FeatureEnum.RESSOURCE_MIN FeatureEnum.RESSOURCE_NZ_MIN, } pset = PrimitiveSet("main", len(set_feature)) pset.addPrimitive(operator.add, 2) pset.addPrimitive(operator.sub, 2) pset.addPrimitive(operator.mul, 2) pset.addPrimitive(protected_div, 2) pset.addPrimitive(max_operator, 2) pset.addPrimitive(min_operator, 2) pset.addPrimitive(operator.neg, 1) params_gphh = ParametersGPHH( set_feature=set_feature, set_primitves=pset, tournament_ratio=0.1, pop_size=10, n_gen=1, min_tree_depth=1, max_tree_depth=3, crossover_rate=0.7, mutation_rate=0.3, base_policy_method=BasePolicyMethod.SGS_READY, delta_index_freedom=0, delta_time_freedom=0, deap_verbose=True, evaluation=evaluation, permutation_distance=PermutationDistance.KTD # permutation_distance = PermutationDistance.KTD_HAMMING ) solver = GPHH( training_domains=training_domains, weight=-1, verbose=True, reference_permutations=cp_reference_permutations, # reference_makespans=cp_reference_makespans, training_domains_names=training_domains_names, params_gphh=params_gphh, ) solver.solve(domain_factory=lambda: domain) solver.permutation_distance = PermutationDistance.KTD solver.init_reference_permutations(cp_reference_permutations, training_domains_names) random_pop = pop = solver.toolbox.population(n=100) print(random_pop) out = "f_sgs_train\tf_sgs_dev_train\tf_perm_train\tmk_test\n" for ind in random_pop: fitness_sgs = solver.evaluate_heuristic(ind, solver.training_domains)[0] fitness_sgs_dev = solver.evaluate_heuristic_sgs_deviation( ind, solver.training_domains)[0] fitness_perm = solver.evaluate_heuristic_permutation( ind, solver.training_domains)[0] gphh_policy = GPHHPolicy( domain=domain, func_heuristic=solver.toolbox.compile(expr=ind), features=list(set_feature), params_gphh=params_gphh, ) domain.set_inplace_environment(False) state = domain.get_initial_state() states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=gphh_policy, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) policy_makespan = states[-1].t out += (str(fitness_sgs) + "\t" + str(fitness_sgs_dev) + "\t" + str(fitness_perm) + "\t" + str(policy_makespan) + "\n") print(out) print("---------") print("DONE") print(out)
def compare_settings(): test_domain_names = ["j1201_1.sm"] training_domains_names = ["j301_" + str(i) + ".sm" for i in range(2, 11)] domains_loaded = [] for td in training_domains_names: domains_loaded.append(load_domain(get_complete_path(td))) n_walks = 5 all_settings = [] params1 = ParametersGPHH.default() params1.base_policy_method = BasePolicyMethod.SGS_PRECEDENCE all_settings.append(params1) params2 = ParametersGPHH.default() params2.base_policy_method = BasePolicyMethod.SGS_INDEX_FREEDOM params2.delta_index_freedom = 5 all_settings.append(params2) params3 = ParametersGPHH.default() params3.base_policy_method = BasePolicyMethod.SGS_READY all_settings.append(params3) params4 = ParametersGPHH.default() params4.base_policy_method = BasePolicyMethod.SGS_STRICT all_settings.append(params4) params5 = ParametersGPHH.default() params5.base_policy_method = BasePolicyMethod.SGS_TIME_FREEDOM params5.delta_time_freedom = 5 all_settings.append(params5) all_results = {} for dom in test_domain_names: all_results[dom] = {} for par in all_settings: print("par: ", par.base_policy_method) all_results[dom][par.base_policy_method] = [] for params in all_settings: for i in range(n_walks): print("params: ", params.base_policy_method) print("walk #", i) domain: RCPSP = load_domain(get_complete_path("j301_1.sm")) domain.set_inplace_environment(False) solver = GPHH( training_domains=domains_loaded, weight=-1, verbose=False, params_gphh=params, ) solver.solve(domain_factory=lambda: domain) for test_domain_str in test_domain_names: domain: RCPSP = load_domain(get_complete_path(test_domain_str)) domain.set_inplace_environment(False) state = domain.get_initial_state() solver.set_domain(domain) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("One GPHH done") print("Best evolved heuristic: ", solver.best_heuristic) print("Cost: ", sum([v.cost for v in values])) all_results[test_domain_str][params.base_policy_method].append( sum([v.cost for v in values])) print("##### ALL RESULTS #####") for test_domain_str in test_domain_names: print(test_domain_str, " :") for param_key in all_results[test_domain_str].keys(): print("\t", param_key, ": ") print("\t\t all runs:", all_results[test_domain_str][param_key]) print("\t\t mean:", np.mean(all_results[test_domain_str][param_key]))
def run_gphh_with_settings(): domain: RCPSP = load_domain(get_complete_path("j301_1.sm")) training_domains = [ load_domain(get_complete_path("j301_2.sm")), load_domain(get_complete_path("j301_3.sm")), load_domain(get_complete_path("j301_4.sm")), load_domain(get_complete_path("j301_5.sm")), load_domain(get_complete_path("j301_6.sm")), load_domain(get_complete_path("j301_7.sm")), load_domain(get_complete_path("j301_8.sm")), load_domain(get_complete_path("j301_9.sm")), load_domain(get_complete_path("j301_10.sm")), ] domain.set_inplace_environment(False) state = domain.get_initial_state() set_feature = { FeatureEnum.EARLIEST_FINISH_DATE, FeatureEnum.EARLIEST_START_DATE, FeatureEnum.LATEST_FINISH_DATE, FeatureEnum.LATEST_START_DATE, FeatureEnum.PRECEDENCE_DONE, FeatureEnum.ALL_DESCENDANTS, FeatureEnum.RESSOURCE_AVG, } pset = PrimitiveSet("main", len(set_feature)) pset.addPrimitive(operator.add, 2) pset.addPrimitive(operator.sub, 2) pset.addPrimitive(operator.mul, 2) pset.addPrimitive(protected_div, 2) pset.addPrimitive(max_operator, 2) pset.addPrimitive(min_operator, 2) pset.addPrimitive(operator.neg, 1) params_gphh = ParametersGPHH( set_feature=set_feature, set_primitves=pset, tournament_ratio=0.25, pop_size=20, n_gen=20, min_tree_depth=1, max_tree_depth=5, crossover_rate=0.7, mutation_rate=0.1, base_policy_method=BasePolicyMethod.SGS_READY, delta_index_freedom=0, delta_time_freedom=0, deap_verbose=True, ) solver = GPHH( training_domains=training_domains, weight=-1, verbose=True, params_gphh=params_gphh, ) solver.solve(domain_factory=lambda: domain) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost :", sum([v.cost for v in values]))
def run_pooled_gphh(): n_runs = 1 pool_size = 5 remove_extreme_values = 1 makespans = [] domain: RCPSP = load_domain(file_path=get_complete_path("j1201_9.sm")) # domain: RCPSP = load_domain("j1201_9.sm") training_domains_names = ["j301_" + str(i) + ".sm" for i in range(1, 11)] training_domains = [] for td in training_domains_names: training_domains.append(load_domain(file_path=get_complete_path(td))) for i in range(n_runs): domain.set_inplace_environment(False) state = domain.get_initial_state() with open("cp_reference_permutations") as json_file: cp_reference_permutations = json.load(json_file) heuristics = [] func_heuristics = [] folder = "./trained_gphh_heuristics" files = os.listdir(folder) solver = GPHH( training_domains=training_domains, domain_model=training_domains[0], weight=-1, verbose=True, reference_permutations=cp_reference_permutations, training_domains_names=training_domains_names, ) print("files: ", files) for f in files: full_path = folder + "/" + f print("f: ", full_path) tmp = pickle.load(open(full_path, "rb")) heuristics.append(tmp) func_heuristics.append(solver.toolbox.compile(expr=tmp)) # for pool in range(pool_size): # solver = GPHH(training_domains=training_domains, # weight=-1, # verbose=True, # reference_permutations=cp_reference_permutations, # training_domains_names=training_domains_names # ) # solver.solve(domain_factory=lambda: domain) # func_heuristics.append(solver.func_heuristic) pooled_gphh_solver = PooledGPHHPolicy( domain=domain, domain_model=training_domains[0], func_heuristics=func_heuristics, features=list(solver.params_gphh.set_feature), params_gphh=solver.params_gphh, pool_aggregation_method=PoolAggregationMethod.MEAN, remove_extremes_values=remove_extreme_values, ) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=pooled_gphh_solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost :", sum([v.cost for v in values])) makespans.append(sum([v.cost for v in values])) print("makespans: ", makespans)
def run_gphh(): import time n_runs = 1 makespans = [] domain: RCPSP = load_domain(file_path=get_complete_path("j601_1.sm")) training_domains_names = ["j601_" + str(i) + ".sm" for i in range(1, 11)] training_domains = [] for td in training_domains_names: training_domains.append(load_domain(file_path=get_complete_path(td))) runtimes = [] for i in range(n_runs): domain.set_inplace_environment(False) state = domain.get_initial_state() with open("cp_reference_permutations") as json_file: cp_reference_permutations = json.load(json_file) # with open('cp_reference_makespans') as json_file: # cp_reference_makespans = json.load(json_file) start = time.time() solver = GPHH( training_domains=training_domains, domain_model=training_domains[3], weight=-1, verbose=True, reference_permutations=cp_reference_permutations, # reference_makespans=cp_reference_makespans, training_domains_names=training_domains_names, params_gphh=ParametersGPHH.fast_test() # params_gphh=ParametersGPHH.default() ) solver.solve(domain_factory=lambda: domain) end = time.time() runtimes.append((end - start)) heuristic = solver.hof print("ttype:", solver.best_heuristic) folder = "./trained_gphh_heuristics" if not os.path.exists(folder): os.makedirs(folder) file = open(os.path.join(folder, "test_gphh_" + str(i) + ".pkl"), "wb") pickle.dump(dict(hof=heuristic), file) file.close() solver.set_domain(domain) states, actions, values = rollout_episode( domain=domain, max_steps=1000, solver=solver, from_memory=state, verbose=False, outcome_formatter=lambda o: f"{o.observation} - cost: {o.value.cost:.2f}", ) print("Cost :", sum([v.cost for v in values])) makespans.append(sum([v.cost for v in values])) print("makespans: ", makespans) print("runtimes: ", runtimes) print("runtime - mean: ", np.mean(runtimes))