def __init__(self, model, bounds=[]): self.model = model self.bounds = bounds self.algo = LAOStar(self.model, constrained=True, bounds=self.bounds, Lagrangian=True) self.graph = self.algo.graph
def draw_lower_envelop(): init_state = (0,0) size = (5,5) goal = (4,4) model = GRIDModel(size, init_state, goal, prob_right_transition=0.85) # model = LAOModel() # algo = LAOStar(model) alpha_list = list(linspace(0,0.6,100)) # alpha_list = [200] weighted_value_list = [] bound = 1.5 for a in alpha_list: print(a) algo = LAOStar(model,constrained=True,bounds=[bound],alpha=[a],Lagrangian=True) policy = algo.solve() value = algo.graph.root.value weighted_value = value[0] + a*(value[1] - bound) weighted_value_list.append(weighted_value) model.print_policy(policy) # print(algo.compute_value(algo.graph.nodes[(4,2)],'D')) # print(algo.compute_value(algo.graph.nodes[(4,2)],'U')) # print(algo.compute_value(algo.graph.nodes[(4,2)],'R')) # print(algo.compute_value(algo.graph.nodes[(4,2)],'L')) # print(alpha_list) # print(weighted_value_list) print(alpha_list) print(weighted_value_list) plt.plot(alpha_list, weighted_value_list,'*') # plt.plot(0.05775379446627887, 9.357673380261254, 'r*') # with bound = 2 # plt.plot(0.013834705882, 9.3420861953, 'r*') # with bound = 3 plt.plot(0.15374170009084218, 9.42260840432311, 'r*') # with bound = 1.5 plt.show()
def draw_lower_envelop_multiple_bounds(): ## this is for two separate constraints init_state = (0,0) size = (5,5) goal = (4,4) model = GRIDModel_multiple_bounds(size, init_state, goal, prob_right_transition=0.85) # model = LAOModel() # algo = LAOStar(model) alpha_1_range = list(linspace(0.0,100,20)) alpha_2_range = list(linspace(0.0,20,20)) # alpha_1_range = [46.355221667242965] # alpha_2_range = [4.325548069037225] # alpha_2_range = [20] alpha_1_list = [] alpha_2_list = [] weighted_value_list = [] # bounds = [0.5, -0.2] bounds = [1.5, 10] for a_1 in alpha_1_range: for a_2 in alpha_2_range: print([a_1, a_2]) alpha_1_list.append(a_1) alpha_2_list.append(a_2) algo = LAOStar(model,constrained=True,bounds=bounds,alpha=[a_1, a_2],Lagrangian=True) policy = algo.solve() while policy == False: algo = LAOStar(model,constrained=True,bounds=bounds,alpha=[a_1, a_2],Lagrangian=True) policy = algo.solve() value = algo.graph.root.value weighted_value = value[0] + a_1*(value[1] - bounds[0]) + a_2*(value[2] - bounds[1]) weighted_value_list.append(weighted_value) print(alpha_1_list) print(alpha_2_list) print(weighted_value_list) fig = plt.figure() ax = Axes3D(fig) plt.plot(alpha_1_list,alpha_2_list, weighted_value_list,'*') # plt.plot(0.15374170009084218, 9.42260840432311, 'r*') # with bound = 1.5 plt.show()
def test_LAOStar(): init_state = (0,0) size = (5,5) goal = (4,4) model = GRIDModel_multiple_bounds(size, init_state, goal, prob_right_transition=0.85) alpha = [0.2, 0.1] bounds = [1.5, -0.7] algo = LAOStar(model,constrained=True,bounds=bounds,alpha=alpha,Lagrangian=True) policy = algo.solve() value = algo.graph.root.value weighted_value = value[0] + alpha[0]*(value[1] - bounds[0]) + alpha[1]*(value[2] - bounds[1]) print(value[0]) print(value[1]) print(weighted_value)
def draw_lower_envelop_multiple_bounds_lb_ub(): ## this is for two separate constraints init_state = (0,0) size = (5,5) goal = (4,4) model = GRIDModel_multiple_bounds(size, init_state, goal, prob_right_transition=0.85) # model = LAOModel() # algo = LAOStar(model) alpha_1_range = list(linspace(0.01,1.0,15)) alpha_2_range = list(linspace(0.01,0.3,15)) # alpha_2_range = [20] alpha_1_list = [] alpha_2_list = [] weighted_value_list = [] # bounds = [0.5, -0.2] bounds = [1.5, -0.9] for a_1 in alpha_1_range: for a_2 in alpha_2_range: print([a_1, a_2]) alpha_1_list.append(a_1) alpha_2_list.append(a_2) algo = LAOStar(model,constrained=True,bounds=bounds,alpha=[a_1, a_2],Lagrangian=True) policy = algo.solve() # while policy == False: # algo = LAOStar(model,constrained=True,bounds=bounds,alpha=[a_1, a_2],Lagrangian=True) # policy = algo.solve() if policy == None: weighted_value = -200 weighted_value_list.append(weighted_value) print("seems unbounded below") else: value = algo.graph.root.value weighted_value = value[0] + a_1*(value[1] - bounds[0]) + a_2*(value[2] - bounds[1]) weighted_value_list.append(weighted_value) # min_val = 100000000 # max_val = -10000000 # for v in weighted_value_list: # if v==-200: # continue # if v < min_val: # min_val = v # if v > max_val: # max_val = v # diff = max_val - min_val # for i in range(len(weighted_value_list)): # if weighted_value_list[i]<0: # weighted_value_list[i] = 8.2 print(alpha_1_list) print(alpha_2_list) print(weighted_value_list) fig = plt.figure() ax = Axes3D(fig) plt.plot(alpha_1_list,alpha_2_list, weighted_value_list,'*') # plt.plot(0.15374170009084218, 9.42260840432311, 'r*') # with bound = 1.5 ax.axes.set_zlim3d(bottom=9.0, top=9.4) plt.show()
class CSSPSolver(object): def __init__(self, model, bounds=[]): self.model = model self.bounds = bounds self.algo = LAOStar(self.model, constrained=True, bounds=self.bounds, Lagrangian=True) self.graph = self.algo.graph def solve(self, initial_alpha_set): self.find_dual_multiple_bounds(initial_alpha_set) # self.anytime_update() def find_dual(self, initial_alpha_set): start_time = time.time() ##### phase 1 # zero case self.algo.alpha = [initial_alpha_set[0][0]] policy = self.algo.solve() value = self.algo.graph.root.value f_plus = value[0] g_plus = value[1] - self.bounds[0] print(time.time() - start_time) print("-------------------------------") print(f_plus + self.algo.alpha[0] * g_plus) print("-------------------------------") # infinite case self.resolve_LAOStar([initial_alpha_set[0][1]]) value = self.algo.graph.root.value f_minus = value[0] g_minus = value[1] - self.bounds[0] print(time.time() - start_time) print("-------------------------------") print(f_minus + self.algo.alpha[0] * g_minus) print("-------------------------------") # phase 1 interation to compute alpha while True: # update alpha alpha = (f_minus - f_plus) / (g_plus - g_minus) L = f_plus + alpha * g_plus UB = float('inf') # evaluate L(u), f, g self.resolve_LAOStar([alpha]) print(time.time() - start_time) value = self.algo.graph.root.value L_u = value[0] + alpha * (value[1] - self.bounds[0]) f = value[0] g = value[1] - self.bounds[0] print("-------------------------------") print(L_u) print("-------------------------------") # cases if abs(L_u - L) < 0.1**10 and g < 0: LB = L_u UB = min(f, UB) break elif abs(L_u - L) < 0.1**10 and g > 0: LB = L_u UB = f_minus break elif L_u < L and g > 0: f_plus = f g_plus = g elif L_u < L and g < 0: f_minus = f g_minus = g UB = min(UB, f) elif g == 0: raise ValueError( "opt solution found during phase 1. not implented for this case yet." ) elif L_u > L: print(L_u) print(L) raise ValueError("impossible case. Something must be wrong") if LB >= UB: ## optimal solutiion found during phase 1 print("optimal solution found during phase 1!") else: print("dual optima with the following values:") print(" alpha:" + str(alpha)) print(" L: " + str(L)) print(" f: " + str(f)) print(" g: " + str(g)) print("elapsed time: " + str(time.time() - start_time)) def find_dual_multiple_bounds(self, initial_alpha_set): ###### TODO: functionize "solve_LAOStar with 'resolve' as an option" # overall zero case self.algo.alpha = [alpha_set[0] for alpha_set in initial_alpha_set] policy = self.algo.solve() value = self.algo.graph.root.value primary_value = value[0] secondary_value = value[1:] f_plus = primary_value g_plus = ptw_sub(secondary_value, self.bounds) print("---------- zero case --------") for g_temp in g_plus: print(g_temp) print(f_plus + dot(self.algo.alpha, g_plus)) ###### TODO: Need to check whether this solution is feasible, which is the case that we already found optima. # overall infinite case self.algo.alpha = [alpha_set[1] for alpha_set in initial_alpha_set] self.resolve_LAOStar() value = self.algo.graph.root.value primary_value = value[0] secondary_value = value[1:] f_minus = primary_value g_minus = ptw_sub(secondary_value, self.bounds) print("---------- infinite case --------") print(self.algo.alpha) for g_temp in g_minus: print(g_temp) print(f_minus + dot(self.algo.alpha, g_minus)) ###### TODO: Need to check whether this solution is infeasible, which is the case that the problem is infeasible, or alpha_max is not large enough. self.algo.alpha = [alpha_set[0] for alpha_set in initial_alpha_set] L_new = f_plus + dot(self.algo.alpha, g_plus) while True: L_prev = L_new for bound_idx in range(len(initial_alpha_set) ): # looping over each bound (coorindate) print("*" * 20) # zero case for this coordinate self.algo.alpha[bound_idx] = initial_alpha_set[bound_idx][0] self.resolve_LAOStar() value = self.algo.graph.root.value primary_value = value[0] secondary_value = value[1:] f_plus = primary_value g_plus = ptw_sub(secondary_value, self.bounds) print(self.algo.alpha) print(f_plus + dot(self.algo.alpha, g_plus)) # infinite case for this coordinate self.algo.alpha[bound_idx] = initial_alpha_set[bound_idx][1] self.resolve_LAOStar() value = self.algo.graph.root.value primary_value = value[0] secondary_value = value[1:] f_minus = primary_value g_minus = ptw_sub(secondary_value, self.bounds) print(self.algo.alpha) print(f_minus + dot(self.algo.alpha, g_minus)) while True: new_alpha_comp = (f_plus - f_minus) for bound_idx_inner in range(len(initial_alpha_set)): if bound_idx_inner == bound_idx: continue # print(self.algo.alpha) # print(g_plus) # print(g_minus) new_alpha_comp += self.algo.alpha[bound_idx_inner] * ( g_plus[bound_idx_inner] - g_minus[bound_idx_inner]) new_alpha_comp = new_alpha_comp / (g_minus[bound_idx] - g_plus[bound_idx]) self.algo.alpha[bound_idx] = new_alpha_comp print(self.algo.alpha) L = f_plus + dot(self.algo.alpha, g_plus) UB = float('inf') print(L) # time.sleep(10000) # evaluate L(u), f, g self.resolve_LAOStar() value = self.algo.graph.root.value primary_value = value[0] secondary_value = value[1:] f = primary_value g = ptw_sub(secondary_value, self.bounds) L_u = f + dot(self.algo.alpha, g) # cases if abs(L_u - L) < 0.1**10 and g[bound_idx] < 0: LB = L_u UB = min(f, UB) break elif abs(L_u - L) < 0.1**10 and g[bound_idx] > 0: LB = L_u UB = f_minus break elif L_u < L and g[bound_idx] > 0: f_plus = f g_plus = g elif L_u < L and g[bound_idx] < 0: f_minus = f g_minus = g UB = min(UB, f) elif g[bound_idx] == 0: raise ValueError( "opt solution found during phase 1. not implented for this case yet." ) elif L_u > L: print(L_u) print(L) raise ValueError( "impossible case. Something must be wrong") ## optimality check for this entire envelop L_new = L_u if abs(L_new - L_prev) < 0.1**300: break print("optimal solution found during phase 1!") print("dual optima with the following values:") print(" alpha:" + str(alpha)) print(" L: " + str(L)) print(" f: " + str(f)) print(" g: " + str(g)) def resolve_LAOStar(self, new_alpha=None): if new_alpha != None: self.algo.alpha = new_alpha nodes = list(self.algo.graph.nodes.values()) self.algo.value_iteration(nodes) self.algo.update_fringe() self.algo.solve()
model = GRIDModel(size, init_state, goal, prob_right_transition=0.85) # model = LAOModel() # algo = LAOStar(model) alpha_list = list(linspace(0,0.6,100)) # alpha_list = [200] weighted_value_list = [] bound = 2 for a in alpha_list: algo = LAOStar(model,constrained=True,bounds=[bound],alpha=[a],Lagrangian=True) policy = algo.solve() value = algo.graph.root.value print(value) weighted_value = value[0] + a*(value[1] - bound) weighted_value_list.append(weighted_value) model.print_policy(policy) # print(algo.compute_value(algo.graph.nodes[(4,2)],'D')) # print(algo.compute_value(algo.graph.nodes[(4,2)],'U')) # print(algo.compute_value(algo.graph.nodes[(4,2)],'R')) # print(algo.compute_value(algo.graph.nodes[(4,2)],'L'))
#!/usr/bin/env python import sys from graph import Node, Graph from LAOStar import LAOStar from models.grid_model import GRIDModel from models.LAO_paper_model import LAOModel if __name__ == '__main__': init_state = (0, 0) size = (5, 5) goal = (4, 4) model = GRIDModel(size, init_state, goal, prob_right_transition=0.85) # model = LAOModel() # algo = LAOStar(model) algo = LAOStar(model) policy = algo.solve() model.print_policy(policy)