def VF_obj_fct(x): decision = md.convert_array_to_decision(x) input = np.array([ model.transition(self, decision, state), state.energy, state.price ]) norm = np.linalg.norm(input) normalized_input = input / norm return -norm * self.VF_approximation[t].predict( np.array([normalized_input]))
def obj_fct(x): # pdb.set_trace() decision = md.convert_array_to_decision(x) input = [ model.transition(self, decision, state), state.energy, state.price ] norm = np.linalg.norm(input) normalized_input = input / norm if norm > self.eps else input return -model.contribution( self, decision, state) - norm * self.VF_approximation[t].predict( np.array([normalized_input]))
def contr_obj_fct(x): return -model.contribution(self, md.convert_array_to_decision(x), state)
def approximate_policy_iteration(self, model, dataset, dataset_type): '''Performs approximate policy iteration step (policy improvement is invoked within policy evaluation step) To apply the final policy to a state, policy_improvement_for_this_state needs to be applied first. ''' for iteration in range(self.max_iter): print("ITERATION ", iteration) res = self.approx_policy_evaluation_and_update_VF(model, iteration) # Evaluate current optimal policy. solution_storage = np.zeros(model.t_max) solution_storage = self.evaluate_policy( model, dataset_type + ' datasets/' + dataset + '/txt/e.txt', dataset_type + ' datasets/' + dataset + '/txt/p.txt', dataset_type + ' datasets/' + dataset + '/txt/D.txt') for k in range(model.number_samplepaths): outputfcts.plot_data_and_save( model, solution_storage[k], 'R' + '-' + dataset, 'Results/' + dataset + '/solution_storage plot, ' + dataset + ', ' + str(iteration) + 'out of' + str(self.max_iter) + ' iterations, m = ' + str(self.episode_max) + ', sample ' + str(k) + ' .pdf') outputfcts.save_data_in_file( solution_storage[k], 'Results/' + dataset + '/solution_storage, ' + dataset + ', ' + str(iteration) + ' out of' + str(self.max_iter) + ' iterations, m = ' + str(self.episode_max) + ', sample ' + str(k) + ' .txt') # Plot slices of value function plot_bool_3D = 0 #TODO: als Parameter uebergeben? plot_bool_2D = 0 if plot_bool_3D == 1: plot_tmax = 10 if model.t_max > 11 else model.t_max #TODO: set accordingly for t in range(plot_tmax): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.set_xlabel('x_wr') ax.set_ylabel('R') ax.set_zlabel('VF') X_test = np.array( [[x for i in np.arange(0, model.R_max, 1)] for x in np.arange(0, model.max_discharge, 0.001)]) Y_test = np.array( [[y for y in np.arange(0, model.R_max, 1)] for i in np.arange(0, model.max_discharge, 0.001)]) Z_test = np.array([[ self.VF_approximation[t].predict( np.array([[ model.transition( self, md.convert_array_to_decision([ 0.019, 0, 0, x, 0, 0 ]), md.State(y, 0.05, 23, 0.019)), 0.05, 28 ] / np.linalg.norm( np.array([ model.transition( self, md.convert_array_to_decision( [0.019, 0, 0, x, 0, 0]), md.State(y, 0.05, 23, 0.019)), 0.05, 28 ]))]))[0][0] for y in np.arange(0, model.R_max, 1) ] for x in np.arange(0, model.max_discharge, 0.001) ]) #GPy Version # Z_test = np.array([[ self.VF_approximation[t].predict(np.array([[model.transition(self, md.convert_array_to_decision([0.019, 0, 0, x, 0, 0]), md.State(y, 0.05, 28, 0.019)), 0.05, 28]]))[0] for y in np.arange(0, model.R_max, 1)] for x in np.arange(0, model.max_discharge, 0.001)]) ax.plot_surface(X_test, Y_test, Z_test, rstride=10, cstride=10) fig.canvas.set_window_title('VF approximation at time ' + str(t)) fig.savefig("Results/" + dataset + "/VF_plot, " + str(self.max_iter) + "iterations, " + str(self.episode_max) + "samples, time" + str(t) + " at iteration " + str(iteration) + ".pdf", bbox_inches='tight') plt.show() if plot_bool_2D == 1: for t in range(model.t_max): plt.plot([ model.transition(self, md.Decision(1, 0, 0, 3, 0, 1), md.State(R, 6, 37, 2)) for R in range(0, model.R_max, 1) ], [ self.VF_approximation[t].predict([[ model.transition(self, md.Decision( 1, 0, 0, 3, 0, 1), md.State(R, 6, 37, 2)), 6, 37 ]])[0] for R in range(0, model.R_max, 1) ]) plt.xlabel('Storage R') plt.ylabel('Post-decision VF') plt.show()
def scipy_policy_improvement_for_this_state(self, model, state, t, initial_guess, VF_initial_guess, contr_initial_guess, iteration=0): '''Policy improvement step with scipy''' #Optimization of objective function, value function and contribution function with scipy.minimize const1 = min(state.storage, model.max_discharge) print("const1, storage, max_discharge:", const1, state.storage, model.max_discharge) const2 = min(model.R_max - state.storage, model.max_charge) print("const2, R_max, storage, max_discharge:", const2, model.R_max, state.storage, model.max_discharge) const3 = min(state.energy, state.demand) def obj_fct(x): # pdb.set_trace() decision = md.convert_array_to_decision(x) input = [ model.transition(self, decision, state), state.energy, state.price ] norm = np.linalg.norm(input) normalized_input = input / norm if norm > self.eps else input return -model.contribution( self, decision, state) - norm * self.VF_approximation[t].predict( np.array([normalized_input])) #obj_fct = lambda decision: - model.contribution(self, md.convert_array_to_decision(decision), state) - self.VF_approximation[t].predict(np.array([[model.transition(md.convert_array_to_decision(decision), state), state.energy, state.price]])) def VF_obj_fct(x): decision = md.convert_array_to_decision(x) input = np.array([ model.transition(self, decision, state), state.energy, state.price ]) norm = np.linalg.norm(input) normalized_input = input / norm return -norm * self.VF_approximation[t].predict( np.array([normalized_input])) def contr_obj_fct(x): return -model.contribution(self, md.convert_array_to_decision(x), state) # VF_obj_fct = lambda decision: - self.VF_approximation[t].predict(np.array([[model.transition(self, md.convert_array_to_decision(decision), state), state.energy, state.price]])) # contr_obj_fct = lambda decision: - model.contribution(self, md.convert_array_to_decision(decision), state) bnds = ( (0, const3), (0, const1), (0, state.demand), (0, const2), (0, const2), (0, const1) ) #approximation: open intervals (since closed intervals not available as bounds) beta_d = model.discharge_efficiency cons = ({ 'type': 'eq', 'fun': lambda x: x[0] + beta_d * x[1] + x[2] - state.demand }, { 'type': 'ineq', 'fun': lambda x: -x[1] - x[5] + const1 }, { 'type': 'ineq', 'fun': lambda x: -x[3] - x[4] + const2 }, { 'type': 'ineq', 'fun': lambda x: -x[0] - x[3] + state.energy }) print("Initial guess: ", initial_guess) print( "Initial guess is feasible:", model.is_feasible(self, md.convert_array_to_decision(initial_guess), state)) solution = minimize(obj_fct, initial_guess, method='SLSQP', bounds=bnds, constraints=cons, options={ 'eps': self.eps, 'ftol': self.ftol }).x VF_solution = minimize(VF_obj_fct, VF_initial_guess, method='SLSQP', bounds=bnds, constraints=cons, options={ 'ftol': self.ftol }).x contr_solution = minimize(contr_obj_fct, contr_initial_guess, method='SLSQP', bounds=bnds, constraints=cons, options={ 'ftol': self.ftol }).x return solution, VF_solution, contr_solution
def policy_improvement_for_this_state(self, model, state, t, iteration=0): '''Policy improvement step for one particular state at time t (policy stored as dictionary)''' const1 = min(state.storage, model.max_discharge) const2 = min(model.R_max - state.storage, model.max_charge) const3 = min(state.energy, state.demand) if self.optimizer_choice == 0: # Set initial guess initial_guess = model.initial_guess_for_policy_improvement(state) res = self.scipy_policy_improvement_for_this_state( model, state, t, initial_guess, initial_guess, initial_guess, iteration) solution = res[0] VF_solution = res[1] contr_solution = res[2] ########################### #use gridsearch elif self.optimizer_choice == 1: #TODO: Use finish function? params = (state.storage, state.energy, state.price, state.demand, t) def infeasibility_indicator(x, *params): x0, x1, x2, x3, x4, x5 = x stor, ener, price, dem, t = params feas = model.is_feasible(self, md.Decision(x0, x1, x2, x3, x4, x5), md.State(stor, ener, price, dem)) if feas != 0: if feas[0] == 1 and abs(feas[1]) < 0.01: return 0.1 return 1 return 0 #NOTE: The objective functions are not the same as for the other solvers, as they include the constraints! def objective(x, *params): x0, x1, x2, x3, x4, x5 = x stor, ener, price, dem, t = params state = md.State(stor, ener, price, dem) decision = md.Decision(x0, x1, x2, x3, x4, x5) input = [ model.transition(self, decision, state), state.energy, state.price ] norm = np.linalg.norm(input) normalized_input = input / norm return (infeasibility_indicator(x, *params) * 1e6 - model.contribution(self, decision, state) - norm * self.VF_approximation[t].predict( np.array([normalized_input]))) def contr_objective(x, *params): x0, x1, x2, x3, x4, x5 = x stor, ener, price, dem, t = params state = md.State(stor, ener, price, dem) decision = md.Decision(x0, x1, x2, x3, x4, x5) return (infeasibility_indicator(x, *params) * 1e6 - model.contribution(self, decision, state)) #TODO: Adjust to scaling def VF_objective(x, *params): x0, x1, x2, x3, x4, x5 = x stor, ener, price, dem, t = params state = md.State(stor, ener, price, dem) decision = md.Decision(x0, x1, x2, x3, x4, x5) input = [ model.transition(self, decision, state), state.energy, state.price ] norm = np.linalg.norm(input) normalized_input = input / norm return (infeasibility_indicator(x, *params) * 1e6 - norm * self.VF_approximation[t].predict( np.array([normalized_input]))) #TODO: different stepsize? stepsize = 0.1 * max(const1, const2, const3) print("const1:", const1, ", const2:", const2, ", const3:", const3) rranges = (slice(0, const3 + stepsize, stepsize), slice(0, const1 + stepsize, stepsize), slice(0, state.demand + stepsize, stepsize), slice(0, const2 + stepsize, stepsize), slice(0, const2 + stepsize, stepsize), slice(0, const1 + stepsize, stepsize)) # pt = outputfcts.progress_timer(description = 'Progress', n_iter = 1) solution = brute(objective, rranges, args=params, finish=None) contr_solution = brute(VF_objective, rranges, args=params, finish=None) VF_solution = brute(contr_objective, rranges, args=params, finish=None) res = self.scipy_policy_improvement_for_this_state( model, state, t, solution, VF_solution, contr_solution, iteration) solution = res[0] VF_solution = res[1] contr_solution = res[2] # pt.update() # pt.finish() ################################ #solve maximization problem with Artelys Knitro Solver (student free trial version) else: solution = policy_improvement.maximize( model, self, state, lambda decision: self.VF_approximation[t].predict( np.array([[ model.transition( self, md.convert_array_to_decision(decision), state ), state.energy, state.price ]])), t) #GPy version VF_solution = policy_improvement_VF_only.maximize( model, state, lambda decision: self.VF_approximation[t].predict( np.array([[ model.transition( self, md.convert_array_to_decision(decision), state ), state.energy, state.price ]])), t) #GPy version contr_solution = policy_improvement_contr_only.maximize( model, self, state, t) #GPy version print("Iteration", iteration, " at time", t, solution) print("State: st, p, e, d ", state.storage, state.price, state.energy, state.demand) print( "Post-decision storage after", t, ":", model.transition(self, md.convert_array_to_decision(solution), state)) print( "Solution is feasible:", model.is_feasible(self, md.convert_array_to_decision(solution), state)) input1 = [ model.transition(self, md.convert_array_to_decision(solution), state), state.energy, state.price ] norm1 = np.linalg.norm(input1) normalized_input1 = input1 / norm1 input2 = [ model.transition(self, md.convert_array_to_decision(VF_solution), state), state.energy, state.price ] norm2 = np.linalg.norm(input2) normalized_input2 = input2 / norm2 print("VF_approximation at solution:", norm1 * self.VF_approximation[t].predict( np.array([normalized_input1]))) #GPy version print( "Contribution at solution:", model.contribution(self, md.convert_array_to_decision(solution), state)) print( "Optimal contribution obtained at ", contr_solution, "with contribution value:", model.contribution(self, md.convert_array_to_decision(contr_solution), state)) print( "Optimal VF obtained at ", VF_solution, "with VF value:", norm2 * self.VF_approximation[t].predict(np.array([normalized_input2]))) return md.convert_array_to_decision(solution)