def contr_objective(x, *params): x0, x1, x2, x3, x4, x5 = x stor, ener, price, dem, t = params state = md.State(stor, ener, price, dem) decision = md.Decision(x0, x1, x2, x3, x4, x5) return (infeasibility_indicator(x, *params) * 1e6 - model.contribution(self, decision, state))
def infeasibility_indicator(x, *params): x0, x1, x2, x3, x4, x5 = x stor, ener, price, dem, t = params feas = model.is_feasible(self, md.Decision(x0, x1, x2, x3, x4, x5), md.State(stor, ener, price, dem)) if feas != 0: if feas[0] == 1 and abs(feas[1]) < 0.01: return 0.1 return 1 return 0
def VF_objective(x, *params): x0, x1, x2, x3, x4, x5 = x stor, ener, price, dem, t = params state = md.State(stor, ener, price, dem) decision = md.Decision(x0, x1, x2, x3, x4, x5) input = [ model.transition(self, decision, state), state.energy, state.price ] norm = np.linalg.norm(input) normalized_input = input / norm return (infeasibility_indicator(x, *params) * 1e6 - norm * self.VF_approximation[t].predict( np.array([normalized_input])))
def approximate_policy_iteration(self, model, dataset, dataset_type): '''Performs approximate policy iteration step (policy improvement is invoked within policy evaluation step) To apply the final policy to a state, policy_improvement_for_this_state needs to be applied first. ''' for iteration in range(self.max_iter): print("ITERATION ", iteration) res = self.approx_policy_evaluation_and_update_VF(model, iteration) # Evaluate current optimal policy. solution_storage = np.zeros(model.t_max) solution_storage = self.evaluate_policy( model, dataset_type + ' datasets/' + dataset + '/txt/e.txt', dataset_type + ' datasets/' + dataset + '/txt/p.txt', dataset_type + ' datasets/' + dataset + '/txt/D.txt') for k in range(model.number_samplepaths): outputfcts.plot_data_and_save( model, solution_storage[k], 'R' + '-' + dataset, 'Results/' + dataset + '/solution_storage plot, ' + dataset + ', ' + str(iteration) + 'out of' + str(self.max_iter) + ' iterations, m = ' + str(self.episode_max) + ', sample ' + str(k) + ' .pdf') outputfcts.save_data_in_file( solution_storage[k], 'Results/' + dataset + '/solution_storage, ' + dataset + ', ' + str(iteration) + ' out of' + str(self.max_iter) + ' iterations, m = ' + str(self.episode_max) + ', sample ' + str(k) + ' .txt') # Plot slices of value function plot_bool_3D = 0 #TODO: als Parameter uebergeben? plot_bool_2D = 0 if plot_bool_3D == 1: plot_tmax = 10 if model.t_max > 11 else model.t_max #TODO: set accordingly for t in range(plot_tmax): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.set_xlabel('x_wr') ax.set_ylabel('R') ax.set_zlabel('VF') X_test = np.array( [[x for i in np.arange(0, model.R_max, 1)] for x in np.arange(0, model.max_discharge, 0.001)]) Y_test = np.array( [[y for y in np.arange(0, model.R_max, 1)] for i in np.arange(0, model.max_discharge, 0.001)]) Z_test = np.array([[ self.VF_approximation[t].predict( np.array([[ model.transition( self, md.convert_array_to_decision([ 0.019, 0, 0, x, 0, 0 ]), md.State(y, 0.05, 23, 0.019)), 0.05, 28 ] / np.linalg.norm( np.array([ model.transition( self, md.convert_array_to_decision( [0.019, 0, 0, x, 0, 0]), md.State(y, 0.05, 23, 0.019)), 0.05, 28 ]))]))[0][0] for y in np.arange(0, model.R_max, 1) ] for x in np.arange(0, model.max_discharge, 0.001) ]) #GPy Version # Z_test = np.array([[ self.VF_approximation[t].predict(np.array([[model.transition(self, md.convert_array_to_decision([0.019, 0, 0, x, 0, 0]), md.State(y, 0.05, 28, 0.019)), 0.05, 28]]))[0] for y in np.arange(0, model.R_max, 1)] for x in np.arange(0, model.max_discharge, 0.001)]) ax.plot_surface(X_test, Y_test, Z_test, rstride=10, cstride=10) fig.canvas.set_window_title('VF approximation at time ' + str(t)) fig.savefig("Results/" + dataset + "/VF_plot, " + str(self.max_iter) + "iterations, " + str(self.episode_max) + "samples, time" + str(t) + " at iteration " + str(iteration) + ".pdf", bbox_inches='tight') plt.show() if plot_bool_2D == 1: for t in range(model.t_max): plt.plot([ model.transition(self, md.Decision(1, 0, 0, 3, 0, 1), md.State(R, 6, 37, 2)) for R in range(0, model.R_max, 1) ], [ self.VF_approximation[t].predict([[ model.transition(self, md.Decision( 1, 0, 0, 3, 0, 1), md.State(R, 6, 37, 2)), 6, 37 ]])[0] for R in range(0, model.R_max, 1) ]) plt.xlabel('Storage R') plt.ylabel('Post-decision VF') plt.show()
def approx_policy_evaluation_and_update_VF(self, model, iteration): '''Performs approximate policy evaluation step, returns a dictionary with functions (values) for every time t(keys) (an approximation of the post-decision value function for each time t).''' agg_values = { m: np.zeros(model.t_max) for m in range(self.episode_max) } #for all m and for all t, stores v^m_t (in Paper), aggregated undiscounted sum of contributions from point t on in sample m pdstates = { m: {t: np.array(3) for t in range(model.t_max)} for m in range(self.episode_max) } # store all post decision states as arrays (do not include demand, as demand is modelled deterministically here) #Simulated samplepaths for m in range(self.episode_max): #Sample initial state. if model.is_deterministic: # state_m = md.State(np.random.uniform(0, model.R_max), 0,0,0)# TODO: maybe change back? state_m = md.State(0, 0, 0, 0) else: state_m = md.State( np.random.choice( np.arange(0, model.R_max, model.R_stepsize)), np.random.choice( np.arange(model.P_min, model.P_max, model.P_stepsize)), np.random.choice( np.arange(model.E_min, model.E_max, model.E_stepsize)), 0) #Go through sample paths. for t in range(model.t_max): model.get_state( state_m, t) #update exogenous information (excluding storage value) if model.is_deterministic: #TODO: change? state_m.energy = np.random.uniform(model.E_min, model.E_max) state_m.price = np.random.uniform(model.P_min, model.P_max) if iteration != 0: print("Episode ", m) #TODO:DELETE #if m == 8 and iteration == 1 and t == 3: # state_m.storage = 3.506639423278557e-13 # pdb.set_trace() decision_m = self.policy_improvement_for_this_state( model, state_m, t, iteration) else: # Set initial policy (randomized, while satisfying feasibility constraints) wd = np.random.uniform(0, min(state_m.demand, state_m.energy)) rd = np.random.uniform( 0, min(state_m.storage, model.max_discharge, (state_m.demand - wd)) / model.discharge_efficiency) gd = max( 0, state_m.demand - wd - rd * model.discharge_efficiency ) #not randomized, in order to satisfy demand/be feasible wr = np.random.uniform( 0, min(state_m.energy - wd, model.max_charge, model.R_max - state_m.energy)) rg = np.random.uniform( 0, max( 0, min(state_m.storage - rd, model.max_discharge - rd))) gr = np.random.uniform( 0, max( 0, min((model.R_max - state_m.storage - model.charge_efficiency * wr + rd + rg) / model.charge_efficiency, model.max_charge - wr))) decision_m = md.Decision(wd, rd, gd, wr, gr, rg) current_contribution = model.contribution( self, decision_m, state_m) state_m.storage = model.transition( self, decision_m, state_m) #update storage value pdstates[m][t] = np.array( [state_m.storage, state_m.energy, state_m.price]) for s in range(t + 1): agg_values[m][s] += current_contribution #Calculate the VF Approximations: for t in range(model.t_max): # pdb.set_trace() pd_samples_t = np.array( [pdstates[m][t] for m in range(self.episode_max)]) value_samples_t = np.array([[agg_values[m][t]] for m in range(self.episode_max) ]) # GPy version # value_samples_t = np.array([agg_values[m][t] for m in range(self.episode_max)]) temp = normalize(pd_samples_t, norm='l2', return_norm=True) pd_samples_t = temp[0] norms = temp[1] value_samples_t = np.array([[value_samples_t[i][0] / norms[i]] for i in range(self.episode_max)]) self.VF_approximation[t].fit(pd_samples_t, value_samples_t) print("for iteration", iteration, "fitted approximation for time", t)