예제 #1
0
파일: API.py 프로젝트: miluuu/Appolit
 def contr_objective(x, *params):
     x0, x1, x2, x3, x4, x5 = x
     stor, ener, price, dem, t = params
     state = md.State(stor, ener, price, dem)
     decision = md.Decision(x0, x1, x2, x3, x4, x5)
     return (infeasibility_indicator(x, *params) * 1e6 -
             model.contribution(self, decision, state))
예제 #2
0
파일: API.py 프로젝트: miluuu/Appolit
 def infeasibility_indicator(x, *params):
     x0, x1, x2, x3, x4, x5 = x
     stor, ener, price, dem, t = params
     feas = model.is_feasible(self,
                              md.Decision(x0, x1, x2, x3, x4, x5),
                              md.State(stor, ener, price, dem))
     if feas != 0:
         if feas[0] == 1 and abs(feas[1]) < 0.01:
             return 0.1
         return 1
     return 0
예제 #3
0
파일: API.py 프로젝트: miluuu/Appolit
 def VF_objective(x, *params):
     x0, x1, x2, x3, x4, x5 = x
     stor, ener, price, dem, t = params
     state = md.State(stor, ener, price, dem)
     decision = md.Decision(x0, x1, x2, x3, x4, x5)
     input = [
         model.transition(self, decision, state), state.energy,
         state.price
     ]
     norm = np.linalg.norm(input)
     normalized_input = input / norm
     return (infeasibility_indicator(x, *params) * 1e6 -
             norm * self.VF_approximation[t].predict(
                 np.array([normalized_input])))
예제 #4
0
파일: API.py 프로젝트: miluuu/Appolit
    def approximate_policy_iteration(self, model, dataset, dataset_type):
        '''Performs approximate policy iteration step

        (policy improvement is invoked within policy evaluation step)
        To apply the final policy to a state, policy_improvement_for_this_state needs to be applied first.
        '''

        for iteration in range(self.max_iter):
            print("ITERATION ", iteration)
            res = self.approx_policy_evaluation_and_update_VF(model, iteration)

            # Evaluate current optimal policy.
            solution_storage = np.zeros(model.t_max)
            solution_storage = self.evaluate_policy(
                model, dataset_type + ' datasets/' + dataset + '/txt/e.txt',
                dataset_type + ' datasets/' + dataset + '/txt/p.txt',
                dataset_type + ' datasets/' + dataset + '/txt/D.txt')
            for k in range(model.number_samplepaths):
                outputfcts.plot_data_and_save(
                    model, solution_storage[k], 'R' + '-' + dataset,
                    'Results/' + dataset + '/solution_storage plot, ' +
                    dataset + ', ' + str(iteration) + 'out of' +
                    str(self.max_iter) + ' iterations, m = ' +
                    str(self.episode_max) + ', sample ' + str(k) + ' .pdf')
                outputfcts.save_data_in_file(
                    solution_storage[k], 'Results/' + dataset +
                    '/solution_storage, ' + dataset + ', ' + str(iteration) +
                    ' out of' + str(self.max_iter) + ' iterations, m = ' +
                    str(self.episode_max) + ', sample ' + str(k) + ' .txt')

            # Plot slices of value function
            plot_bool_3D = 0  #TODO: als Parameter uebergeben?
            plot_bool_2D = 0

            if plot_bool_3D == 1:

                plot_tmax = 10 if model.t_max > 11 else model.t_max  #TODO: set accordingly
                for t in range(plot_tmax):
                    fig = plt.figure()
                    ax = fig.add_subplot(111, projection='3d')
                    ax.set_xlabel('x_wr')
                    ax.set_ylabel('R')
                    ax.set_zlabel('VF')
                    X_test = np.array(
                        [[x for i in np.arange(0, model.R_max, 1)]
                         for x in np.arange(0, model.max_discharge, 0.001)])
                    Y_test = np.array(
                        [[y for y in np.arange(0, model.R_max, 1)]
                         for i in np.arange(0, model.max_discharge, 0.001)])
                    Z_test = np.array([[
                        self.VF_approximation[t].predict(
                            np.array([[
                                model.transition(
                                    self,
                                    md.convert_array_to_decision([
                                        0.019, 0, 0, x, 0, 0
                                    ]), md.State(y, 0.05, 23, 0.019)), 0.05, 28
                            ] / np.linalg.norm(
                                np.array([
                                    model.transition(
                                        self,
                                        md.convert_array_to_decision(
                                            [0.019, 0, 0, x, 0, 0]),
                                        md.State(y, 0.05, 23, 0.019)), 0.05, 28
                                ]))]))[0][0]
                        for y in np.arange(0, model.R_max, 1)
                    ] for x in np.arange(0, model.max_discharge, 0.001)
                                       ])  #GPy Version
                    #            Z_test = np.array([[  self.VF_approximation[t].predict(np.array([[model.transition(self, md.convert_array_to_decision([0.019, 0, 0, x, 0, 0]), md.State(y, 0.05, 28, 0.019)), 0.05, 28]]))[0] for y in np.arange(0, model.R_max, 1)] for x in np.arange(0, model.max_discharge, 0.001)])
                    ax.plot_surface(X_test,
                                    Y_test,
                                    Z_test,
                                    rstride=10,
                                    cstride=10)
                    fig.canvas.set_window_title('VF approximation at time ' +
                                                str(t))
                    fig.savefig("Results/" + dataset + "/VF_plot, " +
                                str(self.max_iter) + "iterations, " +
                                str(self.episode_max) + "samples, time" +
                                str(t) + " at iteration " + str(iteration) +
                                ".pdf",
                                bbox_inches='tight')
                    plt.show()

            if plot_bool_2D == 1:
                for t in range(model.t_max):
                    plt.plot([
                        model.transition(self, md.Decision(1, 0, 0, 3, 0, 1),
                                         md.State(R, 6, 37, 2))
                        for R in range(0, model.R_max, 1)
                    ], [
                        self.VF_approximation[t].predict([[
                            model.transition(self, md.Decision(
                                1, 0, 0, 3, 0, 1), md.State(R, 6, 37, 2)), 6,
                            37
                        ]])[0] for R in range(0, model.R_max, 1)
                    ])
                    plt.xlabel('Storage R')
                    plt.ylabel('Post-decision VF')
                    plt.show()
예제 #5
0
파일: API.py 프로젝트: miluuu/Appolit
    def approx_policy_evaluation_and_update_VF(self, model, iteration):
        '''Performs approximate policy evaluation step, returns a dictionary with functions (values) for every time t(keys) (an approximation of the post-decision value function for each time t).'''
        agg_values = {
            m: np.zeros(model.t_max)
            for m in range(self.episode_max)
        }  #for all m and for all t, stores v^m_t (in Paper), aggregated undiscounted sum of contributions from point t on in sample m
        pdstates = {
            m: {t: np.array(3)
                for t in range(model.t_max)}
            for m in range(self.episode_max)
        }  # store all post decision states as arrays (do not include demand, as demand is modelled deterministically here)

        #Simulated samplepaths
        for m in range(self.episode_max):
            #Sample initial state.
            if model.is_deterministic:
                #                state_m = md.State(np.random.uniform(0, model.R_max), 0,0,0)# TODO: maybe change back?
                state_m = md.State(0, 0, 0, 0)
            else:
                state_m = md.State(
                    np.random.choice(
                        np.arange(0, model.R_max, model.R_stepsize)),
                    np.random.choice(
                        np.arange(model.P_min, model.P_max, model.P_stepsize)),
                    np.random.choice(
                        np.arange(model.E_min, model.E_max, model.E_stepsize)),
                    0)
            #Go through sample paths.
            for t in range(model.t_max):
                model.get_state(
                    state_m,
                    t)  #update exogenous information (excluding storage value)
                if model.is_deterministic:  #TODO: change?
                    state_m.energy = np.random.uniform(model.E_min,
                                                       model.E_max)
                    state_m.price = np.random.uniform(model.P_min, model.P_max)
                if iteration != 0:
                    print("Episode ", m)

                    #TODO:DELETE
                    #if m == 8 and iteration == 1 and t == 3:
                    #    state_m.storage = 3.506639423278557e-13
                    #    pdb.set_trace()

                    decision_m = self.policy_improvement_for_this_state(
                        model, state_m, t, iteration)
                else:
                    # Set initial policy (randomized, while satisfying feasibility constraints)
                    wd = np.random.uniform(0,
                                           min(state_m.demand, state_m.energy))
                    rd = np.random.uniform(
                        0,
                        min(state_m.storage, model.max_discharge,
                            (state_m.demand - wd)) /
                        model.discharge_efficiency)
                    gd = max(
                        0,
                        state_m.demand - wd - rd * model.discharge_efficiency
                    )  #not randomized, in order to satisfy demand/be feasible
                    wr = np.random.uniform(
                        0,
                        min(state_m.energy - wd, model.max_charge,
                            model.R_max - state_m.energy))
                    rg = np.random.uniform(
                        0,
                        max(
                            0,
                            min(state_m.storage - rd,
                                model.max_discharge - rd)))
                    gr = np.random.uniform(
                        0,
                        max(
                            0,
                            min((model.R_max - state_m.storage -
                                 model.charge_efficiency * wr + rd + rg) /
                                model.charge_efficiency,
                                model.max_charge - wr)))

                    decision_m = md.Decision(wd, rd, gd, wr, gr, rg)

                current_contribution = model.contribution(
                    self, decision_m, state_m)
                state_m.storage = model.transition(
                    self, decision_m, state_m)  #update storage value
                pdstates[m][t] = np.array(
                    [state_m.storage, state_m.energy, state_m.price])

                for s in range(t + 1):
                    agg_values[m][s] += current_contribution

        #Calculate the VF Approximations:
        for t in range(model.t_max):
            #        pdb.set_trace()
            pd_samples_t = np.array(
                [pdstates[m][t] for m in range(self.episode_max)])
            value_samples_t = np.array([[agg_values[m][t]]
                                        for m in range(self.episode_max)
                                        ])  # GPy version
            #        value_samples_t = np.array([agg_values[m][t] for m in range(self.episode_max)])

            temp = normalize(pd_samples_t, norm='l2', return_norm=True)
            pd_samples_t = temp[0]
            norms = temp[1]
            value_samples_t = np.array([[value_samples_t[i][0] / norms[i]]
                                        for i in range(self.episode_max)])

            self.VF_approximation[t].fit(pd_samples_t, value_samples_t)
            print("for iteration", iteration, "fitted approximation for time",
                  t)