コード例 #1
0
ファイル: API.py プロジェクト: miluuu/Appolit
 def VF_obj_fct(x):
     decision = md.convert_array_to_decision(x)
     input = np.array([
         model.transition(self, decision, state), state.energy,
         state.price
     ])
     norm = np.linalg.norm(input)
     normalized_input = input / norm
     return -norm * self.VF_approximation[t].predict(
         np.array([normalized_input]))
コード例 #2
0
ファイル: API.py プロジェクト: miluuu/Appolit
 def obj_fct(x):
     #        pdb.set_trace()
     decision = md.convert_array_to_decision(x)
     input = [
         model.transition(self, decision, state), state.energy,
         state.price
     ]
     norm = np.linalg.norm(input)
     normalized_input = input / norm if norm > self.eps else input
     return -model.contribution(
         self, decision,
         state) - norm * self.VF_approximation[t].predict(
             np.array([normalized_input]))
コード例 #3
0
ファイル: API.py プロジェクト: miluuu/Appolit
 def contr_obj_fct(x):
     return -model.contribution(self, md.convert_array_to_decision(x),
                                state)
コード例 #4
0
ファイル: API.py プロジェクト: miluuu/Appolit
    def approximate_policy_iteration(self, model, dataset, dataset_type):
        '''Performs approximate policy iteration step

        (policy improvement is invoked within policy evaluation step)
        To apply the final policy to a state, policy_improvement_for_this_state needs to be applied first.
        '''

        for iteration in range(self.max_iter):
            print("ITERATION ", iteration)
            res = self.approx_policy_evaluation_and_update_VF(model, iteration)

            # Evaluate current optimal policy.
            solution_storage = np.zeros(model.t_max)
            solution_storage = self.evaluate_policy(
                model, dataset_type + ' datasets/' + dataset + '/txt/e.txt',
                dataset_type + ' datasets/' + dataset + '/txt/p.txt',
                dataset_type + ' datasets/' + dataset + '/txt/D.txt')
            for k in range(model.number_samplepaths):
                outputfcts.plot_data_and_save(
                    model, solution_storage[k], 'R' + '-' + dataset,
                    'Results/' + dataset + '/solution_storage plot, ' +
                    dataset + ', ' + str(iteration) + 'out of' +
                    str(self.max_iter) + ' iterations, m = ' +
                    str(self.episode_max) + ', sample ' + str(k) + ' .pdf')
                outputfcts.save_data_in_file(
                    solution_storage[k], 'Results/' + dataset +
                    '/solution_storage, ' + dataset + ', ' + str(iteration) +
                    ' out of' + str(self.max_iter) + ' iterations, m = ' +
                    str(self.episode_max) + ', sample ' + str(k) + ' .txt')

            # Plot slices of value function
            plot_bool_3D = 0  #TODO: als Parameter uebergeben?
            plot_bool_2D = 0

            if plot_bool_3D == 1:

                plot_tmax = 10 if model.t_max > 11 else model.t_max  #TODO: set accordingly
                for t in range(plot_tmax):
                    fig = plt.figure()
                    ax = fig.add_subplot(111, projection='3d')
                    ax.set_xlabel('x_wr')
                    ax.set_ylabel('R')
                    ax.set_zlabel('VF')
                    X_test = np.array(
                        [[x for i in np.arange(0, model.R_max, 1)]
                         for x in np.arange(0, model.max_discharge, 0.001)])
                    Y_test = np.array(
                        [[y for y in np.arange(0, model.R_max, 1)]
                         for i in np.arange(0, model.max_discharge, 0.001)])
                    Z_test = np.array([[
                        self.VF_approximation[t].predict(
                            np.array([[
                                model.transition(
                                    self,
                                    md.convert_array_to_decision([
                                        0.019, 0, 0, x, 0, 0
                                    ]), md.State(y, 0.05, 23, 0.019)), 0.05, 28
                            ] / np.linalg.norm(
                                np.array([
                                    model.transition(
                                        self,
                                        md.convert_array_to_decision(
                                            [0.019, 0, 0, x, 0, 0]),
                                        md.State(y, 0.05, 23, 0.019)), 0.05, 28
                                ]))]))[0][0]
                        for y in np.arange(0, model.R_max, 1)
                    ] for x in np.arange(0, model.max_discharge, 0.001)
                                       ])  #GPy Version
                    #            Z_test = np.array([[  self.VF_approximation[t].predict(np.array([[model.transition(self, md.convert_array_to_decision([0.019, 0, 0, x, 0, 0]), md.State(y, 0.05, 28, 0.019)), 0.05, 28]]))[0] for y in np.arange(0, model.R_max, 1)] for x in np.arange(0, model.max_discharge, 0.001)])
                    ax.plot_surface(X_test,
                                    Y_test,
                                    Z_test,
                                    rstride=10,
                                    cstride=10)
                    fig.canvas.set_window_title('VF approximation at time ' +
                                                str(t))
                    fig.savefig("Results/" + dataset + "/VF_plot, " +
                                str(self.max_iter) + "iterations, " +
                                str(self.episode_max) + "samples, time" +
                                str(t) + " at iteration " + str(iteration) +
                                ".pdf",
                                bbox_inches='tight')
                    plt.show()

            if plot_bool_2D == 1:
                for t in range(model.t_max):
                    plt.plot([
                        model.transition(self, md.Decision(1, 0, 0, 3, 0, 1),
                                         md.State(R, 6, 37, 2))
                        for R in range(0, model.R_max, 1)
                    ], [
                        self.VF_approximation[t].predict([[
                            model.transition(self, md.Decision(
                                1, 0, 0, 3, 0, 1), md.State(R, 6, 37, 2)), 6,
                            37
                        ]])[0] for R in range(0, model.R_max, 1)
                    ])
                    plt.xlabel('Storage R')
                    plt.ylabel('Post-decision VF')
                    plt.show()
コード例 #5
0
ファイル: API.py プロジェクト: miluuu/Appolit
    def scipy_policy_improvement_for_this_state(self,
                                                model,
                                                state,
                                                t,
                                                initial_guess,
                                                VF_initial_guess,
                                                contr_initial_guess,
                                                iteration=0):
        '''Policy improvement step with scipy'''
        #Optimization of objective function, value function and contribution function with scipy.minimize
        const1 = min(state.storage, model.max_discharge)
        print("const1, storage, max_discharge:", const1, state.storage,
              model.max_discharge)
        const2 = min(model.R_max - state.storage, model.max_charge)
        print("const2, R_max, storage, max_discharge:", const2, model.R_max,
              state.storage, model.max_discharge)
        const3 = min(state.energy, state.demand)

        def obj_fct(x):
            #        pdb.set_trace()
            decision = md.convert_array_to_decision(x)
            input = [
                model.transition(self, decision, state), state.energy,
                state.price
            ]
            norm = np.linalg.norm(input)
            normalized_input = input / norm if norm > self.eps else input
            return -model.contribution(
                self, decision,
                state) - norm * self.VF_approximation[t].predict(
                    np.array([normalized_input]))

        #obj_fct = lambda decision: - model.contribution(self, md.convert_array_to_decision(decision), state) - self.VF_approximation[t].predict(np.array([[model.transition(md.convert_array_to_decision(decision), state), state.energy, state.price]]))

        def VF_obj_fct(x):
            decision = md.convert_array_to_decision(x)
            input = np.array([
                model.transition(self, decision, state), state.energy,
                state.price
            ])
            norm = np.linalg.norm(input)
            normalized_input = input / norm
            return -norm * self.VF_approximation[t].predict(
                np.array([normalized_input]))

        def contr_obj_fct(x):
            return -model.contribution(self, md.convert_array_to_decision(x),
                                       state)

    #   VF_obj_fct = lambda decision:  - self.VF_approximation[t].predict(np.array([[model.transition(self, md.convert_array_to_decision(decision), state), state.energy, state.price]]))
    #    contr_obj_fct = lambda decision: - model.contribution(self, md.convert_array_to_decision(decision), state)

        bnds = (
            (0, const3), (0, const1), (0, state.demand), (0, const2),
            (0, const2), (0, const1)
        )  #approximation: open intervals (since closed intervals not available as bounds)
        beta_d = model.discharge_efficiency
        cons = ({
            'type': 'eq',
            'fun': lambda x: x[0] + beta_d * x[1] + x[2] - state.demand
        }, {
            'type': 'ineq',
            'fun': lambda x: -x[1] - x[5] + const1
        }, {
            'type': 'ineq',
            'fun': lambda x: -x[3] - x[4] + const2
        }, {
            'type': 'ineq',
            'fun': lambda x: -x[0] - x[3] + state.energy
        })

        print("Initial guess: ", initial_guess)
        print(
            "Initial guess is feasible:",
            model.is_feasible(self,
                              md.convert_array_to_decision(initial_guess),
                              state))

        solution = minimize(obj_fct,
                            initial_guess,
                            method='SLSQP',
                            bounds=bnds,
                            constraints=cons,
                            options={
                                'eps': self.eps,
                                'ftol': self.ftol
                            }).x
        VF_solution = minimize(VF_obj_fct,
                               VF_initial_guess,
                               method='SLSQP',
                               bounds=bnds,
                               constraints=cons,
                               options={
                                   'ftol': self.ftol
                               }).x
        contr_solution = minimize(contr_obj_fct,
                                  contr_initial_guess,
                                  method='SLSQP',
                                  bounds=bnds,
                                  constraints=cons,
                                  options={
                                      'ftol': self.ftol
                                  }).x
        return solution, VF_solution, contr_solution
コード例 #6
0
ファイル: API.py プロジェクト: miluuu/Appolit
    def policy_improvement_for_this_state(self, model, state, t, iteration=0):
        '''Policy improvement step for one particular state at time t (policy stored as dictionary)'''
        const1 = min(state.storage, model.max_discharge)
        const2 = min(model.R_max - state.storage, model.max_charge)
        const3 = min(state.energy, state.demand)

        if self.optimizer_choice == 0:
            # Set initial guess
            initial_guess = model.initial_guess_for_policy_improvement(state)
            res = self.scipy_policy_improvement_for_this_state(
                model, state, t, initial_guess, initial_guess, initial_guess,
                iteration)
            solution = res[0]
            VF_solution = res[1]
            contr_solution = res[2]
###########################
#use gridsearch
        elif self.optimizer_choice == 1:
            #TODO: Use finish function?
            params = (state.storage, state.energy, state.price, state.demand,
                      t)

            def infeasibility_indicator(x, *params):
                x0, x1, x2, x3, x4, x5 = x
                stor, ener, price, dem, t = params
                feas = model.is_feasible(self,
                                         md.Decision(x0, x1, x2, x3, x4, x5),
                                         md.State(stor, ener, price, dem))
                if feas != 0:
                    if feas[0] == 1 and abs(feas[1]) < 0.01:
                        return 0.1
                    return 1
                return 0

            #NOTE: The objective functions are not the same as for the  other solvers, as they include the constraints!
            def objective(x, *params):
                x0, x1, x2, x3, x4, x5 = x
                stor, ener, price, dem, t = params
                state = md.State(stor, ener, price, dem)
                decision = md.Decision(x0, x1, x2, x3, x4, x5)
                input = [
                    model.transition(self, decision, state), state.energy,
                    state.price
                ]
                norm = np.linalg.norm(input)
                normalized_input = input / norm
                return (infeasibility_indicator(x, *params) * 1e6 -
                        model.contribution(self, decision, state) -
                        norm * self.VF_approximation[t].predict(
                            np.array([normalized_input])))

            def contr_objective(x, *params):
                x0, x1, x2, x3, x4, x5 = x
                stor, ener, price, dem, t = params
                state = md.State(stor, ener, price, dem)
                decision = md.Decision(x0, x1, x2, x3, x4, x5)
                return (infeasibility_indicator(x, *params) * 1e6 -
                        model.contribution(self, decision, state))

            #TODO: Adjust to scaling
            def VF_objective(x, *params):
                x0, x1, x2, x3, x4, x5 = x
                stor, ener, price, dem, t = params
                state = md.State(stor, ener, price, dem)
                decision = md.Decision(x0, x1, x2, x3, x4, x5)
                input = [
                    model.transition(self, decision, state), state.energy,
                    state.price
                ]
                norm = np.linalg.norm(input)
                normalized_input = input / norm
                return (infeasibility_indicator(x, *params) * 1e6 -
                        norm * self.VF_approximation[t].predict(
                            np.array([normalized_input])))

            #TODO: different stepsize?
            stepsize = 0.1 * max(const1, const2, const3)
            print("const1:", const1, ", const2:", const2, ", const3:", const3)
            rranges = (slice(0, const3 + stepsize,
                             stepsize), slice(0, const1 + stepsize, stepsize),
                       slice(0, state.demand + stepsize,
                             stepsize), slice(0, const2 + stepsize, stepsize),
                       slice(0, const2 + stepsize,
                             stepsize), slice(0, const1 + stepsize, stepsize))
            #    pt = outputfcts.progress_timer(description = 'Progress', n_iter = 1)
            solution = brute(objective, rranges, args=params, finish=None)
            contr_solution = brute(VF_objective,
                                   rranges,
                                   args=params,
                                   finish=None)
            VF_solution = brute(contr_objective,
                                rranges,
                                args=params,
                                finish=None)

            res = self.scipy_policy_improvement_for_this_state(
                model, state, t, solution, VF_solution, contr_solution,
                iteration)
            solution = res[0]
            VF_solution = res[1]
            contr_solution = res[2]

        #    pt.update()
        #    pt.finish()
################################
#solve maximization problem with Artelys Knitro Solver (student free trial version)
        else:
            solution = policy_improvement.maximize(
                model, self, state,
                lambda decision: self.VF_approximation[t].predict(
                    np.array([[
                        model.transition(
                            self, md.convert_array_to_decision(decision), state
                        ), state.energy, state.price
                    ]])), t)  #GPy version
            VF_solution = policy_improvement_VF_only.maximize(
                model, state,
                lambda decision: self.VF_approximation[t].predict(
                    np.array([[
                        model.transition(
                            self, md.convert_array_to_decision(decision), state
                        ), state.energy, state.price
                    ]])), t)  #GPy version
            contr_solution = policy_improvement_contr_only.maximize(
                model, self, state, t)  #GPy version

        print("Iteration", iteration, " at time", t, solution)
        print("State: st, p, e, d ", state.storage, state.price, state.energy,
              state.demand)
        print(
            "Post-decision storage after", t, ":",
            model.transition(self, md.convert_array_to_decision(solution),
                             state))
        print(
            "Solution is feasible:",
            model.is_feasible(self, md.convert_array_to_decision(solution),
                              state))
        input1 = [
            model.transition(self, md.convert_array_to_decision(solution),
                             state), state.energy, state.price
        ]
        norm1 = np.linalg.norm(input1)
        normalized_input1 = input1 / norm1
        input2 = [
            model.transition(self, md.convert_array_to_decision(VF_solution),
                             state), state.energy, state.price
        ]
        norm2 = np.linalg.norm(input2)
        normalized_input2 = input2 / norm2
        print("VF_approximation at solution:",
              norm1 * self.VF_approximation[t].predict(
                  np.array([normalized_input1])))  #GPy version
        print(
            "Contribution at solution:",
            model.contribution(self, md.convert_array_to_decision(solution),
                               state))
        print(
            "Optimal contribution obtained at ", contr_solution,
            "with contribution value:",
            model.contribution(self,
                               md.convert_array_to_decision(contr_solution),
                               state))
        print(
            "Optimal VF obtained at ", VF_solution, "with VF value:", norm2 *
            self.VF_approximation[t].predict(np.array([normalized_input2])))
        return md.convert_array_to_decision(solution)