Beispiel #1
0
    def step(self, prices):
        """
		- get what the controller would output
		- controller.update to pass in reward
		- controller initiatlization
		"""

        # get controllers points
        controller = self.controller
        controllers_points = controller.get_points(prices)

        end = False

        energy_dict = {}
        rewards_dict = {}
        for player_name in self.players_dict:

            # get the points output from players
            player = self.players_dict.get(player_name)
            player_energy = player.threshold_exp_response(
                controllers_points.numpy())
            last_player_energy = player_energy
            energy_dict[player_name] = player_energy

            # get the reward from the player's output
            player_min_demand = player.get_min_demand()
            player_max_demand = player.get_max_demand()
            player_reward = Reward(player_energy, prices, player_min_demand,
                                   player_max_demand)
            player_ideal_demands = player_reward.ideal_use_calculation()
            last_player_ideal = player_ideal_demands
            # either distance from ideal or cost distance
            # distance = player_reward.neg_distance_from_ideal(player_ideal_demands)

            # print("Ideal demands: ", player_ideal_demands)
            # print("Actual demands: ", player_energy)
            reward = player_reward.scaled_cost_distance_neg(
                player_ideal_demands)
            rewards_dict[player_name] = reward

        total_reward = sum(rewards_dict.values())

        # reward goes back into controller as controller update

        controller.update(total_reward, prices, controllers_points)

        self._timestep = self._timestep + self._time_interval

        if self._timestep > self._end_timestamp:
            self._timestep = self._start_timestamp

        if self.current_iter >= self.num_iters:
            end = True

        self.current_iter += 1
        return controllers_points, last_player_energy, last_player_ideal, total_reward, end
Beispiel #2
0
    def step(self, prices):
        """ 
		- get what the controller would output
		- controller.update to pass in reward
		- controller initiatlization 
		"""

        # get controllers points
        controller = self.controller
        controllers_points = controller.get_points(prices)

        end = False

        energy_dict = {}
        rewards_dict = {}
        for player_name in self.players_dict:

            # get the points output from players
            player = self.players_dict.get(player_name)
            player_energy = player.energy_output_simple_linear(
                controllers_points)
            energy_dict[player_name] = player_energy

            # get the reward from the player's output
            player_min_demand = player.get_min_demand()
            player_max_demand = player.get_max_demand()
            player_reward = Reward(player_energy, prices, player_min_demand,
                                   player_max_demand)
            player_ideal_demands = player_reward.ideal_use_calculation()
            distance_from_ideal = player_reward.neg_distance_from_ideal(
                player_ideal_demands)
            rewards_dict[player_name] = distance_from_ideal

        total_distance = sum(rewards_dict.values())

        # reward goes back into controller as controller update

        # controller.update(reward = total_distance)

        self._timestep = self._timestep + self._time_interval

        if self._timestep > self._end_timestamp:
            end = True

        return total_distance, end