def step(self, prices): """ - get what the controller would output - controller.update to pass in reward - controller initiatlization """ # get controllers points controller = self.controller controllers_points = controller.get_points(prices) end = False energy_dict = {} rewards_dict = {} for player_name in self.players_dict: # get the points output from players player = self.players_dict.get(player_name) player_energy = player.threshold_exp_response( controllers_points.numpy()) last_player_energy = player_energy energy_dict[player_name] = player_energy # get the reward from the player's output player_min_demand = player.get_min_demand() player_max_demand = player.get_max_demand() player_reward = Reward(player_energy, prices, player_min_demand, player_max_demand) player_ideal_demands = player_reward.ideal_use_calculation() last_player_ideal = player_ideal_demands # either distance from ideal or cost distance # distance = player_reward.neg_distance_from_ideal(player_ideal_demands) # print("Ideal demands: ", player_ideal_demands) # print("Actual demands: ", player_energy) reward = player_reward.scaled_cost_distance_neg( player_ideal_demands) rewards_dict[player_name] = reward total_reward = sum(rewards_dict.values()) # reward goes back into controller as controller update controller.update(total_reward, prices, controllers_points) self._timestep = self._timestep + self._time_interval if self._timestep > self._end_timestamp: self._timestep = self._start_timestamp if self.current_iter >= self.num_iters: end = True self.current_iter += 1 return controllers_points, last_player_energy, last_player_ideal, total_reward, end
def step(self, prices): """ - get what the controller would output - controller.update to pass in reward - controller initiatlization """ # get controllers points controller = self.controller controllers_points = controller.get_points(prices) end = False energy_dict = {} rewards_dict = {} for player_name in self.players_dict: # get the points output from players player = self.players_dict.get(player_name) player_energy = player.energy_output_simple_linear( controllers_points) energy_dict[player_name] = player_energy # get the reward from the player's output player_min_demand = player.get_min_demand() player_max_demand = player.get_max_demand() player_reward = Reward(player_energy, prices, player_min_demand, player_max_demand) player_ideal_demands = player_reward.ideal_use_calculation() distance_from_ideal = player_reward.neg_distance_from_ideal( player_ideal_demands) rewards_dict[player_name] = distance_from_ideal total_distance = sum(rewards_dict.values()) # reward goes back into controller as controller update # controller.update(reward = total_distance) self._timestep = self._timestep + self._time_interval if self._timestep > self._end_timestamp: end = True return total_distance, end