class Location: def __init__(self, max_cars: int, rental_rate: float, return_rate: float, excess_parking_cost: float): self._max_cars: int = max_cars self._rental_rate: float = rental_rate self._return_rate: float = return_rate self._excess_parking_cost: float = excess_parking_cost self._car_count: list[int] = [] self._demand_distribution: Multinoulli[int] = Multinoulli() self._return_distribution: Multinoulli[int] = Multinoulli() # for each starting_cars find possible outcomes # dict[starting_cars, dict[LocationOutcome, probability]] self.outcome_distributions: dict[int, Multinoulli[LocationOutcome]] = {} self._counter: int = 0 # summaries # dict[starting_cars, cars_rented * probability] self.expected_cars_rented: dict[int, float] = {} # dict[starting_cars, dict[ending_cars, probability]] self.ending_cars_distribution: dict[int, Multinoulli[int]] = {} # dict[starting_cars, dict[ending_cars, cars_rented_x_probability]] self.cars_rented_x_probability_by_ending_cars: dict[int, dict[int, float]] = {} # dict[starting_cars, dict[ending_cars, expected_cars_rented]] self.expected_cars_rented_by_ending_cars: dict[int, dict[int, float]] = {} # given starting_cars as input, value is expected revenue # E[r[l] | s, a] # self._expected_revenue: np.ndarray = np.zeros(self._max_cars + 1, float) # given starting_cars as first value, value is probability of ending_cars # Pr(s'[l] | s, a) # self._prob_ending_cars: np.ndarray = np.zeros(shape=(self._max_cars + 1, self._max_cars + 1), dtype=float) def build(self): self._build_rental_return_distributions() self._build_outcome_distributions() for distribution in self.outcome_distributions.values(): for _ in distribution.keys(): self._counter += 1 # print(f"daily_outcomes = {self._counter}") self._build_summaries() def _build_rental_return_distributions(self): self._car_count = [c for c in range(self._max_cars + 1)] self._demand_distribution = Multinoulli({c: self._poisson(self._rental_rate, c) for c in range(self._max_cars + 1)}) self._demand_distribution[self._max_cars] += 1.0 - sum(self._demand_distribution.values()) self._demand_distribution.enable() self._return_distribution = Multinoulli({c: self._poisson(self._return_rate, c) for c in range(self._max_cars + 1)}) self._return_distribution[self._max_cars] += 1.0 - sum(self._return_distribution.values()) self._return_distribution.enable() def _poisson(self, lambda_: float, n: int) -> float: return stats.poisson.pmf(k=n, mu=lambda_) def _build_outcome_distributions(self): for starting_cars in self._car_count: self._build_outcome_distribution(starting_cars) def _build_outcome_distribution(self, starting_cars: int): outcome_distribution: Multinoulli[LocationOutcome, float] = Multinoulli() # cars_rented_x_probability: float = 0.0 for car_demand, demand_probability in self._demand_distribution.items(): cars_rented = self._get_cars_rented(starting_cars, car_demand) for cars_returned, return_probability in self._return_distribution.items(): ending_cars = self._get_ending_cars(starting_cars, cars_rented, cars_returned) probability = demand_probability * return_probability if probability > 0.0: location_outcome = LocationOutcome(ending_cars, cars_rented) outcome_distribution[location_outcome] += probability outcome_distribution.enable() self.outcome_distributions[starting_cars] = outcome_distribution def _get_cars_rented(self, starting_cars: int, car_demand: int) -> int: return min(starting_cars, car_demand) def _get_ending_cars(self, starting_cars: int, cars_rented: int, cars_returned: int) -> int: ending_cars = starting_cars - cars_rented + cars_returned if ending_cars > self._max_cars: ending_cars = self._max_cars return ending_cars def _build_summaries(self): for starting_cars in self.outcome_distributions.keys(): expected_cars_rented: float = 0.0 ending_cars_distribution: Multinoulli[int] = Multinoulli() cars_rented_x_probability_by_ending_cars: DictZero[int, float] = DictZero() for outcome, probability in self.outcome_distributions[starting_cars].items(): cars_rented_x_probability = outcome.cars_rented * probability expected_cars_rented += cars_rented_x_probability ending_cars_distribution[outcome.ending_cars] += probability cars_rented_x_probability_by_ending_cars[outcome.ending_cars] += cars_rented_x_probability ending_cars_distribution.enable() expected_cars_rented_by_ending_cars: DictZero[int, float] = DictZero() for ending_cars, ending_cars_probability in ending_cars_distribution.items(): cars_rented_x_probability = cars_rented_x_probability_by_ending_cars[ending_cars] # E[r|s,a,s'] = Sum_over_r( p(r,s'|s,a).r ) / p(s'|s,a) conditional_expected_cars_rented = cars_rented_x_probability / ending_cars_probability expected_cars_rented_by_ending_cars[ending_cars] = conditional_expected_cars_rented self.expected_cars_rented[starting_cars] = expected_cars_rented self.ending_cars_distribution[starting_cars] = ending_cars_distribution self.cars_rented_x_probability_by_ending_cars[starting_cars] = cars_rented_x_probability_by_ending_cars self.expected_cars_rented_by_ending_cars[starting_cars] = expected_cars_rented_by_ending_cars def get_outcome_distribution(self, starting_cars: int) -> Multinoulli[LocationOutcome]: return self.outcome_distributions[starting_cars] def get_ending_cars_distribution(self, starting_cars: int) -> Multinoulli[int]: return self.ending_cars_distribution[starting_cars] def get_transition_probability(self, starting_cars: int, ending_cars: int) -> float: return self.ending_cars_distribution[starting_cars][ending_cars] def get_expected_cars_rented_given_ending_cars(self, starting_cars: int, ending_cars: int) -> float: return self.expected_cars_rented_by_ending_cars[starting_cars][ending_cars] def draw_outcome(self, starting_cars: int) -> LocationOutcome: return self.outcome_distributions[starting_cars].draw_one() # outcome_distribution = self.outcome_distributions[starting_cars] # outcome: LocationOutcome = random.choices( # population=list(outcome_distribution.keys()), # weights=list(outcome_distribution.values()) # )[0] # return outcome # car_demand: int = random.choices(population=self._car_count, weights=self._demand_prob)[0] # cars_rented = self._get_cars_rented(starting_cars, car_demand) # cars_returned = random.choices(population=self._car_count, weights=self._return_prob)[0] # ending_cars = self._get_ending_cars(starting_cars, cars_rented, cars_returned) # return LocationOutcome(ending_cars, cars_rented) def parking_costs(self, end_cars: int) -> float: if end_cars > 10: return self._excess_parking_cost else: return 0.0
from __future__ import annotations # from typing import TYPE_CHECKING from scipy import stats from mdp.common import Multinoulli # _car_count: list[int] = [] # _demand_distribution: Distribution[int] = Distribution() _max_cars = 5 def _poisson(lambda_: float, n: int) -> float: return stats.poisson.pmf(k=n, mu=lambda_) _car_count = [c for c in range(_max_cars + 1)] print(_car_count) _demand_distribution = Multinoulli( {c: _poisson(4.0, c) for c in range(_max_cars + 1)}) print(_demand_distribution) _demand_distribution[_max_cars] += 1.0 - sum( _demand_distribution.dict.values()) print(_demand_distribution) _demand_distribution.enable()