Beispiel #1
0
    def _build_rental_return_distributions(self):
        self._car_count = [c for c in range(self._max_cars + 1)]

        self._demand_distribution = Multinoulli({c: self._poisson(self._rental_rate, c)
                                                 for c in range(self._max_cars + 1)})
        self._demand_distribution[self._max_cars] += 1.0 - sum(self._demand_distribution.values())
        self._demand_distribution.enable()

        self._return_distribution = Multinoulli({c: self._poisson(self._return_rate, c)
                                                 for c in range(self._max_cars + 1)})
        self._return_distribution[self._max_cars] += 1.0 - sum(self._return_distribution.values())
        self._return_distribution.enable()
Beispiel #2
0
    def __init__(self, environment: Environment, environment_parameters: EnvironmentParameters):
        super().__init__(environment, environment_parameters)
        self._environment: Environment = environment
        self._environment_parameters: EnvironmentParameters = environment_parameters

        self._max_card: int = 10    # 10, J, Q or K combined
        self._card_distribution: Multinoulli[int] = Multinoulli()
Beispiel #3
0
    def _build_summaries(self):
        for starting_cars in self.outcome_distributions.keys():
            expected_cars_rented: float = 0.0
            ending_cars_distribution: Multinoulli[int] = Multinoulli()
            cars_rented_x_probability_by_ending_cars: DictZero[int, float] = DictZero()

            for outcome, probability in self.outcome_distributions[starting_cars].items():
                cars_rented_x_probability = outcome.cars_rented * probability

                expected_cars_rented += cars_rented_x_probability
                ending_cars_distribution[outcome.ending_cars] += probability
                cars_rented_x_probability_by_ending_cars[outcome.ending_cars] += cars_rented_x_probability
            ending_cars_distribution.enable()

            expected_cars_rented_by_ending_cars: DictZero[int, float] = DictZero()
            for ending_cars, ending_cars_probability in ending_cars_distribution.items():
                cars_rented_x_probability = cars_rented_x_probability_by_ending_cars[ending_cars]
                # E[r|s,a,s'] = Sum_over_r( p(r,s'|s,a).r ) / p(s'|s,a)
                conditional_expected_cars_rented = cars_rented_x_probability / ending_cars_probability
                expected_cars_rented_by_ending_cars[ending_cars] = conditional_expected_cars_rented

            self.expected_cars_rented[starting_cars] = expected_cars_rented
            self.ending_cars_distribution[starting_cars] = ending_cars_distribution
            self.cars_rented_x_probability_by_ending_cars[starting_cars] = cars_rented_x_probability_by_ending_cars
            self.expected_cars_rented_by_ending_cars[starting_cars] = expected_cars_rented_by_ending_cars
Beispiel #4
0
    def get_all_outcomes(self, state: State,
                         action: Action) -> Multinoulli[Response]:
        """
        dict of possible responses for a single state and action
        could be used for one state, action in theory
        but too many for all states and actions so potentially not useful in practice
        """
        self._calc_start_of_day(state, action)
        l1 = self._location_1
        l2 = self._location_2

        outcomes1: Multinoulli[LocationOutcome] = l1.get_outcome_distribution(
            self._starting_cars_1)
        outcomes2: Multinoulli[LocationOutcome] = l2.get_outcome_distribution(
            self._starting_cars_2)

        # collate (s', r)
        # outcome_dict: dict[(next_state, reward), probability]
        # outcome_dict: DictZero[tuple[State, float], float] = DictZero()
        response_distribution: Multinoulli[Response] = Multinoulli()
        for outcome1, probability1 in outcomes1.items():
            for outcome2, probability2 in outcomes2.items():
                cars_rented = outcome1.cars_rented + outcome2.cars_rented
                new_state = State(is_terminal=False,
                                  ending_cars_1=outcome1.ending_cars,
                                  ending_cars_2=outcome2.ending_cars)
                probability = probability1 * probability2
                reward = self._calc_reward(cars_rented)
                response_distribution[reward, new_state] += probability
        response_distribution.enable()
        return response_distribution
Beispiel #5
0
    def get_summary_outcomes(self, state: State,
                             action: Action) -> Multinoulli[Response]:
        """
        dict of possible responses for a single state and action
        with the expected_reward given in place of reward
        """
        self._calc_start_of_day(state, action)
        l1 = self._location_1
        l2 = self._location_2

        ending_cars_dist1: Multinoulli[int] = l1.get_ending_cars_distribution(
            self._starting_cars_1)
        ending_cars_dist2: Multinoulli[int] = l2.get_ending_cars_distribution(
            self._starting_cars_2)

        response_distribution: Multinoulli[Response] = Multinoulli()
        for ending_cars1, probability1 in ending_cars_dist1.items():
            cars_rented1 = l1.get_expected_cars_rented_given_ending_cars(
                self._starting_cars_1, ending_cars1)
            for ending_cars2, probability2 in ending_cars_dist2.items():
                cars_rented2 = l2.get_expected_cars_rented_given_ending_cars(
                    self._starting_cars_2, ending_cars2)
                cars_rented = cars_rented1 + cars_rented2
                new_state = State(is_terminal=False,
                                  ending_cars_1=ending_cars1,
                                  ending_cars_2=ending_cars2)
                probability = probability1 * probability2
                reward = self._calc_reward(cars_rented)
                response_distribution[reward, new_state] += probability
        response_distribution.enable()
        return response_distribution
Beispiel #6
0
    def __init__(self, max_cars: int, rental_rate: float, return_rate: float, excess_parking_cost: float):
        self._max_cars: int = max_cars
        self._rental_rate: float = rental_rate
        self._return_rate: float = return_rate
        self._excess_parking_cost: float = excess_parking_cost

        self._car_count: list[int] = []
        self._demand_distribution: Multinoulli[int] = Multinoulli()
        self._return_distribution: Multinoulli[int] = Multinoulli()

        # for each starting_cars find possible outcomes
        # dict[starting_cars, dict[LocationOutcome, probability]]
        self.outcome_distributions: dict[int, Multinoulli[LocationOutcome]] = {}
        self._counter: int = 0

        # summaries
        # dict[starting_cars, cars_rented * probability]
        self.expected_cars_rented: dict[int, float] = {}
        # dict[starting_cars, dict[ending_cars, probability]]
        self.ending_cars_distribution: dict[int, Multinoulli[int]] = {}
        # dict[starting_cars, dict[ending_cars, cars_rented_x_probability]]
        self.cars_rented_x_probability_by_ending_cars: dict[int, dict[int, float]] = {}
        # dict[starting_cars, dict[ending_cars, expected_cars_rented]]
        self.expected_cars_rented_by_ending_cars: dict[int, dict[int, float]] = {}
Beispiel #7
0
    def _build_outcome_distribution(self, starting_cars: int):
        outcome_distribution: Multinoulli[LocationOutcome, float] = Multinoulli()
        # cars_rented_x_probability: float = 0.0

        for car_demand, demand_probability in self._demand_distribution.items():
            cars_rented = self._get_cars_rented(starting_cars, car_demand)
            for cars_returned, return_probability in self._return_distribution.items():
                ending_cars = self._get_ending_cars(starting_cars, cars_rented, cars_returned)
                probability = demand_probability * return_probability

                if probability > 0.0:
                    location_outcome = LocationOutcome(ending_cars, cars_rented)
                    outcome_distribution[location_outcome] += probability
        outcome_distribution.enable()

        self.outcome_distributions[starting_cars] = outcome_distribution
Beispiel #8
0
    def get_state_transition_distribution(
            self, state: State, action: Action) -> Multinoulli[State]:
        """
        dict[ s', p(s'|s,a) ]
        distribution of next states for a (state, action)
        """
        distribution: Multinoulli[State] = Multinoulli()
        for toss in [Toss.HEADS, Toss.TAILS]:
            probability = self._toss_distribution[toss]
            if toss == Toss.HEADS:
                new_capital = state.capital + action.stake
            else:
                new_capital = state.capital - action.stake
            is_terminal: bool = (new_capital == 0 or new_capital
                                 == self._environment_parameters.max_capital)
            next_state = State(is_terminal=is_terminal, capital=new_capital)
            distribution[next_state] = probability
        distribution.enable(do_self_check=False)

        return distribution
Beispiel #9
0
    def _calc_next_state_distribution(self, state: State,
                                      action: Action) -> Multinoulli[State]:
        """
        dict[ s', p(s'|s,a) ]
        distribution of next states for a (state, action)
        """
        self._calc_start_of_day(state, action)
        ending_cars_distribution1 = self._location_1.get_ending_cars_distribution(
            self._starting_cars_1)
        ending_cars_distribution2 = self._location_2.get_ending_cars_distribution(
            self._starting_cars_2)

        next_state_distribution: Multinoulli[State] = Multinoulli()
        for ending_cars1, probability1 in ending_cars_distribution1.items():
            for ending_cars2, probability2 in ending_cars_distribution2.items(
            ):
                next_state = State(is_terminal=False,
                                   ending_cars_1=ending_cars1,
                                   ending_cars_2=ending_cars2)
                probability = probability1 * probability2
                next_state_distribution[next_state] = probability
        next_state_distribution.enable()
        return next_state_distribution
from __future__ import annotations

from mdp.common import Multinoulli
from mdp.task.jacks.model.state import State

x: Multinoulli[State] = Multinoulli()

my_state = State(is_terminal=False, ending_cars_1=5, ending_cars_2=6)
my_state2 = State(is_terminal=False, ending_cars_1=7, ending_cars_2=6)
my_state3 = State(is_terminal=False, ending_cars_1=9, ending_cars_2=6)

x[my_state] = 0.8
x[my_state2] += 0.2
print(x[my_state])
print(x[my_state2])
print(x[my_state3])

x.enable()

print(x.draw_one())
Beispiel #11
0
class Location:
    def __init__(self, max_cars: int, rental_rate: float, return_rate: float, excess_parking_cost: float):
        self._max_cars: int = max_cars
        self._rental_rate: float = rental_rate
        self._return_rate: float = return_rate
        self._excess_parking_cost: float = excess_parking_cost

        self._car_count: list[int] = []
        self._demand_distribution: Multinoulli[int] = Multinoulli()
        self._return_distribution: Multinoulli[int] = Multinoulli()

        # for each starting_cars find possible outcomes
        # dict[starting_cars, dict[LocationOutcome, probability]]
        self.outcome_distributions: dict[int, Multinoulli[LocationOutcome]] = {}
        self._counter: int = 0

        # summaries
        # dict[starting_cars, cars_rented * probability]
        self.expected_cars_rented: dict[int, float] = {}
        # dict[starting_cars, dict[ending_cars, probability]]
        self.ending_cars_distribution: dict[int, Multinoulli[int]] = {}
        # dict[starting_cars, dict[ending_cars, cars_rented_x_probability]]
        self.cars_rented_x_probability_by_ending_cars: dict[int, dict[int, float]] = {}
        # dict[starting_cars, dict[ending_cars, expected_cars_rented]]
        self.expected_cars_rented_by_ending_cars: dict[int, dict[int, float]] = {}

        # given starting_cars as input, value is expected revenue
        # E[r[l] | s, a]
        # self._expected_revenue: np.ndarray = np.zeros(self._max_cars + 1, float)

        # given starting_cars as first value, value is probability of ending_cars
        # Pr(s'[l] | s, a)
        # self._prob_ending_cars: np.ndarray = np.zeros(shape=(self._max_cars + 1, self._max_cars + 1), dtype=float)

    def build(self):
        self._build_rental_return_distributions()
        self._build_outcome_distributions()
        for distribution in self.outcome_distributions.values():
            for _ in distribution.keys():
                self._counter += 1
        # print(f"daily_outcomes = {self._counter}")
        self._build_summaries()

    def _build_rental_return_distributions(self):
        self._car_count = [c for c in range(self._max_cars + 1)]

        self._demand_distribution = Multinoulli({c: self._poisson(self._rental_rate, c)
                                                 for c in range(self._max_cars + 1)})
        self._demand_distribution[self._max_cars] += 1.0 - sum(self._demand_distribution.values())
        self._demand_distribution.enable()

        self._return_distribution = Multinoulli({c: self._poisson(self._return_rate, c)
                                                 for c in range(self._max_cars + 1)})
        self._return_distribution[self._max_cars] += 1.0 - sum(self._return_distribution.values())
        self._return_distribution.enable()

    def _poisson(self, lambda_: float, n: int) -> float:
        return stats.poisson.pmf(k=n, mu=lambda_)

    def _build_outcome_distributions(self):
        for starting_cars in self._car_count:
            self._build_outcome_distribution(starting_cars)

    def _build_outcome_distribution(self, starting_cars: int):
        outcome_distribution: Multinoulli[LocationOutcome, float] = Multinoulli()
        # cars_rented_x_probability: float = 0.0

        for car_demand, demand_probability in self._demand_distribution.items():
            cars_rented = self._get_cars_rented(starting_cars, car_demand)
            for cars_returned, return_probability in self._return_distribution.items():
                ending_cars = self._get_ending_cars(starting_cars, cars_rented, cars_returned)
                probability = demand_probability * return_probability

                if probability > 0.0:
                    location_outcome = LocationOutcome(ending_cars, cars_rented)
                    outcome_distribution[location_outcome] += probability
        outcome_distribution.enable()

        self.outcome_distributions[starting_cars] = outcome_distribution

    def _get_cars_rented(self, starting_cars: int, car_demand: int) -> int:
        return min(starting_cars, car_demand)

    def _get_ending_cars(self, starting_cars: int, cars_rented: int, cars_returned: int) -> int:
        ending_cars = starting_cars - cars_rented + cars_returned
        if ending_cars > self._max_cars:
            ending_cars = self._max_cars
        return ending_cars

    def _build_summaries(self):
        for starting_cars in self.outcome_distributions.keys():
            expected_cars_rented: float = 0.0
            ending_cars_distribution: Multinoulli[int] = Multinoulli()
            cars_rented_x_probability_by_ending_cars: DictZero[int, float] = DictZero()

            for outcome, probability in self.outcome_distributions[starting_cars].items():
                cars_rented_x_probability = outcome.cars_rented * probability

                expected_cars_rented += cars_rented_x_probability
                ending_cars_distribution[outcome.ending_cars] += probability
                cars_rented_x_probability_by_ending_cars[outcome.ending_cars] += cars_rented_x_probability
            ending_cars_distribution.enable()

            expected_cars_rented_by_ending_cars: DictZero[int, float] = DictZero()
            for ending_cars, ending_cars_probability in ending_cars_distribution.items():
                cars_rented_x_probability = cars_rented_x_probability_by_ending_cars[ending_cars]
                # E[r|s,a,s'] = Sum_over_r( p(r,s'|s,a).r ) / p(s'|s,a)
                conditional_expected_cars_rented = cars_rented_x_probability / ending_cars_probability
                expected_cars_rented_by_ending_cars[ending_cars] = conditional_expected_cars_rented

            self.expected_cars_rented[starting_cars] = expected_cars_rented
            self.ending_cars_distribution[starting_cars] = ending_cars_distribution
            self.cars_rented_x_probability_by_ending_cars[starting_cars] = cars_rented_x_probability_by_ending_cars
            self.expected_cars_rented_by_ending_cars[starting_cars] = expected_cars_rented_by_ending_cars

    def get_outcome_distribution(self, starting_cars: int) -> Multinoulli[LocationOutcome]:
        return self.outcome_distributions[starting_cars]

    def get_ending_cars_distribution(self, starting_cars: int) -> Multinoulli[int]:
        return self.ending_cars_distribution[starting_cars]

    def get_transition_probability(self, starting_cars: int, ending_cars: int) -> float:
        return self.ending_cars_distribution[starting_cars][ending_cars]

    def get_expected_cars_rented_given_ending_cars(self, starting_cars: int, ending_cars: int) -> float:
        return self.expected_cars_rented_by_ending_cars[starting_cars][ending_cars]

    def draw_outcome(self, starting_cars: int) -> LocationOutcome:
        return self.outcome_distributions[starting_cars].draw_one()
        # outcome_distribution = self.outcome_distributions[starting_cars]
        # outcome: LocationOutcome = random.choices(
        #     population=list(outcome_distribution.keys()),
        #     weights=list(outcome_distribution.values())
        # )[0]
        # return outcome
        # car_demand: int = random.choices(population=self._car_count, weights=self._demand_prob)[0]
        # cars_rented = self._get_cars_rented(starting_cars, car_demand)
        # cars_returned = random.choices(population=self._car_count, weights=self._return_prob)[0]
        # ending_cars = self._get_ending_cars(starting_cars, cars_rented, cars_returned)
        # return LocationOutcome(ending_cars, cars_rented)

    def parking_costs(self, end_cars: int) -> float:
        if end_cars > 10:
            return self._excess_parking_cost
        else:
            return 0.0
Beispiel #12
0
from __future__ import annotations
# from typing import TYPE_CHECKING

from scipy import stats

from mdp.common import Multinoulli

# _car_count: list[int] = []
# _demand_distribution: Distribution[int] = Distribution()
_max_cars = 5


def _poisson(lambda_: float, n: int) -> float:
    return stats.poisson.pmf(k=n, mu=lambda_)


_car_count = [c for c in range(_max_cars + 1)]
print(_car_count)

_demand_distribution = Multinoulli(
    {c: _poisson(4.0, c)
     for c in range(_max_cars + 1)})
print(_demand_distribution)

_demand_distribution[_max_cars] += 1.0 - sum(
    _demand_distribution.dict.values())
print(_demand_distribution)

_demand_distribution.enable()