Ejemplo n.º 1
0
from Assignment11.td_scratch import td_prediction
from dataclasses import dataclass, replace, field
from Assignment11.monte_carlo_scratch import mc_prediction
from Assignment12.td_nbootstrap import td_nbootstrap_tabular_prediction
from Assignment12.td_lambda import td_lambda_tabular_prediction
import matplotlib.pyplot as plt

user_capacity = 2
user_poisson_lambda = 1.0
user_holding_cost = 1.0
user_stockout_cost = 10.0

user_gamma = 0.9

si_mrp = SimpleInventoryMRPFinite(capacity=user_capacity,
                                  poisson_lambda=user_poisson_lambda,
                                  holding_cost=user_holding_cost,
                                  stockout_cost=user_stockout_cost)

print("-----MRP Value Function-----:\n")
print(si_mrp.get_value_function_vec(user_gamma))

# create Iterable[TransitionStep[S]]
non_terminal_states = si_mrp.non_terminal_states
start_distribution = {
    state: 1 / len(non_terminal_states)
    for state in non_terminal_states
}
transitions = si_mrp.simulate_reward(Categorical(start_distribution))


def count_to_weight_func(n: int):
Ejemplo n.º 2
0
    mc_prediction_learning_rate,
    td_prediction_learning_rate,
)
import numpy as np
from itertools import islice

capacity: int = 2
poisson_lambda: float = 1.0
holding_cost: float = 1.0
stockout_cost: float = 10.0

gamma: float = 0.9

si_mrp: SimpleInventoryMRPFinite = SimpleInventoryMRPFinite(
    capacity=capacity,
    poisson_lambda=poisson_lambda,
    holding_cost=holding_cost,
    stockout_cost=stockout_cost,
)
nt_states: Sequence[InventoryState] = si_mrp.non_terminal_states
true_vf: np.ndarray = si_mrp.get_value_function_vec(gamma=gamma)

mc_episode_length_tol: float = 1e-6
num_episodes = 10000

td_episode_length: int = 100
initial_learning_rate: float = 0.03
half_life: float = 1000.0
exponent: float = 0.5

ffs: Sequence[Callable[[InventoryState],
                       float]] = [(lambda x, s=s: float(x == s))
Ejemplo n.º 3
0
                })
        return d
    
if __name__ == '__main__':

    print("Testing our implementations for Problems 1 and 2 and solving problem 3")
    user_capacity = 2
    user_poisson_lambda = 1.0
    user_holding_cost = 1.0
    user_stockout_cost = 10.0

    user_gamma = 0.9

    si_mrp = SimpleInventoryMRPFinite(
        capacity=user_capacity,
        poisson_lambda=user_poisson_lambda,
        holding_cost=user_holding_cost,
        stockout_cost=user_stockout_cost
    )
    print("Value Function")
    print("--------------")
    si_mrp.display_value_function(gamma=user_gamma)
    print()
    
    states:List[InventoryState] = si_mrp.non_terminal_states
    start_state_distrib: Categorical[InventoryState] = Categorical({i:1 for i in states})
    simulation_episodes = si_mrp.reward_traces(start_state_distrib)
    simulation_transitions = si_mrp.simulate_reward(start_state_distrib)
    approx_0 = Tabular({i : 0 for i in states})
    value_mc = mc_prediction_scratch(
                traces = simulation_episodes,
                states = states,