Beispiel #1
0
def prob_3():
    T, C, e_c, t, p_mu, p_var = problem_data.hw5_p3_data()
    n = C + 1
    m = 2
    p = 1
    f = get_f(n, m, p)
    g = get_g(n, m, T, p_mu, e_c)
    v_final = get_v_final(n)
    w_dist = np.array([1])

    # part a
    pol_a, v_a = utils_mdp.value(f, g, v_final, w_dist, T, g_is_time_dependent=True)
    utils_io.label('5.3a')
    print_cost(v_a, 'part a')
    job_times = get_schedule(pol_a)
    p_mu_normal = get_underlying_normal_mu(p_mu, p_var)
    p_var_normal = get_underlying_normal_var(p_mu, p_var)
    plot_cost_histogram(job_times, p_mu, p_var, e_c)

    # part b
    price_grid_dim = 10
    price_grid = get_prices_over_grid(price_grid_dim, p_mu_normal, p_var_normal)
    g_w_known = get_g_w_known(n, m, T, price_grid, p_mu, e_c)
    price_grid_dist = np.ones(len(price_grid)) / len(price_grid)
    f_w_known = f.repeat(price_grid_dim).reshape([n, m, price_grid_dim, 1])
    pol_b, v_b = utils_mdp.value_info_pat(f_w_known, g_w_known, v_final, price_grid_dist, w_dist, T, g_is_time_dependent=True)
    utils_io.label('5.3b')
    print_cost(v_b, 'part b')
    plot_cost_histogram_real_time(pol_b, price_grid, e_c)
Beispiel #2
0
def prob_1():
    d_t_dist, p_fixed, p_whole, p_disc, u_disc, s_lin, s_quad, p_rev, p_unmet, p_sal, T, C, D, q_0 = problem_data.hw5_p1_data()
    n = C + 2  # 0, 1, ..., C plus a dummy state
    m = C + 1  # 0, 1, ..., C
    p = len(d_t_dist)
    f = get_f(n, m, p)
    g_order = get_g_order(n, m, p_fixed, p_whole, p_disc, u_disc)  # n by m
    g_store = get_g_store(n, s_lin, s_quad)  # n
    g_rev = get_g_rev(n, m, p, p_rev)  # n by m by p
    g_unmet = get_g_unmet(n, m, p, p_unmet)
    g_sal = get_g_sal(n, p_sal)
    g_total = get_g_total(g_order, g_store, g_rev, g_unmet)
    pol, v = utils_mdp.value(f, g_total, g_sal, d_t_dist, T, g_is_w_dependent=True)
    utils_io.label('5.1a')
    print 'j_star: ', v[q_0, 0]
    # part b; yes the optimal policy converges; for all but the last two time periods it is optimal to buy 7 when x = 0,
    #  buy 5 when x = 1, and buy 0 otherwise
    ax = pd.DataFrame(pol).T.plot(title='5.1b: Optimal Policy for various current inventory values (most = 0)', legend=True)
    ax.set_xlabel('time')
    ax.set_ylabel('Number of items to buy')
    plt.ion()
    part_c(pol, g_order, g_store, g_rev, g_unmet, g_sal, q_0, d_t_dist)