Example #1
0
def prob_2():
    plt.show()
    prices = np.arange(0.6, 2.1, 0.1)
    mean_price = 0
    std_dev_price = 0.2
    price_pdf = get_price_pdf(prices, mean_price, std_dev_price)
    buy_not_buy = np.array([0.6, 0.4])
    T = 50
    S = 10  # number of stocks
    n = S + 1  # we can hold 0, ..., S stocks, at one of len(prices) prices
    m = 2  # we can not offer to sell (0), or offer to sell (1)
    p1 = len(prices)  # 15 different prices
    p2 = 2  # there may not be a buyer (0), or there may be a buyer (1)
    f = get_f(n, m, p1, p2)
    g = get_g(n, m, p1, p2, prices)

    # part b
    g_final = np.zeros(n)
    pol, v = utils_mdp.value_info_pat(f, -g, -g_final, price_pdf, buy_not_buy, T)  # pass -g, -g_final since we are maximizing revenue
    v = -v
    utils_io.label('5.2b')
    print 'expected revenue, optimal policy' + ':', str(v[-1, 0])
    plot_pol_info_pat('5.2b', pol, [0, 20, 40, 45])
    plot_val('5.2b', v, [0, 45, 48, 49, 50])

    # part c
    prices_modified = get_prices_modified(prices, price_pdf)
    g_modified = get_g(n, m, p1, p2, prices_modified)
    pol_mod, v_mod = utils_mdp.value_info_pat(f, -g_modified, -g_final, price_pdf, buy_not_buy, T)
    v_mod = -v_mod
    utils_io.label('5.2c')
    print 'expected revenue, threshold policy' + ':', str(v_mod[-1, 0])
    plot_pol_info_pat('5.2c', pol_mod, [0, 20, 40, 45])
    plot_val('5.2c', v_mod, [0, 45, 48, 49, 50])

    # part d
    fcl, gcl = utils_mdp.cloop_info_pat(f, g, pol, buy_not_buy)
    P = utils_mdp.ftop_info_pat(fcl, price_pdf, buy_not_buy)
    initial_state = get_initial_state(n)
    print_probability_unsold(initial_state, P, T, 'b')
    fcl_mod, gcl_mod = utils_mdp.cloop_info_pat(f, g_modified, pol_mod, buy_not_buy)
    P_mod = utils_mdp.ftop_info_pat(fcl_mod, price_pdf, buy_not_buy)
    initial_state = get_initial_state(n)
    print_probability_unsold(initial_state, P_mod, T, 'c')

    # part e
    UNSOLD_PENALTY = -100
    g_final_mod = UNSOLD_PENALTY * np.ones([n])  # add a penalty for unsold stocks, to incentivize selling
    g_final_mod[0] = 0.
    pol_e, v_e = utils_mdp.value_info_pat(f, -g, -g_final_mod, price_pdf, buy_not_buy, T)
    v_e = -v_e
    utils_io.label('5.2e')
    fcl_e, gcl_e = utils_mdp.cloop_info_pat(f, g, pol_e, buy_not_buy)
    P_e = utils_mdp.ftop_info_pat(fcl_e, price_pdf, buy_not_buy)
    prob_unsold = print_probability_unsold(initial_state, P_e, T, 'e')
    print 'expected revenue, policy ' + 'e' + ':', str(v_e[-1, 0] - prob_unsold * UNSOLD_PENALTY)  # add back the penalty times the probability we have unsold stocks
Example #2
0
def prob_3():
    T, C, e_c, t, p_mu, p_var = problem_data.hw5_p3_data()
    n = C + 1
    m = 2
    p = 1
    f = get_f(n, m, p)
    g = get_g(n, m, T, p_mu, e_c)
    v_final = get_v_final(n)
    w_dist = np.array([1])

    # part a
    pol_a, v_a = utils_mdp.value(f, g, v_final, w_dist, T, g_is_time_dependent=True)
    utils_io.label('5.3a')
    print_cost(v_a, 'part a')
    job_times = get_schedule(pol_a)
    p_mu_normal = get_underlying_normal_mu(p_mu, p_var)
    p_var_normal = get_underlying_normal_var(p_mu, p_var)
    plot_cost_histogram(job_times, p_mu, p_var, e_c)

    # part b
    price_grid_dim = 10
    price_grid = get_prices_over_grid(price_grid_dim, p_mu_normal, p_var_normal)
    g_w_known = get_g_w_known(n, m, T, price_grid, p_mu, e_c)
    price_grid_dist = np.ones(len(price_grid)) / len(price_grid)
    f_w_known = f.repeat(price_grid_dim).reshape([n, m, price_grid_dim, 1])
    pol_b, v_b = utils_mdp.value_info_pat(f_w_known, g_w_known, v_final, price_grid_dist, w_dist, T, g_is_time_dependent=True)
    utils_io.label('5.3b')
    print_cost(v_b, 'part b')
    plot_cost_histogram_real_time(pol_b, price_grid, e_c)