def prob_2(): plt.show() prices = np.arange(0.6, 2.1, 0.1) mean_price = 0 std_dev_price = 0.2 price_pdf = get_price_pdf(prices, mean_price, std_dev_price) buy_not_buy = np.array([0.6, 0.4]) T = 50 S = 10 # number of stocks n = S + 1 # we can hold 0, ..., S stocks, at one of len(prices) prices m = 2 # we can not offer to sell (0), or offer to sell (1) p1 = len(prices) # 15 different prices p2 = 2 # there may not be a buyer (0), or there may be a buyer (1) f = get_f(n, m, p1, p2) g = get_g(n, m, p1, p2, prices) # part b g_final = np.zeros(n) pol, v = utils_mdp.value_info_pat(f, -g, -g_final, price_pdf, buy_not_buy, T) # pass -g, -g_final since we are maximizing revenue v = -v utils_io.label('5.2b') print 'expected revenue, optimal policy' + ':', str(v[-1, 0]) plot_pol_info_pat('5.2b', pol, [0, 20, 40, 45]) plot_val('5.2b', v, [0, 45, 48, 49, 50]) # part c prices_modified = get_prices_modified(prices, price_pdf) g_modified = get_g(n, m, p1, p2, prices_modified) pol_mod, v_mod = utils_mdp.value_info_pat(f, -g_modified, -g_final, price_pdf, buy_not_buy, T) v_mod = -v_mod utils_io.label('5.2c') print 'expected revenue, threshold policy' + ':', str(v_mod[-1, 0]) plot_pol_info_pat('5.2c', pol_mod, [0, 20, 40, 45]) plot_val('5.2c', v_mod, [0, 45, 48, 49, 50]) # part d fcl, gcl = utils_mdp.cloop_info_pat(f, g, pol, buy_not_buy) P = utils_mdp.ftop_info_pat(fcl, price_pdf, buy_not_buy) initial_state = get_initial_state(n) print_probability_unsold(initial_state, P, T, 'b') fcl_mod, gcl_mod = utils_mdp.cloop_info_pat(f, g_modified, pol_mod, buy_not_buy) P_mod = utils_mdp.ftop_info_pat(fcl_mod, price_pdf, buy_not_buy) initial_state = get_initial_state(n) print_probability_unsold(initial_state, P_mod, T, 'c') # part e UNSOLD_PENALTY = -100 g_final_mod = UNSOLD_PENALTY * np.ones([n]) # add a penalty for unsold stocks, to incentivize selling g_final_mod[0] = 0. pol_e, v_e = utils_mdp.value_info_pat(f, -g, -g_final_mod, price_pdf, buy_not_buy, T) v_e = -v_e utils_io.label('5.2e') fcl_e, gcl_e = utils_mdp.cloop_info_pat(f, g, pol_e, buy_not_buy) P_e = utils_mdp.ftop_info_pat(fcl_e, price_pdf, buy_not_buy) prob_unsold = print_probability_unsold(initial_state, P_e, T, 'e') print 'expected revenue, policy ' + 'e' + ':', str(v_e[-1, 0] - prob_unsold * UNSOLD_PENALTY) # add back the penalty times the probability we have unsold stocks
def prob_3(): T, C, e_c, t, p_mu, p_var = problem_data.hw5_p3_data() n = C + 1 m = 2 p = 1 f = get_f(n, m, p) g = get_g(n, m, T, p_mu, e_c) v_final = get_v_final(n) w_dist = np.array([1]) # part a pol_a, v_a = utils_mdp.value(f, g, v_final, w_dist, T, g_is_time_dependent=True) utils_io.label('5.3a') print_cost(v_a, 'part a') job_times = get_schedule(pol_a) p_mu_normal = get_underlying_normal_mu(p_mu, p_var) p_var_normal = get_underlying_normal_var(p_mu, p_var) plot_cost_histogram(job_times, p_mu, p_var, e_c) # part b price_grid_dim = 10 price_grid = get_prices_over_grid(price_grid_dim, p_mu_normal, p_var_normal) g_w_known = get_g_w_known(n, m, T, price_grid, p_mu, e_c) price_grid_dist = np.ones(len(price_grid)) / len(price_grid) f_w_known = f.repeat(price_grid_dim).reshape([n, m, price_grid_dim, 1]) pol_b, v_b = utils_mdp.value_info_pat(f_w_known, g_w_known, v_final, price_grid_dist, w_dist, T, g_is_time_dependent=True) utils_io.label('5.3b') print_cost(v_b, 'part b') plot_cost_histogram_real_time(pol_b, price_grid, e_c)