def prob_3(): m, n, k, T, p, A, B, c, P, q, r, pT, qT, rT, x0 = problem_data.hw6_p3_data() val_fns = [] controllers = [] next_cost = utils_fns.QuadraticFunction(pT, qT, rT) for t_val in range(T)[::-1]: total_cost = utils_fns.QuadraticFunction(np.zeros([n+m, n+m]), np.zeros([n+m, 1]), np.zeros(1)) for k_val in range(k): A_val = A[t_val, k_val, :, :] B_val = B[t_val, k_val, :, :] c_val = c[t_val, k_val, :, :] P_val = P[t_val, k_val, :, :] q_val = q[t_val, k_val, :, :] r_val = r[t_val, k_val, 0] this_stage_quadratic = utils_fns.QuadraticFunction(P_val, q_val, r_val) linear_A = np.hstack((A_val, B_val)) linear_to_next_stage = utils_fns.LinearFunction(linear_A, c_val) next_stage_quadratic = next_cost.precompose_linear(linear_to_next_stage) quadratic_this_k = this_stage_quadratic.plus_quadratic(next_stage_quadratic) total_cost = total_cost.plus_quadratic(quadratic_this_k) total_cost = total_cost.div_by_num(k) qfx, lfu = total_cost.partial_minimization(m) val_fns.append(qfx) controllers.append(lfu) next_cost = qfx utils_io.label('6.3') print 'Optimal expected total cost:', val_fns[-1].evaluate(x0)
def prob_1(): W, s, t = problem_data.hw4_p1_data() utils_io.label('4.1b') print 'Shortest paths for each matrix in \'matrix_name, [path], weight\' format:' for i in range(W.shape[0]): p, wp = bellman_ford(W[i], s[i], t[i]) print 'matrix_{0:s}, {1:s}, {2:s}'.format(str(i), str(p), str(wp))
def prob_3(): T, C, e_c, t, p_mu, p_var = problem_data.hw5_p3_data() n = C + 1 m = 2 p = 1 f = get_f(n, m, p) g = get_g(n, m, T, p_mu, e_c) v_final = get_v_final(n) w_dist = np.array([1]) # part a pol_a, v_a = utils_mdp.value(f, g, v_final, w_dist, T, g_is_time_dependent=True) utils_io.label('5.3a') print_cost(v_a, 'part a') job_times = get_schedule(pol_a) p_mu_normal = get_underlying_normal_mu(p_mu, p_var) p_var_normal = get_underlying_normal_var(p_mu, p_var) plot_cost_histogram(job_times, p_mu, p_var, e_c) # part b price_grid_dim = 10 price_grid = get_prices_over_grid(price_grid_dim, p_mu_normal, p_var_normal) g_w_known = get_g_w_known(n, m, T, price_grid, p_mu, e_c) price_grid_dist = np.ones(len(price_grid)) / len(price_grid) f_w_known = f.repeat(price_grid_dim).reshape([n, m, price_grid_dim, 1]) pol_b, v_b = utils_mdp.value_info_pat(f_w_known, g_w_known, v_final, price_grid_dist, w_dist, T, g_is_time_dependent=True) utils_io.label('5.3b') print_cost(v_b, 'part b') plot_cost_histogram_real_time(pol_b, price_grid, e_c)
def p_1(): A, b, x, _, _, _, _, _, _ = problem_data.hw7_p1_data() lin_func = utils_fns.LinearFunction(A, b) first_result = np.linalg.norm(lin_func.evaluate(x), ord=2) ** 2 utils_io.label('7.1') print 'norm squared direct calc result: ', str(first_result) second_result = lin_func.norm_squared_linear().evaluate(x) print 'norm squared calc via quadratic function: ', str(second_result)
def part_a(trans_mat): n = trans_mat.shape[0] dist = float(1) / n * np.ones(n) utils_io.label('3.4a') for t in range(101): dist = np.dot(dist, trans_mat) if t == 10 or t == 100: print 'Surfer\'s most likely page at time %d is %d' % (t, np.argmax(dist))
def prob_5a(): pi, P, T = problem_data.hw2_p5_data() probs = np.zeros(T) for time in range(T): probs[time] = pi[0] pi = np.dot(pi, P) utils_io.label('2.5a') print 'p_T for T={0:d} equals: {1:f}'.format(T-1, np.mean(probs))
def part_e(P): x = np.array([1.] + 25 * [0]) for T in range(100): x = np.dot(x, P) utils_io.label('3.1e') print 'pct of time in slow mode: ', str(np.sum(x[:16])) print 'pct of time in normal mode: ', str(np.sum(x[16:23])) print 'pct of time in fast mode: ', str(np.sum(x[23:]))
def p_2b(): _, _, x, P, q, r, y, _, _ = problem_data.hw7_p1_data() quad_func = utils_fns.QuadraticFunction(P, q, r) quad_func_partial = quad_func.partial_evaluation(y, len(y)) first_result = quad_func_partial.evaluate(x) utils_io.label('7.2b') print 'quadratic function partial evaluation result: ', str(first_result) second_result = quad_func.evaluate(np.concatenate((x, y))) print 'quadratic function direct evaluation result: ', str(second_result)
def p_3(): A, B, P, q, r, w_bar, w_var, n, m = problem_data.hw7_p3_data() g = utils_fns.QuadraticFunction(P, q, r) A_f = np.hstack((A, B, np.eye(n))) b_f = np.zeros([A_f.shape[0], 1]) f = utils_fns.LinearFunction(A_f, b_f) val, pol, _ = ss_lqsc(f, g, w_bar, w_var, n, m) utils_io.label('7.3') print 'optimal steady-state controller values:' print pol print 'optimal steady-state value function (disregard r-value as we are only interested in quadratic and linear components):' print val
def p_2e(): _, _, x, P, q, r, _, y_vals, y_pmf = problem_data.hw7_p1_data() y_mean = np.dot(y_vals.T, y_pmf).T[0] devs = y_vals - y_mean y_cov = np.dot(y_pmf.T * devs.T, devs) quad_func = utils_fns.QuadraticFunction(P, q, r) h_x = quad_func.partial_expectation(y_mean, y_cov) first_result = h_x.evaluate(x) utils_io.label('7.2e') print 'partial expectation via quadratic function: ', str(first_result) second_result = 0. for idx, y_val in enumerate(y_vals): eval_point = np.vstack((x, y_val[:, np.newaxis])) second_result += y_pmf[idx] * quad_func.evaluate(eval_point) print 'partial expectation via direct evaluation: ', str(second_result[0])
def part_b_mc(trans_mat, R): num_samples = 1000 values = [] for sample in range(num_samples): value = 0 state = np.floor(random.random() * 100) for t in range(50): next_state_dist = trans_mat[state, :] next_state = np.min(np.where(random.random() < next_state_dist.cumsum())) value += R[state, next_state] state = next_state values.append(value) utils_io.label('3.4b') print 'J is expected total payment...' print 'MC estimate of J, t=0,...,50:', str(np.mean(values))
def prob_3(): n, P = problem_data.hw3_p3_data() R = reachable_states(P) C = communication_matrix(R) t = transience_vector(C, R) C_no_dupes = remove_duplicate_rows(C) num_transient_classes, C_no_dupes_sorted = move_transient_rows_up(C_no_dupes, t) C_adjacency = get_C_adjacency(R, C_no_dupes_sorted) L = topological_sort(C_adjacency[:num_transient_classes, :num_transient_classes]) L = np.concatenate((L, range(len(L), C_no_dupes.shape[0]))) C_ordered = C_no_dupes[L] P_index_order = get_index_order(C_ordered) P_ordered = P[P_index_order] utils_io.label('3.3') print 'Transition matrix formatted for class decomposition:' print P_ordered
def prob_5b(): num_samples = np.array([10, 100, 1000, 10000]) avg_ones = np.zeros(len(num_samples)) pi, P, T = problem_data.hw2_p5_data() for idx, num in enumerate(num_samples): ones = [] for sample in range(num): number_of_ones = 0 next_dist = pi for time in range(T): next_idx = get_random_idx(next_dist) if next_idx == 0: number_of_ones += 1 next_dist = P[next_idx] ones.append(number_of_ones) avg_ones[idx] = np.mean(ones) / T utils_io.label('2.5b') print 'num in sample: ', num_samples print 'p_T estimate for T={0:d} equals: {1:s}'.format(T-1, avg_ones)
def part_d(P): T = 101 x = np.array([1.] + 25 * [0]) for time in range(T + 1000): x = np.dot(x, P) x_ss = x profit_vec = get_profit_vec() v_t = x_ss * profit_vec for time in range(T): v_t = x_ss * profit_vec + np.dot(P, v_t) v_1 = v_t v_0 = x_ss * profit_vec + np.dot(P, v_1) utils_io.label('3.1d') print 'alpha: ', str((v_0 - v_1)[0]) ax = pd.Series(v_0).plot(title='Value Function') ax.set_xlabel('$x$') ax.set_ylabel('$v_0(x)$') plt.show()
def prob_3(): a, b, weights, n = problem_data.hw4_p3_data() num_nodes = n + 2 b = b[np.argsort(a)] b = np.concatenate((np.array([0]), b, np.array([np.inf]))) weights = weights[np.argsort(a)] weights = np.concatenate((np.array([0]), weights, np.array([0]))) a = a[np.argsort(a)] a = np.concatenate((np.array([0]), a, np.array([np.inf]))) W = np.inf * np.ones([num_nodes, num_nodes]) for i in range(W.shape[0]): for j in range(W.shape[0]): if a[j] >= b[i] and j > i: W[i, j] = -weights[i] elif a[j] >= a[i] and j > i: W[i, j] = 0 p, wp = bellman_ford(W, 0, num_nodes - 1) utils_io.label('4.3b') print 'Maximum weight job schedule (assuming jobs labeled 1 to n): ', p[:-1] print 'Total weight: ', -wp
def prob_1(): d_t_dist, p_fixed, p_whole, p_disc, u_disc, s_lin, s_quad, p_rev, p_unmet, p_sal, T, C, D, q_0 = problem_data.hw5_p1_data() n = C + 2 # 0, 1, ..., C plus a dummy state m = C + 1 # 0, 1, ..., C p = len(d_t_dist) f = get_f(n, m, p) g_order = get_g_order(n, m, p_fixed, p_whole, p_disc, u_disc) # n by m g_store = get_g_store(n, s_lin, s_quad) # n g_rev = get_g_rev(n, m, p, p_rev) # n by m by p g_unmet = get_g_unmet(n, m, p, p_unmet) g_sal = get_g_sal(n, p_sal) g_total = get_g_total(g_order, g_store, g_rev, g_unmet) pol, v = utils_mdp.value(f, g_total, g_sal, d_t_dist, T, g_is_w_dependent=True) utils_io.label('5.1a') print 'j_star: ', v[q_0, 0] # part b; yes the optimal policy converges; for all but the last two time periods it is optimal to buy 7 when x = 0, # buy 5 when x = 1, and buy 0 otherwise ax = pd.DataFrame(pol).T.plot(title='5.1b: Optimal Policy for various current inventory values (most = 0)', legend=True) ax.set_xlabel('time') ax.set_ylabel('Number of items to buy') plt.ion() part_c(pol, g_order, g_store, g_rev, g_unmet, g_sal, q_0, d_t_dist)
def prob_3(): num_samples, transition_probabilities, T = problem_data.hw2_p3_data() final_grids = [] num_deceased = [] for sample in range(int(num_samples)): grids = [] grid = get_new_grid_with_protected() for time in range(T): grids.append(grid) grid_has_inf_neighbor = get_grid_has_inf_neighbor(grid) transition_indices = (4 * grid_has_inf_neighbor + grid).ravel() next_state_vars = np.random.random([10, 10]).ravel() next_states = [] for idx, transition_idx in enumerate(transition_indices): next_state_var = next_state_vars[idx] next_states.append(get_next_grid_val(transition_idx, next_state_var, transition_probabilities)) grid = np.array(next_states).reshape(grid.shape) final_grids.append(grids[-1]) num_deceased.append(len(np.where(grids[-1] == 2)[0])) utils_io.label('2.3') print 'Mean num deceased at T={0:d}, for {1:d} by {1:d} grid: '.format(T, int(num_samples)),\ str(np.mean(num_deceased))
def prob_2(): plt.show() prices = np.arange(0.6, 2.1, 0.1) mean_price = 0 std_dev_price = 0.2 price_pdf = get_price_pdf(prices, mean_price, std_dev_price) buy_not_buy = np.array([0.6, 0.4]) T = 50 S = 10 # number of stocks n = S + 1 # we can hold 0, ..., S stocks, at one of len(prices) prices m = 2 # we can not offer to sell (0), or offer to sell (1) p1 = len(prices) # 15 different prices p2 = 2 # there may not be a buyer (0), or there may be a buyer (1) f = get_f(n, m, p1, p2) g = get_g(n, m, p1, p2, prices) # part b g_final = np.zeros(n) pol, v = utils_mdp.value_info_pat(f, -g, -g_final, price_pdf, buy_not_buy, T) # pass -g, -g_final since we are maximizing revenue v = -v utils_io.label('5.2b') print 'expected revenue, optimal policy' + ':', str(v[-1, 0]) plot_pol_info_pat('5.2b', pol, [0, 20, 40, 45]) plot_val('5.2b', v, [0, 45, 48, 49, 50]) # part c prices_modified = get_prices_modified(prices, price_pdf) g_modified = get_g(n, m, p1, p2, prices_modified) pol_mod, v_mod = utils_mdp.value_info_pat(f, -g_modified, -g_final, price_pdf, buy_not_buy, T) v_mod = -v_mod utils_io.label('5.2c') print 'expected revenue, threshold policy' + ':', str(v_mod[-1, 0]) plot_pol_info_pat('5.2c', pol_mod, [0, 20, 40, 45]) plot_val('5.2c', v_mod, [0, 45, 48, 49, 50]) # part d fcl, gcl = utils_mdp.cloop_info_pat(f, g, pol, buy_not_buy) P = utils_mdp.ftop_info_pat(fcl, price_pdf, buy_not_buy) initial_state = get_initial_state(n) print_probability_unsold(initial_state, P, T, 'b') fcl_mod, gcl_mod = utils_mdp.cloop_info_pat(f, g_modified, pol_mod, buy_not_buy) P_mod = utils_mdp.ftop_info_pat(fcl_mod, price_pdf, buy_not_buy) initial_state = get_initial_state(n) print_probability_unsold(initial_state, P_mod, T, 'c') # part e UNSOLD_PENALTY = -100 g_final_mod = UNSOLD_PENALTY * np.ones([n]) # add a penalty for unsold stocks, to incentivize selling g_final_mod[0] = 0. pol_e, v_e = utils_mdp.value_info_pat(f, -g, -g_final_mod, price_pdf, buy_not_buy, T) v_e = -v_e utils_io.label('5.2e') fcl_e, gcl_e = utils_mdp.cloop_info_pat(f, g, pol_e, buy_not_buy) P_e = utils_mdp.ftop_info_pat(fcl_e, price_pdf, buy_not_buy) prob_unsold = print_probability_unsold(initial_state, P_e, T, 'e') print 'expected revenue, policy ' + 'e' + ':', str(v_e[-1, 0] - prob_unsold * UNSOLD_PENALTY) # add back the penalty times the probability we have unsold stocks
def p4_d_e(params): mn = params.m * params.n Ax = np.eye(mn) Au = np.hstack([-np.eye(mn), np.zeros([mn, params.m + params.n])]) Aw = np.eye(mn) A_f = np.hstack([Ax, Au, Aw]) b_f = np.zeros(mn) f = utils_fns.LinearFunction(A_f, b_f) rho_1_vals = np.linspace(0.2, 1, 5) rho_2_vals = np.linspace(0.2, 1, 5) rho_3_vals = np.linspace(0.2, 1, 5) df = pd.DataFrame(columns=['strategy_name', 'rho_1', 'rho_2', 'rho_3', 'avg_cost_ss', 'avg_cpst_adp']) for rho_1 in rho_1_vals: for rho_2 in rho_2_vals: for rho_3 in rho_3_vals: g = get_g_fn(params, rho_1, rho_2, rho_3) constraints_A, constraints_b = get_constraints(params) value_func, lfu, r_change = ss_lqsc(f, g, params.lambda_.ravel(), np.diag(params.lambda_.ravel()), mn, mn + params.m + params.n, constraints_A, constraints_b) strategy_ss = get_strategy_4d(lfu) avg_cost_ss = simulate_strategy(strategy_ss, params) strategy_name = '7.4, ' + str(rho_1) + ',' + str(rho_2) + ',' + str(rho_3) strategy_adp = get_strategy_4e(value_func) avg_cost_adp = simulate_strategy(strategy_adp, params) df = df.append({'strategy_name': strategy_name, 'rho_1': rho_1, 'rho_2': rho_2, 'rho_3': rho_3, 'avg_cost_ss': avg_cost_ss, 'avg_cost_adp': avg_cost_adp}, ignore_index=True) # df.to_csv(os.path.expanduser('~') + '/crossbar_switch.csv') strat_ss = df.sort('avg_cost_ss').iloc[0] utils_io.label('7.4d') print 'best optimal steady-state strategy has avg_cost={0:.3f} with rho_1={1:.2f}, rho_2={2:.2f}, rho_3={3:.2f}'. \ format(strat_ss['avg_cost_ss'], strat_ss['rho_1'], strat_ss['rho_2'], strat_ss['rho_3']) strat_adp = df.sort('avg_cost_adp').iloc[0] utils_io.label('7.4e') print 'best optimal adp strategy has avg_cost={0:.3f} with rho_1={1:.2f}, rho_2={2:.2f}, rho_3={3:.2f}'. \ format(strat_adp['avg_cost_adp'], strat_adp['rho_1'], strat_adp['rho_2'], strat_adp['rho_3'])
def prob_4(): T, p, a, w_seq, pw, phi_cl, ct = problem_data.hw1_p4_data() all_costs, all_cost_arrs, min_prescient_costs = get_all_costs(T, a, w_seq, pw, phi_cl, ct) # 4a min_ol_cost = np.inf min_ol_idx = None min_cl_cost = np.inf min_cl_idx = None for idx, pc in enumerate(phi_cl): if np.array_equal(pc[0, :, :], pc[1, :, :]): is_open_loop = True else: is_open_loop = False if is_open_loop: if all_costs[idx] < min_ol_cost: min_ol_cost = all_costs[idx] min_ol_idx = idx if all_costs[idx] < min_cl_cost: min_cl_cost = all_costs[idx] min_cl_idx = idx utils_io.label('1.4a') print 'min_ol_cost: ', str(min_ol_cost) print 'min_ol_policy: ', phi_cl[min_ol_idx] # print 'min_ol_costs_by_w: ', all_cost_arrs[min_ol_idx] df = pd.DataFrame({'min_costs': all_cost_arrs[min_ol_idx], 'open loop costs': pw}) df = df.groupby('min_costs').sum() # 4b utils_io.label('1.4b') print 'expected cost, optimal prescient policy: ', str(np.sum(min_prescient_costs * pw)) # print 'min_prescient_costs_by_w: ', min_prescient_costs df_prescient = pd.DataFrame({'min_costs': min_prescient_costs, 'prescient costs': pw}) df_prescient = df_prescient.groupby('min_costs').sum() df = df.join(df_prescient, how='outer') # 4c utils_io.label('1.4c') print 'min_cl_cost: ', str(min_cl_cost) print 'min_cl_policy: ', phi_cl[min_cl_idx] df_cl = pd.DataFrame({'min_costs': all_cost_arrs[min_cl_idx], 'closed loop costs': pw}) df_cl = df_cl.groupby('min_costs').sum() df = df.join(df_cl, how='outer') df = df.join(pd.DataFrame(index=range(int(df.index.min()), int(df.index.max() + 1))), how='outer') ax = df.plot(kind='bar', sharex=True, sharey=True, title='1.4c Probability of costs by strategy') ax.set_xlabel('cost') ax.set_ylabel('probability') plt.ion()
def part_f(P): x = np.array([1.] + 25 * [0]) for T in range(100): x = np.dot(x, P) utils_io.label('3.1f') print 'mean profit per step, large T: ', str(np.dot(x, get_profit_vec()))
def p_4(): p4_params = P4Params(*problem_data.hw7_p4_data()) utils_io.label('7.4b') print 'average heuristic policy stage cost is: {0:.3f}'.format(simulate_strategy(strategy_4b, p4_params)) p4_d_e(p4_params)
def part_g(P): x = np.array([1.] + 25 * [0]) for T in range(100): x = np.dot(x, P) utils_io.label('3.1g') print 'mean profit per step, large T with $10 penalty: ', str(np.dot(x, get_profit_vec(True)))