Esempio n. 1
0
def prob_3():
    m, n, k, T, p, A, B, c, P, q, r, pT, qT, rT, x0 = problem_data.hw6_p3_data()
    val_fns = []
    controllers = []
    next_cost = utils_fns.QuadraticFunction(pT, qT, rT)
    for t_val in range(T)[::-1]:
        total_cost = utils_fns.QuadraticFunction(np.zeros([n+m, n+m]), np.zeros([n+m, 1]), np.zeros(1))
        for k_val in range(k):
            A_val = A[t_val, k_val, :, :]
            B_val = B[t_val, k_val, :, :]
            c_val = c[t_val, k_val, :, :]
            P_val = P[t_val, k_val, :, :]
            q_val = q[t_val, k_val, :, :]
            r_val = r[t_val, k_val, 0]
            this_stage_quadratic = utils_fns.QuadraticFunction(P_val, q_val, r_val)
            linear_A = np.hstack((A_val, B_val))
            linear_to_next_stage = utils_fns.LinearFunction(linear_A, c_val)
            next_stage_quadratic = next_cost.precompose_linear(linear_to_next_stage)
            quadratic_this_k = this_stage_quadratic.plus_quadratic(next_stage_quadratic)
            total_cost = total_cost.plus_quadratic(quadratic_this_k)
        total_cost = total_cost.div_by_num(k)
        qfx, lfu = total_cost.partial_minimization(m)
        val_fns.append(qfx)
        controllers.append(lfu)
        next_cost = qfx
    utils_io.label('6.3')
    print 'Optimal expected total cost:', val_fns[-1].evaluate(x0)
Esempio n. 2
0
def prob_1():
    W, s, t = problem_data.hw4_p1_data()
    utils_io.label('4.1b')
    print 'Shortest paths for each matrix in \'matrix_name, [path], weight\' format:'
    for i in range(W.shape[0]):
        p, wp = bellman_ford(W[i], s[i], t[i])
        print 'matrix_{0:s}, {1:s}, {2:s}'.format(str(i), str(p), str(wp))
Esempio n. 3
0
def prob_3():
    T, C, e_c, t, p_mu, p_var = problem_data.hw5_p3_data()
    n = C + 1
    m = 2
    p = 1
    f = get_f(n, m, p)
    g = get_g(n, m, T, p_mu, e_c)
    v_final = get_v_final(n)
    w_dist = np.array([1])

    # part a
    pol_a, v_a = utils_mdp.value(f, g, v_final, w_dist, T, g_is_time_dependent=True)
    utils_io.label('5.3a')
    print_cost(v_a, 'part a')
    job_times = get_schedule(pol_a)
    p_mu_normal = get_underlying_normal_mu(p_mu, p_var)
    p_var_normal = get_underlying_normal_var(p_mu, p_var)
    plot_cost_histogram(job_times, p_mu, p_var, e_c)

    # part b
    price_grid_dim = 10
    price_grid = get_prices_over_grid(price_grid_dim, p_mu_normal, p_var_normal)
    g_w_known = get_g_w_known(n, m, T, price_grid, p_mu, e_c)
    price_grid_dist = np.ones(len(price_grid)) / len(price_grid)
    f_w_known = f.repeat(price_grid_dim).reshape([n, m, price_grid_dim, 1])
    pol_b, v_b = utils_mdp.value_info_pat(f_w_known, g_w_known, v_final, price_grid_dist, w_dist, T, g_is_time_dependent=True)
    utils_io.label('5.3b')
    print_cost(v_b, 'part b')
    plot_cost_histogram_real_time(pol_b, price_grid, e_c)
Esempio n. 4
0
def p_1():
    A, b, x, _, _, _, _, _, _ = problem_data.hw7_p1_data()
    lin_func = utils_fns.LinearFunction(A, b)
    first_result = np.linalg.norm(lin_func.evaluate(x), ord=2) ** 2
    utils_io.label('7.1')
    print 'norm squared direct calc result: ', str(first_result)
    second_result = lin_func.norm_squared_linear().evaluate(x)
    print 'norm squared calc via quadratic function: ', str(second_result)
Esempio n. 5
0
def part_a(trans_mat):
    n = trans_mat.shape[0]
    dist = float(1) / n * np.ones(n)
    utils_io.label('3.4a')
    for t in range(101):
        dist = np.dot(dist, trans_mat)
        if t == 10 or t == 100:
            print 'Surfer\'s most likely page at time %d is %d' % (t, np.argmax(dist))
Esempio n. 6
0
def prob_5a():
    pi, P, T = problem_data.hw2_p5_data()
    probs = np.zeros(T)
    for time in range(T):
        probs[time] = pi[0]
        pi = np.dot(pi, P)
    utils_io.label('2.5a')
    print 'p_T for T={0:d} equals: {1:f}'.format(T-1, np.mean(probs))
Esempio n. 7
0
def part_e(P):
    x = np.array([1.] + 25 * [0])
    for T in range(100):
        x = np.dot(x, P)
    utils_io.label('3.1e')
    print 'pct of time in slow mode: ', str(np.sum(x[:16]))
    print 'pct of time in normal mode: ', str(np.sum(x[16:23]))
    print 'pct of time in fast mode: ', str(np.sum(x[23:]))
Esempio n. 8
0
def p_2b():
    _, _, x, P, q, r, y, _, _ = problem_data.hw7_p1_data()
    quad_func = utils_fns.QuadraticFunction(P, q, r)
    quad_func_partial = quad_func.partial_evaluation(y, len(y))
    first_result = quad_func_partial.evaluate(x)
    utils_io.label('7.2b')
    print 'quadratic function partial evaluation result: ', str(first_result)
    second_result = quad_func.evaluate(np.concatenate((x, y)))
    print 'quadratic function direct evaluation result: ', str(second_result)
Esempio n. 9
0
def p_3():
    A, B, P, q, r, w_bar, w_var, n, m = problem_data.hw7_p3_data()
    g = utils_fns.QuadraticFunction(P, q, r)
    A_f = np.hstack((A, B, np.eye(n)))
    b_f = np.zeros([A_f.shape[0], 1])
    f = utils_fns.LinearFunction(A_f, b_f)
    val, pol, _ = ss_lqsc(f, g, w_bar, w_var, n, m)
    utils_io.label('7.3')
    print 'optimal steady-state controller values:'
    print pol
    print 'optimal steady-state value function (disregard r-value as we are only interested in quadratic and linear components):'
    print val
Esempio n. 10
0
def p_2e():
    _, _, x, P, q, r, _, y_vals, y_pmf = problem_data.hw7_p1_data()
    y_mean = np.dot(y_vals.T, y_pmf).T[0]
    devs = y_vals - y_mean
    y_cov = np.dot(y_pmf.T * devs.T, devs)
    quad_func = utils_fns.QuadraticFunction(P, q, r)
    h_x = quad_func.partial_expectation(y_mean, y_cov)
    first_result = h_x.evaluate(x)
    utils_io.label('7.2e')
    print 'partial expectation via quadratic function: ', str(first_result)
    second_result = 0.
    for idx, y_val in enumerate(y_vals):
        eval_point = np.vstack((x, y_val[:, np.newaxis]))
        second_result += y_pmf[idx] * quad_func.evaluate(eval_point)
    print 'partial expectation via direct evaluation: ', str(second_result[0])
Esempio n. 11
0
def part_b_mc(trans_mat, R):
    num_samples = 1000
    values = []
    for sample in range(num_samples):
        value = 0
        state = np.floor(random.random() * 100)
        for t in range(50):
            next_state_dist = trans_mat[state, :]
            next_state = np.min(np.where(random.random() < next_state_dist.cumsum()))
            value += R[state, next_state]
            state = next_state
        values.append(value)
    utils_io.label('3.4b')
    print 'J is expected total payment...'
    print 'MC estimate of J, t=0,...,50:', str(np.mean(values))
Esempio n. 12
0
def prob_3():
    n, P = problem_data.hw3_p3_data()
    R = reachable_states(P)
    C = communication_matrix(R)
    t = transience_vector(C, R)
    C_no_dupes = remove_duplicate_rows(C)
    num_transient_classes, C_no_dupes_sorted = move_transient_rows_up(C_no_dupes, t)
    C_adjacency = get_C_adjacency(R, C_no_dupes_sorted)
    L = topological_sort(C_adjacency[:num_transient_classes, :num_transient_classes])
    L = np.concatenate((L, range(len(L), C_no_dupes.shape[0])))
    C_ordered = C_no_dupes[L]
    P_index_order = get_index_order(C_ordered)
    P_ordered = P[P_index_order]
    utils_io.label('3.3')
    print 'Transition matrix formatted for class decomposition:'
    print P_ordered
Esempio n. 13
0
def prob_5b():
    num_samples = np.array([10, 100, 1000, 10000])
    avg_ones = np.zeros(len(num_samples))
    pi, P, T = problem_data.hw2_p5_data()
    for idx, num in enumerate(num_samples):
        ones = []
        for sample in range(num):
            number_of_ones = 0
            next_dist = pi
            for time in range(T):
                next_idx = get_random_idx(next_dist)
                if next_idx == 0:
                    number_of_ones += 1
                next_dist = P[next_idx]
            ones.append(number_of_ones)
        avg_ones[idx] = np.mean(ones) / T
    utils_io.label('2.5b')
    print 'num in sample: ', num_samples
    print 'p_T estimate for T={0:d} equals: {1:s}'.format(T-1, avg_ones)
Esempio n. 14
0
def part_d(P):
    T = 101
    x = np.array([1.] + 25 * [0])
    for time in range(T + 1000):
        x = np.dot(x, P)
    x_ss = x
    profit_vec = get_profit_vec()

    v_t = x_ss * profit_vec
    for time in range(T):
        v_t = x_ss * profit_vec + np.dot(P, v_t)

    v_1 = v_t
    v_0 = x_ss * profit_vec + np.dot(P, v_1)
    utils_io.label('3.1d')
    print 'alpha: ', str((v_0 - v_1)[0])
    ax = pd.Series(v_0).plot(title='Value Function')
    ax.set_xlabel('$x$')
    ax.set_ylabel('$v_0(x)$')
    plt.show()
Esempio n. 15
0
def prob_3():
    a, b, weights, n = problem_data.hw4_p3_data()
    num_nodes = n + 2
    b = b[np.argsort(a)]
    b = np.concatenate((np.array([0]), b, np.array([np.inf])))
    weights = weights[np.argsort(a)]
    weights = np.concatenate((np.array([0]), weights, np.array([0])))
    a = a[np.argsort(a)]
    a = np.concatenate((np.array([0]), a, np.array([np.inf])))
    W = np.inf * np.ones([num_nodes, num_nodes])
    for i in range(W.shape[0]):
        for j in range(W.shape[0]):
            if a[j] >= b[i] and j > i:
                W[i, j] = -weights[i]
            elif a[j] >= a[i] and j > i:
                W[i, j] = 0
    p, wp = bellman_ford(W, 0, num_nodes - 1)
    utils_io.label('4.3b')
    print 'Maximum weight job schedule (assuming jobs labeled 1 to n): ', p[:-1]
    print 'Total weight: ', -wp
Esempio n. 16
0
def prob_1():
    d_t_dist, p_fixed, p_whole, p_disc, u_disc, s_lin, s_quad, p_rev, p_unmet, p_sal, T, C, D, q_0 = problem_data.hw5_p1_data()
    n = C + 2  # 0, 1, ..., C plus a dummy state
    m = C + 1  # 0, 1, ..., C
    p = len(d_t_dist)
    f = get_f(n, m, p)
    g_order = get_g_order(n, m, p_fixed, p_whole, p_disc, u_disc)  # n by m
    g_store = get_g_store(n, s_lin, s_quad)  # n
    g_rev = get_g_rev(n, m, p, p_rev)  # n by m by p
    g_unmet = get_g_unmet(n, m, p, p_unmet)
    g_sal = get_g_sal(n, p_sal)
    g_total = get_g_total(g_order, g_store, g_rev, g_unmet)
    pol, v = utils_mdp.value(f, g_total, g_sal, d_t_dist, T, g_is_w_dependent=True)
    utils_io.label('5.1a')
    print 'j_star: ', v[q_0, 0]
    # part b; yes the optimal policy converges; for all but the last two time periods it is optimal to buy 7 when x = 0,
    #  buy 5 when x = 1, and buy 0 otherwise
    ax = pd.DataFrame(pol).T.plot(title='5.1b: Optimal Policy for various current inventory values (most = 0)', legend=True)
    ax.set_xlabel('time')
    ax.set_ylabel('Number of items to buy')
    plt.ion()
    part_c(pol, g_order, g_store, g_rev, g_unmet, g_sal, q_0, d_t_dist)
Esempio n. 17
0
def prob_3():
    num_samples, transition_probabilities, T = problem_data.hw2_p3_data()
    final_grids = []
    num_deceased = []

    for sample in range(int(num_samples)):
        grids = []
        grid = get_new_grid_with_protected()
        for time in range(T):
            grids.append(grid)
            grid_has_inf_neighbor = get_grid_has_inf_neighbor(grid)
            transition_indices = (4 * grid_has_inf_neighbor + grid).ravel()
            next_state_vars = np.random.random([10, 10]).ravel()
            next_states = []
            for idx, transition_idx in enumerate(transition_indices):
                next_state_var = next_state_vars[idx]
                next_states.append(get_next_grid_val(transition_idx, next_state_var, transition_probabilities))
            grid = np.array(next_states).reshape(grid.shape)
        final_grids.append(grids[-1])
        num_deceased.append(len(np.where(grids[-1] == 2)[0]))
    utils_io.label('2.3')
    print 'Mean num deceased at T={0:d}, for {1:d} by {1:d} grid: '.format(T, int(num_samples)),\
        str(np.mean(num_deceased))
Esempio n. 18
0
def prob_2():
    plt.show()
    prices = np.arange(0.6, 2.1, 0.1)
    mean_price = 0
    std_dev_price = 0.2
    price_pdf = get_price_pdf(prices, mean_price, std_dev_price)
    buy_not_buy = np.array([0.6, 0.4])
    T = 50
    S = 10  # number of stocks
    n = S + 1  # we can hold 0, ..., S stocks, at one of len(prices) prices
    m = 2  # we can not offer to sell (0), or offer to sell (1)
    p1 = len(prices)  # 15 different prices
    p2 = 2  # there may not be a buyer (0), or there may be a buyer (1)
    f = get_f(n, m, p1, p2)
    g = get_g(n, m, p1, p2, prices)

    # part b
    g_final = np.zeros(n)
    pol, v = utils_mdp.value_info_pat(f, -g, -g_final, price_pdf, buy_not_buy, T)  # pass -g, -g_final since we are maximizing revenue
    v = -v
    utils_io.label('5.2b')
    print 'expected revenue, optimal policy' + ':', str(v[-1, 0])
    plot_pol_info_pat('5.2b', pol, [0, 20, 40, 45])
    plot_val('5.2b', v, [0, 45, 48, 49, 50])

    # part c
    prices_modified = get_prices_modified(prices, price_pdf)
    g_modified = get_g(n, m, p1, p2, prices_modified)
    pol_mod, v_mod = utils_mdp.value_info_pat(f, -g_modified, -g_final, price_pdf, buy_not_buy, T)
    v_mod = -v_mod
    utils_io.label('5.2c')
    print 'expected revenue, threshold policy' + ':', str(v_mod[-1, 0])
    plot_pol_info_pat('5.2c', pol_mod, [0, 20, 40, 45])
    plot_val('5.2c', v_mod, [0, 45, 48, 49, 50])

    # part d
    fcl, gcl = utils_mdp.cloop_info_pat(f, g, pol, buy_not_buy)
    P = utils_mdp.ftop_info_pat(fcl, price_pdf, buy_not_buy)
    initial_state = get_initial_state(n)
    print_probability_unsold(initial_state, P, T, 'b')
    fcl_mod, gcl_mod = utils_mdp.cloop_info_pat(f, g_modified, pol_mod, buy_not_buy)
    P_mod = utils_mdp.ftop_info_pat(fcl_mod, price_pdf, buy_not_buy)
    initial_state = get_initial_state(n)
    print_probability_unsold(initial_state, P_mod, T, 'c')

    # part e
    UNSOLD_PENALTY = -100
    g_final_mod = UNSOLD_PENALTY * np.ones([n])  # add a penalty for unsold stocks, to incentivize selling
    g_final_mod[0] = 0.
    pol_e, v_e = utils_mdp.value_info_pat(f, -g, -g_final_mod, price_pdf, buy_not_buy, T)
    v_e = -v_e
    utils_io.label('5.2e')
    fcl_e, gcl_e = utils_mdp.cloop_info_pat(f, g, pol_e, buy_not_buy)
    P_e = utils_mdp.ftop_info_pat(fcl_e, price_pdf, buy_not_buy)
    prob_unsold = print_probability_unsold(initial_state, P_e, T, 'e')
    print 'expected revenue, policy ' + 'e' + ':', str(v_e[-1, 0] - prob_unsold * UNSOLD_PENALTY)  # add back the penalty times the probability we have unsold stocks
Esempio n. 19
0
def p4_d_e(params):
    mn = params.m * params.n
    Ax = np.eye(mn)
    Au = np.hstack([-np.eye(mn), np.zeros([mn, params.m + params.n])])
    Aw = np.eye(mn)
    A_f = np.hstack([Ax, Au, Aw])
    b_f = np.zeros(mn)
    f = utils_fns.LinearFunction(A_f, b_f)
    rho_1_vals = np.linspace(0.2, 1, 5)
    rho_2_vals = np.linspace(0.2, 1, 5)
    rho_3_vals = np.linspace(0.2, 1, 5)
    df = pd.DataFrame(columns=['strategy_name', 'rho_1', 'rho_2', 'rho_3', 'avg_cost_ss', 'avg_cpst_adp'])

    for rho_1 in rho_1_vals:
        for rho_2 in rho_2_vals:
            for rho_3 in rho_3_vals:
                g = get_g_fn(params, rho_1, rho_2, rho_3)
                constraints_A, constraints_b = get_constraints(params)
                value_func, lfu, r_change = ss_lqsc(f, g, params.lambda_.ravel(), np.diag(params.lambda_.ravel()), mn,
                                                    mn + params.m + params.n, constraints_A, constraints_b)
                strategy_ss = get_strategy_4d(lfu)
                avg_cost_ss = simulate_strategy(strategy_ss, params)
                strategy_name = '7.4, ' + str(rho_1) + ',' + str(rho_2) + ',' + str(rho_3)
                strategy_adp = get_strategy_4e(value_func)
                avg_cost_adp = simulate_strategy(strategy_adp, params)
                df = df.append({'strategy_name': strategy_name, 'rho_1': rho_1, 'rho_2': rho_2, 'rho_3': rho_3,
                                'avg_cost_ss': avg_cost_ss, 'avg_cost_adp': avg_cost_adp}, ignore_index=True)
    # df.to_csv(os.path.expanduser('~') + '/crossbar_switch.csv')
    strat_ss = df.sort('avg_cost_ss').iloc[0]
    utils_io.label('7.4d')
    print 'best optimal steady-state strategy has avg_cost={0:.3f} with rho_1={1:.2f}, rho_2={2:.2f}, rho_3={3:.2f}'. \
        format(strat_ss['avg_cost_ss'], strat_ss['rho_1'], strat_ss['rho_2'], strat_ss['rho_3'])
    strat_adp = df.sort('avg_cost_adp').iloc[0]
    utils_io.label('7.4e')
    print 'best optimal adp strategy has avg_cost={0:.3f} with rho_1={1:.2f}, rho_2={2:.2f}, rho_3={3:.2f}'. \
        format(strat_adp['avg_cost_adp'], strat_adp['rho_1'], strat_adp['rho_2'], strat_adp['rho_3'])
Esempio n. 20
0
def prob_4():
    T, p, a, w_seq, pw, phi_cl, ct = problem_data.hw1_p4_data()
    all_costs, all_cost_arrs, min_prescient_costs = get_all_costs(T, a, w_seq, pw, phi_cl, ct)

    # 4a
    min_ol_cost = np.inf
    min_ol_idx = None
    min_cl_cost = np.inf
    min_cl_idx = None
    for idx, pc in enumerate(phi_cl):
        if np.array_equal(pc[0, :, :], pc[1, :, :]):
            is_open_loop = True
        else:
            is_open_loop = False
        if is_open_loop:
            if all_costs[idx] < min_ol_cost:
                min_ol_cost = all_costs[idx]
                min_ol_idx = idx
        if all_costs[idx] < min_cl_cost:
            min_cl_cost = all_costs[idx]
            min_cl_idx = idx
    utils_io.label('1.4a')
    print 'min_ol_cost: ', str(min_ol_cost)
    print 'min_ol_policy: ', phi_cl[min_ol_idx]
    # print 'min_ol_costs_by_w: ', all_cost_arrs[min_ol_idx]

    df = pd.DataFrame({'min_costs': all_cost_arrs[min_ol_idx], 'open loop costs': pw})
    df = df.groupby('min_costs').sum()

    # 4b
    utils_io.label('1.4b')
    print 'expected cost, optimal prescient policy: ', str(np.sum(min_prescient_costs * pw))
    # print 'min_prescient_costs_by_w: ', min_prescient_costs

    df_prescient = pd.DataFrame({'min_costs': min_prescient_costs, 'prescient costs': pw})
    df_prescient = df_prescient.groupby('min_costs').sum()
    df = df.join(df_prescient, how='outer')

    # 4c
    utils_io.label('1.4c')
    print 'min_cl_cost: ', str(min_cl_cost)
    print 'min_cl_policy: ', phi_cl[min_cl_idx]

    df_cl = pd.DataFrame({'min_costs': all_cost_arrs[min_cl_idx], 'closed loop costs': pw})
    df_cl = df_cl.groupby('min_costs').sum()
    df = df.join(df_cl, how='outer')
    df = df.join(pd.DataFrame(index=range(int(df.index.min()), int(df.index.max() + 1))), how='outer')
    ax = df.plot(kind='bar', sharex=True, sharey=True, title='1.4c Probability of costs by strategy')
    ax.set_xlabel('cost')
    ax.set_ylabel('probability')
    plt.ion()
Esempio n. 21
0
def part_f(P):
    x = np.array([1.] + 25 * [0])
    for T in range(100):
        x = np.dot(x, P)
    utils_io.label('3.1f')
    print 'mean profit per step, large T: ', str(np.dot(x, get_profit_vec()))
Esempio n. 22
0
def p_4():
    p4_params = P4Params(*problem_data.hw7_p4_data())
    utils_io.label('7.4b')
    print 'average heuristic policy stage cost is: {0:.3f}'.format(simulate_strategy(strategy_4b, p4_params))
    p4_d_e(p4_params)
Esempio n. 23
0
def part_g(P):
    x = np.array([1.] + 25 * [0])
    for T in range(100):
        x = np.dot(x, P)
    utils_io.label('3.1g')
    print 'mean profit per step, large T with $10 penalty: ', str(np.dot(x, get_profit_vec(True)))