コード例 #1
0
def run_experiment(args):

    np.random.seed(args.seed)

    P = P_matrices.build_cyclic_P(args.n, args.delta)

    R_mat = np.zeros_like(P)
    env = environment.MRP(args.gamma, P, R_mat)

    bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k)

    orientation = -1 * np.ones(args.n)
    orientation[-1] = args.n - 1
    orientation = 10.0 * orientation
    init_conditions = -20.0
    print(orientation)
    epsilon = 0.05
    P_spir = P_matrices.build_cyclic_P(args.n, 0.5)
    V = spiral.Spiral(init_conditions, P_spir, orientation, epsilon)
    if args.online:
        Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps,
                                          args.log_idx, args.plot_step)
    else:
        thetas, Vs = numpy_expected_tdk.TDk(args.k, V, env, args.stepsize,
                                            args.steps, args.log_idx,
                                            args.plot_step)
    spiral_Vs = utils.dist_mu(env.mu, env.V_star, np.array(Vs))

    ts = thetas[args.plot_start::args.plot_step]
    condition_numbers = []
    for i in range(len(ts)):
        V.theta = ts[i]
        condition_numbers.append(utils.jac_cond(V.jacobian()))

    V = simple.Tabular(Vs[0])
    if args.online:
        Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps,
                                          args.log_idx, args.plot_step)
    else:
        thetas, Vs = numpy_expected_tdk.TDk(args.k, V, env, args.stepsize,
                                            args.steps, args.log_idx,
                                            args.plot_step)
    tabular_Vs = utils.dist_mu(env.mu, env.V_star, np.array(Vs))

    smoothness = max(
        [abs(env.V_star[i] - env.V_star[i - 1]) for i in range(args.n)])

    return tabular_Vs, spiral_Vs, condition_numbers, bound, smoothness
コード例 #2
0
def run_experiment(args):

    grid_n = int(np.sqrt(args.n))

    P = P_matrices.constant_gridworld(grid_n, 0.3, 0.1, 0.1, 0.3, 0.2)

    R_mat = np.zeros_like(P)
    R_mat[:, -1] = 1
    env = environment.MRP(args.gamma, P, R_mat)
    
    # build features
    Phi = np.stack([np.array(sum([[i]*grid_n for i in range(grid_n)], [])), np.array(list(range(grid_n))*grid_n)], axis = 1)
    Phi = np.concatenate([Phi, np.ones((args.n,1))], axis = 1) 

    np.random.seed(args.seed)
    V = mlp.MLP(Phi, [args.width]*args.depth, biases = False, activation='ReLU')
    if args.online:
        Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step)
    else:
        thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step)
    mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs))
    mlp_V_to_origin = utils.mu_norm(env.mu, jnp.array(Vs))

    ts = thetas[args.plot_start::args.plot_step]
    condition_numbers = []
    for i in range(len(ts)):
        V.theta = ts[i]
        condition_numbers.append(utils.jac_cond(V.jacobian()))
    
    bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k)
    smoothness = max([abs(env.V_star[i] - env.V_star[i-1]) for i in range(args.n)])

    return mlp_V_to_V_star, mlp_V_to_origin, condition_numbers, bound, smoothness
コード例 #3
0
def run_experiment(args):

    np.random.seed(args.seed)

    P = P_matrices.build_cyclic_P(args.n, args.delta)

    # R_mat = np.zeros_like(P)
    # R_mat[0, 1] = 1
    # env = environment.MRP(args.gamma, P, R_mat)

    V_star = np.zeros(args.n)
    for i in range(0, args.n, 2):
        V_star[i] = 1
    env = environment.MRP(args.gamma, P, V_star=V_star)
    

    # build features
    angles = np.linspace(0, 2 * np.pi, args.n, endpoint=False)
    Phi = np.concatenate([np.expand_dims(np.sin(angles), 1), np.expand_dims(np.cos(angles), 1), np.ones((args.n,1))], axis = 1) 


    V = simple.Tabular(np.zeros(args.n))
    if args.online:
        Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step)
    else:
        thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step)
    tabular_Vs = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs))


    V = mlp.MLP(Phi, [args.width]*args.depth, biases = False)
    if args.online:
        Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step)
    else:
        thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step)
    mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs))
    mlp_V_to_origin = utils.mu_norm(env.mu, jnp.array(Vs))

    ts = thetas[args.plot_start::args.plot_step]
    condition_numbers = []
    for i in range(len(ts)):
        V.theta = ts[i]
        condition_numbers.append(utils.jac_cond(V.jacobian()))
    
    bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k)
    smoothness = max([abs(env.V_star[i] - env.V_star[i-1]) for i in range(args.n)])

    return tabular_Vs, mlp_V_to_V_star, mlp_V_to_origin, condition_numbers, bound, smoothness
コード例 #4
0
def run_experiment(args):


    P = P_matrices.build_cyclic_P(args.n, args.delta)

    R_mat = np.zeros_like(P)
    R_mat[0, 1] = 1
    env = environment.MRP(args.gamma, P, R_mat)

    # build features
    angles = np.linspace(0, 2 * np.pi, args.n, endpoint=False)
    Phi = np.concatenate([np.expand_dims(np.sin(angles), 1), np.expand_dims(np.cos(angles), 1), np.ones((args.n,1))], axis = 1) 


    # np.random.seed(args.seed)
    # V = mlp.MLP(Phi, [args.width]*args.depth, biases = False, activation='tanh')
    # if args.online:
    #     Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step)
    # else:
    #     thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step)
    # tanh_mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs))
    
    # ts = thetas[args.plot_start::args.plot_step]
    # tanh_dynamics = []
    # for i in range(len(ts)):
    #     V.theta = ts[i]
    #     tanh_dynamics.append(utils.dynamics_norm(V, env.A, env.V_star))
    # tanh_params = []
    # for i in range(len(ts)):
    #     theta = np.concatenate([x.flatten() for x in ts[i]]).ravel()
    #     tanh_params.append(np.linalg.norm(theta))
    tanh_params, tanh_dynamics, tanh_mlp_V_to_V_star = [],[],[]

    np.random.seed(args.seed)
    V = mlp.MLP(Phi, [args.width]*args.depth, biases = False, activation='ReLU')
    if args.online:
        Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step)
    else:
        thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step)
    mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs))

    ts = thetas[args.plot_start::args.plot_step]
    dynamics = []
    for i in range(len(ts)):
        V.theta = ts[i]
        dynamics.append(utils.norm_dynamics(V, env.A, env.V_star))
    params = []
    for i in range(len(ts)):
        theta = np.concatenate([x.flatten() for x in ts[i]]).ravel()
        params.append(np.linalg.norm(theta))


    bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k)

    return tanh_mlp_V_to_V_star, tanh_dynamics, tanh_params, mlp_V_to_V_star, dynamics, params, bound