def run_experiment(args): np.random.seed(args.seed) P = P_matrices.build_cyclic_P(args.n, args.delta) R_mat = np.zeros_like(P) env = environment.MRP(args.gamma, P, R_mat) bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k) orientation = -1 * np.ones(args.n) orientation[-1] = args.n - 1 orientation = 10.0 * orientation init_conditions = -20.0 print(orientation) epsilon = 0.05 P_spir = P_matrices.build_cyclic_P(args.n, 0.5) V = spiral.Spiral(init_conditions, P_spir, orientation, epsilon) if args.online: Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) else: thetas, Vs = numpy_expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) spiral_Vs = utils.dist_mu(env.mu, env.V_star, np.array(Vs)) ts = thetas[args.plot_start::args.plot_step] condition_numbers = [] for i in range(len(ts)): V.theta = ts[i] condition_numbers.append(utils.jac_cond(V.jacobian())) V = simple.Tabular(Vs[0]) if args.online: Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) else: thetas, Vs = numpy_expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) tabular_Vs = utils.dist_mu(env.mu, env.V_star, np.array(Vs)) smoothness = max( [abs(env.V_star[i] - env.V_star[i - 1]) for i in range(args.n)]) return tabular_Vs, spiral_Vs, condition_numbers, bound, smoothness
def run_experiment(args): grid_n = int(np.sqrt(args.n)) P = P_matrices.constant_gridworld(grid_n, 0.3, 0.1, 0.1, 0.3, 0.2) R_mat = np.zeros_like(P) R_mat[:, -1] = 1 env = environment.MRP(args.gamma, P, R_mat) # build features Phi = np.stack([np.array(sum([[i]*grid_n for i in range(grid_n)], [])), np.array(list(range(grid_n))*grid_n)], axis = 1) Phi = np.concatenate([Phi, np.ones((args.n,1))], axis = 1) np.random.seed(args.seed) V = mlp.MLP(Phi, [args.width]*args.depth, biases = False, activation='ReLU') if args.online: Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) else: thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs)) mlp_V_to_origin = utils.mu_norm(env.mu, jnp.array(Vs)) ts = thetas[args.plot_start::args.plot_step] condition_numbers = [] for i in range(len(ts)): V.theta = ts[i] condition_numbers.append(utils.jac_cond(V.jacobian())) bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k) smoothness = max([abs(env.V_star[i] - env.V_star[i-1]) for i in range(args.n)]) return mlp_V_to_V_star, mlp_V_to_origin, condition_numbers, bound, smoothness
def run_experiment(args): np.random.seed(args.seed) P = P_matrices.build_cyclic_P(args.n, args.delta) # R_mat = np.zeros_like(P) # R_mat[0, 1] = 1 # env = environment.MRP(args.gamma, P, R_mat) V_star = np.zeros(args.n) for i in range(0, args.n, 2): V_star[i] = 1 env = environment.MRP(args.gamma, P, V_star=V_star) # build features angles = np.linspace(0, 2 * np.pi, args.n, endpoint=False) Phi = np.concatenate([np.expand_dims(np.sin(angles), 1), np.expand_dims(np.cos(angles), 1), np.ones((args.n,1))], axis = 1) V = simple.Tabular(np.zeros(args.n)) if args.online: Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) else: thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) tabular_Vs = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs)) V = mlp.MLP(Phi, [args.width]*args.depth, biases = False) if args.online: Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) else: thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs)) mlp_V_to_origin = utils.mu_norm(env.mu, jnp.array(Vs)) ts = thetas[args.plot_start::args.plot_step] condition_numbers = [] for i in range(len(ts)): V.theta = ts[i] condition_numbers.append(utils.jac_cond(V.jacobian())) bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k) smoothness = max([abs(env.V_star[i] - env.V_star[i-1]) for i in range(args.n)]) return tabular_Vs, mlp_V_to_V_star, mlp_V_to_origin, condition_numbers, bound, smoothness
def run_experiment(args): P = P_matrices.build_cyclic_P(args.n, args.delta) R_mat = np.zeros_like(P) R_mat[0, 1] = 1 env = environment.MRP(args.gamma, P, R_mat) # build features angles = np.linspace(0, 2 * np.pi, args.n, endpoint=False) Phi = np.concatenate([np.expand_dims(np.sin(angles), 1), np.expand_dims(np.cos(angles), 1), np.ones((args.n,1))], axis = 1) # np.random.seed(args.seed) # V = mlp.MLP(Phi, [args.width]*args.depth, biases = False, activation='tanh') # if args.online: # Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) # else: # thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) # tanh_mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs)) # ts = thetas[args.plot_start::args.plot_step] # tanh_dynamics = [] # for i in range(len(ts)): # V.theta = ts[i] # tanh_dynamics.append(utils.dynamics_norm(V, env.A, env.V_star)) # tanh_params = [] # for i in range(len(ts)): # theta = np.concatenate([x.flatten() for x in ts[i]]).ravel() # tanh_params.append(np.linalg.norm(theta)) tanh_params, tanh_dynamics, tanh_mlp_V_to_V_star = [],[],[] np.random.seed(args.seed) V = mlp.MLP(Phi, [args.width]*args.depth, biases = False, activation='ReLU') if args.online: Vs, thetas, _, _ = online_td0.TD0(V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) else: thetas, Vs = expected_tdk.TDk(args.k, V, env, args.stepsize, args.steps, args.log_idx, args.plot_step) mlp_V_to_V_star = utils.dist_mu(env.mu, env.V_star, jnp.array(Vs)) ts = thetas[args.plot_start::args.plot_step] dynamics = [] for i in range(len(ts)): V.theta = ts[i] dynamics.append(utils.norm_dynamics(V, env.A, env.V_star)) params = [] for i in range(len(ts)): theta = np.concatenate([x.flatten() for x in ts[i]]).ravel() params.append(np.linalg.norm(theta)) bound = utils.overparam_cond_number_bound(env.P, env.mu, env.gamma, args.k) return tanh_mlp_V_to_V_star, tanh_dynamics, tanh_params, mlp_V_to_V_star, dynamics, params, bound