states,_,_,_,_ = mdp.samples_cached(n_iter=15000, n_restarts=1, policy=policy,seed=8000) def make_slice(l, u, n): return slice(l, u + float(u - l) / (n - 1) / 2., float(u - l) / (n - 1)) n_slices = [3, 5, 7,10] bounds = [[-0.012, 0.012], [-0.02, 0.02], [-.6, .6], [-.6, .6]] s = [make_slice(b[0], b[1], n) for b, n in zip(bounds, n_slices)] bounds = np.array(bounds, dtype="float") means = np.mgrid[s[0], s[1], s[2], s[3]].reshape(4, -1).T sigmas = np.ones_like(means) * ( (bounds[:, 1] - bounds[:, 0]) / 2. / (np.array(n_slices) - 1)).flatten() phi = features.gaussians(means, sigmas, constant=False) A = util.apply_rowise(arr=states, f=phi) a = np.nonzero(np.sum(A > 0.05, axis=0) > 20)[0] phi = features.gaussians(means[a], sigmas[a], constant=True) print phi.dim, "features are used" theta0 = np.zeros(phi.dim) task = LinearContinuousValuePredictionTask( mdp, gamma, phi, theta0, policy=policy, normalize_phi=False, mu_next=200) methods = [] alpha = 0.001 mu = .01
policy = policies.MarcsPolicy(noise=np.array([.05])) states, _, _, _, _ = mdp.samples_cached(n_iter=200, n_restarts=30, policy=policy, seed=8000) n_slices = [3, 5, 7, 10] bounds = [[0, 35], [-3, 4], [-12, 12], [-3, 3]] s = [make_slice(b[0], b[1], n) for b, n in zip(bounds, n_slices)] bounds = np.array(bounds, dtype="float") means = np.mgrid[s[0], s[1], s[2], s[3]].reshape(4, -1).T sigmas = np.ones_like(means) * ((bounds[:, 1] - bounds[:, 0]) / 2. / (np.array(n_slices) - 1)).flatten() phi = features.gaussians(means, sigmas, constant=False) A = util.apply_rowise(arr=states, f=phi) a = np.nonzero(np.sum(A > 0.05, axis=0) > 20)[0] phi = features.gaussians(means[a], sigmas[a], constant=True) print phi.dim, "features are used" theta0 = 0. * np.ones(phi.dim) task = LinearContinuousValuePredictionTask(mdp, gamma, phi, theta0, policy=policy, normalize_phi=False, mu_seed=1100, mu_subsample=1, mu_iter=200,
states, _, _, _, _ = mdp.samples_cached(n_iter=200, n_restarts=30, policy=policy, seed=8000) n_slices = [3, 5, 7, 10] n_slices2 = [5, 5, 14, 20] bounds = [[0, 35], [-3, 4], [-12, 12], [-3, 3]] means, sigmas = features.make_grid(n_slices, bounds) means2, sigmas2 = features.make_grid(n_slices2, bounds) #means = np.vstack([means,means2]) #sigmas = np.vstack([sigmas, sigmas2]) #phi = features.gaussians(means, sigmas, constant=False) #A = util.apply_rowise(arr=states, f=phi) #a = np.nonzero(np.sum(A > 0.05, axis=0) > 5)[0] phi = features.gaussians(means, sigmas, constant=True) print phi.dim, "features are used" theta0 = 0. * np.ones(phi.dim) task = LinearContinuousValuePredictionTask( mdp, gamma, phi, theta0, policy=policy, normalize_phi=False, mu_seed=1100, mu_subsample=1, mu_iter=200, mu_restarts=150, mu_next=300) methods = [] lam = 0.0 alpha = 0.3 mu = .1 tdc = td.TDCLambda(alpha=alpha, mu=mu, lam=lam, phi=phi, gamma=gamma)