def test_basis_and_tensors(): low = np.array([0., -.5]) high = np.array([1., .5]) basis_rbf = GaussianRBF.generate([3, 3], low, high) tensor_rbf = GaussianRBFTensor.generate([3, 3], low, high) features_1 = Features(tensor_list=tensor_rbf) features_2 = Features(basis_list=basis_rbf) x = np.random.rand(10, 2) + [0., -.5] y_1 = features_1(x) y_2 = features_2(x) assert np.allclose(y_1, y_2)
def experiment(): np.random.seed() # MDP mdp = CartPole() # Policy epsilon = Parameter(value=1.) pi = EpsGreedy(epsilon=epsilon) # Agent basis = [PolynomialBasis()] s1 = np.array([-np.pi, 0, np.pi]) * .25 s2 = np.array([-1, 0, 1]) for i in s1: for j in s2: basis.append(GaussianRBF(np.array([i, j]), np.array([1.]))) features = Features(basis_list=basis) fit_params = dict() approximator_params = dict(input_shape=(features.size, ), output_shape=(mdp.info.action_space.n, ), n_actions=mdp.info.action_space.n) agent = LSPI(mdp.info, pi, approximator_params=approximator_params, fit_params=fit_params, features=features) # Algorithm core = Core(agent, mdp) core.evaluate(n_episodes=3, render=True) # Train core.learn(n_episodes=100, n_episodes_per_fit=100) # Test test_epsilon = Parameter(0.) agent.policy.set_epsilon(test_epsilon) dataset = core.evaluate(n_episodes=1, quiet=True) core.evaluate(n_steps=100, render=True) return np.mean(episodes_length(dataset))
def test_basis(): low = np.array([0., -.5]) high = np.array([1., .5]) rbf = GaussianRBF.generate([3, 3], high, low) features = Features(basis_list=rbf) x = np.random.rand(10, 2) + [0., -.5] y = features(x) for i, x_i in enumerate(x): assert np.all(features(x_i) == y[i]) x_1 = x[:, 0].reshape(-1, 1) x_2 = x[:, 1].reshape(-1, 1) assert np.all(features(x_1, x_2) == y) for i, x_i in enumerate(zip(x_1, x_2)): assert np.all(features(x_i[0], x_i[1]) == y[i])