def experiment(alg, n_runs, n_iterations, ep_per_run, use_tensorflow): np.random.seed() # MDP mdp = ShipSteering() # Policy if use_tensorflow: tensor_list = gaussian_tensor.generate( [3, 3, 6, 2], [[0., 150.], [0., 150.], [-np.pi, np.pi], [-np.pi / 12, np.pi / 12]]) phi = Features(tensor_list=tensor_list, name='phi', input_dim=mdp.info.observation_space.shape[0]) else: basis = GaussianRBF.generate([3, 3, 6, 2], [[0., 150.], [0., 150.], [-np.pi, np.pi], [-np.pi / 12, np.pi / 12]]) phi = Features(basis_list=basis) input_shape = (phi.size, ) approximator_params = dict(input_dim=phi.size) approximator = Regressor(LinearApproximator, input_shape=input_shape, output_shape=mdp.info.action_space.shape, params=approximator_params) sigma = np.array([[.05]]) policy = MultivariateGaussianPolicy(mu=approximator, sigma=sigma) # Agent learning_rate = AdaptiveParameter(value=.01) algorithm_params = dict(learning_rate=learning_rate) fit_params = dict() agent_params = { 'algorithm_params': algorithm_params, 'fit_params': fit_params } agent = alg(policy, mdp.info, agent_params, phi) # Train core = Core(agent, mdp) dataset_eval = core.evaluate(n_episodes=ep_per_run) J = compute_J(dataset_eval, gamma=mdp.info.gamma) print('J at start : ' + str(np.mean(J))) for i in xrange(n_runs): core.learn(n_episodes=n_iterations * ep_per_run, n_episodes_per_fit=ep_per_run) dataset_eval = core.evaluate(n_episodes=ep_per_run) J = compute_J(dataset_eval, gamma=mdp.info.gamma) print('J at iteration ' + str(i) + ': ' + str(np.mean(J))) np.save('ship_steering.npy', dataset_eval)
def experiment(alg, params, experiment_params ,subdir, i): np.random.seed() # MDP mdp = ShipSteering(small=True, n_steps_action=3) high = [150, 150, np.pi] low = [0, 0, -np.pi] n_tiles = [5, 5, 6] low = np.array(low, dtype=np.float) high = np.array(high, dtype=np.float) n_tilings = 1 tilings = Tiles.generate(n_tilings=n_tilings, n_tiles=n_tiles, low=low, high=high) phi = Features(tilings=tilings) input_shape = (phi.size,) approximator_params = dict(input_dim=phi.size) approximator = Regressor(LinearApproximator, input_shape=input_shape, output_shape=mdp.info.action_space.shape, params=approximator_params) #sigma = np.array([[1e-4]]) std = np.array([3e-2]) policy = DiagonalGaussianPolicy(mu=approximator, std=std) #policy = GaussianPolicy(mu=approximator, sigma=sigma) # Agent agent = alg(policy, mdp.info, features=phi, **params) # Train parameter_dataset = CollectPolicyParameter(policy) core = Core(agent, mdp, callbacks=[parameter_dataset]) dataset_eval = list() dataset_eval_run = core.evaluate(n_episodes=ep_per_run) # print('distribution parameters: ', distribution.get_parameters()) J = compute_J(dataset_eval_run, gamma=mdp.info.gamma) dataset_eval += dataset_eval_run print('J at start : ' + str(np.mean(J))) for n in range(n_runs): print('ITERATION :', n) core.learn(n_episodes=n_iterations * ep_per_run, n_episodes_per_fit=ep_per_run) dataset_eval_run = core.evaluate(n_episodes=ep_per_run) J = compute_J(dataset_eval_run, gamma=mdp.info.gamma) print('J at iteration ' + str(n) + ': ' + str(np.mean(J))) dataset_eval += dataset_eval_run mk_dir_recursive('./' + subdir + str(i)) np.save(subdir+str(i)+'/dataset_eval_file', dataset_eval) np.save(subdir+str(i)+'/parameter_dataset_file', parameter_dataset)
def experiment(alg, params, subdir, exp_no): np.random.seed() # MDP mdp = ShipSteering(small=True, n_steps_action=3) high = [150, 150, np.pi] low = [0, 0, -np.pi] n_tiles = [5, 5, 6] low = np.array(low, dtype=np.float) high = np.array(high, dtype=np.float) n_tilings = 1 tilings = Tiles.generate(n_tilings=n_tilings, n_tiles=n_tiles, low=low, high=high) phi = Features(tilings=tilings) input_shape = (phi.size, ) approximator_params = dict(input_dim=input_shape) approximator = Regressor(LinearApproximator, input_shape=input_shape, output_shape=mdp.info.action_space.shape, params=approximator_params) policy = DeterministicPolicy(mu=approximator) mu = np.zeros(policy.weights_size) sigma = 4e-1 * np.ones(policy.weights_size) distribution = GaussianDiagonalDistribution(mu, sigma) # Agent agent = alg(distribution, policy, mdp.info, features=phi, **params) # Train dataset_eval = list() core = Core(agent, mdp) dataset_eval_run = core.evaluate(n_episodes=ep_per_run) #print('distribution parameters: ', distribution.get_parameters()) J = compute_J(dataset_eval_run, gamma=mdp.info.gamma) print('J at start : ' + str(np.mean(J))) dataset_eval += dataset_eval_run for n in range(n_runs): core.learn(n_episodes=n_iterations * ep_per_run, n_episodes_per_fit=ep_per_run) dataset_eval_run = core.evaluate(n_episodes=ep_per_run) J = compute_J(dataset_eval_run, gamma=mdp.info.gamma) print('J at iteration ' + str(n) + ': ' + str(np.mean(J))) dataset_eval += dataset_eval_run mk_dir_recursive('./' + subdir + str(exp_no)) np.save(subdir + str(exp_no) + '/dataset_eval_file', dataset_eval)
def experiment(alg, params, n_epochs, n_iterations, ep_per_run): np.random.seed() # MDP mdp = ShipSteering() # Policy high = [150, 150, np.pi] low = [0, 0, -np.pi] n_tiles = [5, 5, 6] low = np.array(low, dtype=np.float) high = np.array(high, dtype=np.float) n_tilings = 1 tilings = Tiles.generate(n_tilings=n_tilings, n_tiles=n_tiles, low=low, high=high) phi = Features(tilings=tilings) input_shape = (phi.size, ) approximator = Regressor(LinearApproximator, input_shape=input_shape, output_shape=mdp.info.action_space.shape) policy = DeterministicPolicy(approximator) mu = np.zeros(policy.weights_size) sigma = 4e-1 * np.ones(policy.weights_size) distribution = GaussianDiagonalDistribution(mu, sigma) # Agent agent = alg(distribution, policy, mdp.info, features=phi, **params) # Train print(alg.__name__) core = Core(agent, mdp) dataset_eval = core.evaluate(n_episodes=ep_per_run) J = compute_J(dataset_eval, gamma=mdp.info.gamma) print('J at start : ' + str(np.mean(J))) for i in range(n_epochs): core.learn(n_episodes=n_iterations * ep_per_run, n_episodes_per_fit=ep_per_run) dataset_eval = core.evaluate(n_episodes=ep_per_run) J = compute_J(dataset_eval, gamma=mdp.info.gamma) print('J at iteration ' + str(i) + ': ' + str(np.mean(J)))
def build_approximator(mdp): high = [150, 150, np.pi] low = [0, 0, -np.pi] n_tiles = [5, 5, 8] low = np.array(low, dtype=np.float) high = np.array(high, dtype=np.float) n_tilings = 1 tilings = Tiles.generate(n_tilings=n_tilings, n_tiles=n_tiles, low=low, high=high, uniform=True) phi = Features(tilings=tilings) input_shape = (phi.size,) approximator = Regressor(LinearApproximator, input_shape=input_shape, output_shape=mdp.info.action_space.shape) return phi, approximator
from mushroom.utils.parameters import Parameter, AdaptiveParameter # Learning parameters n_runs = 4 n_iterations = 10 ep_per_run = 3 # Environment mdp = TurtlebotGazebo() # Policy tensor_list = gaussian_tensor.generate( [10, 10, 6], [[-5.0, 5.0], [-5.0, 5.0], [-np.pi, np.pi]]) phi = Features(tensor_list=tensor_list, name='phi', input_dim=mdp.info.observation_space.shape[0]) input_shape = (phi.size, ) approximator_params = dict(input_dim=phi.size) approximator = Regressor(LinearApproximator, input_shape=input_shape, output_shape=mdp.info.action_space.shape, params=approximator_params) sigma = np.eye(2) * 1e-1 policy = MultivariateGaussianPolicy(mu=approximator, sigma=sigma) # Agent learning_rate = AdaptiveParameter(value=5)