def setup_pilco_experiment(params, pol=None, dyn=None): # initial state distribution p0 = params['state0_dist'] # init policy if pol is None: pol = control.RBFPolicy(**params['policy']) # init dynmodel if dyn is None: dynmodel_class = params.get('dynmodel_class', regression.SSGP_UI) dyn = dynmodel_class(**params['dynamics_model']) # create experience dataset exp = ExperienceDataset() # init policy optimizer polopt = optimizers.ScipyOptimizer(**params['optimizer']) # module where get_loss and build_rollout are defined # (can also be a class) learner = algorithms.pilco return p0, pol, dyn, exp, polopt, learner
def get_scenario(experiment_id, *args, **kwargs): pol = None dyn = None if experiment_id == 1: # PILCO with rbf controller scenario_params = experiment1_params(*args, **kwargs) learner_setup = experiment_utils.setup_pilco_experiment params = scenario_params[0] pol = control.RBFPolicy(**params['policy']) elif experiment_id == 2: # PILCO with nn controller 1 scenario_params = experiment1_params(*args, **kwargs) learner_setup = experiment_utils.setup_pilco_experiment params = scenario_params[0] pol_spec = dict(hidden_dims=[200] * 2, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), build_fn=regression.mlp) pol = control.NNPolicy(dyn.E, network_spec=pol_spec, **params['policy']) elif experiment_id == 3: # mc PILCO with RBF controller and binary dropout mlp dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] pol = control.RBFPolicy(**params['policy']) # init dyn to use dropout dyn_spec = dict(hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseDropoutLayer, build_fn=regression.dropout_mlp) dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model']) elif experiment_id == 4: # mc PILCO with NN controller and binary dropout mlp dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] # init dyn to use dropout dyn_spec = dict(hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseDropoutLayer, build_fn=regression.dropout_mlp) dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model']) # init policy pol_spec = dict(hidden_dims=[200] * 2, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), build_fn=regression.mlp) pol = control.NNPolicy(dyn.E, network_spec=pol_spec, **params['policy']) elif experiment_id == 5: # mc PILCO with RBF controller and log normal dropout dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] pol = control.RBFPolicy(**params['policy']) # init dyn to use dropout # for the log normal dropout layers, the dropout probabilities # are dummy variables to enable dropout (not actual dropout probs) dyn_spec = dict( hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseLogNormalDropoutLayer, build_fn=regression.dropout_mlp) dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model']) elif experiment_id == 6: # mc PILCO with NN controller and log normal dropout dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] # init dyn to use dropout dyn_spec = dict( hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseLogNormalDropoutLayer, build_fn=regression.dropout_mlp) dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model']) # init policy pol_spec = dict(hidden_dims=[200] * 2, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), build_fn=regression.mlp) pol = control.NNPolicy(dyn.E, network_spec=pol_spec, **params['policy']) elif experiment_id == 7: # mc PILCO with dropout controller and binary dropout mlp dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] # init dyn to use dropout dyn_spec = dict(hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseDropoutLayer, build_fn=regression.dropout_mlp) dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model']) # init policy pol_spec = dict(hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseDropoutLayer, build_fn=regression.dropout_mlp) pol = control.NNPolicy(dyn.E, network_spec=pol_spec, **params['policy']) elif experiment_id == 8: # mc PILCO with dropout controller and log-normal dropout dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] # init dyn to use dropout dyn_spec = dict( hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseLogNormalDropoutLayer, build_fn=regression.dropout_mlp) dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model']) # init policy pol_spec = dict(hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseDropoutLayer, build_fn=regression.dropout_mlp) pol = control.NNPolicy(dyn.E, network_spec=pol_spec, **params['policy']) elif experiment_id == 9: # mc PILCO with dropout controller and concrete dropout dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] p0 = params['state0_dist'] # init dyn to use dropout dyn = regression.BNN(**params['dynamics_model']) odims = 2 * dyn.E if dyn.heteroscedastic else dyn.E dyn_spec = regression.dropout_mlp( input_dims=dyn.D, output_dims=odims, hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseConcreteDropoutLayer, name=dyn.name) dyn.build_network(dyn_spec) # init policy pol = control.NNPolicy(p0.mean.size, **params['policy']) pol_spec = regression.dropout_mlp( input_dims=pol.D, output_dims=pol.E, hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), output_nonlinearity=pol.sat_func, dropout_class=regression.layers.DenseDropoutLayer, name=pol.name) pol.build_network(pol_spec) return scenario_params, pol, dyn, learner_setup