def predict_symbolic(self, mx, Sx=None, **kwargs): if self.network_spec is None: self.network_spec = dropout_mlp( input_dims=self.D, output_dims=self.E, hidden_dims=[50]*2, p=0.1, p_input=0.0, nonlinearities=lasagne.nonlinearities.rectify, output_nonlinearity=self.sat_func, dropout_class=layers.DenseDropoutLayer, name=self.name) if self.network is None: params = self.network_params\ if self.network_params is not None\ else {} self.network = self.build_network(self.network_spec, params=params, name=self.name) ret = super(NNPolicy, self).predict_symbolic(mx, Sx, **kwargs) if Sx is None: if isinstance(ret, list) or isinstance(ret, tuple): ret = ret[0] M = ret return M else: M, S, V = ret return M, S, V
def setup_mc_pilco_experiment(params, pol=None, dyn=None): # initial state distribution p0 = params['state0_dist'] D = p0.mean.size pol_spec = params.get('pol_spec', None) dyn_spec = params.get('dyn_spec', None) # init policy if pol is None: pol = control.NNPolicy(D, **params['policy']) if pol_spec is None: pol_spec = regression.mlp( input_dims=pol.D, output_dims=pol.E, hidden_dims=[50] * 2, p=0.05, p_input=0.0, nonlinearities=nonlinearities.rectify, output_nonlinearity=pol.sat_func, dropout_class=regression.DenseDropoutLayer, name=pol.name) pol.network = pol.build_network(pol_spec) # init dynmodel if dyn is None: dyn = regression.BNN(**params['dynamics_model']) if dyn_spec is None: odims = 2 * dyn.E if dyn.heteroscedastic else dyn.E dyn_spec = regression.dropout_mlp( input_dims=dyn.D, output_dims=odims, hidden_dims=[200] * 2, p=0.1, p_input=0.1, nonlinearities=nonlinearities.rectify, dropout_class=regression.DenseLogNormalDropoutLayer, name=dyn.name) dyn.network = dyn.build_network(dyn_spec) # create experience dataset exp = ExperienceDataset() # init policy optimizer polopt = optimizers.SGDOptimizer(**params['optimizer']) # module where get_loss and build_rollout are defined # (can also be a class) learner = algorithms.mc_pilco return p0, pol, dyn, exp, polopt, learner
def get_scenario(experiment_id, *args, **kwargs): pol = None dyn = None if experiment_id == 1: # PILCO with rbf controller scenario_params = experiment1_params(*args, **kwargs) learner_setup = experiment_utils.setup_pilco_experiment params = scenario_params[0] pol = control.RBFPolicy(**params['policy']) elif experiment_id == 2: # PILCO with nn controller 1 scenario_params = experiment1_params(*args, **kwargs) learner_setup = experiment_utils.setup_pilco_experiment params = scenario_params[0] pol_spec = dict(hidden_dims=[200] * 2, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), build_fn=regression.mlp) pol = control.NNPolicy(dyn.E, network_spec=pol_spec, **params['policy']) elif experiment_id == 3: # mc PILCO with RBF controller and binary dropout mlp dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] pol = control.RBFPolicy(**params['policy']) # init dyn to use dropout dyn_spec = dict(hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseDropoutLayer, build_fn=regression.dropout_mlp) dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model']) elif experiment_id == 4: # mc PILCO with NN controller and binary dropout mlp dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] # init dyn to use dropout dyn_spec = dict(hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseDropoutLayer, build_fn=regression.dropout_mlp) dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model']) # init policy pol_spec = dict(hidden_dims=[200] * 2, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), build_fn=regression.mlp) pol = control.NNPolicy(dyn.E, network_spec=pol_spec, **params['policy']) elif experiment_id == 5: # mc PILCO with RBF controller and log normal dropout dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] pol = control.RBFPolicy(**params['policy']) # init dyn to use dropout # for the log normal dropout layers, the dropout probabilities # are dummy variables to enable dropout (not actual dropout probs) dyn_spec = dict( hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseLogNormalDropoutLayer, build_fn=regression.dropout_mlp) dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model']) elif experiment_id == 6: # mc PILCO with NN controller and log normal dropout dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] # init dyn to use dropout dyn_spec = dict( hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseLogNormalDropoutLayer, build_fn=regression.dropout_mlp) dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model']) # init policy pol_spec = dict(hidden_dims=[200] * 2, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), build_fn=regression.mlp) pol = control.NNPolicy(dyn.E, network_spec=pol_spec, **params['policy']) elif experiment_id == 7: # mc PILCO with dropout controller and binary dropout mlp dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] # init dyn to use dropout dyn_spec = dict(hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseDropoutLayer, build_fn=regression.dropout_mlp) dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model']) # init policy pol_spec = dict(hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseDropoutLayer, build_fn=regression.dropout_mlp) pol = control.NNPolicy(dyn.E, network_spec=pol_spec, **params['policy']) elif experiment_id == 8: # mc PILCO with dropout controller and log-normal dropout dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] # init dyn to use dropout dyn_spec = dict( hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseLogNormalDropoutLayer, build_fn=regression.dropout_mlp) dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model']) # init policy pol_spec = dict(hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseDropoutLayer, build_fn=regression.dropout_mlp) pol = control.NNPolicy(dyn.E, network_spec=pol_spec, **params['policy']) elif experiment_id == 9: # mc PILCO with dropout controller and concrete dropout dynamics scenario_params = experiment2_params(*args, **kwargs) learner_setup = experiment_utils.setup_mc_pilco_experiment params = scenario_params[0] p0 = params['state0_dist'] # init dyn to use dropout dyn = regression.BNN(**params['dynamics_model']) odims = 2 * dyn.E if dyn.heteroscedastic else dyn.E dyn_spec = regression.dropout_mlp( input_dims=dyn.D, output_dims=odims, hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), dropout_class=regression.layers.DenseConcreteDropoutLayer, name=dyn.name) dyn.build_network(dyn_spec) # init policy pol = control.NNPolicy(p0.mean.size, **params['policy']) pol_spec = regression.dropout_mlp( input_dims=pol.D, output_dims=pol.E, hidden_dims=[200] * 2, p=0.1, p_input=0.0, nonlinearities=regression.nonlinearities.rectify, W_init=lasagne.init.GlorotNormal(), output_nonlinearity=pol.sat_func, dropout_class=regression.layers.DenseDropoutLayer, name=pol.name) pol.build_network(pol_spec) return scenario_params, pol, dyn, learner_setup