コード例 #1
0
def setup_pilco_experiment(params, pol=None, dyn=None):
    # initial state distribution
    p0 = params['state0_dist']

    # init policy
    if pol is None:
        pol = control.RBFPolicy(**params['policy'])

    # init dynmodel
    if dyn is None:
        dynmodel_class = params.get('dynmodel_class', regression.SSGP_UI)
        dyn = dynmodel_class(**params['dynamics_model'])

    # create experience dataset
    exp = ExperienceDataset()

    # init policy optimizer
    polopt = optimizers.ScipyOptimizer(**params['optimizer'])

    # module where get_loss and build_rollout are defined
    # (can also be a class)
    learner = algorithms.pilco

    return p0, pol, dyn, exp, polopt, learner
コード例 #2
0
def get_scenario(experiment_id, *args, **kwargs):
    pol = None
    dyn = None

    if experiment_id == 1:
        # PILCO with rbf controller
        scenario_params = experiment1_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_pilco_experiment
        params = scenario_params[0]
        pol = control.RBFPolicy(**params['policy'])

    elif experiment_id == 2:
        # PILCO with nn controller 1
        scenario_params = experiment1_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_pilco_experiment
        params = scenario_params[0]

        pol_spec = dict(hidden_dims=[200] * 2,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        build_fn=regression.mlp)
        pol = control.NNPolicy(dyn.E,
                               network_spec=pol_spec,
                               **params['policy'])

    elif experiment_id == 3:
        # mc PILCO with RBF controller and binary dropout mlp dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]
        pol = control.RBFPolicy(**params['policy'])

        # init dyn to use dropout
        dyn_spec = dict(hidden_dims=[200] * 2,
                        p=0.1,
                        p_input=0.0,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        dropout_class=regression.layers.DenseDropoutLayer,
                        build_fn=regression.dropout_mlp)
        dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model'])

    elif experiment_id == 4:
        # mc PILCO with NN controller and binary dropout mlp dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]

        # init dyn to use dropout
        dyn_spec = dict(hidden_dims=[200] * 2,
                        p=0.1,
                        p_input=0.0,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        dropout_class=regression.layers.DenseDropoutLayer,
                        build_fn=regression.dropout_mlp)
        dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model'])

        # init policy
        pol_spec = dict(hidden_dims=[200] * 2,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        build_fn=regression.mlp)
        pol = control.NNPolicy(dyn.E,
                               network_spec=pol_spec,
                               **params['policy'])

    elif experiment_id == 5:
        # mc PILCO with RBF controller and log normal dropout dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]
        pol = control.RBFPolicy(**params['policy'])

        # init dyn to use dropout
        # for the log normal dropout layers, the dropout probabilities
        # are dummy variables to enable dropout (not actual dropout probs)
        dyn_spec = dict(
            hidden_dims=[200] * 2,
            p=0.1,
            p_input=0.0,
            nonlinearities=regression.nonlinearities.rectify,
            W_init=lasagne.init.GlorotNormal(),
            dropout_class=regression.layers.DenseLogNormalDropoutLayer,
            build_fn=regression.dropout_mlp)
        dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model'])

    elif experiment_id == 6:
        # mc PILCO with NN controller and log normal dropout dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]

        # init dyn to use dropout
        dyn_spec = dict(
            hidden_dims=[200] * 2,
            p=0.1,
            p_input=0.0,
            nonlinearities=regression.nonlinearities.rectify,
            W_init=lasagne.init.GlorotNormal(),
            dropout_class=regression.layers.DenseLogNormalDropoutLayer,
            build_fn=regression.dropout_mlp)
        dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model'])

        # init policy
        pol_spec = dict(hidden_dims=[200] * 2,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        build_fn=regression.mlp)
        pol = control.NNPolicy(dyn.E,
                               network_spec=pol_spec,
                               **params['policy'])

    elif experiment_id == 7:
        # mc PILCO with dropout controller and binary dropout mlp dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]

        # init dyn to use dropout
        dyn_spec = dict(hidden_dims=[200] * 2,
                        p=0.1,
                        p_input=0.0,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        dropout_class=regression.layers.DenseDropoutLayer,
                        build_fn=regression.dropout_mlp)
        dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model'])

        # init policy
        pol_spec = dict(hidden_dims=[200] * 2,
                        p=0.1,
                        p_input=0.0,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        dropout_class=regression.layers.DenseDropoutLayer,
                        build_fn=regression.dropout_mlp)
        pol = control.NNPolicy(dyn.E,
                               network_spec=pol_spec,
                               **params['policy'])

    elif experiment_id == 8:
        # mc PILCO with dropout controller and log-normal dropout dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]

        # init dyn to use dropout
        dyn_spec = dict(
            hidden_dims=[200] * 2,
            p=0.1,
            p_input=0.0,
            nonlinearities=regression.nonlinearities.rectify,
            W_init=lasagne.init.GlorotNormal(),
            dropout_class=regression.layers.DenseLogNormalDropoutLayer,
            build_fn=regression.dropout_mlp)
        dyn = regression.BNN(network_spec=dyn_spec, **params['dynamics_model'])

        # init policy
        pol_spec = dict(hidden_dims=[200] * 2,
                        p=0.1,
                        p_input=0.0,
                        nonlinearities=regression.nonlinearities.rectify,
                        W_init=lasagne.init.GlorotNormal(),
                        dropout_class=regression.layers.DenseDropoutLayer,
                        build_fn=regression.dropout_mlp)
        pol = control.NNPolicy(dyn.E,
                               network_spec=pol_spec,
                               **params['policy'])

    elif experiment_id == 9:
        # mc PILCO with dropout controller and concrete dropout dynamics
        scenario_params = experiment2_params(*args, **kwargs)
        learner_setup = experiment_utils.setup_mc_pilco_experiment
        params = scenario_params[0]
        p0 = params['state0_dist']

        # init dyn to use dropout
        dyn = regression.BNN(**params['dynamics_model'])
        odims = 2 * dyn.E if dyn.heteroscedastic else dyn.E
        dyn_spec = regression.dropout_mlp(
            input_dims=dyn.D,
            output_dims=odims,
            hidden_dims=[200] * 2,
            p=0.1,
            p_input=0.0,
            nonlinearities=regression.nonlinearities.rectify,
            W_init=lasagne.init.GlorotNormal(),
            dropout_class=regression.layers.DenseConcreteDropoutLayer,
            name=dyn.name)
        dyn.build_network(dyn_spec)

        # init policy
        pol = control.NNPolicy(p0.mean.size, **params['policy'])
        pol_spec = regression.dropout_mlp(
            input_dims=pol.D,
            output_dims=pol.E,
            hidden_dims=[200] * 2,
            p=0.1,
            p_input=0.0,
            nonlinearities=regression.nonlinearities.rectify,
            W_init=lasagne.init.GlorotNormal(),
            output_nonlinearity=pol.sat_func,
            dropout_class=regression.layers.DenseDropoutLayer,
            name=pol.name)
        pol.build_network(pol_spec)

    return scenario_params, pol, dyn, learner_setup