Ejemplo n.º 1
0
def objective(objective_args):
    (z_scale, zdot_reward, action_reward, exploration, tolerance, max_mem_a,
     k_a, alpha_a, k_c, alpha_c, max_mem_pm, k_pm, pred_tol, lambda_trace,
     gamma) = objective_args

    (df_x, df_z, df_xdot, df_zdot, df_theta, df_thetadot, df_u1,
     df_u3) = QuadRotor2DPlant.get_default_feature_set()

    feature_z = Feature(r"$z$ [m]", scale=z_scale, bounds=np.array([-25, 0]))
    quad_rotor_plant = QuadRotor2DPlant(1. / FREQUENCY,
                                        blade_flapping=BLADE_FLAPPING,
                                        init_mean=DEFAULT_INIT_STATE_MEAN,
                                        feature_set=FeatureSet([
                                            df_x,
                                            feature_z,
                                            df_xdot,
                                            df_zdot,
                                            df_theta,
                                            df_thetadot,
                                            df_u1,
                                            df_u3,
                                        ]))

    train_args = (
        Actor(
            FeatureSet([feature_z, df_zdot]),
            FeatureSet([df_u1]),
            quad_rotor_plant.get_feature_set(),
            k_a,
            max_mem_a * 50,
            alpha_a,
            tolerance,
        ),
        Critic(
            FeatureSet([feature_z,
                        df_zdot]), quad_rotor_plant.get_feature_set(), k_c,
            max_mem_a * 50, lambda_trace, alpha_c, gamma,
            QuadraticErrorRewardFunction([action_reward, 0],
                                         [0, 10., 0, zdot_reward, 0, 0],
                                         desired_state=DESIRED_STATE),
            tolerance),
        PlantModel(
            quad_rotor_plant.get_feature_set(),
            FeatureSet([feature_z, df_zdot, df_u1]),
            k_pm,
            max_mem_pm * 50,
            pred_tol,
        ),
        quad_rotor_plant,
        DEFAULT_LENGTH,
        DEFAULT_ADD_METHOD,
        DEFAULT_PURGE_METHOD,
        ExplorationStrategy({1: exploration}),
    )

    cs = ControllerSet(
        parallelize(
            parsed_args.j,
            train,
            [train_args + (SEED + i, ) for i in range(parsed_args.p)],
        ))

    result = SimulationResult(
        cs.lookback_result(
            LOOK_BACK_WINDOW,
            look_back_metric="median",
        ),
        metric=parsed_args.metric,
    )
    training_message = "Finished training with cumulative z-error {:.2f}".format(
        result.get_cum_state_error().flatten()[1])
    print(training_message)
    return result.get_cum_state_error().flatten()[1]
Ejemplo n.º 2
0
def higher_discount_rate():
    quad_rotor_plant = QuadRotor2DPlant(
        1. / FREQUENCY,
        blade_flapping=BLADE_FLAPPING,
        init_mean=DEFAULT_INIT_STATE_MEAN,
    )
    actor_critic_args = (
        Actor(
            FeatureSet([feature_z, feature_zdot]),
            FeatureSet([feature_u1]),
            quad_rotor_plant.get_feature_set(),
            K_ACTOR,
            STAGE_ONE_AC_MEMORY,
            ALPHA_ACTOR,
            TOLERANCE_ACTOR,
        ),
        Critic(
            FeatureSet([feature_z, feature_zdot]),
            quad_rotor_plant.get_feature_set(),
            K_CRITIC,
            STAGE_ONE_AC_MEMORY,
            LAMBDA_TRACE,
            ALPHA_CRITIC,
            0.99,
            QuadraticErrorRewardFunction(
                ACTION_REWARDS,
                STATE_REWARDS,
                desired_state=DESIRED_STATE
            ),
            TOLERANCE_CRITIC,
        ),
        PlantModel(
            quad_rotor_plant.get_feature_set(),
            FeatureSet([feature_z, feature_zdot, feature_u1]),
            K_PLANT_MODEL,
            STAGE_ONE_PM_MEMORY,
            PREDICTION_TOLERANCE,
        ),
        quad_rotor_plant,
        DEFAULT_LENGTH,
        DEFAULT_ADD_METHOD,
        DEFAULT_PURGE_METHOD,
        ExplorationStrategy(EXPLORATION_DICT),
    )

    print("Starting training of quad-rotor with higher discount rate.")
    trained_cs = ControllerSet(
        parallelize(
            parsed_args.j,
            train,
            [actor_critic_args + (SEED + i,) for i in range(parsed_args.p)],
        )
    )
    print("Finished higher discount rate with {:.2f} (id={})".format(
        SimulationResult(
            trained_cs.lookback_result(LOOK_BACK_WINDOW),
            metric=parsed_args.metric
        ).get_cum_state_error()[1:2].sum(),
        trained_cs.get_id(),
    ))
    trained_cs.notes = "Sensitivity analysis: higher discount rate"
    # trained_cs.dump()
    RewardSet(trained_cs).dump()
Ejemplo n.º 3
0
            ),
            quad_rotor_plant,
            DEFAULT_LENGTH,
            DEFAULT_ADD_METHOD,
            DEFAULT_PURGE_METHOD,
            ExplorationStrategy(EXPLORATION_DICT),
        )

        print("Starting training of quad-rotor.")

        # STAGE ONE
        first_stage_cs = ControllerSet(
            parallelize(
                parsed_args.j,
                train_stage_one,
                [
                    actor_critic_args + (SEED + i, )
                    for i in range(parsed_args.p)
                ],
            ))
        print("Finished stage one with {:.2f}".format(
            SimulationResult(
                first_stage_cs.lookback_result(LOOK_BACK_WINDOW),
                metric=parsed_args.metric).get_cum_state_error()[1:2].sum()))

        # ZERO EXPANSION
        zero_expansion = ControllerSet(
            parallelize(
                parsed_args.j,
                train_zero_expansion,
                [deepcopy(ac) for ac in first_stage_cs],
            ))
Ejemplo n.º 4
0
def objective(objective_args):
    (
        a2_scale,
        theta_scale,
        thetadot_scale,
        max_mem_ac,
        max_mem_pm,
        theta_spread,
        thetadot_spread,
        exploration,
        theta_reward,
        thetadot_reward,
        u_3_reward,
        k_a,
        k_c,
        k_pm,
    ) = objective_args

    feature_theta = Feature(r"$\theta$ [rad]", scale=theta_scale),
    feature_thetadot = Feature(r"$\dot{\theta}$ [rad/s]",
                               scale=thetadot_scale,
                               derivative=True)
    feature_a2 = Feature(r"$a_2$ [-]",
                         feature_type="action",
                         scale=0.760859,
                         bounds=0.3 * np.array([-1, 1]))
    quad_rotor_plant = QuadRotor2DPlant(
        1. / FREQUENCY,
        blade_flapping=BLADE_FLAPPING,
        init_mean=DEFAULT_INIT_STATE_MEAN,
        feature_set=FeatureSet([
            df_x, df_z, df_xdot, df_zdot, feature_theta, feature_thetadot,
            df_a1, feature_a2
        ]),
    )

    stage_one_args = [
        Actor(
            FeatureSet([df_z, df_zdot]),
            FeatureSet([df_a1]),
            quad_rotor_plant.get_feature_set(),
            K_ACTOR,
            STAGE_ONE_AC_MEMORY,
            ALPHA_ACTOR,
            TOLERANCE_ACTOR,
        ),
        Critic(
            FeatureSet([df_z, df_zdot]),
            quad_rotor_plant.get_feature_set(),
            K_CRITIC,
            STAGE_ONE_AC_MEMORY,
            LAMBDA_TRACE,
            ALPHA_CRITIC,
            DISCOUNT,
            QuadraticErrorRewardFunction(ACTION_REWARDS,
                                         STATE_REWARDS,
                                         desired_state=DESIRED_STATE),
            TOLERANCE_CRITIC,
        ),
        PlantModel(
            quad_rotor_plant.get_feature_set(),
            FeatureSet([df_z, df_zdot, df_a1]),
            K_PLANT_MODEL,
            STAGE_ONE_PM_MEMORY,
            PREDICTION_TOLERANCE,
        ), quad_rotor_plant, DEFAULT_LENGTH, DEFAULT_ADD_METHOD,
        DEFAULT_PURGE_METHOD,
        ExplorationStrategy(STAGE_ONE_EXPLORATION_DICT)
    ]

    print("Training basic quad-rotor")

    # STAGE ONE
    cs_stage_one = ControllerSet(
        parallelize(
            parsed_args.j,
            train_stage_one,
            [stage_one_args + [SEED + i] for i in range(parsed_args.p)],
        ))

    _, z_error_stage_one, _, _, _, _ = SimulationResult(
        cs_stage_one.lookback_result(LOOK_BACK_WINDOW),
        metric=parsed_args.metric).get_cum_state_error().flatten()
    print("Finished stage one with {:s} cumulative z-error of {:.2f}".format(
        parsed_args.metric, z_error_stage_one))

    stage_two_args = [
        max_mem_ac, max_mem_pm, theta_spread, thetadot_spread, exploration,
        theta_reward, thetadot_reward, u_3_reward, k_a, k_c, k_pm,
        feature_theta, feature_thetadot, feature_a2
    ]
    cs_stage_two = ControllerSet(
        parallelize(
            parsed_args.j,
            train_stage_two,
            [stage_two_args + [deepcopy(ac)] for ac in cs_stage_one],
        ))
    x_error, z_error, _, _, theta_error, _ = SimulationResult(
        cs_stage_two.lookback_result(LOOK_BACK_WINDOW),
        metric=parsed_args.metric).get_cum_state_error().flatten()
    return z_error
Ejemplo n.º 5
0
                STAGE_ONE_PM_MEMORY,
                PREDICTION_TOLERANCE,
            ),
            quad_rotor_plant,
            DEFAULT_LENGTH,
            DEFAULT_ADD_METHOD,
            DEFAULT_PURGE_METHOD,
            ExplorationStrategy(EXPLORATION_DICT),
        )

        # STAGE ONE
        trained_cs = ControllerSet(
            parallelize(
                parsed_args.j,
                train,
                [
                    actor_critic_args + (SEED + i, )
                    for i in range(parsed_args.p)
                ],
            ))

        print("Finished Gaussian clone expansion (id={}) with {:.2f}".format(
            trained_cs.get_id(),
            SimulationResult(
                trained_cs.lookback_result(LOOK_BACK_WINDOW),
                metric=parsed_args.metric).get_cum_state_error()[1:2].sum()))
        trained_cs.dump()
        RewardSet(trained_cs).dump()

    except KeyboardInterrupt:
        print("Shutdown requested... exiting")
    finally: