コード例 #1
0
def objective(objective_args):
    (z_scale, zdot_reward, action_reward, exploration, tolerance, max_mem_a,
     k_a, alpha_a, k_c, alpha_c, max_mem_pm, k_pm, pred_tol, lambda_trace,
     gamma) = objective_args

    (df_x, df_z, df_xdot, df_zdot, df_theta, df_thetadot, df_u1,
     df_u3) = QuadRotor2DPlant.get_default_feature_set()

    feature_z = Feature(r"$z$ [m]", scale=z_scale, bounds=np.array([-25, 0]))
    quad_rotor_plant = QuadRotor2DPlant(1. / FREQUENCY,
                                        blade_flapping=BLADE_FLAPPING,
                                        init_mean=DEFAULT_INIT_STATE_MEAN,
                                        feature_set=FeatureSet([
                                            df_x,
                                            feature_z,
                                            df_xdot,
                                            df_zdot,
                                            df_theta,
                                            df_thetadot,
                                            df_u1,
                                            df_u3,
                                        ]))

    train_args = (
        Actor(
            FeatureSet([feature_z, df_zdot]),
            FeatureSet([df_u1]),
            quad_rotor_plant.get_feature_set(),
            k_a,
            max_mem_a * 50,
            alpha_a,
            tolerance,
        ),
        Critic(
            FeatureSet([feature_z,
                        df_zdot]), quad_rotor_plant.get_feature_set(), k_c,
            max_mem_a * 50, lambda_trace, alpha_c, gamma,
            QuadraticErrorRewardFunction([action_reward, 0],
                                         [0, 10., 0, zdot_reward, 0, 0],
                                         desired_state=DESIRED_STATE),
            tolerance),
        PlantModel(
            quad_rotor_plant.get_feature_set(),
            FeatureSet([feature_z, df_zdot, df_u1]),
            k_pm,
            max_mem_pm * 50,
            pred_tol,
        ),
        quad_rotor_plant,
        DEFAULT_LENGTH,
        DEFAULT_ADD_METHOD,
        DEFAULT_PURGE_METHOD,
        ExplorationStrategy({1: exploration}),
    )

    cs = ControllerSet(
        parallelize(
            parsed_args.j,
            train,
            [train_args + (SEED + i, ) for i in range(parsed_args.p)],
        ))

    result = SimulationResult(
        cs.lookback_result(
            LOOK_BACK_WINDOW,
            look_back_metric="median",
        ),
        metric=parsed_args.metric,
    )
    training_message = "Finished training with cumulative z-error {:.2f}".format(
        result.get_cum_state_error().flatten()[1])
    print(training_message)
    return result.get_cum_state_error().flatten()[1]
コード例 #2
0
def higher_discount_rate():
    quad_rotor_plant = QuadRotor2DPlant(
        1. / FREQUENCY,
        blade_flapping=BLADE_FLAPPING,
        init_mean=DEFAULT_INIT_STATE_MEAN,
    )
    actor_critic_args = (
        Actor(
            FeatureSet([feature_z, feature_zdot]),
            FeatureSet([feature_u1]),
            quad_rotor_plant.get_feature_set(),
            K_ACTOR,
            STAGE_ONE_AC_MEMORY,
            ALPHA_ACTOR,
            TOLERANCE_ACTOR,
        ),
        Critic(
            FeatureSet([feature_z, feature_zdot]),
            quad_rotor_plant.get_feature_set(),
            K_CRITIC,
            STAGE_ONE_AC_MEMORY,
            LAMBDA_TRACE,
            ALPHA_CRITIC,
            0.99,
            QuadraticErrorRewardFunction(
                ACTION_REWARDS,
                STATE_REWARDS,
                desired_state=DESIRED_STATE
            ),
            TOLERANCE_CRITIC,
        ),
        PlantModel(
            quad_rotor_plant.get_feature_set(),
            FeatureSet([feature_z, feature_zdot, feature_u1]),
            K_PLANT_MODEL,
            STAGE_ONE_PM_MEMORY,
            PREDICTION_TOLERANCE,
        ),
        quad_rotor_plant,
        DEFAULT_LENGTH,
        DEFAULT_ADD_METHOD,
        DEFAULT_PURGE_METHOD,
        ExplorationStrategy(EXPLORATION_DICT),
    )

    print("Starting training of quad-rotor with higher discount rate.")
    trained_cs = ControllerSet(
        parallelize(
            parsed_args.j,
            train,
            [actor_critic_args + (SEED + i,) for i in range(parsed_args.p)],
        )
    )
    print("Finished higher discount rate with {:.2f} (id={})".format(
        SimulationResult(
            trained_cs.lookback_result(LOOK_BACK_WINDOW),
            metric=parsed_args.metric
        ).get_cum_state_error()[1:2].sum(),
        trained_cs.get_id(),
    ))
    trained_cs.notes = "Sensitivity analysis: higher discount rate"
    # trained_cs.dump()
    RewardSet(trained_cs).dump()
コード例 #3
0
            ),
            quad_rotor_plant,
            DEFAULT_LENGTH,
            DEFAULT_ADD_METHOD,
            DEFAULT_PURGE_METHOD,
            ExplorationStrategy(EXPLORATION_DICT),
        )

        print("Starting training of quad-rotor.")

        # STAGE ONE
        first_stage_cs = ControllerSet(
            parallelize(
                parsed_args.j,
                train_stage_one,
                [
                    actor_critic_args + (SEED + i, )
                    for i in range(parsed_args.p)
                ],
            ))
        print("Finished stage one with {:.2f}".format(
            SimulationResult(
                first_stage_cs.lookback_result(LOOK_BACK_WINDOW),
                metric=parsed_args.metric).get_cum_state_error()[1:2].sum()))

        # ZERO EXPANSION
        zero_expansion = ControllerSet(
            parallelize(
                parsed_args.j,
                train_zero_expansion,
                [deepcopy(ac) for ac in first_stage_cs],
コード例 #4
0
targets = [
    "sensitivity-analysis-alpha-lower",
    "sensitivity-analysis-alpha-higher",
    "sensitivity-analysis-gamma-lower",
    "sensitivity-analysis-gamma-higher",
]

BASE_PATH = FIGURE_PATH
file_format = "pdf"
dpi = 300

for i, t in zip(ids, targets):
    try:
        rc = RewardSet.load(i)
    except IOError:
        rc = RewardSet(ControllerSet.load(i))
        rc.dump()
    print("ID: {}".format(i))
    vis = rc.plot(
        conf=68,
        bounds=False,
        metric="median",
        minimum=9000,
    )
    vis.save(
        "report-3pp",
        target=os.path.join(BASE_PATH,
                            t + "-{}.{}".format(len(rc), file_format)),
        format=file_format,
        transparant=True,
        dpi=dpi,
コード例 #5
0
                STAGE_ONE_PM_MEMORY,
                PREDICTION_TOLERANCE,
            ),
            quad_rotor_plant,
            DEFAULT_LENGTH,
            DEFAULT_ADD_METHOD,
            DEFAULT_PURGE_METHOD,
            ExplorationStrategy(EXPLORATION_DICT),
        )

        # STAGE ONE
        trained_cs = ControllerSet(
            parallelize(
                parsed_args.j,
                train,
                [
                    actor_critic_args + (SEED + i, )
                    for i in range(parsed_args.p)
                ],
            ))

        print("Finished Gaussian clone expansion (id={}) with {:.2f}".format(
            trained_cs.get_id(),
            SimulationResult(
                trained_cs.lookback_result(LOOK_BACK_WINDOW),
                metric=parsed_args.metric).get_cum_state_error()[1:2].sum()))
        trained_cs.dump()
        RewardSet(trained_cs).dump()

    except KeyboardInterrupt:
        print("Shutdown requested... exiting")