def objective(objective_args): (z_scale, zdot_reward, action_reward, exploration, tolerance, max_mem_a, k_a, alpha_a, k_c, alpha_c, max_mem_pm, k_pm, pred_tol, lambda_trace, gamma) = objective_args (df_x, df_z, df_xdot, df_zdot, df_theta, df_thetadot, df_u1, df_u3) = QuadRotor2DPlant.get_default_feature_set() feature_z = Feature(r"$z$ [m]", scale=z_scale, bounds=np.array([-25, 0])) quad_rotor_plant = QuadRotor2DPlant(1. / FREQUENCY, blade_flapping=BLADE_FLAPPING, init_mean=DEFAULT_INIT_STATE_MEAN, feature_set=FeatureSet([ df_x, feature_z, df_xdot, df_zdot, df_theta, df_thetadot, df_u1, df_u3, ])) train_args = ( Actor( FeatureSet([feature_z, df_zdot]), FeatureSet([df_u1]), quad_rotor_plant.get_feature_set(), k_a, max_mem_a * 50, alpha_a, tolerance, ), Critic( FeatureSet([feature_z, df_zdot]), quad_rotor_plant.get_feature_set(), k_c, max_mem_a * 50, lambda_trace, alpha_c, gamma, QuadraticErrorRewardFunction([action_reward, 0], [0, 10., 0, zdot_reward, 0, 0], desired_state=DESIRED_STATE), tolerance), PlantModel( quad_rotor_plant.get_feature_set(), FeatureSet([feature_z, df_zdot, df_u1]), k_pm, max_mem_pm * 50, pred_tol, ), quad_rotor_plant, DEFAULT_LENGTH, DEFAULT_ADD_METHOD, DEFAULT_PURGE_METHOD, ExplorationStrategy({1: exploration}), ) cs = ControllerSet( parallelize( parsed_args.j, train, [train_args + (SEED + i, ) for i in range(parsed_args.p)], )) result = SimulationResult( cs.lookback_result( LOOK_BACK_WINDOW, look_back_metric="median", ), metric=parsed_args.metric, ) training_message = "Finished training with cumulative z-error {:.2f}".format( result.get_cum_state_error().flatten()[1]) print(training_message) return result.get_cum_state_error().flatten()[1]
def higher_discount_rate(): quad_rotor_plant = QuadRotor2DPlant( 1. / FREQUENCY, blade_flapping=BLADE_FLAPPING, init_mean=DEFAULT_INIT_STATE_MEAN, ) actor_critic_args = ( Actor( FeatureSet([feature_z, feature_zdot]), FeatureSet([feature_u1]), quad_rotor_plant.get_feature_set(), K_ACTOR, STAGE_ONE_AC_MEMORY, ALPHA_ACTOR, TOLERANCE_ACTOR, ), Critic( FeatureSet([feature_z, feature_zdot]), quad_rotor_plant.get_feature_set(), K_CRITIC, STAGE_ONE_AC_MEMORY, LAMBDA_TRACE, ALPHA_CRITIC, 0.99, QuadraticErrorRewardFunction( ACTION_REWARDS, STATE_REWARDS, desired_state=DESIRED_STATE ), TOLERANCE_CRITIC, ), PlantModel( quad_rotor_plant.get_feature_set(), FeatureSet([feature_z, feature_zdot, feature_u1]), K_PLANT_MODEL, STAGE_ONE_PM_MEMORY, PREDICTION_TOLERANCE, ), quad_rotor_plant, DEFAULT_LENGTH, DEFAULT_ADD_METHOD, DEFAULT_PURGE_METHOD, ExplorationStrategy(EXPLORATION_DICT), ) print("Starting training of quad-rotor with higher discount rate.") trained_cs = ControllerSet( parallelize( parsed_args.j, train, [actor_critic_args + (SEED + i,) for i in range(parsed_args.p)], ) ) print("Finished higher discount rate with {:.2f} (id={})".format( SimulationResult( trained_cs.lookback_result(LOOK_BACK_WINDOW), metric=parsed_args.metric ).get_cum_state_error()[1:2].sum(), trained_cs.get_id(), )) trained_cs.notes = "Sensitivity analysis: higher discount rate" # trained_cs.dump() RewardSet(trained_cs).dump()
), quad_rotor_plant, DEFAULT_LENGTH, DEFAULT_ADD_METHOD, DEFAULT_PURGE_METHOD, ExplorationStrategy(EXPLORATION_DICT), ) print("Starting training of quad-rotor.") # STAGE ONE first_stage_cs = ControllerSet( parallelize( parsed_args.j, train_stage_one, [ actor_critic_args + (SEED + i, ) for i in range(parsed_args.p) ], )) print("Finished stage one with {:.2f}".format( SimulationResult( first_stage_cs.lookback_result(LOOK_BACK_WINDOW), metric=parsed_args.metric).get_cum_state_error()[1:2].sum())) # ZERO EXPANSION zero_expansion = ControllerSet( parallelize( parsed_args.j, train_zero_expansion, [deepcopy(ac) for ac in first_stage_cs],
targets = [ "sensitivity-analysis-alpha-lower", "sensitivity-analysis-alpha-higher", "sensitivity-analysis-gamma-lower", "sensitivity-analysis-gamma-higher", ] BASE_PATH = FIGURE_PATH file_format = "pdf" dpi = 300 for i, t in zip(ids, targets): try: rc = RewardSet.load(i) except IOError: rc = RewardSet(ControllerSet.load(i)) rc.dump() print("ID: {}".format(i)) vis = rc.plot( conf=68, bounds=False, metric="median", minimum=9000, ) vis.save( "report-3pp", target=os.path.join(BASE_PATH, t + "-{}.{}".format(len(rc), file_format)), format=file_format, transparant=True, dpi=dpi,
STAGE_ONE_PM_MEMORY, PREDICTION_TOLERANCE, ), quad_rotor_plant, DEFAULT_LENGTH, DEFAULT_ADD_METHOD, DEFAULT_PURGE_METHOD, ExplorationStrategy(EXPLORATION_DICT), ) # STAGE ONE trained_cs = ControllerSet( parallelize( parsed_args.j, train, [ actor_critic_args + (SEED + i, ) for i in range(parsed_args.p) ], )) print("Finished Gaussian clone expansion (id={}) with {:.2f}".format( trained_cs.get_id(), SimulationResult( trained_cs.lookback_result(LOOK_BACK_WINDOW), metric=parsed_args.metric).get_cum_state_error()[1:2].sum())) trained_cs.dump() RewardSet(trained_cs).dump() except KeyboardInterrupt: print("Shutdown requested... exiting")