def __init__(self, motor='SynRM', reward_function=None, reference_generator=None, **kwargs): """ Args: motor(ElectricMotor): Electric Motor used in the PhysicalSystem reward_function(RewardFunction): Reward Function for the environment reference_generator(ReferenceGenerator): Reference Generator for the environment kwargs(dict): Further kwargs tot pass to the superclass and the submodules """ physical_system = SynchronousMotorSystem(motor=motor, **kwargs) reference_generator = reference_generator or WienerProcessReferenceGenerator(**kwargs) reward_function = reward_function or WeightedSumOfErrors(**kwargs) super().__init__( physical_system, reference_generator=reference_generator, reward_function=reward_function, **kwargs )
def __init__(self, motor='SynRM', reward_function=None, reference_generator=None, constraints=None, **kwargs): """ Args: motor(ElectricMotor): Electric Motor used in the PhysicalSystem reward_function(RewardFunction): Reward Function for the environment reference_generator(ReferenceGenerator): Reference Generator for the environment kwargs(dict): Further kwargs tot pass to the superclass and the submodules """ physical_system = SynchronousMotorSystem(motor=motor, **kwargs) reference_generator = reference_generator or WienerProcessReferenceGenerator(**kwargs) reward_function = reward_function or WeightedSumOfErrors(**kwargs) constraints_ = constraints if constraints is not None \ else ('i_a', 'i_b', 'i_c', SquaredConstraint(('i_sd', 'i_sq'))) super().__init__( physical_system, reference_generator=reference_generator, reward_function=reward_function, constraints=constraints_, **kwargs )
converter='Disc-1QC', # Define which states will be shown in the state observation (what we can "measure") state_filter=['omega', 'i'], # Define the reward function and to which state variable it applies # Here, we define it for current control reward_function=WeightedSumOfErrors(observed_states='i'), # Defines which numerical solver is to be used for the simulation # euler is fastest but not most precise ode_solver='euler', solver_kwargs={}, # Define and parameterize the reference generator for the current reference reference_generator=WienerProcessReferenceGenerator( reference_state='i', sigma_range=(3e-3, 3e-2)), # Defines which variables to plot via the builtin dashboard monitor visualization=MotorDashboard(state_plots=['i', 'omega']), ) # Now, the environment will output states and references separately state, ref = env.reset() # For data processing we sometimes want to flatten the env output, # which means that the env will only output one array that contains states and references consecutively env = FlattenObservation(env) obs = env.reset() # Read the number of possible actions for the given env # this allows us to define a proper learning agent for this task
'emotor-dc-series-disc-v1', state_filter=['i'], # Pass an instance visualization=MotorDashboard(visu_period=0.5, plotted_variables=['omega', 'i', 'u']), converter='Disc-4QC', # Take standard class and pass parameters (Load) a=0, b=.1, c=1.1, j_load=0.4, # Pass a string (with extra parameters) ode_solver='euler', solver_kwargs={}, # Pass a Class with extra parameters reference_generator=WienerProcessReferenceGenerator( reference_state='i', sigma_range=(5e-3, 5e-1))) nb_actions = env.action_space.n env = FlattenObservation(env) model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(4)) model.add(LeakyReLU(alpha=0.05)) model.add(Dense(4)) model.add(LeakyReLU(alpha=0.05)) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=15000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(eps=0.5), 'eps', 0.5, 0.01, 0, 20000) dqn = DQNAgent(model=model,
r_e=4.5, l_a=9.7e-3, l_e_prime=9.2e-3, l_e=9.2e-3, j_rotor=0.001), # Take standard class and pass parameters (Load) load_parameter=dict(a=0, b=.0, c=0.01, j_load=.001), reward_weights={'omega': 1000}, reward_power=0.5, observed_states= None, # Constraint violation monitoring is disabled for presentation purpose # Pass a string (with extra parameters) ode_solver='scipy.solve_ivp', solver_kwargs=dict(method='BDF'), # Pass an instance reference_generator=WienerProcessReferenceGenerator( reference_state='omega', sigma_range=(5e-3, 1e-2))) # Keras-rl DDPG-agent accepts flat observations only env = FlattenObservation(env) nb_actions = env.action_space.shape[0] # CAUTION: Do not use layers that behave differently in training and # testing # (e.g. dropout, batch-normalization, etc..) # Reason is a bug in TF2 where not the learning_phase_tensor is extractable # in order to put as an input to keras models # https://stackoverflow.com/questions/58987264/how-to-get-learning-phase-in-tensorflow-2-eager # https://stackoverflow.com/questions/58279628/what-is-the-difference-between-tf-keras-and-tf-python-keras?noredirect=1&lq=1 # https://github.com/tensorflow/tensorflow/issues/34508 window_length = 1 actor = Sequential() actor.add(
# Create the environment env = gem.make( 'emotor-dc-series-cont-v1', # Pass a class with extra parameters visualization=MotorDashboard, visu_period=1, # Take standard class and pass parameters (Load) load_parameter=dict(a=0, b=.0, c=0.0, j_load=.5), # Pass a string (with extra parameters) ode_solver='euler', solver_kwargs={}, # Pass an instance reference_generator=WienerProcessReferenceGenerator(reference_state='i')) # Keras-rl DDPG-agent only accepts flat observations env = FlattenObservation(env) nb_actions = env.action_space.shape[0] actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) print(actor.summary())
def set_env(time_limit=True, gamma=0.99, N=0, M=0, training=True, callbacks=[]): # define motor arguments motor_parameter = dict( p=3, # [p] = 1, nb of pole pairs r_s=17.932e-3, # [r_s] = Ohm, stator resistance l_d=0.37e-3, # [l_d] = H, d-axis inductance l_q=1.2e-3, # [l_q] = H, q-axis inductance psi_p=65.65e-3, # [psi_p] = Vs, magnetic flux of the permanent magnet ) u_sup = 350 nominal_values = dict(omega=4000 * 2 * np.pi / 60, i=230, u=u_sup) limit_values = dict(omega=4000 * 2 * np.pi / 60, i=1.5 * 230, u=u_sup) q_generator = WienerProcessReferenceGenerator(reference_state='i_sq') d_generator = WienerProcessReferenceGenerator(reference_state='i_sd') rg = MultipleReferenceGenerator([q_generator, d_generator]) tau = 1e-5 max_eps_steps = 10000 if training: motor_initializer = { 'random_init': 'uniform', 'interval': [[-230, 230], [-230, 230], [-np.pi, np.pi]] } # motor_initializer={'random_init': 'gaussian'} reward_function = gem.reward_functions.WeightedSumOfErrors( observed_states=['i_sq', 'i_sd'], reward_weights={ 'i_sq': 10, 'i_sd': 10 }, constraint_monitor=SqdCurrentMonitor(), gamma=gamma, reward_power=1) else: motor_initializer = {'random_init': 'gaussian'} reward_function = gem.reward_functions.WeightedSumOfErrors( observed_states=['i_sq', 'i_sd'], reward_weights={ 'i_sq': 0.5, 'i_sd': 0.5 }, # comparable reward constraint_monitor=SqdCurrentMonitor(), gamma=0.99, # comparable reward reward_power=1) # creating gem environment env = gem.make( # define a PMSM with discrete action space "PMSMDisc-v1", # visualize the results visualization=MotorDashboard(plots=['i_sq', 'i_sd', 'reward']), # parameterize the PMSM and update limitations motor_parameter=motor_parameter, limit_values=limit_values, nominal_values=nominal_values, # define the random initialisation for load and motor load='ConstSpeedLoad', load_initializer={ 'random_init': 'uniform', }, motor_initializer=motor_initializer, reward_function=reward_function, # define the duration of one sampling step tau=tau, u_sup=u_sup, # turn off terminations via limit violation, parameterize the rew-fct reference_generator=rg, ode_solver='euler', callbacks=callbacks, ) # appling wrappers and modifying environment env.action_space = Discrete(7) eps_idx = env.physical_system.state_names.index('epsilon') i_sd_idx = env.physical_system.state_names.index('i_sd') i_sq_idx = env.physical_system.state_names.index('i_sq') if time_limit: gem_env = TimeLimit( AppendNLastActionsWrapper( AppendNLastOberservationsWrapper( EpsilonWrapper(FlattenObservation(env), eps_idx, i_sd_idx, i_sq_idx), N), M), max_eps_steps) else: gem_env = AppendNLastActionsWrapper( AppendNLastOberservationsWrapper( EpsilonWrapper(FlattenObservation(env), eps_idx, i_sd_idx, i_sq_idx), N), M) return gem_env