def __init__(self, action_space, name='SACAgent', training_param=TrainingParamSAC()): AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct) # Class with parameters for training self.training_param = training_param self.name = name # Exploration parameter epsilon self.epsilon = training_param.INITIAL_EPSILON self.replay_buffer = ReplayBuffer(training_param.BUFFER_SIZE) self.deep_q = None self.tf_writer = None self.graph_saved = False # Statistics self.epoch_num_steps_alive = None self.epoch_rewards = None self.actions_per_1000steps = np.zeros((1000, self.action_space.size()), dtype=np.int) self.illegal_actions_per_1000steps = np.zeros(1000, dtype=np.int) self.ambiguous_actions_per_1000steps = np.zeros(1000, dtype=np.int) self._tmp_obs = None self.total_load_100 = deque(maxlen=100) self.total_prod_100 = deque(maxlen=100) self.q_selected_100 = deque(maxlen=100)
def __init__(self, env, action_space, name=__name__, num_frames=4, is_training=False, batch_size=32, lr=1e-5): # Call parent constructor AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct) # Store constructor params self.env = env self.name = name self.num_frames = num_frames self.is_training = is_training self.batch_size = batch_size self.lr = lr # Declare required vars self.Qmain = None self.obs = None self.state = [] self.frames = [] # Declare training vars self.replay_buffer = None self.done = False self.frames2 = None self.epoch_rewards = None self.epoch_alive = None self.Qtarget = None # Setup training vars if needed if self.is_training: self._init_training() # Setup inital state self._reset_state() self._reset_frame_buffer() # Compute dimensions from intial state self.observation_size = self.state.shape[0] self.action_size = self.action_space.size() # Load network graph self.Qmain = DoubleDuelingDQN(self.action_size, self.observation_size, num_frames=self.num_frames, learning_rate=self.lr) if self.is_training: self.Qtarget = DoubleDuelingDQN(self.action_size, self.observation_size, num_frames=self.num_frames, learning_rate=self.lr)
def __init__(self, env, n_states, actions, seed=0): AgentWithConverter.__init__(self, env.action_space) np.random.seed(seed) self.actions = actions self.n_states = n_states self.n_actions = len(self.actions) observation_encoded = np.random.rand(3, self.n_states).astype( np.float32) # (None, n_states) _ = self.my_act(observation_encoded) # (None, n_actions)
def __init__(self, observation_space, action_space, name=__name__, is_training=False): # Call parent constructor AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct) self.obs_space = observation_space # Filter #print("Actions filtering...") self.action_space.filter_action(self._filter_action) #print("..Done") # Store constructor params self.name = name self.num_frames = cfg.N_FRAMES self.is_training = is_training self.batch_size = cfg.BATCH_SIZE self.lr = cfg.LR # Declare required vars self.Qmain = None self.obs = None self.state = [] self.frames = [] # Declare training vars self.per_buffer = None self.done = False self.frames2 = None self.epoch_rewards = None self.epoch_alive = None self.Qtarget = None self.epsilon = 0.0 # Compute dimensions from intial spaces self.observation_size = self.obs_space.size_obs() self.action_size = self.action_space.size() # Load network graph self.Qmain = DoubleDuelingDQN_NN( self.action_size, self.observation_size, num_frames=self.num_frames, learning_rate=self.lr, learning_rate_decay_steps=cfg.LR_DECAY_STEPS, learning_rate_decay_rate=cfg.LR_DECAY_RATE) # Setup training vars if needed if self.is_training: self._init_training()
def __init__(self, observation_space, action_space): self.observation_space = observation_space self.action_space = action_space AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct) self.observation_size = self.observation_space.size_obs() self.action_size = self.action_space.size() self.done = False
def __init__(self, env, n_states, action_set, n_hidden, alpha, rho_threshold=0.8): self.n_states = n_states self.n_actions = len(action_set) self.action_set = action_set self.rho_threshold = rho_threshold tf.keras.Model.__init__(self) AgentWithConverter.__init__(self, action_space=env.action_space) # Actor self.layer_input = tf.keras.layers.Dense( n_hidden[0], input_shape=(self.n_states, ), activation=tf.nn.relu, name="actor_layer_input", trainable=True, ) self.layers_hidden = [] for layer, layer_n_hidden in enumerate(n_hidden[1:]): self.layers_hidden.append( tf.keras.layers.Dense( layer_n_hidden, activation=tf.nn.relu, name=f"actor_layer_hidden_{layer}", trainable=True, )) self.layer_output = tf.keras.layers.Dense(self.n_actions, activation=None, name="actor_layer_output", trainable=True) # Training self.compile( optimizer=tf.keras.optimizers.Adam(learning_rate=alpha), loss=tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True), ) # Initialize variables observation_encoded = np.random.rand(10, self.n_states).astype( np.float32) # (None, n_states) _ = self.my_act(observation_encoded) # (None, n_actions) logits print_trainable_variables(self)
def __init__(self, env, action_space, name=__name__, trace_length=1, batch_size=1, is_training=False, lr=1e-5): # Call parent constructor AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct) # Store constructor params self.env = env self.name = name self.trace_length = trace_length self.batch_size = batch_size self.is_training = is_training self.lr = lr # Declare required vars self.Qmain = None self.obs = None self.state = [] self.mem_state = None self.carry_state = None # Declare training vars self.exp_buffer = None self.done = False self.epoch_rewards = None self.epoch_alive = None self.Qtarget = None # Compute dimensions from intial state self.obs = self.env.reset() self.state = self.convert_obs(self.obs) self.observation_size = self.state.shape[0] self.action_size = self.action_space.size() # Load network graph self.Qmain = DoubleDuelingRDQN(self.action_size, self.observation_size, learning_rate=self.lr) # Setup inital state self._reset_state() # Setup training vars if needed if self.is_training: self._init_training()
def __init__(self, action_space): AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct) self.deep_q = None self.replay_buffer = ReplayBuffer(BUFFER_SIZE) self.process_buffer = [] self.id = self.__class__.__name__ # Statistics self.action_history = [] self.reward_history = [] self.cumulative_reward = 0 self.smallest_loss = np.inf self.run_step_count = 0 self.run_tf_writer = None
def __init__(self, observation_space, action_space, name=__name__, is_training=False): # Call parent constructor AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct) # Store constructor params self.observation_space = observation_space self.name = name self.trace_length = cfg.TRACE_LENGTH self.batch_size = cfg.BATCH_SIZE self.is_training = is_training self.lr = cfg.LR # Declare required vars self.Qmain = None self.obs = None self.state = [] self.mem_state = None self.carry_state = None # Declare training vars self.exp_buffer = None self.done = False self.epoch_rewards = None self.epoch_alive = None self.Qtarget = None # Compute dimensions from intial state self.observation_size = self.observation_space.size_obs() self.action_size = self.action_space.size() # Load network graph self.Qmain = DoubleDuelingRDQN_NN(self.action_size, self.observation_size, learning_rate=self.lr) # Setup training vars if needed if self.is_training: self._init_training()
def __init__(self, action_space, mode="DDQN", lr=1e-5, training_param=TrainingParam()): # this function has been adapted. # to built a AgentWithConverter, we need an action_space. # No problem, we add it in the constructor. AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct) # and now back to the origin implementation self.replay_buffer = ReplayBuffer(training_param.BUFFER_SIZE) # compare to original implementation, i don't know the observation space size. # Because it depends on the component of the observation we want to look at. So these neural network will # be initialized the first time an observation is observe. self.deep_q = None self.mode = mode self.lr = lr self.training_param = training_param
def __init__(self, action_space, name="DeepQAgent", lr=1e-3, learning_rate_decay_steps=3000, learning_rate_decay_rate=0.99, store_action=False, istraining=False, nb_env=1): AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct) # and now back to the origin implementation self.replay_buffer = None self.__nb_env = nb_env self.deep_q = None self.training_param = None self.tf_writer = None self.name = name self.losses = None self.graph_saved = False self.lr = lr self.learning_rate_decay_steps = learning_rate_decay_steps self.learning_rate_decay_rate = learning_rate_decay_rate self.store_action = store_action self.dict_action = {} self.istraining = istraining self.actions_per_1000steps = np.zeros((1000, self.action_space.size()), dtype=np.int) self.illegal_actions_per_1000steps = np.zeros(1000, dtype=np.int) self.ambiguous_actions_per_1000steps = np.zeros(1000, dtype=np.int) self.train_lr = lr self.epsilon = 1.0 self.obs_as_vect = None self._tmp_obs = None self.reset_num = None
def __init__(self, action_space, env_name, action_space_converter=IdToAct, nb_quiet=2, **kwargs_converter): AgentWithConverter.__init__(self, action_space, action_space_converter=action_space_converter, **kwargs_converter) self.action_space.all_actions = [] # do nothing all_actions_tmp = [action_space()] # powerline switch: disconnection for i in range(action_space.n_line): if env_name == "rte_case14_realistic": if i == 18: continue elif env_name == "rte_case5_example": pass elif env_name == "rte_case118_example": if i == 6: continue if i == 26: continue if i == 72: continue if i == 73: continue if i == 80: continue if i == 129: continue if i == 140: continue if i == 176: continue if i == 177: continue all_actions_tmp.append(action_space.disconnect_powerline(line_id=i)) # other type of actions all_actions_tmp += action_space.get_all_unitary_topologies_set(action_space) # self.action_space.all_actions += action_space.get_all_unitary_redispatch(action_space) if env_name == "rte_case14_realistic": # remove action that makes the powerflow diverge breaking_acts = [action_space({"set_bus": {"lines_or_id": [(7,2), (8,1), (9,1)], "lines_ex_id": [(17,2)], "generators_id": [(2,2)], "loads_id": [(4,1)]}}), action_space({"set_bus": {"lines_or_id": [(10, 2), (11, 1), (19,2)], "lines_ex_id": [(16, 2)], "loads_id": [(5, 1)]}}), action_space({"set_bus": {"lines_or_id": [(5, 1)], "lines_ex_id": [(2, 2)], "generators_id": [(1, 2)], "loads_id": [(1, 1)]}}), action_space({"set_bus": {"lines_or_id": [(6, 2), (15, 2), (16, 1)], "lines_ex_id": [(3, 2), (5, 2)], "loads_id": [(2, 1)]}}), action_space({"set_bus": {"lines_or_id": [(18, 1)], "lines_ex_id": [(15, 2), (19, 2)], }}) ] elif env_name == "rte_case118_example": breaking_acts = [action_space({"set_bus": {"lines_or_id": [(100, 2), (129, 1), (173, 2)], # "lines_ex_id": [(17,2)], "generators_id": [(2, 2)], "loads_id": [(6, 1)] }}), action_space({"set_bus": {"lines_or_id": [(100, 2), (129, 1), (173, 2)], # "lines_ex_id": [(17,2)], "generators_id": [(2, 2)], "loads_id": [(6, 2)] }}), action_space({"set_bus": {"lines_or_id": [(100, 2), (129, 1), (173, 2)], # "lines_ex_id": [(17,2)], "generators_id": [(2, 1)], "loads_id": [(6, 1)] }}), action_space({"set_bus": {"lines_or_id": [(140, 1)], "lines_ex_id": [(129, 2)], # "generators_id": [(2, 1)], # "loads_id": [(6, 1)] }}), action_space({"set_bus": {"lines_or_id": [(57, 2), (80, 1), (83, 2)], "lines_ex_id": [(2, 2), (13, 2), (24, 2), (35, 2)], "generators_id": [(6, 2)], "loads_id": [(8, 2)] }}), action_space({"set_bus": {"lines_or_id": [(57, 2), (80, 1), (83, 2)], "lines_ex_id": [(2, 2), (13, 2), (24, 2), (35, 2)], "generators_id": [(6, 2)], "loads_id": [(8, 1)] }}), action_space({"set_bus": {"lines_or_id": [(57, 2), (80, 1), (83, 2)], "lines_ex_id": [(2, 2), (13, 2), (24, 2), (35, 2)], "generators_id": [(6, 1)], "loads_id": [(8, 2)] }}), action_space({"set_bus": {"lines_or_id": [(57, 2), (80, 1), (83, 2)], "lines_ex_id": [(2, 2), (13, 2), (24, 2), (35, 2)], "generators_id": [(6, 1)], "loads_id": [(8, 1)] }}), ] else: breaking_acts = [action_space({"set_bus": {"lines_or_id": [(0,2), (1,2), (2,2), (3,1)], "generators_id": [(0,1)], "loads_id": [(0,1)]}}), ] # filter out actions that break everything all_actions = [] for el in all_actions_tmp: if not el in breaking_acts: all_actions.append(el) # set the action to the action space self.action_space.all_actions = all_actions # add the action "reset everything to 1 bus" self.action_space.all_actions.append(action_space({"set_bus": np.ones(action_space.dim_topo, dtype=np.int), "set_line_status": np.ones(action_space.n_line, dtype=np.int)})) self.nb_act_done = 0 self.act_this = 0 self.nb_quiet = nb_quiet self._nb_quiet_1 = self.nb_quiet - 1 self.nb_act = len(self.action_space.all_actions)
def __init__(self, observation_space, action_space, num_frames = 4, batch_size = 32, learning_rate = 1e-5, learning_rate_decay_steps = 10000, learning_rate_decay_rate = 0.95, discount_factor = 0.95, tau = 1e-2, erb_size = 50000, epsilon = 0.99, decay_epsilon = 1024*32, final_epsilon = 0.0001): # initializes AgentWithConverter class to handle action conversiions AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct) self.obs_space = observation_space self.act_space = action_space self.num_frames = num_frames self.batch_size = batch_size self.lr = learning_rate self.lr_decay_steps = learning_rate_decay_steps self.lr_decay_rate = learning_rate_decay_rate self.gamma = discount_factor self.tau = tau # epsilon is the degree of exploraation self.initial_epsilon = epsilon # Adaptive epsilon decay constants self.decay_epsilon = decay_epsilon self.final_epsilon = final_epsilon self.buff_size = erb_size self.observation_size = self.obs_space.size_obs() self.action_size = self.act_space.size() self.dqn = DQNet(self.action_size, self.observation_size, self.num_frames, self.lr, self.lr_decay_steps, self.lr_decay_rate, self.batch_size, self.gamma, self.tau) #State variables self.obs = None self.done = None self.epsilon = self.initial_epsilon self.state = [] self.frames = [] self.next_frames = [] self.replay_buffer = BaseReplayBuffer(self.buff_size) return
def __init__(self, observation_space, action_space, name=__name__, is_training=False): # Call parent constructor AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct) # Store constructor params self.observation_space = observation_space self.name = name self.trace_length = cfg.TRACE_LENGTH self.batch_size = cfg.BATCH_SIZE self.is_training = is_training self.lr = cfg.LR # Declare required vars self.Qmain = None self.obs = None self.state = [] self.mem_state = None self.carry_state = None # Declare training vars self.exp_buffer = None self.done = False self.epoch_rewards = None self.epoch_alive = None self.Qtarget = None self.epsilon = cfg.INITIAL_EPSILON # Compute dimensions from intial state self.action_size = self.action_space.n self.observation_shape = shape_obs(self.observation_space) # Slices dict self.slices = { "lines": { "indexes": [1, 3, 4, 9, 10, 11, 14, 15, 18, 20, 23, 24], "q_len": lines_q_len(self.action_space) }, "sub": { "indexes": [1, 2, 3, 4, 9, 10, 11, 12, 14, 15, 18, 20, 23, 24], "q_len": topo_q_len(self.action_space) }, #"disp": { # "indexes": [4,7,8,9,10,11,12,14,18,23,24], # "q_len": disp_q_len(self.action_space) #} } self.n_slices = len(self.slices.keys()) # Load network graph self.Qmain = SliceRDQN_NN(self.action_size, self.observation_shape, self.slices, learning_rate=self.lr) # Setup training vars if needed if self.is_training: self._init_training()
def __init__(self, action_space, nn_archi, name="DeepQAgent", store_action=True, istraining=False, filter_action_fun=None, verbose=False, observation_space=None, **kwargs_converters): AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct, **kwargs_converters) self.filter_action_fun = filter_action_fun if self.filter_action_fun is not None: self.action_space.filter_action(self.filter_action_fun) # and now back to the origin implementation self.replay_buffer = None self.__nb_env = None self.deep_q = None self._training_param = None self._tf_writer = None self.name = name self._losses = None self.__graph_saved = False self.store_action = store_action self.dict_action = {} self.istraining = istraining self.epsilon = 1.0 # for tensorbaord self._train_lr = None self._reset_num = None self._max_iter_env_ = 1000000 self._curr_iter_env = 0 self._max_reward = 0. # action type self.nb_injection = 0 self.nb_voltage = 0 self.nb_topology = 0 self.nb_line = 0 self.nb_redispatching = 0 self.nb_do_nothing = 0 # for over sampling the hard scenarios self._prev_obs_num = 0 self._time_step_lived = None self._nb_chosen = None self._proba = None self._prev_id = 0 # this is for the "limit the episode length" depending on your previous success self._total_sucesses = 0 # neural network architecture self._nn_archi = nn_archi # observation tranformers self._obs_as_vect = None self._tmp_obs = None self._indx_obs = None self.verbose = verbose if observation_space is None: pass else: self.init_obs_extraction(observation_space) # for the frequency of action type self.current_ = 0 self.nb_ = 10 self._nb_this_time = np.zeros((self.nb_, 6)) # self._vector_size = None self._actions_per_ksteps = None self._illegal_actions_per_ksteps = None self._ambiguous_actions_per_ksteps = None