Esempio n. 1
0
    def __init__(self,
                 action_space,
                 name='SACAgent',
                 training_param=TrainingParamSAC()):

        AgentWithConverter.__init__(self,
                                    action_space,
                                    action_space_converter=IdToAct)
        # Class with parameters for training
        self.training_param = training_param
        self.name = name

        # Exploration parameter epsilon
        self.epsilon = training_param.INITIAL_EPSILON

        self.replay_buffer = ReplayBuffer(training_param.BUFFER_SIZE)
        self.deep_q = None
        self.tf_writer = None
        self.graph_saved = False

        # Statistics
        self.epoch_num_steps_alive = None
        self.epoch_rewards = None

        self.actions_per_1000steps = np.zeros((1000, self.action_space.size()),
                                              dtype=np.int)
        self.illegal_actions_per_1000steps = np.zeros(1000, dtype=np.int)
        self.ambiguous_actions_per_1000steps = np.zeros(1000, dtype=np.int)

        self._tmp_obs = None

        self.total_load_100 = deque(maxlen=100)
        self.total_prod_100 = deque(maxlen=100)
        self.q_selected_100 = deque(maxlen=100)
Esempio n. 2
0
    def __init__(self,
                 env,
                 action_space,
                 name=__name__,
                 num_frames=4,
                 is_training=False,
                 batch_size=32,
                 lr=1e-5):
        # Call parent constructor
        AgentWithConverter.__init__(self,
                                    action_space,
                                    action_space_converter=IdToAct)

        # Store constructor params
        self.env = env
        self.name = name
        self.num_frames = num_frames
        self.is_training = is_training
        self.batch_size = batch_size
        self.lr = lr

        # Declare required vars
        self.Qmain = None
        self.obs = None
        self.state = []
        self.frames = []

        # Declare training vars
        self.replay_buffer = None
        self.done = False
        self.frames2 = None
        self.epoch_rewards = None
        self.epoch_alive = None
        self.Qtarget = None

        # Setup training vars if needed
        if self.is_training:
            self._init_training()

        # Setup inital state
        self._reset_state()
        self._reset_frame_buffer()
        # Compute dimensions from intial state
        self.observation_size = self.state.shape[0]
        self.action_size = self.action_space.size()

        # Load network graph
        self.Qmain = DoubleDuelingDQN(self.action_size,
                                      self.observation_size,
                                      num_frames=self.num_frames,
                                      learning_rate=self.lr)
        if self.is_training:
            self.Qtarget = DoubleDuelingDQN(self.action_size,
                                            self.observation_size,
                                            num_frames=self.num_frames,
                                            learning_rate=self.lr)
Esempio n. 3
0
    def __init__(self, env, n_states, actions, seed=0):
        AgentWithConverter.__init__(self, env.action_space)
        np.random.seed(seed)

        self.actions = actions
        self.n_states = n_states
        self.n_actions = len(self.actions)

        observation_encoded = np.random.rand(3, self.n_states).astype(
            np.float32)  # (None, n_states)
        _ = self.my_act(observation_encoded)  # (None, n_actions)
    def __init__(self,
                 observation_space,
                 action_space,
                 name=__name__,
                 is_training=False):
        # Call parent constructor
        AgentWithConverter.__init__(self,
                                    action_space,
                                    action_space_converter=IdToAct)
        self.obs_space = observation_space

        # Filter
        #print("Actions filtering...")
        self.action_space.filter_action(self._filter_action)
        #print("..Done")

        # Store constructor params
        self.name = name
        self.num_frames = cfg.N_FRAMES
        self.is_training = is_training
        self.batch_size = cfg.BATCH_SIZE
        self.lr = cfg.LR

        # Declare required vars
        self.Qmain = None
        self.obs = None
        self.state = []
        self.frames = []

        # Declare training vars
        self.per_buffer = None
        self.done = False
        self.frames2 = None
        self.epoch_rewards = None
        self.epoch_alive = None
        self.Qtarget = None
        self.epsilon = 0.0

        # Compute dimensions from intial spaces
        self.observation_size = self.obs_space.size_obs()
        self.action_size = self.action_space.size()

        # Load network graph
        self.Qmain = DoubleDuelingDQN_NN(
            self.action_size,
            self.observation_size,
            num_frames=self.num_frames,
            learning_rate=self.lr,
            learning_rate_decay_steps=cfg.LR_DECAY_STEPS,
            learning_rate_decay_rate=cfg.LR_DECAY_RATE)
        # Setup training vars if needed
        if self.is_training:
            self._init_training()
Esempio n. 5
0
    def __init__(self, observation_space, action_space):
        self.observation_space = observation_space
        self.action_space = action_space

        AgentWithConverter.__init__(self,
                                    action_space,
                                    action_space_converter=IdToAct)

        self.observation_size = self.observation_space.size_obs()
        self.action_size = self.action_space.size()

        self.done = False
Esempio n. 6
0
    def __init__(self,
                 env,
                 n_states,
                 action_set,
                 n_hidden,
                 alpha,
                 rho_threshold=0.8):
        self.n_states = n_states
        self.n_actions = len(action_set)
        self.action_set = action_set
        self.rho_threshold = rho_threshold

        tf.keras.Model.__init__(self)
        AgentWithConverter.__init__(self, action_space=env.action_space)

        # Actor
        self.layer_input = tf.keras.layers.Dense(
            n_hidden[0],
            input_shape=(self.n_states, ),
            activation=tf.nn.relu,
            name="actor_layer_input",
            trainable=True,
        )

        self.layers_hidden = []
        for layer, layer_n_hidden in enumerate(n_hidden[1:]):
            self.layers_hidden.append(
                tf.keras.layers.Dense(
                    layer_n_hidden,
                    activation=tf.nn.relu,
                    name=f"actor_layer_hidden_{layer}",
                    trainable=True,
                ))

        self.layer_output = tf.keras.layers.Dense(self.n_actions,
                                                  activation=None,
                                                  name="actor_layer_output",
                                                  trainable=True)

        # Training
        self.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=alpha),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(
                from_logits=True),
        )

        # Initialize variables
        observation_encoded = np.random.rand(10, self.n_states).astype(
            np.float32)  # (None, n_states)
        _ = self.my_act(observation_encoded)  # (None, n_actions) logits

        print_trainable_variables(self)
Esempio n. 7
0
    def __init__(self,
                 env,
                 action_space,
                 name=__name__,
                 trace_length=1,
                 batch_size=1,
                 is_training=False,
                 lr=1e-5):
        # Call parent constructor
        AgentWithConverter.__init__(self,
                                    action_space,
                                    action_space_converter=IdToAct)

        # Store constructor params
        self.env = env
        self.name = name
        self.trace_length = trace_length
        self.batch_size = batch_size
        self.is_training = is_training
        self.lr = lr

        # Declare required vars
        self.Qmain = None
        self.obs = None
        self.state = []
        self.mem_state = None
        self.carry_state = None

        # Declare training vars
        self.exp_buffer = None
        self.done = False
        self.epoch_rewards = None
        self.epoch_alive = None
        self.Qtarget = None

        # Compute dimensions from intial state
        self.obs = self.env.reset()
        self.state = self.convert_obs(self.obs)
        self.observation_size = self.state.shape[0]
        self.action_size = self.action_space.size()

        # Load network graph
        self.Qmain = DoubleDuelingRDQN(self.action_size,
                                       self.observation_size,
                                       learning_rate=self.lr)
        # Setup inital state
        self._reset_state()
        # Setup training vars if needed
        if self.is_training:
            self._init_training()
Esempio n. 8
0
    def __init__(self, action_space):
        AgentWithConverter.__init__(self,
                                    action_space,
                                    action_space_converter=IdToAct)

        self.deep_q = None
        self.replay_buffer = ReplayBuffer(BUFFER_SIZE)
        self.process_buffer = []
        self.id = self.__class__.__name__

        # Statistics
        self.action_history = []
        self.reward_history = []
        self.cumulative_reward = 0
        self.smallest_loss = np.inf
        self.run_step_count = 0
        self.run_tf_writer = None
    def __init__(self,
                 observation_space,
                 action_space,
                 name=__name__,
                 is_training=False):
        # Call parent constructor
        AgentWithConverter.__init__(self,
                                    action_space,
                                    action_space_converter=IdToAct)

        # Store constructor params
        self.observation_space = observation_space
        self.name = name
        self.trace_length = cfg.TRACE_LENGTH
        self.batch_size = cfg.BATCH_SIZE
        self.is_training = is_training
        self.lr = cfg.LR

        # Declare required vars
        self.Qmain = None
        self.obs = None
        self.state = []
        self.mem_state = None
        self.carry_state = None

        # Declare training vars
        self.exp_buffer = None
        self.done = False
        self.epoch_rewards = None
        self.epoch_alive = None
        self.Qtarget = None

        # Compute dimensions from intial state
        self.observation_size = self.observation_space.size_obs()
        self.action_size = self.action_space.size()

        # Load network graph
        self.Qmain = DoubleDuelingRDQN_NN(self.action_size,
                                          self.observation_size,
                                          learning_rate=self.lr)
        # Setup training vars if needed
        if self.is_training:
            self._init_training()
Esempio n. 10
0
    def __init__(self,
                 action_space,
                 mode="DDQN",
                 lr=1e-5,
                 training_param=TrainingParam()):
        # this function has been adapted.

        # to built a AgentWithConverter, we need an action_space.
        # No problem, we add it in the constructor.
        AgentWithConverter.__init__(self,
                                    action_space,
                                    action_space_converter=IdToAct)

        # and now back to the origin implementation
        self.replay_buffer = ReplayBuffer(training_param.BUFFER_SIZE)

        # compare to original implementation, i don't know the observation space size.
        # Because it depends on the component of the observation we want to look at. So these neural network will
        # be initialized the first time an observation is observe.
        self.deep_q = None
        self.mode = mode
        self.lr = lr
        self.training_param = training_param
Esempio n. 11
0
    def __init__(self,
                 action_space,
                 name="DeepQAgent",
                 lr=1e-3,
                 learning_rate_decay_steps=3000,
                 learning_rate_decay_rate=0.99,
                 store_action=False,
                 istraining=False,
                 nb_env=1):
        AgentWithConverter.__init__(self, action_space, action_space_converter=IdToAct)

        # and now back to the origin implementation
        self.replay_buffer = None
        self.__nb_env = nb_env

        self.deep_q = None
        self.training_param = None
        self.tf_writer = None
        self.name = name
        self.losses = None
        self.graph_saved = False
        self.lr = lr
        self.learning_rate_decay_steps = learning_rate_decay_steps
        self.learning_rate_decay_rate = learning_rate_decay_rate
        self.store_action = store_action
        self.dict_action = {}
        self.istraining = istraining
        self.actions_per_1000steps = np.zeros((1000, self.action_space.size()), dtype=np.int)
        self.illegal_actions_per_1000steps = np.zeros(1000, dtype=np.int)
        self.ambiguous_actions_per_1000steps = np.zeros(1000, dtype=np.int)
        self.train_lr = lr
        self.epsilon = 1.0

        self.obs_as_vect = None
        self._tmp_obs = None
        self.reset_num = None
Esempio n. 12
0
    def __init__(self,
                 action_space,
                 env_name,
                 action_space_converter=IdToAct,
                 nb_quiet=2,
                 **kwargs_converter):
        AgentWithConverter.__init__(self, action_space, action_space_converter=action_space_converter, **kwargs_converter)
        self.action_space.all_actions = []

        # do nothing
        all_actions_tmp = [action_space()]

        # powerline switch: disconnection
        for i in range(action_space.n_line):
            if env_name == "rte_case14_realistic":
                if i == 18:
                    continue
            elif env_name == "rte_case5_example":
                pass
            elif env_name == "rte_case118_example":
                if i == 6:
                    continue
                if i == 26:
                    continue
                if i == 72:
                    continue
                if i == 73:
                    continue
                if i == 80:
                    continue
                if i == 129:
                    continue
                if i == 140:
                    continue
                if i == 176:
                    continue
                if i == 177:
                    continue
            all_actions_tmp.append(action_space.disconnect_powerline(line_id=i))

        # other type of actions
        all_actions_tmp += action_space.get_all_unitary_topologies_set(action_space)
        # self.action_space.all_actions += action_space.get_all_unitary_redispatch(action_space)

        if env_name == "rte_case14_realistic":
            # remove action that makes the powerflow diverge
            breaking_acts = [action_space({"set_bus": {"lines_or_id": [(7,2), (8,1), (9,1)],
                                                       "lines_ex_id": [(17,2)],
                                                       "generators_id": [(2,2)],
                                                       "loads_id": [(4,1)]}}),
                             action_space({"set_bus": {"lines_or_id": [(10, 2), (11, 1), (19,2)],
                                                       "lines_ex_id": [(16, 2)],
                                                       "loads_id": [(5, 1)]}}),
                             action_space({"set_bus": {"lines_or_id": [(5, 1)],
                                                       "lines_ex_id": [(2, 2)],
                                                       "generators_id": [(1, 2)],
                                                       "loads_id": [(1, 1)]}}),
                             action_space({"set_bus": {"lines_or_id": [(6, 2), (15, 2), (16, 1)],
                                                       "lines_ex_id": [(3, 2), (5, 2)],
                                                       "loads_id": [(2, 1)]}}),
                            action_space({"set_bus": {"lines_or_id": [(18, 1)],
                                                      "lines_ex_id": [(15, 2), (19, 2)],
                                                      }})
            ]
        elif env_name == "rte_case118_example":
            breaking_acts = [action_space({"set_bus": {"lines_or_id": [(100, 2), (129, 1), (173, 2)],
                                                       # "lines_ex_id": [(17,2)],
                                                       "generators_id": [(2, 2)],
                                                       "loads_id": [(6, 1)]
                                                       }}),
                             action_space({"set_bus": {"lines_or_id": [(100, 2), (129, 1), (173, 2)],
                                                       # "lines_ex_id": [(17,2)],
                                                       "generators_id": [(2, 2)],
                                                       "loads_id": [(6, 2)]
                                                       }}),
                             action_space({"set_bus": {"lines_or_id": [(100, 2), (129, 1), (173, 2)],
                                                       # "lines_ex_id": [(17,2)],
                                                       "generators_id": [(2, 1)],
                                                       "loads_id": [(6, 1)]
                                                       }}),
                             action_space({"set_bus": {"lines_or_id": [(140, 1)],
                                                       "lines_ex_id": [(129, 2)],
                                                       # "generators_id": [(2, 1)],
                                                       # "loads_id": [(6, 1)]
                                                       }}),
                             action_space({"set_bus": {"lines_or_id": [(57, 2), (80, 1), (83, 2)],
                                                       "lines_ex_id": [(2, 2), (13, 2), (24, 2), (35, 2)],
                                                       "generators_id": [(6, 2)],
                                                       "loads_id": [(8, 2)]
                                                       }}),
                             action_space({"set_bus": {"lines_or_id": [(57, 2), (80, 1), (83, 2)],
                                                       "lines_ex_id": [(2, 2), (13, 2), (24, 2), (35, 2)],
                                                       "generators_id": [(6, 2)],
                                                       "loads_id": [(8, 1)]
                                                       }}),
                             action_space({"set_bus": {"lines_or_id": [(57, 2), (80, 1), (83, 2)],
                                                       "lines_ex_id": [(2, 2), (13, 2), (24, 2), (35, 2)],
                                                       "generators_id": [(6, 1)],
                                                       "loads_id": [(8, 2)]
                                                       }}),
                             action_space({"set_bus": {"lines_or_id": [(57, 2), (80, 1), (83, 2)],
                                                       "lines_ex_id": [(2, 2), (13, 2), (24, 2), (35, 2)],
                                                       "generators_id": [(6, 1)],
                                                       "loads_id": [(8, 1)]
                                                       }}),
            ]
        else:
            breaking_acts = [action_space({"set_bus": {"lines_or_id": [(0,2), (1,2), (2,2), (3,1)],
                                                       "generators_id": [(0,1)],
                                                       "loads_id": [(0,1)]}}),
                             ]

        # filter out actions that break everything
        all_actions = []
        for el in all_actions_tmp:
            if not el in breaking_acts:
                all_actions.append(el)

        # set the action to the action space
        self.action_space.all_actions = all_actions

        # add the action "reset everything to 1 bus"
        self.action_space.all_actions.append(action_space({"set_bus": np.ones(action_space.dim_topo, dtype=np.int),
                                                           "set_line_status": np.ones(action_space.n_line, dtype=np.int)}))
        self.nb_act_done = 0
        self.act_this = 0
        self.nb_quiet = nb_quiet
        self._nb_quiet_1 = self.nb_quiet - 1
        self.nb_act = len(self.action_space.all_actions)
Esempio n. 13
0
    def __init__(self, 
                 observation_space,
                 action_space,
                 num_frames = 4,
                 batch_size = 32,
                 learning_rate = 1e-5,
                 learning_rate_decay_steps = 10000,
                 learning_rate_decay_rate = 0.95,
                 discount_factor = 0.95,
                 tau = 1e-2,
                 erb_size = 50000,
                 epsilon = 0.99,
                 decay_epsilon = 1024*32,
                 final_epsilon = 0.0001):
        
        # initializes AgentWithConverter class to handle action conversiions
        AgentWithConverter.__init__(self, action_space,
                                    action_space_converter=IdToAct)
        
        self.obs_space = observation_space
        self.act_space = action_space
        self.num_frames = num_frames
        
        self.batch_size = batch_size
        self.lr = learning_rate
        self.lr_decay_steps = learning_rate_decay_steps
        self.lr_decay_rate = learning_rate_decay_rate
        self.gamma = discount_factor
        self.tau = tau
        # epsilon is the degree of exploraation
        self.initial_epsilon = epsilon
        # Adaptive epsilon decay constants
        self.decay_epsilon = decay_epsilon
        self.final_epsilon = final_epsilon
        

        self.buff_size = erb_size
        
        self.observation_size = self.obs_space.size_obs()
        self.action_size = self.act_space.size()
        
        self.dqn = DQNet(self.action_size,
                         self.observation_size,
                         self.num_frames,
                         self.lr,
                         self.lr_decay_steps,
                         self.lr_decay_rate,
                         self.batch_size,
                         self.gamma,
                         self.tau)
        
        #State variables
        self.obs = None
        self.done = None
        self.epsilon = self.initial_epsilon
        
        self.state = []
        self.frames = []
        self.next_frames = []
        self.replay_buffer = BaseReplayBuffer(self.buff_size)
        
        return
Esempio n. 14
0
    def __init__(self,
                 observation_space,
                 action_space,
                 name=__name__,
                 is_training=False):
        # Call parent constructor
        AgentWithConverter.__init__(self,
                                    action_space,
                                    action_space_converter=IdToAct)

        # Store constructor params
        self.observation_space = observation_space
        self.name = name
        self.trace_length = cfg.TRACE_LENGTH
        self.batch_size = cfg.BATCH_SIZE
        self.is_training = is_training
        self.lr = cfg.LR

        # Declare required vars
        self.Qmain = None
        self.obs = None
        self.state = []
        self.mem_state = None
        self.carry_state = None

        # Declare training vars
        self.exp_buffer = None
        self.done = False
        self.epoch_rewards = None
        self.epoch_alive = None
        self.Qtarget = None
        self.epsilon = cfg.INITIAL_EPSILON

        # Compute dimensions from intial state
        self.action_size = self.action_space.n
        self.observation_shape = shape_obs(self.observation_space)

        # Slices dict
        self.slices = {
            "lines": {
                "indexes": [1, 3, 4, 9, 10, 11, 14, 15, 18, 20, 23, 24],
                "q_len": lines_q_len(self.action_space)
            },
            "sub": {
                "indexes": [1, 2, 3, 4, 9, 10, 11, 12, 14, 15, 18, 20, 23, 24],
                "q_len": topo_q_len(self.action_space)
            },
            #"disp": {
            #    "indexes": [4,7,8,9,10,11,12,14,18,23,24],
            #    "q_len": disp_q_len(self.action_space)
            #}
        }
        self.n_slices = len(self.slices.keys())

        # Load network graph
        self.Qmain = SliceRDQN_NN(self.action_size,
                                  self.observation_shape,
                                  self.slices,
                                  learning_rate=self.lr)
        # Setup training vars if needed
        if self.is_training:
            self._init_training()
Esempio n. 15
0
    def __init__(self,
                 action_space,
                 nn_archi,
                 name="DeepQAgent",
                 store_action=True,
                 istraining=False,
                 filter_action_fun=None,
                 verbose=False,
                 observation_space=None,
                 **kwargs_converters):
        AgentWithConverter.__init__(self,
                                    action_space,
                                    action_space_converter=IdToAct,
                                    **kwargs_converters)
        self.filter_action_fun = filter_action_fun
        if self.filter_action_fun is not None:
            self.action_space.filter_action(self.filter_action_fun)

        # and now back to the origin implementation
        self.replay_buffer = None
        self.__nb_env = None

        self.deep_q = None
        self._training_param = None
        self._tf_writer = None
        self.name = name
        self._losses = None
        self.__graph_saved = False
        self.store_action = store_action
        self.dict_action = {}
        self.istraining = istraining
        self.epsilon = 1.0

        # for tensorbaord
        self._train_lr = None

        self._reset_num = None

        self._max_iter_env_ = 1000000
        self._curr_iter_env = 0
        self._max_reward = 0.

        # action type
        self.nb_injection = 0
        self.nb_voltage = 0
        self.nb_topology = 0
        self.nb_line = 0
        self.nb_redispatching = 0
        self.nb_do_nothing = 0

        # for over sampling the hard scenarios
        self._prev_obs_num = 0
        self._time_step_lived = None
        self._nb_chosen = None
        self._proba = None
        self._prev_id = 0
        # this is for the "limit the episode length" depending on your previous success
        self._total_sucesses = 0

        # neural network architecture
        self._nn_archi = nn_archi

        # observation tranformers
        self._obs_as_vect = None
        self._tmp_obs = None
        self._indx_obs = None
        self.verbose = verbose
        if observation_space is None:
            pass
        else:
            self.init_obs_extraction(observation_space)

        # for the frequency of action type
        self.current_ = 0
        self.nb_ = 10
        self._nb_this_time = np.zeros((self.nb_, 6))

        #
        self._vector_size = None
        self._actions_per_ksteps = None
        self._illegal_actions_per_ksteps = None
        self._ambiguous_actions_per_ksteps = None