Example #1
0
 def __init__(self,
              model_class,
              model=None,
              env=None,
              exploration=None,
              gamma=0.99,
              memory_size=100000,
              batch_size=1,
              target_update_frequency=1000,
              saving_dir=None,
              min_mem=10000):
     """
     base class for lstm dqn agent
     :param model_class: sub class of torch.nn.Module. class reference of the model
     :param model: initial model of the policy net. could be None if loading from checkpoint
     :param env: environment
     :param exploration: exploration object. Must have function value(step) which returns e
     :param gamma: gamma
     :param memory_size: size of the memory
     :param batch_size: size of the mini batch for one step update
     :param target_update_frequency: the frequency for updating target net (in steps)
     :param saving_dir: the directory for saving checkpoint
     """
     DQNAgent.__init__(self, model_class, model, env, exploration, gamma,
                       memory_size, batch_size, target_update_frequency,
                       saving_dir)
     self.memory = EpisodicReplayMemory(memory_size)
     self.hidden = None
     self.min_mem = min_mem
Example #2
0
    def __init__(self, state_size, action_size, input_shape, memory_size,
                 replay_start_step, load_model):
        DQNAgent.__init__(self, state_size, action_size, replay_start_step,
                          memory_size)

        self.input_shape = input_shape
        self.initializer = he_normal()
        if load_model is not False:
            self.load_model(load_model)
        else:
            self.__build_model()
Example #3
0
    def __init__(self,
                 env,
                 units=[32, 32],
                 buffer_size=5000,
                 learning_rate=0.01,
                 init_epsilon=1.,
                 epsilon_decay=0.99,
                 min_epsilon=0.01,
                 gamma=0.98,
                 batch_size=16,
                 target_update_iter=300,
                 train_nums=5000,
                 start_learning=64,
                 max_iter=200):
        # =============== Init Observation Space =================
        self.env = env
        self.gym_obs_space = GymObservationSpace(self.env)
        self.all_obs = []
        obs = self.env.reset()
        gym_obs = self.gym_obs_space.to_gym(obs)
        for key in gym_obs.items():
            self.all_obs.append(key)
        self.obs_list = [
            "prod_p", "prod_v", "load_p", "load_q", "rho", "topo_vect"
        ]
        init_obs = self.convert_obs(obs)
        #print("obs_shape = ", init_obs.shape) # (39,)

        # =============== Init Action Space =================
        self.converter = grid2op.Converter.IdToAct(self.env.action_space)
        # action number 0 = DoNothing
        self.converter.init_converter(
            set_line_status=False,  # 40 
            change_line_status=True,  # 8
            change_bus_vect=False,  # 59
            set_topo_vect=True,  # 58
        )
        self.gym_action_space = GymActionSpace(action_space=self.converter)
        ACT_SIZE = len(self.converter.all_actions)
        #print("action space size= ", ACT_SIZE) # 68
        #print("gym_action_space = ", self.gym_action_space) # Dict(action:Discrete(68))
        gym_action = self.gym_action_space.sample()
        #print("sample_action = ", gym_action) # OrderedDict([('action', 34)])
        encoded_action = self.gym_action_space.from_gym(gym_action)  # 34
        self.num_actions = ACT_SIZE
        '''
        print("=======================alll action ==================")
        for i in range(ACT_SIZE):
            print("action number = ", i)
            print(self.converter.convert_act(i))
        print("buffer_size : ", buffer_size)
        print("env : ", self.env)
        '''
        self.units = units
        DQNAgent.__init__(self,
                          self.env,
                          obs=init_obs,
                          num_actions=ACT_SIZE,
                          buffer_size=buffer_size,
                          learning_rate=learning_rate,
                          init_epsilon=init_epsilon,
                          epsilon_decay=epsilon_decay,
                          min_epsilon=min_epsilon,
                          gamma=gamma,
                          batch_size=batch_size,
                          target_update_iter=target_update_iter,
                          train_nums=train_nums,
                          start_learning=start_learning)
        #print(self.gym_action_space)
        self.max_iter = max_iter