def __init__(self, model_class, model=None, env=None, exploration=None, gamma=0.99, memory_size=100000, batch_size=1, target_update_frequency=1000, saving_dir=None, min_mem=10000): """ base class for lstm dqn agent :param model_class: sub class of torch.nn.Module. class reference of the model :param model: initial model of the policy net. could be None if loading from checkpoint :param env: environment :param exploration: exploration object. Must have function value(step) which returns e :param gamma: gamma :param memory_size: size of the memory :param batch_size: size of the mini batch for one step update :param target_update_frequency: the frequency for updating target net (in steps) :param saving_dir: the directory for saving checkpoint """ DQNAgent.__init__(self, model_class, model, env, exploration, gamma, memory_size, batch_size, target_update_frequency, saving_dir) self.memory = EpisodicReplayMemory(memory_size) self.hidden = None self.min_mem = min_mem
def __init__(self, state_size, action_size, input_shape, memory_size, replay_start_step, load_model): DQNAgent.__init__(self, state_size, action_size, replay_start_step, memory_size) self.input_shape = input_shape self.initializer = he_normal() if load_model is not False: self.load_model(load_model) else: self.__build_model()
def __init__(self, env, units=[32, 32], buffer_size=5000, learning_rate=0.01, init_epsilon=1., epsilon_decay=0.99, min_epsilon=0.01, gamma=0.98, batch_size=16, target_update_iter=300, train_nums=5000, start_learning=64, max_iter=200): # =============== Init Observation Space ================= self.env = env self.gym_obs_space = GymObservationSpace(self.env) self.all_obs = [] obs = self.env.reset() gym_obs = self.gym_obs_space.to_gym(obs) for key in gym_obs.items(): self.all_obs.append(key) self.obs_list = [ "prod_p", "prod_v", "load_p", "load_q", "rho", "topo_vect" ] init_obs = self.convert_obs(obs) #print("obs_shape = ", init_obs.shape) # (39,) # =============== Init Action Space ================= self.converter = grid2op.Converter.IdToAct(self.env.action_space) # action number 0 = DoNothing self.converter.init_converter( set_line_status=False, # 40 change_line_status=True, # 8 change_bus_vect=False, # 59 set_topo_vect=True, # 58 ) self.gym_action_space = GymActionSpace(action_space=self.converter) ACT_SIZE = len(self.converter.all_actions) #print("action space size= ", ACT_SIZE) # 68 #print("gym_action_space = ", self.gym_action_space) # Dict(action:Discrete(68)) gym_action = self.gym_action_space.sample() #print("sample_action = ", gym_action) # OrderedDict([('action', 34)]) encoded_action = self.gym_action_space.from_gym(gym_action) # 34 self.num_actions = ACT_SIZE ''' print("=======================alll action ==================") for i in range(ACT_SIZE): print("action number = ", i) print(self.converter.convert_act(i)) print("buffer_size : ", buffer_size) print("env : ", self.env) ''' self.units = units DQNAgent.__init__(self, self.env, obs=init_obs, num_actions=ACT_SIZE, buffer_size=buffer_size, learning_rate=learning_rate, init_epsilon=init_epsilon, epsilon_decay=epsilon_decay, min_epsilon=min_epsilon, gamma=gamma, batch_size=batch_size, target_update_iter=target_update_iter, train_nums=train_nums, start_learning=start_learning) #print(self.gym_action_space) self.max_iter = max_iter