def __init__(self, sess, dis=0.99, REPLAY_MEMORY=100000, batch_size=64, max_steps=10000000, max_episodes=500000, layer_size_Q1=300, layer_size_Q2=400, learning_rate_Q=0.0001, learning_rate_A=0.01, training_step=1, copy_step=1, action_copy_step=1, repu_num=1, beta_max_step=1500000, action_res=None, random_action=True, weighted_train=False, ending_cond_epis=100, ending_cond_reward=195, alpha=0.6, beta_init=0.4, eps=0.01, eps_div=256, s_scale=1, size_action_batch=500, Double=True, Prioritized=True, Exp='softmax', DDPG=False, action_train=False, seed_n=0, Game='CartPole-v0', file_name='steps', save_epi=100, save_network=False): env = gym.make(Game) tf.set_random_seed(seed_n) rng = np.random.RandomState(seed_n) input_size = env.observation_space.shape[0] self.action_dim = env.action_space.shape[0] output_size = self.action_dim action_map = [] action_scale = env.action_space.high[0] """ for o in range(self.action_dim): print(env.action_space.low[o]) print(env.action_space.high[o]) """ # DDPG 시, Action Network 생성 if DDPG: self.Action_Network = Action_Network(sess, input_size, output_size, seed_n, layer_size_Q1, layer_size_Q2, learning_rate_A, action_scale=action_scale) # Action이 랜덤하지 않을 시, get_action_information을 사용해 균일하게 action 범위를 자름 if not random_action: size_action_batch, conti_action_flag, action_map = self.get_action_information( env, Game, action_res=action_res) replay_memory = replaymemory.ReplayMemory(rng=rng, memory_size=REPLAY_MEMORY, per_alpha=alpha, per_beta0=beta_init) ############### parameter 복사 ################ self.sess = sess self.dis = dis self.REPLAY_MEMORY = REPLAY_MEMORY self.replay_memory_size = REPLAY_MEMORY self.replay_memory = replay_memory self.batch_size = batch_size self.size_action_batch = size_action_batch self.alpha = alpha self.beta_init = beta_init self.beta_max_step = beta_max_step self.eps = eps self.eps_div = eps_div self.s_scale = s_scale self.action_scale = action_scale self.layer_size_Q1 = layer_size_Q1 self.layer_size_Q2 = layer_size_Q2 self.learning_rate_Q = learning_rate_Q self.learning_rate_A = learning_rate_A self.training_step = training_step self.copy_step = copy_step self.action_copy_step = action_copy_step self.action_train = action_train self.weighted_train = weighted_train self.repu_num = repu_num self.Double = Double self.Prioritized = Prioritized self.Exp = Exp self.DDPG = DDPG self.seed_n = seed_n self.Game = Game self.save_epi = save_epi self.max_episodes = max_episodes self.max_steps = max_steps self.env = env self.file_name = file_name self.save_network = save_network self.random_action = random_action self.input_size = input_size self.output_size = output_size self.ending_cond_epis = ending_cond_epis self.ending_cond_reward = ending_cond_reward ################################################# self.Q_Network = Q_Network(sess, input_size, output_size, size_action_batch, batch_size, seed_n, layer_size_Q1, layer_size_Q2, learning_rate_Q, learning_rate_A, action_map, random_action, action_scale) # run_DQN 실행 self.run_DQN(seed_n=seed_n, Exp=Exp, Double=Double, Prioritized=Prioritized)
def __init__(self, sess, dis=0.99, REPLAY_MEMORY=10000, batch_size=64, max_episodes=500, layer_size_Q1=64, layer_size_Q2=64, learning_rate_Q=0.0001, training_step=1, copy_step=1, repu_num=1, action_res=None, ending_cond_epis=100, ending_cond_reward=195, alpha=0.6, beta_init=0.4, eps=0.01, eps_div=256, s_scale=1, Double=True, Dueling=True, Prioritized=True, Exp='softmax', seed_n=0, Game='CartPole-v0', file_name='steps', case_n=0, save_epi=100): env = gym.make(Game) tf.set_random_seed(seed_n) rng = np.random.RandomState(seed_n) input_size = env.observation_space.shape[0] self.action_dim = env.action_space.shape[0] #self.action_dim = 1 output_size, conti_action_flag, action_map = self.get_action_information( env, Game, action_res=action_res) replay_memory = replaymemory.ReplayMemory(rng=rng, memory_size=REPLAY_MEMORY, per_alpha=alpha, per_beta0=beta_init) self.sess = sess self.dis = dis self.REPLAY_MEMORY = REPLAY_MEMORY self.replay_memory_size = REPLAY_MEMORY self.replay_memory = replay_memory self.batch_size = batch_size self.alpha = alpha self.beta_init = beta_init self.eps = eps self.eps_div = eps_div self.s_scale = s_scale self.layer_size_Q1 = layer_size_Q1 self.layer_size_Q2 = layer_size_Q2 self.learning_rate_Q = learning_rate_Q self.training_step = training_step self.copy_step = copy_step self.repu_num = repu_num self.Double = Double self.Dueling = Dueling self.Prioritized = Prioritized self.Exp = Exp self.seed_n = seed_n self.Game = Game self.save_epi = save_epi self.max_episodes = max_episodes self.env = env self.file_name = file_name self.input_size = input_size self.output_size = output_size self.conti_action_flag = conti_action_flag self.action_map = action_map if Dueling: self.Q_Network = Q_Network_D(sess, input_size, output_size, seed_n, layer_size_Q1, layer_size_Q2, learning_rate_Q) else: self.Q_Network = Q_Network(sess, input_size, output_size, seed_n, layer_size_Q1, layer_size_Q2, learning_rate_Q) self.ending_cond_epis = ending_cond_epis self.ending_cond_reward = ending_cond_reward self.run_DQN(case_n=case_n, seed_n=seed_n, Exp=Exp, Double=Double, Dueling=Dueling, Prioritized=Prioritized)
def __init__(self, dis=0.99, REPLAY_MEMORY=100000, batch_size=64, max_steps=10000000, max_episodes=500000, layer_size_Q1=300, layer_size_Q2=400, learning_rate_Q=0.0001, learning_rate_A=0.01, training_step=1, copy_step=1, action_copy_step=1, repu_num=1, beta_max_step=1500000, ending_cond_epis=100, ending_cond_reward=195, min_distance=0.1, alpha=0.6, beta_init=0.4, eps=0.01, scale=1, size_action_batch=500, seed_n=0, Game='CartPole-v0', file_name='steps', save_epi=100, save_network=False): env = gym.make(Game) rng = np.random.RandomState(seed_n) input_size = env.observation_space.shape[0] self.action_dim = env.action_space.shape[0] output_size = self.action_dim action_map = [] action_scale = env.action_space.high[0] """ for o in range(self.action_dim): print(env.action_space.low[o]) print(env.action_space.high[o]) """ replay_memory = replaymemory.ReplayMemory(rng=rng, memory_size=REPLAY_MEMORY, per_alpha=alpha, per_beta0=beta_init) ############### parameter 복사 ################ self.dis = dis self.REPLAY_MEMORY = REPLAY_MEMORY self.replay_memory_size = REPLAY_MEMORY self.replay_memory = replay_memory self.batch_size = batch_size self.size_action_batch = size_action_batch self.alpha = alpha self.beta_init = beta_init self.beta_max_step = beta_max_step self.min_distance = min_distance self.eps = eps self.scale = scale self.action_scale = action_scale self.layer_size_Q1 = layer_size_Q1 self.layer_size_Q2 = layer_size_Q2 self.learning_rate_Q = learning_rate_Q self.learning_rate_A = learning_rate_A self.training_step = training_step self.copy_step = copy_step self.action_copy_step = action_copy_step self.repu_num = repu_num self.seed_n = seed_n self.Game = Game self.save_epi = save_epi self.max_episodes = max_episodes self.max_steps = max_steps self.env = env self.file_name = file_name self.save_network = save_network self.input_size = input_size self.output_size = output_size self.ending_cond_epis = ending_cond_epis self.ending_cond_reward = ending_cond_reward ################################################# self.Q_Network = Q_Network(seed_n, input_size, output_size, size_action_batch, action_scale, batch_size, layer_size_Q1, layer_size_Q2, learning_rate_Q, learning_rate_A) # run_DQN 실행 self.run_DQN(seed_n=seed_n)