BATCH_SIZE = 32 POLICY_UPDATE = 4 # 每4步更新一次policy的模型 TARGET_UPDATE = 10_000 # 每10000步同步一次target的模型 WARM_STEPS = 50_000 MAX_STEPS = 5_000_000 EVALUATE_FREQ = 100_000 rand = random.Random() rand.seed(GLOBAL_SEED) new_seed = lambda: rand.randint(0, 1000_000) os.mkdir(SAVE_PREFIX) # 保存训练好的模型 torch.manual_seed(new_seed()) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") env = MyEnv(device) # 创建一个环境,用于跑atari这个游戏 agent = Agent( # 创建一个agent env.get_action_dim(), # 游戏中动作的数量,一共有三个,分别是左右和不动 device, # 训练使用的设备 GAMMA, new_seed(), EPS_START, # epsilon的开始值 EPS_END, # epsilon的最小值 EPS_DECAY, # epsilon递减的 ) memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE, device) # 用来记录agent的动作于结果之间的联系,用于后面神经网络的训练 #### Training #### obs_queue: deque = deque(maxlen=5) done = True
BATCH_SIZE = 32 POLICY_UPDATE = 32 TARGET_UPDATE = 10_000 WARM_STEPS = 50_000 MAX_STEPS = 500_000 # 50000000 EVALUATE_FREQ = 100_00 # 100000 rand = random.Random() rand.seed(GLOBAL_SEED) new_seed = lambda: rand.randint(0, 1000_000) os.mkdir(SAVE_PREFIX) # 创建目录保存模型 torch.manual_seed(new_seed()) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) env = MyEnv(device) # 环境 agent = Agent( # 智能体 env.get_action_dim(), # 3 device, # cuda GAMMA, # 0.99 new_seed(), EPS_START, # 1 EPS_END, # 0.1 EPS_DECAY, # 1e6 ) memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE, device) # 初始化经验池 #### Training #### obs_queue: deque = deque(maxlen=5) done = True
TARGET_UPDATE = 10_000 WARM_STEPS = 50_000 MAX_STEPS = 50_000_000 EVALUATE_FREQ = 100_000 rand = random.Random() rand.seed(GLOBAL_SEED) new_seed = lambda: rand.randint(0, 1000_000) if not os.path.exists(SAVE_PREFIX): os.mkdir(SAVE_PREFIX) torch.manual_seed(new_seed()) # The number of threads here needs to be adjusted based on the number of CPU cores available torch.set_num_threads(4) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") env = MyEnv(device) agent = Agent( env.get_action_dim(), device, GAMMA, new_seed(), EPS_START, EPS_END, EPS_DECAY, restore=restore, rlmodel=rlmodel, ) memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE, device) #### Training #### obs_queue: deque = deque(maxlen=5)
BATCH_SIZE = 32 POLICY_UPDATE = 4 TARGET_UPDATE = 10_000 WARM_STEPS = 50_000 MAX_STEPS = 50_000_000 EVALUATE_FREQ = 100_000 rand = random.Random() rand.seed(GLOBAL_SEED) new_seed = lambda: rand.randint(0, 1000_000) os.mkdir(SAVE_PREFIX) torch.manual_seed(new_seed()) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") env = MyEnv(device) agent = Agent( env.get_action_dim(), device, GAMMA, new_seed(), EPS_START, EPS_END, EPS_DECAY, ) # memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE, device) memory = Experience({ 'size': MEM_SIZE, 'batch_size': BATCH_SIZE, 'learn_start': WARM_STEPS, 'steps': MAX_STEPS,
action_queue = action_queues[i] ve = versions[i] print('current lab:', ve, 'shade rate:', get_shade_time(action_queue)) if __name__ == "__main__": versions = vs0 for version in versions: #set_trace() print(version) dueling = False if version.find('dueling') == -1 else True stable = False if version.find('stable') == -1 else True if stable: action_queue = [] env = MyEnv(device) agent = Agent(env.get_action_dim(), device, GAMMA, new_seed(), EPS_START, EPS_END, EPS_DECAY, dueling, pretrained, stable * 0.1) if version.find('PER') != -1: memory = PERMemory(STACK_SIZE + 1, MEM_SIZE, device) #memory = Memory_Buffer_PER(MEM_SIZE) else: memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE, device) #memory = Memory_Buffer_PER(MEM_SIZE) #### Training #### obs_queue: deque = deque(maxlen=5) done = True avg_reward_arr = []
import pathlib import shutil from IPython import display as ipydisplay import torch from utils_env import MyEnv from utils_drl import Agent # In[2]: target = 78 model_name = f"model_{target:03d}" model_path = f"./models/{model_name}" device = torch.device("cpu") env = MyEnv(device) agent = Agent(env.get_action_dim(), device, 0.99, 0, 0, 0, 1, model_path, use_dueling=True, use_PR=True, use_DDQN=True) # In[3]: obs_queue = deque(maxlen=5)
POLICY_UPDATE = 4 TARGET_UPDATE = 10_000 WARM_STEPS = 50_000 # WARM_STEPS = 50 MAX_STEPS = 50_000_000 EVALUATE_FREQ = 100_000 # 评估频率,每100_000次停下来评估一下 rand = random.Random() # [0,1]中的任一浮点数值 rand.seed(GLOBAL_SEED) # 根据输入seed固定获得相同的随机数 new_seed = lambda: rand.randint(0, 1000_000) #0~1000000中任选其一 os.mkdir(SAVE_PREFIX) # 在"./models"创建目录 torch.manual_seed(new_seed()) # 将new_seed赋值给cpu的随机数种子 #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")#创建设备:GPU/CPU? device = torch.device("cpu") env = MyEnv(device) agent = Agent( # 根据预设参数初始化 env.get_action_dim(), # 返回3,三个动作:["NOOP", "RIGHT", "LEFT"] device, GAMMA, new_seed(), EPS_START, EPS_END, EPS_DECAY, ) memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE, device) # 循环队列,三者分别对应通道、容量、设备,容量为MEM_SIZE=100_000 #### Training #### obs_queue: deque = deque(maxlen=5) #创建观察队列 done = True
BATCH_SIZE = 32 POLICY_UPDATE = 4 TARGET_UPDATE = 10_00 WARM_STEPS = 1_000 MAX_STEPS = 10_000_0 EVALUATE_FREQ = 100_0 rand = random.Random() rand.seed(GLOBAL_SEED) new_seed = lambda: rand.randint(0, 1000_000) #os.mkdir(SAVE_PREFIX) torch.manual_seed(new_seed()) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") env = MyEnv(device) agent = Agent( env.get_action_dim(), device, GAMMA, new_seed(), EPS_START, EPS_END, EPS_DECAY, ) prioritized = True if prioritized: memory = MemoryBufferPER(STACK_SIZE + 1, MEM_SIZE, device) else: memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE, device)