Esempio n. 1
0
BATCH_SIZE = 32
POLICY_UPDATE = 4  # 每4步更新一次policy的模型
TARGET_UPDATE = 10_000  # 每10000步同步一次target的模型
WARM_STEPS = 50_000
MAX_STEPS = 5_000_000
EVALUATE_FREQ = 100_000

rand = random.Random()
rand.seed(GLOBAL_SEED)
new_seed = lambda: rand.randint(0, 1000_000)
os.mkdir(SAVE_PREFIX)  # 保存训练好的模型

torch.manual_seed(new_seed())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = MyEnv(device)  # 创建一个环境,用于跑atari这个游戏
agent = Agent(  # 创建一个agent
    env.get_action_dim(),  # 游戏中动作的数量,一共有三个,分别是左右和不动
    device,  # 训练使用的设备
    GAMMA,
    new_seed(),
    EPS_START,  # epsilon的开始值
    EPS_END,  # epsilon的最小值
    EPS_DECAY,  # epsilon递减的
)
memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE,
                      device)  # 用来记录agent的动作于结果之间的联系,用于后面神经网络的训练

#### Training ####
obs_queue: deque = deque(maxlen=5)
done = True
Esempio n. 2
0
BATCH_SIZE = 32
POLICY_UPDATE = 32
TARGET_UPDATE = 10_000
WARM_STEPS = 50_000
MAX_STEPS = 500_000  # 50000000
EVALUATE_FREQ = 100_00  # 100000

rand = random.Random()
rand.seed(GLOBAL_SEED)
new_seed = lambda: rand.randint(0, 1000_000)
os.mkdir(SAVE_PREFIX)  # 创建目录保存模型

torch.manual_seed(new_seed())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
env = MyEnv(device)  # 环境
agent = Agent(  # 智能体
    env.get_action_dim(),  # 3
    device,  # cuda
    GAMMA,  # 0.99
    new_seed(),
    EPS_START,  # 1
    EPS_END,  # 0.1
    EPS_DECAY,  # 1e6
)
memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE, device)  # 初始化经验池

#### Training ####
obs_queue: deque = deque(maxlen=5)
done = True
Esempio n. 3
0
TARGET_UPDATE = 10_000
WARM_STEPS = 50_000
MAX_STEPS = 50_000_000
EVALUATE_FREQ = 100_000

rand = random.Random()
rand.seed(GLOBAL_SEED)
new_seed = lambda: rand.randint(0, 1000_000)
if not os.path.exists(SAVE_PREFIX):
    os.mkdir(SAVE_PREFIX)

torch.manual_seed(new_seed())
# The number of threads here needs to be adjusted based on the number of CPU cores available
torch.set_num_threads(4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = MyEnv(device)
agent = Agent(
    env.get_action_dim(),
    device,
    GAMMA,
    new_seed(),
    EPS_START,
    EPS_END,
    EPS_DECAY,
    restore=restore,
    rlmodel=rlmodel,
)
memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE, device)

#### Training ####
obs_queue: deque = deque(maxlen=5)
Esempio n. 4
0
BATCH_SIZE = 32
POLICY_UPDATE = 4
TARGET_UPDATE = 10_000
WARM_STEPS = 50_000
MAX_STEPS = 50_000_000
EVALUATE_FREQ = 100_000

rand = random.Random()
rand.seed(GLOBAL_SEED)
new_seed = lambda: rand.randint(0, 1000_000)
os.mkdir(SAVE_PREFIX)

torch.manual_seed(new_seed())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = MyEnv(device)
agent = Agent(
    env.get_action_dim(),
    device,
    GAMMA,
    new_seed(),
    EPS_START,
    EPS_END,
    EPS_DECAY,
)
# memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE, device)
memory = Experience({
    'size': MEM_SIZE,
    'batch_size': BATCH_SIZE,
    'learn_start': WARM_STEPS,
    'steps': MAX_STEPS,
Esempio n. 5
0
        action_queue = action_queues[i]
        ve = versions[i]
        print('current lab:', ve, 'shade rate:', get_shade_time(action_queue))


if __name__ == "__main__":

    versions = vs0
    for version in versions:
        #set_trace()
        print(version)
        dueling = False if version.find('dueling') == -1 else True
        stable = False if version.find('stable') == -1 else True
        if stable:
            action_queue = []
        env = MyEnv(device)
        agent = Agent(env.get_action_dim(), device, GAMMA, new_seed(),
                      EPS_START, EPS_END, EPS_DECAY, dueling, pretrained,
                      stable * 0.1)
        if version.find('PER') != -1:
            memory = PERMemory(STACK_SIZE + 1, MEM_SIZE, device)
            #memory = Memory_Buffer_PER(MEM_SIZE)
        else:
            memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE, device)
            #memory = Memory_Buffer_PER(MEM_SIZE)

        #### Training ####
        obs_queue: deque = deque(maxlen=5)
        done = True

        avg_reward_arr = []
Esempio n. 6
0
import pathlib
import shutil

from IPython import display as ipydisplay
import torch

from utils_env import MyEnv
from utils_drl import Agent

# In[2]:

target = 78
model_name = f"model_{target:03d}"
model_path = f"./models/{model_name}"
device = torch.device("cpu")
env = MyEnv(device)
agent = Agent(env.get_action_dim(),
              device,
              0.99,
              0,
              0,
              0,
              1,
              model_path,
              use_dueling=True,
              use_PR=True,
              use_DDQN=True)

# In[3]:

obs_queue = deque(maxlen=5)
Esempio n. 7
0
POLICY_UPDATE = 4
TARGET_UPDATE = 10_000
WARM_STEPS = 50_000
# WARM_STEPS = 50
MAX_STEPS = 50_000_000
EVALUATE_FREQ = 100_000  # 评估频率,每100_000次停下来评估一下

rand = random.Random()  # [0,1]中的任一浮点数值
rand.seed(GLOBAL_SEED)  # 根据输入seed固定获得相同的随机数
new_seed = lambda: rand.randint(0, 1000_000)  #0~1000000中任选其一
os.mkdir(SAVE_PREFIX)  # 在"./models"创建目录

torch.manual_seed(new_seed())  # 将new_seed赋值给cpu的随机数种子
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")#创建设备:GPU/CPU?
device = torch.device("cpu")
env = MyEnv(device)
agent = Agent(  # 根据预设参数初始化
    env.get_action_dim(),  # 返回3,三个动作:["NOOP", "RIGHT", "LEFT"]
    device,
    GAMMA,
    new_seed(),
    EPS_START,
    EPS_END,
    EPS_DECAY,
)
memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE,
                      device)  # 循环队列,三者分别对应通道、容量、设备,容量为MEM_SIZE=100_000

#### Training ####
obs_queue: deque = deque(maxlen=5)  #创建观察队列
done = True
BATCH_SIZE = 32
POLICY_UPDATE = 4
TARGET_UPDATE = 10_00
WARM_STEPS = 1_000
MAX_STEPS = 10_000_0
EVALUATE_FREQ = 100_0

rand = random.Random()
rand.seed(GLOBAL_SEED)
new_seed = lambda: rand.randint(0, 1000_000)
#os.mkdir(SAVE_PREFIX)

torch.manual_seed(new_seed())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = MyEnv(device)
agent = Agent(
    env.get_action_dim(),
    device,
    GAMMA,
    new_seed(),
    EPS_START,
    EPS_END,
    EPS_DECAY,
)

prioritized = True
if prioritized:
    memory = MemoryBufferPER(STACK_SIZE + 1, MEM_SIZE, device)
else:
    memory = ReplayMemory(STACK_SIZE + 1, MEM_SIZE, device)