from Env_DQN import Environment
import torch
# from DQN import DQN
# from Dueling_DQN import DQN
# from DDQN import DQN
# from n_stepDQN import DQN
from Advance_DQN import DQN
from matplotlib import pyplot as plt
import numpy as np

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
env = Environment()
flag = False
current = 10
price_recode = list()
x_index = list()
model_reward = 0
for ij in range(0, 38):
    current += 10
    print(current)
    MODEL_NAME = 'model5/advanceDQN/' + str(
        env.name) + '_AE_Advance_dqn_' + str(current) + '.pt'
    # MODEL_NAME = 'model5/advanceDQN/'+str(env.name)+'_Advance_dqn_'+str(current)+'.pt'
    # MODEL_NAME = 'model3/dqn/advance_dqn/AE_Advance_dqn2_'+str(current)+'.pt'
    # MODEL_NAME = 'model3/dqn/advance_dqn/Advance_dqn2_'+str(current)+'.pt'
    # MODEL_NAME = 'model2/dqn/advance_dqn/Use_AE_n-step_double_dueling_dqn_'+str(current)+'.pt'
    # MODEL_NAME = 'model2/dqn/advance_dqn/AE_n_stepDQN_'+str(current)+'.pt'
    # MODEL_NAME = 'model2/dqn/advance_dqn/AE_n_stepDQN_'+str(current)+'.pt'
    # MODEL_NAME = 'model2/dqn/advance_dqn/dueling_dqn_'+str(current)+'.pt'
    # MODEL_NAME = 'model2/dqn/advance_dqn/AE_dueling_dqn_'+str(current)+'.pt'
예제 #2
0
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from Env_DQN import Environment
from matplotlib import pyplot as plt
from Net import Net

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
BATCH_SIZE = 128  # 每一批的训练量
LR = 0.01  # 学习率
TAU = 0.001
EPSILON = 0.9  # 贪婪策略指数,Q-learning的一个指数,用于指示是探索还是利用。
GAMMA = 1  # reward discount
MEMORY_CAPACITY = 10000
env = Environment()
Hidden_num = 128
N_ACTIONS = env.action_dim
N_STATES = env.state_dim+2

# 创建Q-learning的模型
class DQN(object):
    def __init__(self):
        # 两张网是一样的,不过就是target_net是每100次更新一次,eval_net每次都更新
        self.eval_net, self.target_net = Net(N_STATES, N_ACTIONS, Hidden_num), Net(N_STATES, N_ACTIONS, Hidden_num)

        self.learn_step_counter = 0  # 如果次数到了,更新target_net
        self.memory_counter = 0  # for storing memory
        self.memory = np.zeros((MEMORY_CAPACITY, N_STATES * 2 + 2))  # 初始化记忆
        self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR)
        self.loss_func = nn.MSELoss()