from Env_DQN import Environment import torch # from DQN import DQN # from Dueling_DQN import DQN # from DDQN import DQN # from n_stepDQN import DQN from Advance_DQN import DQN from matplotlib import pyplot as plt import numpy as np plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False env = Environment() flag = False current = 10 price_recode = list() x_index = list() model_reward = 0 for ij in range(0, 38): current += 10 print(current) MODEL_NAME = 'model5/advanceDQN/' + str( env.name) + '_AE_Advance_dqn_' + str(current) + '.pt' # MODEL_NAME = 'model5/advanceDQN/'+str(env.name)+'_Advance_dqn_'+str(current)+'.pt' # MODEL_NAME = 'model3/dqn/advance_dqn/AE_Advance_dqn2_'+str(current)+'.pt' # MODEL_NAME = 'model3/dqn/advance_dqn/Advance_dqn2_'+str(current)+'.pt' # MODEL_NAME = 'model2/dqn/advance_dqn/Use_AE_n-step_double_dueling_dqn_'+str(current)+'.pt' # MODEL_NAME = 'model2/dqn/advance_dqn/AE_n_stepDQN_'+str(current)+'.pt' # MODEL_NAME = 'model2/dqn/advance_dqn/AE_n_stepDQN_'+str(current)+'.pt' # MODEL_NAME = 'model2/dqn/advance_dqn/dueling_dqn_'+str(current)+'.pt' # MODEL_NAME = 'model2/dqn/advance_dqn/AE_dueling_dqn_'+str(current)+'.pt'
import torch.nn as nn import torch.nn.functional as F import numpy as np from Env_DQN import Environment from matplotlib import pyplot as plt from Net import Net plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False BATCH_SIZE = 128 # 每一批的训练量 LR = 0.01 # 学习率 TAU = 0.001 EPSILON = 0.9 # 贪婪策略指数,Q-learning的一个指数,用于指示是探索还是利用。 GAMMA = 1 # reward discount MEMORY_CAPACITY = 10000 env = Environment() Hidden_num = 128 N_ACTIONS = env.action_dim N_STATES = env.state_dim+2 # 创建Q-learning的模型 class DQN(object): def __init__(self): # 两张网是一样的,不过就是target_net是每100次更新一次,eval_net每次都更新 self.eval_net, self.target_net = Net(N_STATES, N_ACTIONS, Hidden_num), Net(N_STATES, N_ACTIONS, Hidden_num) self.learn_step_counter = 0 # 如果次数到了,更新target_net self.memory_counter = 0 # for storing memory self.memory = np.zeros((MEMORY_CAPACITY, N_STATES * 2 + 2)) # 初始化记忆 self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR) self.loss_func = nn.MSELoss()