class DQN(object): def __init__(self): self.eval_net, self.target_net = Net(N_STATES, N_ACTIONS, Hidden_num), Net( N_STATES, N_ACTIONS, Hidden_num) self.learn_step_counter = 0 # 如果次数到了,更新target_net self.memory_counter = 0 # for storing memory self.memory = np.zeros((MEMORY_CAPACITY, N_STATES * 2 + 2)) # 初始化记忆 self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR) self.loss_func = nn.MSELoss() # 选择动作 def choose_action(self, x): x = torch.unsqueeze(torch.FloatTensor(x), 0) if np.random.uniform() < EPSILON: # 贪婪策略 actions_value = self.eval_net.forward(x, False).detach() action = torch.max(actions_value, 1)[1].data.numpy() action = action[0] else: # random action = np.random.randint(0, N_ACTIONS) return action # 存储记忆 def store_transition(self, s, a, r, s_): transition = np.hstack((s, [a, r], s_)) # 将每个参数打包起来 # replace the old memory with new memory index = self.memory_counter % MEMORY_CAPACITY self.memory[index, :] = transition self.memory_counter += 1 def learn(self): for target_param, param in zip(self.target_net.parameters(), self.eval_net.parameters()): target_param.data.copy_(target_param.data * (1.0 - TAU) + param.data * TAU) self.learn_step_counter += 1 sample_index = np.random.choice(MEMORY_CAPACITY, BATCH_SIZE) b_memory = self.memory[sample_index, :] b_s = torch.FloatTensor(b_memory[:, :N_STATES]) b_a = torch.LongTensor(b_memory[:, N_STATES:N_STATES + 1].astype(int)) b_r = torch.FloatTensor(b_memory[:, N_STATES + 1:N_STATES + 2]) b_s_ = torch.FloatTensor(b_memory[:, -N_STATES:]) q_eval = self.eval_net(b_s) # 按照动作以列为对象进行索引,这样就知道当时采取的那个动作的Q值了 q_eval = torch.gather(q_eval, 1, b_a) q_target = self.target_net( b_s_).detach() # detach的作用就是不反向传播去更新,因为target的更新在前面定义好了的 y = b_r + GAMMA * q_target.max(1)[0].view(BATCH_SIZE, 1) # shape (batch, 1) loss = self.loss_func(q_eval, y) self.optimizer.zero_grad() loss.backward() self.optimizer.step() return loss.data.item()
def train_Net(self, data_files_dic="./data/", normalized=False, percent=0.9): f = open("./log/train_Net_" + str(self.stride) + ".txt", "w") print("-------------------------------------------") f.write("\n" + "-------------------------------------------") # print("Train Net Model...") f.write("\n" + "Train Net Model...") # all trian data and test data files path # Get train data and test data train_data_x, train_data_y, test_data_x, test_data_y = self.getData( data_files_dic=data_files_dic, normalized=normalized, percent=percent) # Print the size of train data and test data print("Train data shape:") f.write("\n" + "Train data shape:") print(train_data_x.shape) f.write("\n" + str(train_data_x.shape)) print(train_data_y.shape) f.write("\n" + str(train_data_y.shape)) train_data_x = torch.Tensor(train_data_x) train_data_y = torch.Tensor(train_data_y) trainset = DataSet(train_data_x, train_data_y) trainloader = DataLoader(trainset, batch_size=16, shuffle=False) # Load the Net LSTM_model # net = torch.load('LSTM_model.pth') # Bulid the Net_model net = Net(self.input_size * self.stride, self.output_size) # Optimize all net parameters optimizer = torch.optim.Adam(net.parameters(), lr=self.learning_rate) # The loss function uses mean square error (MSE) loss function loss_func = nn.MSELoss() for step in range(self.EPOCH): total_loss = 0 for tx, ty in trainloader: output = net.forward(torch.unsqueeze(tx, dim=0)) loss = loss_func(torch.squeeze(output), ty) # clear gradients for this training step optimizer.zero_grad() # back propagation, compute gradients loss.backward() optimizer.step() total_loss += float(loss) # print(step, float(total_loss)) f.write("\n" + str(step) + " " + str(float(total_loss))) torch.save(net, "./models/Net_model_" + str(self.stride) + ".pth") f.write("\n" + "Save Net Model") print("Save Net Model")
def online_train_Net(self, train_path, model_path, stride, EPOCH): print("-------------------------------------------") print("Online Train Net Model...") self.stride = stride self.EPOCH = EPOCH # Get train data and test data train_data_x, train_data_y = self.getOnlineData( data_files_dic=train_path, normalized=True) # Print the size of train data and test data print("Train data shape:") print(train_data_x.shape) print(train_data_y.shape) train_data_x = torch.Tensor(train_data_x) train_data_y = torch.Tensor(train_data_y) trainset = DataSet(train_data_x, train_data_y) trainloader = DataLoader(trainset, batch_size=16, shuffle=False) # Bulid the Net_model net = Net(self.input_size * self.stride, self.output_size) # Optimize all net parameters optimizer = torch.optim.Adam(net.parameters(), lr=self.learning_rate) # The loss function uses mean square error (MSE) loss function loss_func = nn.MSELoss() for step in range(self.EPOCH): total_loss = 0 for tx, ty in trainloader: output = net.forward(torch.unsqueeze(tx, dim=0)) # print(output.detach().numpy()[0][0],ty.numpy()[0]) loss = loss_func(torch.squeeze(output), ty) # clear gradients for this training step optimizer.zero_grad() # back propagation, compute gradients loss.backward() optimizer.step() total_loss += float(loss) # print(step, float(total_loss)) torch.save(net, model_path + "/Net_model_" + str(self.stride) + ".pth") print("Save Online Net Model")
# ----------------------------- net, opt, lr, loss ---------------------------- net = Net() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.init_lr, momentum=0.9) # -------------------------------- train & test ------------------------------- for epoch in range(50): sum_loss = 0.0 adjust_lr_by_epoch(optimizer, epoch, args.init_lr) for batch_no in range(1, provider.train_batch_num + 1): imgs, labels = provider.next() imgs = imgs.permute(0, 3, 1, 2).float() / 255 pred = net.forward(imgs) ''' 相当于loss = criterion.forward(distribution, label_gt) 注意不要写成 loss = nn.CrossEntropyLoss(distribution, label_gt) !!! ''' loss = criterion(pred, labels) optimizer.zero_grad() loss.backward() optimizer.step() sum_loss += loss.item() if batch_no % WATCH_LOSS_PER_BATCH == 0: print("[{}, {}] loss: {:.5f} lr: {:.5f}".format( epoch, batch_no, sum_loss / WATCH_LOSS_PER_BATCH, get_lr(optimizer)))
class DQN(object): def __init__(self): # 两张网是一样的,不过就是target_net是每100次更新一次,eval_net每次都更新 self.eval_net, self.target_net = Net(N_STATES, N_ACTIONS, Hidden_num), Net( N_STATES, N_ACTIONS, Hidden_num) self.learn_step_counter = 0 # 如果次数到了,更新target_net self.memory_counter = 0 # for storing memory self.memory = np.zeros((MEMORY_CAPACITY, N_STATES * 2 + 2)) # 初始化记忆 self.memory_state = np.zeros((MEMORY_CAPACITY, N_STATES)) self.memory_next_state = np.zeros((MEMORY_CAPACITY, N_STATES)) self.memory_action = np.zeros((MEMORY_CAPACITY, 1)) self.memory_reward = np.zeros((MEMORY_CAPACITY, 1)) self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR) self.loss_func = nn.MSELoss() # 选择动作 def choose_action(self, x): x = torch.unsqueeze(torch.FloatTensor(x), 0) if np.random.uniform() < EPSILON: # 贪婪策略 actions_value = self.eval_net.forward(x, False).detach() action = torch.max(actions_value, 1)[1].data.numpy() action = action[0] else: # random action = np.random.randint(0, N_ACTIONS) return action # 存储记忆 def store_transition(self, s, a, r, s_): transition = np.hstack((s, [a, r], s_)) # 将每个参数打包起来 # replace the old memory with new memory index = self.memory_counter % MEMORY_CAPACITY self.memory_state[index, :] = s self.memory_next_state[index, :] = s_ self.memory_action[index, :] = a self.memory_reward[index, :] = r # self.memory[index, :] = transition self.memory_counter += 1 def learn(self): # target parameter update for target_param, param in zip(self.target_net.parameters(), self.eval_net.parameters()): target_param.data.copy_(target_param.data * (1.0 - TAU) + param.data * TAU) self.learn_step_counter += 1 # 学习过程 sample_index = np.random.choice(MEMORY_CAPACITY, BATCH_SIZE) bound = min(T - sample_index.max(), N) - 1 s_memory = list() next_s_memory = list() a_memory = list() r_memory = list() for index in sample_index: r_memory.append(self.memory_reward[index:index + bound + 1, ]) s_memory.append(self.memory_state[sample_index]) next_s_memory.append(self.memory_next_state[sample_index + bound]) a_memory.append(self.memory_action[sample_index]) # b_memory = self.memory[sample_index, :] b_s = torch.tensor(s_memory, dtype=torch.float32).squeeze() b_s_ = torch.tensor(next_s_memory, dtype=torch.float32).squeeze() b_r = torch.tensor(r_memory, dtype=torch.float32) b_r = torch.sum(b_r, dim=1) b_a = torch.tensor(a_memory, dtype=torch.long).squeeze(0) # q_eval w.r.t the action in experience q_eval = self.eval_net(b_s) q_eval = q_eval.gather(1, b_a) # shape (batch, 1) q_next = self.target_net( b_s_).detach() # detach的作用就是不反向传播去更新,因为target的更新在前面定义好了的 q_target = b_r + GAMMA * q_next.max(1)[0].view(BATCH_SIZE, 1) # shape (batch, 1) loss = self.loss_func(q_eval, q_target) self.optimizer.zero_grad() loss.backward() self.optimizer.step() return loss.data.item() def learn2(self): sample_index = np.random.choice(self.memory_counter, BATCH_SIZE) s_memory = list() next_s_memory = list() a_memory = list() r_memory = list() r_memory.append(self.memory_reward[sample_index]) s_memory.append(self.memory_state[sample_index]) next_s_memory.append(self.memory_next_state[sample_index]) a_memory.append(self.memory_action[sample_index]) b_s = torch.tensor(s_memory, dtype=torch.float32).squeeze() b_s_ = torch.tensor(next_s_memory, dtype=torch.float32).squeeze() b_r = torch.tensor(r_memory, dtype=torch.float32) b_r = torch.sum(b_r, dim=1) b_a = torch.tensor(a_memory, dtype=torch.long).squeeze(0) q_eval = self.eval_net(b_s) q_eval = q_eval.gather(1, b_a) # shape (batch, 1) action = torch.argmax(self.eval_net(b_s_), dim=1).view(BATCH_SIZE, 1) q_next = self.target_net(b_s_).gather( dim=1, index=action).detach() # detach的作用就是不反向传播去更新,因为target的更新在前面定义好了的 q_target = b_r + GAMMA * q_next.view(BATCH_SIZE, 1) loss = self.loss_func(q_eval, q_target) self.optimizer.zero_grad() loss.backward() self.optimizer.step() return loss.data.item()
batch_loss = 0 batch_acc = 0 val_acc = 0 val_loss = 0 # train total = 0 train_acc = 0 train_loss = 0 # imgs,labs=data.next_batch(batch_size) for i in range(60000 // batch_size): imgs, labs = data.next_batch(batch_size) imgs = imgs #训练 sf_out = net.forward(imgs) net.backward(sf_out - labs) net.Gradient(alpha=learning_rate, weight_decay=0.01) #统计 for j in range(batch_size): if np.argmax(sf_out[j]) == np.argmax(labs[j]): train_acc += 1 total += 1 mod = 100 if i % mod == 0: print("epoch=%d batchs=%d train_acc: %.4f " % (epoch, i, train_acc / total)) train_acc = 0 total = 0
teach = gen_labeled(input_size) test = gen_labeled(test_size) # view #viewTeach = prepareView("XOR Teach") #drawPoints(viewTeach, teach) viewTest = prepareView("XOR Test") points = drawPoints(viewTest, test) # learn #for epoch in range(0, epochs): epoch = 0 while(True): for point in teach: y = net.forward(point.data) net.backwards([point.label], y, learning_rate) if epoch % draw_epoch == 0: # test losses = 0 for point in test: y = net.forward(point.data)[0] loss = 0.5*(point.label - y)**2 point.label = y losses += loss print("Loss({}): {}".format(epoch, losses)) #print(epoch) points = drawPoints(viewTest, test, points) #net.backwards([0], 1, 10.0)