# Computing actions by using FDDR delta = drl(fragments).double().squeeze(-1) # Computing reward pad_delta = F.pad(delta, [1, 0]) delta_diff = (pad_delta[:, 1:] - pad_delta[:, :-1]) reward = torch.sum(delta * returns - c * torch.abs(delta_diff)) # Updating FDDR optimizer.zero_grad() (-reward).backward() optimizer.step() # Recording and showing the information reward_meter.append(reward.item()) progress_bar.set_description( '[Epoch %d][Iteration %d][Reward: %.4f]' % (e, i, reward_meter.get_average(-1))) progress_bar.update() if e % save_per_epoch == 0: torch.save(drl.state_dict(), os.path.join(log_src, 'drl.pkl')) reward_meter.step() # Save the model and reward history torch.save(drl.state_dict(), os.path.join(log_src, 'drl.pkl')) np.save(os.path.join(log_src, 'drl_reward.npy'), reward_meter.get_average()) # Plot the reward curve plt.plot(reward_meter.get_average()) plt.show()
# Computing reward pad_delta = F.pad(delta, [1, 0]) delta_diff = (pad_delta[:, 1:] - pad_delta[:, :-1]) reward = torch.sum(delta * returns - c * torch.abs(delta_diff)) # Updating FDDR optimizer.zero_grad() (-reward).backward() optimizer.step() # Recording and showing the information train_reward_meter.append(reward.item()) progress_bar.set_description( '[Epoch %d][Iteration %d][Reward: train = %.4f]' % (e, i, train_reward_meter.get_average(-1))) progress_bar.update() fddr.eval() with torch.no_grad(): for i, (returns, fragments, mean, var) in enumerate(test_dataloader): # Computing actions by using FDDR delta = fddr(fragments, running_mean=mean, running_var=var).double().squeeze(-1) # Computing reward pad_delta = F.pad(delta, [1, 0]) delta_diff = (pad_delta[:, 1:] - pad_delta[:, :-1]) reward = torch.sum(delta * returns - c * torch.abs(delta_diff))
delta = ddrl(fragments).double().squeeze(-1) # Computing reward pad_delta = F.pad(delta, [1, 0]) delta_diff = (pad_delta[:, 1:] - pad_delta[:, :-1]) reward = torch.sum(delta * returns - c * torch.abs(delta_diff)) # Updating FDDR optimizer.zero_grad() (-reward).backward() optimizer.step() # Recording and showing the information reward_meter.append(reward.item()) progress_bar.set_description( '[Epoch %d][Iteration %d][Reward: %.4f]' % (e, i, reward_meter.get_average(-1))) progress_bar.update() if e % save_per_epoch == 0: torch.save(ddrl.state_dict(), os.path.join(log_src, 'ddrl.pkl')) reward_meter.step() # Save the model and reward history torch.save(ddrl.state_dict(), os.path.join(log_src, 'ddrl.pkl')) np.save(os.path.join(log_src, 'ddrl_reward.npy'), reward_meter.get_average()) # Plot the reward curve plt.plot(reward_meter.get_average()) plt.show()