AC_input = F.adaptive_max_pool2d(F_map_1[..., :32], 32) # based on feature map get reward and actions actions, estimated_reward = ActorCritic.forward(AC_input) # crop the feature map cropped_F_map = Assistant.crop_feature_map(actions[0].squeeze().item(), actions[1].squeeze().item(), F_map_1).to(device) # passed cropped feature map to FasteNet to get saliency map saliency_map = FasteNet.module_two(cropped_F_map).to( 'cpu').squeeze().numpy() # calculate 'reward' from saliency map Assistant.parse_saliency_map(saliency_map) reward = Assistant.calculate_loss() # zero the gradients of both actor and critic Actor_optimizer.zero_grad() Critic_optimizer.zero_grad() # calculate loss loss = (estimated_reward - reward)**2 # backpropagate loss loss.backward() Actor_optimizer.step() Critic_optimizer.step() # checkpoint our training