예제 #1
0
    # based on feature map get reward and actions
    actions, estimated_reward = ActorCritic.forward(AC_input)

    # crop the feature map
    cropped_F_map = Assistant.crop_feature_map(actions[0].squeeze().item(),
                                               actions[1].squeeze().item(),
                                               F_map_1).to(device)

    # passed cropped feature map to FasteNet to get saliency map
    saliency_map = FasteNet.module_two(cropped_F_map).to(
        'cpu').squeeze().numpy()

    # calculate 'reward' from saliency map
    Assistant.parse_saliency_map(saliency_map)
    reward = Assistant.calculate_loss()

    # zero the gradients of both actor and critic
    Actor_optimizer.zero_grad()
    Critic_optimizer.zero_grad()

    # calculate loss
    loss = (estimated_reward - reward)**2

    # backpropagate loss
    loss.backward()
    Actor_optimizer.step()
    Critic_optimizer.step()

    # checkpoint our training
    weights_file = ActorCritic_helper.training_checkpoint(loss=loss,