Example #1
0
    training_info["epoch mean rewards"].append(epoch_rewards[-1])
    training_info["value net loss"].append(critic_loss_total)
    training_info["AE Hash loss"].append(ae_hash_loss)
    if (i_epoch + 1) % num_avg_epoch:
        training_info["past %d epochs mean reward" %  (num_avg_epoch)] = \
            (sum(training_info["epoch mean rewards"][-num_avg_epoch:]) / num_avg_epoch) \
                if len(training_info["epoch mean rewards"]) >= num_avg_epoch else 0

    # Print stats
    print("\n\n=============  Epoch: %d  =============" % (i_epoch + 1))
    print("epoch mean durations: %f" % (epoch_durations[-1]))
    print("epoch mean rewards: %f" % (epoch_rewards[-1]))
    print("Max reward achieved: %f" % training_info["max reward achieved"])
    print("value net loss: %f" % critic_loss_total)
    print("Autoencoder Hashing model loss: %f" % ae_hash_loss)

    # Plot stats
    if plot:
        # plot_durations(training_info["epoch mean rewards"], training_info["value net loss"])
        plot_durations(training_info["epoch mean rewards"],
                       training_info["value net loss"],
                       training_info["AE Hash loss"])

    # Update counter
    i_epoch += 1

    # Every save_ckpt_interval, save a checkpoint according to current i_episode.
    if i_epoch % save_ckpt_interval == 0:
        save_checkpoint(ckpt_dir, actor_critic, ae_hash, ae_hash_optim,
                        i_epoch, **training_info)
Example #2
0
    training_info["epoch mean durations"].append(epoch_durations[-1])
    training_info["epoch mean rewards"].append(epoch_rewards[-1])
    training_info["extrinsic value net loss"].append(ex_value_net_mse)
    training_info["intrinsic value net loss"].append(in_value_net_mse)
    if (i_epoch + 1) % num_avg_epoch:
        training_info["past %d epochs mean reward" %  (num_avg_epoch)] = \
            (sum(training_info["epoch mean rewards"][-num_avg_epoch:]) / num_avg_epoch) \
                if len(training_info["epoch mean rewards"]) >= num_avg_epoch else 0

    # Print stats
    print("\n\n=============  Epoch: %d  =============" % (i_epoch + 1))
    print("epoch mean durations: %f" % (epoch_durations[-1]))
    print("epoch mean rewards: %f" % (epoch_rewards[-1]))
    print("Max reward achieved: %f" % training_info["max reward achieved"])
    print("extrinsic value net loss: %f" % ex_value_net_mse)
    print("intrinsic value net loss: %f" % in_value_net_mse)

    # Plot stats
    if plot:
        plot_durations(training_info["epoch mean rewards"],
                       training_info["extrinsic value net loss"],
                       training_info["intrinsic value net loss"])

    # Update counter
    i_epoch += 1

    # Every save_ckpt_interval, save a checkpoint according to current i_episode.
    # if i_epoch % save_ckpt_interval == 0:
    #     save_checkpoint(ckpt_dir, policy_net, value_net_in, value_net_ex,
    #                     valuenet_in_optimizer, valuenet_ex_optimizer, simhash, i_epoch, **training_info)
Example #3
0
    # Record stats
    training_info["epoch mean durations"].append(epoch_durations[-1])
    training_info["epoch mean rewards"].append(epoch_rewards[-1])
    if (i_epoch + 1) % num_avg_epoch:
        training_info["past %d epochs mean reward" %  (num_avg_epoch)] = \
            (sum(training_info["epoch mean rewards"][-num_avg_epoch:]) / num_avg_epoch) \
                if len(training_info["epoch mean rewards"]) >= num_avg_epoch else 0

    # Print stats
    print("\n\n=============  Epoch: %d  =============" % (i_epoch + 1))
    print("epoch mean durations: %f" % (epoch_durations[-1]))
    print("epoch mean rewards: %f" % (epoch_rewards[-1]))
    print("Max reward achieved: %f" % training_info["max reward achieved"])

    # Plot stats
    if plot:
        plot_durations(training_info["epoch mean rewards"])

    # Update counter
    i_epoch += 1

    # Every save_ckpt_interval, save a checkpoint according to current i_episode.
    if i_epoch % save_ckpt_interval == 0:
        save_checkpoint(ckpt_dir,
                        policy_net,
                        policynet_optimizer,
                        i_epoch,
                        policy_lr=policy_lr,
                        **training_info)