Ejemplo n.º 1
0
    
    train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)
    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        learning_algo,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        train_policy=train_policy,
        test_policy=test_policy)

    # --- load saved network and test
    # agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97")
    agent.setNetwork(input_nnet) #tesot01

    avg = agent._total_mode_reward
    print(" _total_mode_reward: ", agent._total_mode_reward, ", nmbr of episode: ", agent._totalModeNbrEpisode, ", average per episode: ", avg)

    Epoch_length = 500
    mode = parameters.mode #mode 3 has planning depth 6#mode 2 ahs planning 3
    agent.startMode(mode, Epoch_length)
    agent.run(1, Epoch_length)

    avg = agent._total_mode_reward / agent._totalModeNbrEpisode
    print(" _total_mode_reward: ", agent._total_mode_reward, ", nmbr of episode: ", agent._totalModeNbrEpisode, ", average per episode: ", avg)
    
    #just testing the saved nnet (possibly by visualizing the actions in the env)
Ejemplo n.º 2
0
        show_score=True,
        summarize_every=1))

    # --- Run the experiment ---
    try:
        os.mkdir("params")
    except Exception:
        pass

    # handle loading / saving weights
    savedPath = fname + "_final"
    if os.path.exists("nnets/" + savedPath): # ugly, but as in dumpNetwork
        print("Loading saved net: " + savedPath)
        agent.setNetwork(savedPath)
    if test:
        agent.startMode(PLE_env.VALIDATION_MODE, 10000)

    dump(vars(parameters), "params/" + fname + ".jldump")
    agent.run(parameters.epochs, parameters.steps_per_epoch)
    if test:
        agent.summarizeTestPerformance()

    else:
        # -- save network
        agent.dumpNetwork(savedPath)

    # --- Show results ---
    basename = "scores/" + fname
    scores = load(basename + "_scores.jldump")
    plt.plot(range(1, len(scores['vs'])+1), scores['vs'], label="VS", color='b')
    plt.legend()