train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.) test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1) # --- Instantiate agent --- agent = NeuralAgent( env, learning_algo, parameters.replay_memory_size, max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), parameters.batch_size, rng, train_policy=train_policy, test_policy=test_policy) # --- load saved network and test # agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97") agent.setNetwork(input_nnet) #tesot01 avg = agent._total_mode_reward print(" _total_mode_reward: ", agent._total_mode_reward, ", nmbr of episode: ", agent._totalModeNbrEpisode, ", average per episode: ", avg) Epoch_length = 500 mode = parameters.mode #mode 3 has planning depth 6#mode 2 ahs planning 3 agent.startMode(mode, Epoch_length) agent.run(1, Epoch_length) avg = agent._total_mode_reward / agent._totalModeNbrEpisode print(" _total_mode_reward: ", agent._total_mode_reward, ", nmbr of episode: ", agent._totalModeNbrEpisode, ", average per episode: ", avg) #just testing the saved nnet (possibly by visualizing the actions in the env)
show_score=True, summarize_every=1)) # --- Run the experiment --- try: os.mkdir("params") except Exception: pass # handle loading / saving weights savedPath = fname + "_final" if os.path.exists("nnets/" + savedPath): # ugly, but as in dumpNetwork print("Loading saved net: " + savedPath) agent.setNetwork(savedPath) if test: agent.startMode(PLE_env.VALIDATION_MODE, 10000) dump(vars(parameters), "params/" + fname + ".jldump") agent.run(parameters.epochs, parameters.steps_per_epoch) if test: agent.summarizeTestPerformance() else: # -- save network agent.dumpNetwork(savedPath) # --- Show results --- basename = "scores/" + fname scores = load(basename + "_scores.jldump") plt.plot(range(1, len(scores['vs'])+1), scores['vs'], label="VS", color='b') plt.legend()