Ejemplo n.º 1
0
    
    train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)
    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        learning_algo,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        train_policy=train_policy,
        test_policy=test_policy)

    # --- load saved network and test
    # agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97")
    agent.setNetwork(input_nnet) #tesot01

    avg = agent._total_mode_reward
    print(" _total_mode_reward: ", agent._total_mode_reward, ", nmbr of episode: ", agent._totalModeNbrEpisode, ", average per episode: ", avg)

    Epoch_length = 500
    mode = parameters.mode #mode 3 has planning depth 6#mode 2 ahs planning 3
    agent.startMode(mode, Epoch_length)
    agent.run(1, Epoch_length)

    avg = agent._total_mode_reward / agent._totalModeNbrEpisode
    print(" _total_mode_reward: ", agent._total_mode_reward, ", nmbr of episode: ", agent._totalModeNbrEpisode, ", average per episode: ", avg)
    
    #just testing the saved nnet (possibly by visualizing the actions in the env)
    print("The parameters are: {}".format(parameters))

    # As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy
    # policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more
    # precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every
    # episode or epoch (or never, hence the resetEvery='none').
    agent.attach(
        bc.EpsilonController(initial_e=parameters.epsilon_start,
                             e_decays=parameters.epsilon_decay,
                             e_min=parameters.epsilon_min,
                             evaluate_on='action',
                             periodicity=1,
                             reset_every='none'))

    agent.setNetwork(
        "./backup_maze_lowdim/test_70460bbfb88bb08e2c4c9f4352805f62760b7d2d.epoch=48"
    )
    agent._learning_algo.freezeAllLayersExceptEncoder()
    agent._learning_algo.resetEncoder()
    #TODO compare transfer training time with for instance relearning

    agent.run(10, 500)  #10 epochs with 500 steps, so 5000 random steps
    print("end gathering data")

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))

    # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we
    # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given.
                      show_game=True)

    # --- Instantiate learning algorithm ---
    learning_algo = CRAR(env,
                         rng,
                         double_Q=True,
                         high_int_dim=HIGH_INT_DIM,
                         internal_dim=3)

    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng,
                                      0.1)  #1.)

    # --- Instantiate agent ---
    agent = NeuralAgent(env,
                        learning_algo,
                        parameters.replay_memory_size,
                        max(env.inputDimensions()[i][0]
                            for i in range(len(env.inputDimensions()))),
                        parameters.batch_size,
                        rng,
                        test_policy=test_policy)

    #set name of nnet and planning depth:
    agent.setNetwork("test_71c8fc5b085cd8aa090e8e8e63d0a9450a3b7a27.epoch=35")
    # agent.setNetwork("test_964ccb7a9490cf3c3309a90d07485a77c3ec6486")

    #just running to check its behaviour:
    Epoch_length = 200
    mode = 3  #mode 3 has planning depth 6
    agent.startMode(mode, Epoch_length)
    agent.run(1, Epoch_length)
Ejemplo n.º 4
0
elif args.network == 'DDPG':
    network = MyACNetwork(environment=env, batch_size=32, random_state=rng)

agent = NeuralAgent(env,
                    network,
                    train_policy=EpsilonGreedyPolicy(network, env.nActions(),
                                                     rng, 0.0),
                    replay_memory_size=1000,
                    batch_size=32,
                    random_state=rng)

#agent.attach(bc.VerboseController())
if args.fname == 'baseline':
    agent = EmpiricalTreatmentAgent(env)
else:
    agent.setNetwork(args.fname)

count = 0
length_success = []
avg_rad = []
avg_h_cell_killed = []
avg_percentage = []
avg_doses = []
k = 1000
for i in range(k):
    #print(i)
    agent._runEpisode(100000)
    if env.end_type == 'W':
        count += 1
    length_success.append(env.get_tick() - 350)
    avg_rad.append(env.total_dose)
Ejemplo n.º 5
0
        controllers_to_disable=[0, 1, 2, 3, 4],
        periodicity=2,
        show_score=True,
        summarize_every=1))

    # --- Run the experiment ---
    try:
        os.mkdir("params")
    except Exception:
        pass

    # handle loading / saving weights
    savedPath = fname + "_final"
    if os.path.exists("nnets/" + savedPath): # ugly, but as in dumpNetwork
        print("Loading saved net: " + savedPath)
        agent.setNetwork(savedPath)
    if test:
        agent.startMode(PLE_env.VALIDATION_MODE, 10000)

    dump(vars(parameters), "params/" + fname + ".jldump")
    agent.run(parameters.epochs, parameters.steps_per_epoch)
    if test:
        agent.summarizeTestPerformance()

    else:
        # -- save network
        agent.dumpNetwork(savedPath)

    # --- Show results ---
    basename = "scores/" + fname
    scores = load(basename + "_scores.jldump")
Ejemplo n.º 6
0
                        parameters.replay_memory_size,
                        max(env.inputDimensions()[i][0]
                            for i in range(len(env.inputDimensions()))),
                        parameters.batch_size,
                        rng,
                        test_policy=test_policy)

    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "test_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # test saved network
    # --- load saved network and test
    agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97")

    avg = agent._total_mode_reward
    print(" _total_mode_reward: ", agent._total_mode_reward,
          ", nmbr of episode: ", agent._totalModeNbrEpisode,
          ", average per episode: ", avg)

    Epoch_length = 200
    mode = 3  #mode 3 has planning depth 6
    agent.startMode(mode, Epoch_length)
    agent.run(1, Epoch_length)

    avg = agent._total_mode_reward / agent._totalModeNbrEpisode
    print(" _total_mode_reward: ", agent._total_mode_reward,
          ", nmbr of episode: ", agent._totalModeNbrEpisode,
          ", average per episode: ", avg)
Ejemplo n.º 7
0
    print("The parameters are: {}".format(parameters))

    # As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy
    # policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more
    # precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every
    # episode or epoch (or never, hence the resetEvery='none').
    agent.attach(
        bc.EpsilonController(initial_e=parameters.epsilon_start,
                             e_decays=parameters.epsilon_decay,
                             e_min=parameters.epsilon_min,
                             evaluate_on='action',
                             periodicity=1,
                             reset_every='none'))

    # input_nnet = "backup_maze_lowdim/test_70460bbfb88bb08e2c4c9f4352805f62760b7d2d.epoch=48"
    agent.setNetwork(input_nnet)
    agent._learning_algo.freezeAllLayersExceptEncoder()
    if parameters.mode == 1:
        agent._learning_algo.resetEncoder()
    if parameters.mode == 2:
        agent._learning_algo.freezeAllLayersExceptEncoderPartially()
    # if parameters.mode == 3:
    #     #TODO dont freeze but very small lr for others models

    agent.run(10, 500)  #10 epochs with 500 steps, so 5000 random steps
    print("end gathering data")

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))
Ejemplo n.º 8
0
            discount_factor_max=parameters.discount_max,
            periodicity=1))

    agent.attach(
        bc.FindBestController(validationID=catcher_env.VALIDATION_MODE,
                              testID=None,
                              unique_fname=fname))
    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want
    # these validation epoch to interfere with the training of the agent, which is well established by the
    # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole
    # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the
    # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards
    # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every
    # [parameters.period_btw_summary_perfs] *validation* epochs.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=catcher_env.VALIDATION_MODE,
            epoch_length=parameters.steps_per_test,
            periodicity=1,
            show_score=True,
            summarize_every=1))

    # agent.setNetwork("test_71c8fc5b085cd8aa090e8e8e63d0a9450a3b7a27.epoch=35")
    agent.setNetwork("test_964ccb7a9490cf3c3309a90d07485a77c3ec6486")
    # freeze network except encoder
    agent._learning_algo.freezeAllLayersExceptEncoder()

    #agent.gathering_data=False
    agent.run(parameters.epochs, parameters.steps_per_epoch)
Ejemplo n.º 9
0
        env,
        learning_algo2,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        train_policy=train_policy2,
        test_policy=test_policy2)

    print("The parameters are: {}".format(parameters))

    #1 load normal and transferred network
    # agent.setNetwork("test_4165747fe50541da92a5ea2698b190b90bc006d5.epoch=97")
    # agent2.setNetwork("nnet.epoch=40")

    agent.setNetwork(input_normal) #IMPORTANT: first one gets normal visuals
    agent2.setNetwork(input_transferred) #IMPORTANT: this one gets inverted visuals
    #SO if you want to compare two normal nnets, remove the true in agent2.getabstract state!!
    # agent.setNetwork("test31")
    # agent2.setNetwork("test36")
    iterations = 40
    totaldiff = 0
    for i in range(iterations):
        #2 generate an environment and get abstract states
        agent.resetEnv()
        abstract_state = agent.getAbstractState()
        # print("abstract_state= ", abstract_state)

        abstract_state2 = agent2.getAbstractState(True)
        # abstract_state2 = agent2.getAbstractState()
        # print("abstract_state2= ", abstract_state2)