Ejemplo n.º 1
0
                              testID=None,
                              unique_fname=fname))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want
    # these validation epoch to interfere with the training of the agent, which is well established by the
    # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole
    # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the
    # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards
    # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every
    # [parameters.period_btw_summary_perfs] *validation* epochs.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=ALE_env.VALIDATION_MODE,
            epochLength=parameters.steps_per_test,
            controllersToDisable=[0, 1, 2, 3, 4],
            periodicity=2,
            showScore=True,
            summarizeEvery=1))

    # --- Run the experiment ---
    try:
        os.mkdir("params")
    except Exception:
        pass
    dump(vars(parameters), "params/" + fname + ".jldump")
    agent.run(parameters.epochs, parameters.steps_per_epoch)

    # --- Show results ---
    basename = "scores/" + fname
    scores = load(basename + "_scores.jldump")
Ejemplo n.º 2
0
            initialLearningRate=parameters.learning_rate,
            learningRateDecay=parameters.learning_rate_decay,
            periodicity=1))

    agent.attach(
        bc.DiscountFactorController(
            initialDiscountFactor=parameters.discount,
            discountFactorGrowth=parameters.discount_inc,
            discountFactorMax=parameters.discount_max,
            periodicity=1))

    agent.attach(
        bc.EpsilonController(initialE=parameters.epsilon_start,
                             eDecays=parameters.epsilon_decay,
                             eMin=parameters.epsilon_min,
                             evaluateOn='action',
                             periodicity=1,
                             resetEvery='none'))

    agent.attach(
        bc.InterleavedTestEpochController(
            id=0,
            epochLength=parameters.steps_per_test,
            controllersToDisable=[0, 1, 2, 3, 4],
            periodicity=2,
            showScore=True,
            summarizeEvery=parameters.period_btw_summary_perfs))

    # --- Run the experiment ---
    agent.run(parameters.epochs, parameters.steps_per_epoch)
Ejemplo n.º 3
0
                              testID=MG_two_storages_env.TEST_MODE,
                              unique_fname=fname))

    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a
    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want
    # these validation epoch to interfere with the training of the agent, which is well established by the
    # TrainerController, EpsilonController and alike, nor with its testing (see next controller). Therefore, we will
    # disable these controllers for the whole duration of the validation epochs interleaved this way, using the
    # controllersToDisable argument of the InterleavedTestEpochController. For each validation epoch, we want also to
    # display the sum of all rewards obtained, hence the showScore=True. Finally, we never want this controller to call
    # the summarizePerformance method of MG_two_storage_env.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=MG_two_storages_env.VALIDATION_MODE,
            epochLength=parameters.steps_per_test,
            controllersToDisable=[0, 1, 2, 3, 4, 7],
            periodicity=2,
            showScore=True,
            summarizeEvery=-1))

    # Besides inserting a validation epoch (required if one wants to find the best neural network over all training
    # epochs), we also wish to interleave a "test epoch" between each training epoch ("one of two epochs", hence the
    # periodicity=2). We do not want these test epoch to interfere with the training of the agent nor with its
    # validation. Therefore, we will disable these controllers for the whole duration of the test epochs interleaved
    # this way, using the controllersToDisable argument of the InterleavedTestEpochController. For each test epoch, we
    # want also to display the sum of all rewards obtained, hence the showScore=True. Finally, we want to call the
    # summarizePerformance method of MG_two_storage_env every [parameters.period_btw_summary_perfs] *test* epochs.
    agent.attach(
        bc.InterleavedTestEpochController(
            id=MG_two_storages_env.TEST_MODE,
            epochLength=parameters.steps_per_test,