Python QLearning.learnOneEpisode Examples

Programming Language: Python

Namespace/Package Name: QLearning

Class/Type: QLearning

Method/Function: learnOneEpisode

Examples at hotexamples.com: 2

Python QLearning.learnOneEpisode - 2 examples found. These are the top rated real world Python examples of QLearning.QLearning.learnOneEpisode extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

QLearning(30)

train(6)

run(3)

evaluateOneEpisode(2)

learnOneEpisode(2)

learn(2)

DebugTrainNet(1)

update_q(1)

updateQFunction(1)

test(1)

setterExploration(1)

save(1)

q_learning_until_convergence(1)

max_q_action(1)

learning(1)

intiQtable(1)

getterTables(1)

initQ(1)

getterExploration(1)

get_action(1)

format_q_function(1)

format_policy(1)

exploration_pure(1)

exploration_infaillible(1)

exploration_exigente(1)

exploitationFromTable(1)

exploitation(1)

execute_policy(1)

execute(1)

epsilonGreedy(1)

StartTrial(1)

update_q_table(1)

Example #1

Show file

File: main.py Project: wwxFromTju/options

def qLearningWithOptions(env,
                         alpha,
                         gamma,
                         options_eps,
                         epsilon,
                         nSeeds,
                         maxLengthEp,
                         nEpisodes,
                         verbose,
                         useNegation,
                         genericNumOptionsToEvaluate,
                         loadedOptions=None):

    numSeeds = nSeeds
    numEpisodes = nEpisodes
    # We first discover all options
    options = None
    actionSetPerOption = None

    if loadedOptions == None:
        if verbose:
            options, actionSetPerOption = discoverOptions(env,
                                                          options_eps,
                                                          verbose,
                                                          useNegation,
                                                          plotGraphs=True)
        else:
            options, actionSetPerOption = discoverOptions(env,
                                                          options_eps,
                                                          verbose,
                                                          useNegation,
                                                          plotGraphs=False)
    else:
        options = loadedOptions
        actionSetPerOption = []

        for i in xrange(len(loadedOptions)):
            tempActionSet = env.getActionSet()
            tempActionSet.append('terminate')
            actionSetPerOption.append(tempActionSet)

    returns_eval = []
    returns_learn = []
    # Now I add all options to my action set. Later we decide which ones to use.
    i = 0
    #genericNumOptionsToEvaluate = [1, 2, 4, 32, 64, 128, 256]
    totalOptionsToUse = []
    maxNumOptions = 0
    if useNegation and loadedOptions == None:
        maxNumOptions = int(len(options) / 2)
    else:
        maxNumOptions = len(options)
    while i < len(genericNumOptionsToEvaluate
                  ) and genericNumOptionsToEvaluate[i] <= maxNumOptions:
        totalOptionsToUse.append(genericNumOptionsToEvaluate[i])
        i += 1

    for idx, numOptionsToUse in enumerate(totalOptionsToUse):
        returns_eval.append([])
        returns_learn.append([])

        if verbose:
            print 'Using', numOptionsToUse, 'options'

        for s in xrange(numSeeds):
            if verbose:
                print 'Seed: ', s + 1

            returns_eval[idx].append([])
            returns_learn[idx].append([])
            actionSet = env.getActionSet()

            for i in xrange(numOptionsToUse):
                actionSet.append(options[i])

            if useNegation and loadedOptions == None:
                numOptions = 2 * numOptionsToUse
            else:
                numOptions = numOptionsToUse

            learner = QLearning(alpha=alpha,
                                gamma=gamma,
                                epsilon=epsilon,
                                environment=env,
                                seed=s,
                                useOnlyPrimActions=True,
                                actionSet=actionSet,
                                actionSetPerOption=actionSetPerOption)

            for i in xrange(numEpisodes):
                returns_learn[idx][s].append(
                    learner.learnOneEpisode(timestepLimit=maxLengthEp))
                returns_eval[idx][s].append(
                    learner.evaluateOneEpisode(eps=0.01,
                                               timestepLimit=maxLengthEp))

    returns_learn_primitive = []
    returns_eval_primitive = []
    for s in xrange(numSeeds):
        returns_learn_primitive.append([])
        returns_eval_primitive.append([])
        learner = QLearning(alpha=alpha,
                            gamma=gamma,
                            epsilon=epsilon,
                            environment=env,
                            seed=s)
        for i in xrange(numEpisodes):
            returns_learn_primitive[s].append(
                learner.learnOneEpisode(timestepLimit=maxLengthEp))
            returns_eval_primitive[s].append(
                learner.evaluateOneEpisode(eps=0.01,
                                           timestepLimit=maxLengthEp))

    return returns_eval_primitive, returns_eval, totalOptionsToUse

Example #2

Show file

File: main.py Project: wwxFromTju/options

        getExpectedNumberOfStepsFromOption(env=env,
                                           eps=epsilon,
                                           verbose=verbose,
                                           discoverNegation=bothDirections,
                                           loadedOptions=loadedOptions)

    elif taskToPerform == 5:  #Solve for a given goal (q-learning)
        returns_learn = []
        returns_eval = []
        learner = QLearning(alpha=0.1,
                            gamma=0.9,
                            epsilon=1.00,
                            environment=env)
        for i in xrange(num_episodes):
            returns_learn.append(
                learner.learnOneEpisode(timestepLimit=max_length_episode))
            returns_eval.append(
                learner.evaluateOneEpisode(eps=0.01,
                                           timestepLimit=max_length_episode))

        plt.plot(returns_eval)
        plt.show()

    elif taskToPerform == 6:  #Solve for a given goal w/ primitive actions (q-learning) following options
        returns_eval_primitive, returns_eval, totalOptionsToUse = qLearningWithOptions(
            env=env,
            alpha=0.1,
            gamma=0.9,
            options_eps=0.0,
            epsilon=1.0,
            nSeeds=num_seeds,