Python Actor.createSAPsの例

プログラミング言語: Python

名前空間/パッケージ名: actor

クラス/型: Actor

メソッド/関数: createSAPs

hotexamples.comのコード掲載数: 1

Python Actor.createSAPs - 1件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのactor.Actor.createSAPsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Actor(30)

__init__(30)

eval(11)

choose_action(4)

get_will_save(4)

get_reflex_save(4)

get_fortitude_save(4)

get_action(4)

forward(4)

draw(4)

take_damage(4)

add_movie(4)

act(3)

from_SQLiteRow(3)

action(3)

build(2)

get_actions(2)

get_base_attack_bonus(2)

get_full_attack(2)

create_actor_model(2)

copy_weights(2)

from_string(2)

characterid(2)

mat_name(1)

get_details(1)

setstate(1)

route(1)

push_task(1)

get_alignment_var(1)

get_attack_bonus(1)

get_attack_damage(1)

position(1)

get_base_attribute_score(1)

get_date_debut(1)

get_filter(1)

move_to(1)

get_first_name(1)

act_one_episode(1)

get_full_name(1)

get_rect(1)

mover(1)

_from_string(1)

move_towards(1)

has_private_parking(1)

isAlive(1)

get_action_for_train(1)

attack(1)

getArtistByid(1)

decayEligibilities(1)

build_train_op(1)

コード例 #1

ファイルを表示

class Agent:
    def __init__(self, env, alphaActor, alphaCritic, lam, gamma, criticType, hiddenLayerSizes):
        self.__env = env
        self.__actor = Actor(alphaActor, lam, gamma)
        self.__criticType = criticType
        if self.__criticType == 0: # use criticTable
            from criticTable import CriticTable
            self.__critic = CriticTable(alphaCritic, lam, gamma)
        else: # use criticNN
            from criticNN import CriticNN
            state = env.getState()
            inputLayerSize = len(state)
            self.__critic = CriticNN(alphaCritic, lam, gamma, hiddenLayerSizes, inputLayerSize)

    # Actor-Critic learning
    def learn(self, runs, eps, epsDecay, verbose = False):
        pegsLeft = []
        iterationNumber = []
        if not verbose: # display progressbar instead
            from tqdm import tqdm
            runList = tqdm(range(runs))
        else:
            runList = range(runs)
        for i in runList: # for each episode
            self.__actor.resetEligibilities()
            self.__critic.resetEligibilities()
            state, validActions = self.__env.reset()
            if self.__criticType == 0: # only needed for table critic
                self.__critic.createEligibility(state)
                self.__critic.createStateValues(state)
            self.__actor.createSAPs(state, validActions)
            action = self.__actor.findNextAction(state, validActions, eps)
            self.__actor.updateEligibility(state, action)
            if len(validActions) == 0: break # do not run episode if initial state gives no valid moves
            while len(validActions) > 0: # while there exist a valid next move
                lastState, state, reinforcement, validActions = self.__env.execute(action)
                if self.__criticType == 0:
                    self.__critic.createEligibility(state)
                    self.__critic.createStateValues(state)
                self.__actor.createSAPs(state, validActions)
                action = self.__actor.findNextAction(state, validActions, eps)
                self.__actor.updateEligibility(state, action)
                td_error = self.__critic.findTDError(reinforcement, lastState, state)
                if self.__criticType == 0:
                    self.__critic.updateStateValues()
                else:
                    self.__critic.fit(reinforcement, lastState, state, td_error)
                self.__critic.updateEligibilities()
                self.__actor.updateSAPs(td_error)
                self.__actor.decayEligibilities()
            if verbose: # print valuation of each state
                print("ep", i,"  Pegs", self.__env.numberOfPegsLeft(), " LastState Value", "%.3f" % self.__critic.stateValue(lastState), " eps", "%.3f" % eps)
            pegsLeft.append(self.__env.numberOfPegsLeft())
            iterationNumber.append(i)
            eps = eps * epsDecay # decrease exploration
        plt.plot(iterationNumber, pegsLeft) # plot the development for each episode
        plt.show()

    # runs a greedy search through the best states and actions
    def runGreedy(self, animation_delay):
        state, validActions = self.__env.reset()
        self.__env.draw()
        action = self.__actor.findNextAction(state, validActions, 0)
        while len(validActions) > 0: # while there exist a valid next move
            self.__env.draw(animation_delay)
            _, state, _, validActions = self.__env.execute(action)
            self.__actor.createSAPs(state, validActions) # if game is not won, greedy run may encounter new states.
            action = self.__actor.findNextAction(state, validActions, 0)
        self.__env.draw()