Esempi in Python per Horde.demons

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: rlpark.plugin.rltoys.horde

Classe/tipologia: Horde

Metodo/funzione: demons

Esempi su hotexamples.com: 2

Horde.demons in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per rlpark.plugin.rltoys.horde.Horde.demons, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

beforeFunctions(2)

demons(2)

update(2)

Metodi utilizzati di frequente

beforeFunctions (2)

demons (2)

update (2)

Esempio n. 1

Mostra file

File: DemonsPredictionOnPolicy.py Progetto: rlpark/critterbot

class DemonExperiment(object):
    Latency = 100 #s
    
    def __init__(self):
        command = CritterbotSimulator.startSimulator()
        self.environment = CritterbotSimulator(command)
        self.latencyTimer = Chrono()
        self.rewards = self.createRewardFunction()
        self.actions = XYThetaAction.sevenActions()
        self.behaviourPolicy = RandomPolicy(Random(0), self.actions)
        self.representation = TileCodersNoHashing(self.environment.legend().nbLabels(), -2000, 2000)
        self.representation.includeActiveFeature()
        self.demons = []
        for rewardFunction in self.rewards:
            demon = self.createOnPolicyPredictionDemon(rewardFunction)
            self.demons.append(demon)
        self.horde = Horde()
        self.horde.demons().addAll(self.demons)
        self.horde.beforeFunctions().addAll(self.rewards)
        self.x_t = None
        self.clock = zepy.clock("Nexting Clock")

    def createRewardFunction(self):
        legend = self.environment.legend()
        return list(SensorRewardFunction(legend, label) for label in legend.getLabels())

    def createOnPolicyPredictionDemon(self, rewardFunction):
        gamma = .9
        alpha = .1 / self.representation.vectorNorm()
        nbFeatures = self.representation.vectorSize()
        lambda_= .3
        td = TDLambda(lambda_, gamma, alpha, nbFeatures)
        return PredictionDemon(rewardFunction, td)
        
    def learn(self, a_t, o_tp1):
        x_tp1 = self.representation.project(o_tp1.doubleValues())
        self.horde.update(o_tp1, self.x_t, a_t, x_tp1)
        self.x_t = Vectors.bufferedCopy(x_tp1, self.x_t)
        
    def run(self):
        a_t = None
        while self.clock.tick():
            self.latencyTimer.start()
            o_tp1 = self.environment.waitNewRawObs()
            self.learn(a_t, o_tp1)
            self.behaviourPolicy.update(None)
            a_tp1 = self.behaviourPolicy.sampleAction()
            self.environment.sendAction(a_tp1)
            a_t = a_tp1
            waitingTime = self.Latency - self.latencyTimer.getCurrentMillis()
            if waitingTime > 0:
                time.sleep(waitingTime / 1000.0)
        self.environment.close()
                
    def zephyrize(self):
        zepy.advertise(self.clock, self.environment)
        zepy.advertise(self.clock, self.horde)
        for rewardFunction in self.rewards:
            zepy.monattr(self.clock, rewardFunction, 'rewardValue', label = rewardFunction.label)

Esempio n. 2

Mostra file

File: DemonsControlOffPolicy.py Progetto: rlpark/critterbot

class DemonExperiment(object):
    Latency = 100 #s
    
    def __init__(self):
        command = CritterbotSimulator.startSimulator()
        self.environment = CritterbotSimulator(command)
        self.latencyTimer = Chrono()
        self.rewards = self.createRewardFunction()
        self.actions = XYThetaAction.sevenActions()
        self.behaviourPolicy = RandomPolicy(Random(0), self.actions)
        self.representation = TileCodersNoHashing(self.environment.legend().nbLabels(), -2000, 2000)
        self.representation.includeActiveFeature()
        self.demons = []
        for rewardFunction in self.rewards:
            self.demons.append(self.createOffPolicyControlDemon(rewardFunction))
        self.horde = Horde()
        self.horde.demons().addAll(self.demons)
        self.horde.beforeFunctions().addAll(self.rewards)
        self.x_t = None
        self.clock = zepy.clock("Horde Off-policy Control demons")

    def createRewardFunction(self):
        legend = self.environment.legend()
        return [ SensorRewardFunction(legend, 'MotorCurrent0'),
                 SensorRewardFunction(legend, 'MotorCurrent1'),
                 SensorRewardFunction(legend, 'MotorCurrent2') ]

    def createOffPolicyControlDemon(self, rewardFunction):
        toStateAction = TabularAction(self.actions, self.representation.vectorNorm(), self.representation.vectorSize())
        nbFeatures = toStateAction.vectorSize()
        lambda_ = 0.1
        beta = .1
        alpha_v = .1 / toStateAction.vectorNorm()
        alpha_w = .001 / toStateAction.vectorNorm()
        gq = GQ(alpha_v, alpha_w, beta , lambda_, nbFeatures)
        targetPolicy = Greedy(gq, self.actions, toStateAction)
        controlGQ = GreedyGQ(gq, self.actions, toStateAction, targetPolicy, self.behaviourPolicy)
        return ControlOffPolicyDemon(rewardFunction, controlGQ)
        
    def learn(self, a_t, o_tp1):
        x_tp1 = self.representation.project(o_tp1.doubleValues())
        self.horde.update(o_tp1, self.x_t, a_t, x_tp1)
        self.x_t = Vectors.bufferedCopy(x_tp1, self.x_t)
        
    def run(self):
        a_t = None
        while self.clock.tick():
            self.latencyTimer.start()
            o_tp1 = self.environment.waitNewRawObs()
            self.learn(a_t, o_tp1)
            self.behaviourPolicy.update(None)
            a_tp1 = self.behaviourPolicy.sampleAction()
            self.environment.sendAction(a_tp1)
            a_t = a_tp1
            waitingTime = self.Latency - self.latencyTimer.getCurrentMillis()
            if waitingTime > 0:
                time.sleep(waitingTime / 1000.0)
        self.environment.close()
                
    def zephyrize(self):
        zepy.advertise(self.clock, self.environment)
        zepy.advertise(self.clock, self.horde)
        for rewardFunction in self.rewards:
            zepy.monattr(self.clock, rewardFunction, 'rewardValue', label = rewardFunction.label)