Python CritterbotSimulator.sendAction 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: critterbot.environment

클래스/타입: CritterbotSimulator

메소드/함수: sendAction

hotexamples.com에서의 예제들: 3

Python CritterbotSimulator.sendAction - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 critterbot.environment.CritterbotSimulator.sendAction에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

clock(2)

isClosed(2)

legend(2)

sendAction(2)

waitNewObs(2)

CritterbotSimulator(1)

예제 #1

파일 보기

파일: DemonsControlOffPolicy.py 프로젝트: denizonural/rlpark

class DemonExperiment(object):
    Latency = 100 #s
    
    def __init__(self):
        self.environment = CritterbotSimulator()
        self.latencyTimer = Chrono()
        self.rewards = self.createRewardFunction()
        self.actions = XYThetaAction.sevenActions()
        self.behaviourPolicy = RandomPolicy(Random(0), self.actions)
        self.representation = TileCodersNoHashing(self.environment.legend().nbLabels(), -2000, 2000)
        self.representation.includeActiveFeature()
        self.demons = DemonScheduler()
        for rewardFunction in self.rewards:
            self.demons.add(self.createOffPolicyControlDemon(rewardFunction))
        self.x_t = None

    def createRewardFunction(self):
        legend = self.environment.legend()
        return [ SensorRewardFunction(legend, 'MotorCurrent0'),
                 SensorRewardFunction(legend, 'MotorCurrent1'),
                 SensorRewardFunction(legend, 'MotorCurrent2') ]

    def createOffPolicyControlDemon(self, rewardFunction):
        toStateAction = TabularAction(self.actions, self.representation.vectorSize())
        nbFeatures = toStateAction.actionStateFeatureSize()
        lambda_ = 0.1
        beta = .1
        alpha_v = .1 / self.representation.nbActive()
        alpha_w = .1 / self.representation.nbActive()
        gq = GQ(alpha_v, alpha_w, beta , lambda_, nbFeatures)
        targetPolicy = Greedy(gq, self.actions, toStateAction)
        controlGQ = ExpectedGQ(gq, self.actions, toStateAction, targetPolicy, self.behaviourPolicy)
        return ControlOffPolicyDemon(rewardFunction, controlGQ)
        
    def learn(self, a_t, o_tp1):
        for rewardFunction in self.rewards:
            rewardFunction.update(o_tp1)
        x_tp1 = self.representation.project(o_tp1)
        self.demons.update(self.x_t, a_t, x_tp1)
        self.x_t = x_tp1
        
    def run(self):
        a_t = None
        while not self.environment.isClosed():
            self.latencyTimer.start()
            o_tp1 = self.environment.waitNewObs()
            self.learn(a_t, o_tp1)
            a_tp1 = self.behaviourPolicy.decide(None)
            self.environment.sendAction(a_tp1)
            a_t = a_tp1
            waitingTime = self.Latency - self.latencyTimer.getCurrentMillis()
            if waitingTime > 0:
                time.sleep(waitingTime / 1000.0)
                
    def zephyrize(self):
        clock = self.environment.clock()
        zepy.advertise(self.environment, clock)
        zepy.advertise(self.demons, clock)
        for rewardFunction in self.rewards:
            zepy.monattr(rewardFunction, 'rewardValue', clock = clock, label = rewardFunction.label)

예제 #2

파일 보기

파일: DemonsPredictionOffPolicy.py 프로젝트: amw8/rlpark

class DemonExperiment(object):
    Latency = 100 #s
    
    def __init__(self):
        self.environment = CritterbotSimulator()
        self.latencyTimer = Chrono()
        self.rewards = self.createRewardFunction()
        self.actions = XYThetaAction.sevenActions()
        self.behaviourPolicy = RandomPolicy(Random(0), self.actions)
        self.representation = TileCodersNoHashing(self.environment.legend().nbLabels(), -2000, 2000)
        self.representation.includeActiveFeature()
        self.demons = DemonScheduler()
        for rewardFunction in self.rewards:
            targetPolicy = SingleActionPolicy(XYThetaAction.Left)
            demon = self.createOffPolicyPredictionDemon(rewardFunction, targetPolicy)
            self.demons.add(demon)
        self.x_t = None

    def createRewardFunction(self):
        legend = self.environment.legend()
        return [ SensorRewardFunction(legend, 'MotorCurrent0'),
                 SensorRewardFunction(legend, 'MotorCurrent1'),
                 SensorRewardFunction(legend, 'MotorCurrent2') ]

    def createOffPolicyPredictionDemon(self, rewardFunction, targetPolicy):
        gamma = .9
        alpha_v = .1 / self.representation.nbActive()
        alpha_w = .1 / self.representation.nbActive() 
        nbFeatures = self.representation.vectorSize()
        gtd = GTD(gamma, alpha_v, alpha_w, nbFeatures)
        return PredictionOffPolicyDemon(rewardFunction, gtd, targetPolicy, self.behaviourPolicy)
        
    def learn(self, a_t, o_tp1):
        for rewardFunction in self.rewards:
            rewardFunction.update(o_tp1)
        x_tp1 = self.representation.project(o_tp1)
        self.demons.update(self.x_t, a_t, x_tp1)
        self.x_t = x_tp1
        
    def run(self):
        a_t = None
        while not self.environment.isClosed():
            self.latencyTimer.start()
            o_tp1 = self.environment.waitNewObs()
            self.learn(a_t, o_tp1)
            a_tp1 = self.behaviourPolicy.decide(None)
            self.environment.sendAction(a_tp1)
            a_t = a_tp1
            waitingTime = self.Latency - self.latencyTimer.getCurrentMillis()
            if waitingTime > 0:
                time.sleep(waitingTime / 1000.0)
                
    def zephyrize(self):
        clock = self.environment.clock()
        zepy.advertise(self.environment, clock)
        zepy.advertise(self.demons, clock)
        for rewardFunction in self.rewards:
            zepy.monattr(rewardFunction, 'rewardValue', clock = clock, label = rewardFunction.label)

예제 #3

파일 보기

파일: DemonsPredictionOnPolicy.py 프로젝트: denizonural/rlpark

class DemonExperiment(object):
    Latency = 100 #s
    
    def __init__(self):
        self.environment = CritterbotSimulator()
        self.latencyTimer = Chrono()
        self.rewards = self.createRewardFunction()
        self.actions = XYThetaAction.sevenActions()
        self.behaviourPolicy = RandomPolicy(Random(0), self.actions)
        self.representation = TileCodersNoHashing(self.environment.legend().nbLabels(), -2000, 2000)
        self.representation.includeActiveFeature()
        self.demons = DemonScheduler()
        for rewardFunction in self.rewards:
            demon = self.createOnPolicyPredictionDemon(rewardFunction)
            self.demons.add(demon)
        self.x_t = None

    def createRewardFunction(self):
        legend = self.environment.legend()
        return list(SensorRewardFunction(legend, label) for label in legend.getLabels())

    def createOnPolicyPredictionDemon(self, rewardFunction):
        gamma = .9
        alpha = .1 / self.representation.nbActive()
        nbFeatures = self.representation.vectorSize()
        lambda_= .3
        td = TDLambda(lambda_, gamma, alpha, nbFeatures)
        return PredictionDemon(rewardFunction, td)
        
    def learn(self, a_t, o_tp1):
        for rewardFunction in self.rewards:
            rewardFunction.update(o_tp1)
        x_tp1 = self.representation.project(o_tp1)
        self.demons.update(self.x_t, a_t, x_tp1)
        self.x_t = x_tp1
        
    def run(self):
        a_t = None
        while not self.environment.isClosed():
            self.latencyTimer.start()
            o_tp1 = self.environment.waitNewObs()
            self.learn(a_t, o_tp1)
            a_tp1 = self.behaviourPolicy.decide(None)
            self.environment.sendAction(a_tp1)
            a_t = a_tp1
            waitingTime = self.Latency - self.latencyTimer.getCurrentMillis()
            if waitingTime > 0:
                time.sleep(waitingTime / 1000.0)
                
    def zephyrize(self):
        clock = self.environment.clock()
        zepy.advertise(self.environment, clock)
        zepy.advertise(self.demons, clock)
        for rewardFunction in self.rewards:
            zepy.monattr(rewardFunction, 'rewardValue', clock = clock, label = rewardFunction.label)