def __init__(self): command = CritterbotSimulator.startSimulator() self.environment = CritterbotSimulator(command) self.latencyTimer = Chrono() self.rewards = self.createRewardFunction() self.actions = XYThetaAction.sevenActions() self.behaviourPolicy = RandomPolicy(Random(0), self.actions) self.representation = TileCodersNoHashing(self.environment.legend().nbLabels(), -2000, 2000) self.representation.includeActiveFeature() self.demonsScheduler = DemonScheduler() self.demons = [] for rewardFunction in self.rewards: self.demons.append(self.createOffPolicyControlDemon(rewardFunction)) self.x_t = None self.clock = zepy.clock("Horde Off-policy Control demons")
def __init__(self): command = CritterbotSimulator.startSimulator() self.environment = CritterbotSimulator(command) self.latencyTimer = Chrono() self.rewards = self.createRewardFunction() self.actions = XYThetaAction.sevenActions() self.behaviourPolicy = RandomPolicy(Random(0), self.actions) self.representation = TileCodersNoHashing(self.environment.legend().nbLabels(), -2000, 2000) self.representation.includeActiveFeature() self.demons = [] for rewardFunction in self.rewards: demon = self.createOnPolicyPredictionDemon(rewardFunction) self.demons.append(demon) self.horde = Horde() self.horde.demons().addAll(self.demons) self.horde.beforeFunctions().addAll(self.rewards) self.x_t = None self.clock = zepy.clock("Nexting Clock")
def run(self): clock = zepy.clock() zepy.monattr(clock, self, 'monitoredAttribute') zepy.monfunc(clock, (lambda: -self.monitoredAttribute), name="Function") while clock.tick(): self.monitoredAttribute = (self.monitoredAttribute + 1) % 10