def process(self, please_stop): done = self.done config = self.config # ADD CHUNKS OF WORK self.todo = [] if done.max < config.range.max: # ADD WORK GOING FORWARDS start = Date.floor(done.max, config.interval) while start < config.range.max: end = start + config.interval for branch in config.branches: self.todo.append((start, end, branch)) start = end if config.range.min < done.min: # ADD WORK GOING BACKWARDS end = Date.ceiling(done.min, config.interval) while config.range.min < end: start = end - config.interval for branch in config.branches: self.todo.append((start, end, branch)) end = start try: for start, end, branch in self.todo: if please_stop: break self.process_one(start, end, branch, please_stop) except Exception as e: Log.warning("Could not complete the etl", cause=e) else: self.destination.merge_shards()
def next(self, value): v = Date(value[0]) if self.last_value.floor(self.duration) > v: Log.error("Expecting strictly increasing") self.last_value = v key = Math.round((v.floor(self.duration) - self.start) / self.duration, decimal=0) if key != self.batch: self.child.reset() self.batch = key c = self.child.next(value[1:]) return [self.batch] + c