def requires(self): # cache because we anticipate a fair amount of computation if hasattr(self, '_cached_requires'): return self._cached_requires if not self.start and not self.stop: raise ParameterException( "At least one of start and stop needs to be specified") if not self.start and not self.reverse: raise ParameterException( "Either start needs to be specified or reverse needs to be True" ) if self.start and self.stop and self.start > self.stop: raise ParameterException("Can't have start > stop") # TODO check overridden complete() and exists() now = datetime.utcfromtimestamp( time.time() if self.now is None else self.now) moving_start = self.moving_start(now) finite_start = moving_start if self.start is None else max( self.parameter_to_datetime(self.start), moving_start) moving_stop = self.moving_stop(now) finite_stop = moving_stop if self.stop is None else min( self.parameter_to_datetime(self.stop), moving_stop) datetimes = self.finite_datetimes( finite_start, finite_stop) if finite_start <= finite_stop else [] task_cls = Register.get_task_cls(self.of) if datetimes: logger.debug('Actually checking if range %s of %s is complete' % (self._format_range(datetimes), self.of)) missing_datetimes = sorted( self.missing_datetimes(task_cls, datetimes)) logger.debug('Range %s lacked %d of expected %d %s instances' % (self._format_range(datetimes), len(missing_datetimes), len(datetimes), self.of)) else: missing_datetimes = [] logger.debug('Empty range. No %s instances expected' % (self.of, )) self._emit_metrics(missing_datetimes, finite_start, finite_stop) if self.reverse: required_datetimes = missing_datetimes[-self.task_limit:] else: required_datetimes = missing_datetimes[:self.task_limit] if required_datetimes: logger.debug('Requiring %d missing %s instances in range %s' % (len(required_datetimes), self.of, self._format_range(required_datetimes))) if self.reverse: required_datetimes.reverse( ) # TODO priorities, so that within the batch tasks are ordered too self._cached_requires = [ task_cls(self.datetime_to_parameter(d)) for d in required_datetimes ] return self._cached_requires
def finite_datetimes(self, finite_start, finite_stop): """ Simply returns the points in time that correspond to a whole number of minutes intervals. """ # Validate that the minutes_interval can divide 60 and it is greater than 0 and lesser than 60 if not (0 < self.minutes_interval < 60): raise ParameterException('minutes-interval must be within 0..60') if (60 / self.minutes_interval) * self.minutes_interval != 60: raise ParameterException( 'minutes-interval does not evenly divide 60') # start of a complete interval, e.g. 20:13 and the interval is 5 -> 20:10 start_minute = int(finite_start.minute / self.minutes_interval) * self.minutes_interval datehour_start = datetime(year=finite_start.year, month=finite_start.month, day=finite_start.day, hour=finite_start.hour, minute=start_minute) datehours = [] for i in itertools.count(): t = datehour_start + timedelta(minutes=i * self.minutes_interval) if t >= finite_stop: return datehours if t >= finite_start: datehours.append(t)
def requires(self): # cache because we anticipate a fair amount of computation if hasattr(self, '_cached_requires'): return self._cached_requires if not self.start and not self.stop: raise ParameterException( "At least one of start and stop needs to be specified") if not self.start and not self.reverse: raise ParameterException( "Either start needs to be specified or reverse needs to be True" ) # TODO check overridden complete() and exists() now = datetime.utcfromtimestamp( time.time() if self.now is None else self.now) now = datetime(now.year, now.month, now.day, now.hour) datehours = [ now + timedelta(hours=h) for h in range(-self.hours_back, self.hours_forward + 1) ] datehours = filter( lambda h: (not self.start or h >= self.start) and (not self.stop or h < self.stop), datehours) task_cls = Register.get_task_cls(self.of) if datehours: logger.debug( 'Actually checking if range [%s, %s] of %s is complete' % (datehours[0], datehours[-1], self.of)) missing_datehours = sorted( self.missing_datehours(task_cls, datehours)) logger.debug( 'Range [%s, %s] lacked %d of expected %d %s instances' % (datehours[0], datehours[-1], len(missing_datehours), len(datehours), self.of)) else: missing_datehours = [] self._emit_metrics(missing_datehours, now) if self.reverse: required_datehours = missing_datehours[-self.task_limit:] else: required_datehours = missing_datehours[:self.task_limit] if required_datehours: logger.debug( 'Requiring %d missing %s instances in range [%s, %s]' % (len(required_datehours), self.of, required_datehours[0], required_datehours[-1])) if self.reverse: required_datehours.reverse( ) # I wish this determined the order tasks were scheduled or executed, but it doesn't. No priorities in Luigi yet self._cached_requires = [task_cls(d) for d in required_datehours] return self._cached_requires
def parse(self, s): s = int(s) if not s in self.flatten_modes: raise ParameterException( 'Flatten mode must be one of %s' % ' '.join([str(m) for m in self.flatten_modes])) return s
def normalize(self, x): """Validates folder exist""" if not os.path.isdir(x): raise ParameterException(f"Folder parameter {x} can't be found") return x