예제 #1
0
    def requires(self):
        # cache because we anticipate a fair amount of computation
        if hasattr(self, '_cached_requires'):
            return self._cached_requires

        if not self.start and not self.stop:
            raise ParameterException(
                "At least one of start and stop needs to be specified")
        if not self.start and not self.reverse:
            raise ParameterException(
                "Either start needs to be specified or reverse needs to be True"
            )
        if self.start and self.stop and self.start > self.stop:
            raise ParameterException("Can't have start > stop")
        # TODO check overridden complete() and exists()

        now = datetime.utcfromtimestamp(
            time.time() if self.now is None else self.now)

        moving_start = self.moving_start(now)
        finite_start = moving_start if self.start is None else max(
            self.parameter_to_datetime(self.start), moving_start)
        moving_stop = self.moving_stop(now)
        finite_stop = moving_stop if self.stop is None else min(
            self.parameter_to_datetime(self.stop), moving_stop)

        datetimes = self.finite_datetimes(
            finite_start, finite_stop) if finite_start <= finite_stop else []

        task_cls = Register.get_task_cls(self.of)
        if datetimes:
            logger.debug('Actually checking if range %s of %s is complete' %
                         (self._format_range(datetimes), self.of))
            missing_datetimes = sorted(
                self.missing_datetimes(task_cls, datetimes))
            logger.debug('Range %s lacked %d of expected %d %s instances' %
                         (self._format_range(datetimes),
                          len(missing_datetimes), len(datetimes), self.of))
        else:
            missing_datetimes = []
            logger.debug('Empty range. No %s instances expected' % (self.of, ))

        self._emit_metrics(missing_datetimes, finite_start, finite_stop)

        if self.reverse:
            required_datetimes = missing_datetimes[-self.task_limit:]
        else:
            required_datetimes = missing_datetimes[:self.task_limit]
        if required_datetimes:
            logger.debug('Requiring %d missing %s instances in range %s' %
                         (len(required_datetimes), self.of,
                          self._format_range(required_datetimes)))
        if self.reverse:
            required_datetimes.reverse(
            )  # TODO priorities, so that within the batch tasks are ordered too

        self._cached_requires = [
            task_cls(self.datetime_to_parameter(d)) for d in required_datetimes
        ]
        return self._cached_requires
예제 #2
0
 def finite_datetimes(self, finite_start, finite_stop):
     """
     Simply returns the points in time that correspond to a whole number of minutes intervals.
     """
     # Validate that the minutes_interval can divide 60 and it is greater than 0 and lesser than 60
     if not (0 < self.minutes_interval < 60):
         raise ParameterException('minutes-interval must be within 0..60')
     if (60 / self.minutes_interval) * self.minutes_interval != 60:
         raise ParameterException(
             'minutes-interval does not evenly divide 60')
     # start of a complete interval, e.g. 20:13 and the interval is 5 -> 20:10
     start_minute = int(finite_start.minute /
                        self.minutes_interval) * self.minutes_interval
     datehour_start = datetime(year=finite_start.year,
                               month=finite_start.month,
                               day=finite_start.day,
                               hour=finite_start.hour,
                               minute=start_minute)
     datehours = []
     for i in itertools.count():
         t = datehour_start + timedelta(minutes=i * self.minutes_interval)
         if t >= finite_stop:
             return datehours
         if t >= finite_start:
             datehours.append(t)
예제 #3
0
    def requires(self):
        # cache because we anticipate a fair amount of computation
        if hasattr(self, '_cached_requires'):
            return self._cached_requires

        if not self.start and not self.stop:
            raise ParameterException(
                "At least one of start and stop needs to be specified")
        if not self.start and not self.reverse:
            raise ParameterException(
                "Either start needs to be specified or reverse needs to be True"
            )
        # TODO check overridden complete() and exists()

        now = datetime.utcfromtimestamp(
            time.time() if self.now is None else self.now)
        now = datetime(now.year, now.month, now.day, now.hour)
        datehours = [
            now + timedelta(hours=h)
            for h in range(-self.hours_back, self.hours_forward + 1)
        ]
        datehours = filter(
            lambda h: (not self.start or h >= self.start) and
            (not self.stop or h < self.stop), datehours)

        task_cls = Register.get_task_cls(self.of)
        if datehours:
            logger.debug(
                'Actually checking if range [%s, %s] of %s is complete' %
                (datehours[0], datehours[-1], self.of))
            missing_datehours = sorted(
                self.missing_datehours(task_cls, datehours))
            logger.debug(
                'Range [%s, %s] lacked %d of expected %d %s instances' %
                (datehours[0], datehours[-1], len(missing_datehours),
                 len(datehours), self.of))
        else:
            missing_datehours = []

        self._emit_metrics(missing_datehours, now)

        if self.reverse:
            required_datehours = missing_datehours[-self.task_limit:]
        else:
            required_datehours = missing_datehours[:self.task_limit]
        if required_datehours:
            logger.debug(
                'Requiring %d missing %s instances in range [%s, %s]' %
                (len(required_datehours), self.of, required_datehours[0],
                 required_datehours[-1]))
        if self.reverse:
            required_datehours.reverse(
            )  # I wish this determined the order tasks were scheduled or executed, but it doesn't. No priorities in Luigi yet

        self._cached_requires = [task_cls(d) for d in required_datehours]
        return self._cached_requires
예제 #4
0
    def parse(self, s):

        s = int(s)

        if not s in self.flatten_modes:
            raise ParameterException(
                'Flatten mode must be one of %s' %
                ' '.join([str(m) for m in self.flatten_modes]))

        return s
예제 #5
0
    def normalize(self, x):
        """Validates folder exist"""
        if not os.path.isdir(x):
            raise ParameterException(f"Folder parameter {x} can't be found")

        return x