Esempio n. 1
0
def test_callback_factory_generates_pickleable_objs():
    """
    This test does some heavy lifting to create two lambda functions,
    and then passes them through callback_factory to generate a state-handler.
    We then pickle the state handler to a temporary file, delete the functions,
    and try to unpickle the file in a new Process, asserting that the result is what we
    think it should be.
    """
    def load_bytes(fname):
        import cloudpickle

        with open(fname, "rb") as f:
            obj = cloudpickle.load(f)
        return obj(1, 2, 3)

    fn = lambda obj, state: None
    check = lambda state: True
    handler = callback_factory(fn, check)

    sd, tmp_file = tempfile.mkstemp()
    os.close(sd)
    try:
        with open(tmp_file, "wb") as bit_file:
            cloudpickle.dump(handler, bit_file)
        del fn
        del check
        pool = ThreadPool(processes=1)
        result = pool.apply_async(load_bytes, (tmp_file, ))
        value = result.get()
        assert value == 3
    except Exception as exc:
        raise exc
    finally:
        os.unlink(tmp_file)
Esempio n. 2
0
    def __init__(
        self,
        name: str,
        schedule: prefect.schedules.Schedule = None,
        environment: Environment = None,
        storage: Storage = None,
        tasks: Iterable[Task] = None,
        edges: Iterable[Edge] = None,
        reference_tasks: Iterable[Task] = None,
        state_handlers: List[Callable] = None,
        on_failure: Callable = None,
        validate: bool = None,
        result_handler: ResultHandler = None,
    ):
        self._cache = {}  # type: dict

        if not name:
            raise ValueError("A name must be provided for the flow.")

        self.name = name
        self.logger = logging.get_logger("Flow: {}".format(self.name))
        self.schedule = schedule
        self.environment = environment or prefect.environments.RemoteEnvironment(
        )
        self.storage = storage
        self.result_handler = (
            result_handler
            or prefect.engine.get_default_result_handler_class()())

        self.tasks = set()  # type: Set[Task]
        self.edges = set()  # type: Set[Edge]
        self.constants = collections.defaultdict(
            dict)  # type: Dict[Task, Dict[str, Any]]

        for t in tasks or []:
            self.add_task(t)

        self.set_reference_tasks(reference_tasks or [])
        for e in edges or []:
            self.add_edge(
                upstream_task=e.upstream_task,
                downstream_task=e.downstream_task,
                key=e.key,
                mapped=e.mapped,
                validate=validate,
            )

        self._prefect_version = prefect.__version__

        if state_handlers and not isinstance(state_handlers,
                                             collections.Sequence):
            raise TypeError("state_handlers should be iterable.")
        self.state_handlers = state_handlers or []
        if on_failure is not None:
            self.state_handlers.append(
                callback_factory(on_failure, check=lambda s: s.is_failed()))

        super().__init__()
Esempio n. 3
0
    def __init__(
        self,
        name: str = None,
        slug: str = None,
        tags: Iterable[str] = None,
        max_retries: int = None,
        retry_delay: timedelta = None,
        timeout: int = None,
        trigger: "Callable[[Dict[Edge, State]], bool]" = None,
        skip_on_upstream_skip: bool = True,
        cache_for: timedelta = None,
        cache_validator: Callable = None,
        cache_key: str = None,
        checkpoint: bool = None,
        result_handler: "ResultHandler" = None,
        state_handlers: List[Callable] = None,
        on_failure: Callable = None,
        log_stdout: bool = False,
        result: "Result" = None,
        target: str = None,
    ):
        self.name = name or type(self).__name__
        self.slug = slug

        self.logger = logging.get_logger(self.name)

        # avoid silently iterating over a string
        if isinstance(tags, str):
            raise TypeError("Tags should be a set of tags, not a string.")
        current_tags = set(prefect.context.get("tags", set()))
        self.tags = (set(tags) if tags is not None else set()) | current_tags

        max_retries = (max_retries if max_retries is not None else
                       prefect.config.tasks.defaults.max_retries)
        retry_delay = (retry_delay if retry_delay is not None else
                       prefect.config.tasks.defaults.retry_delay)
        timeout = (timeout if timeout is not None else
                   prefect.config.tasks.defaults.timeout)

        if max_retries > 0 and retry_delay is None:
            raise ValueError(
                "A datetime.timedelta `retry_delay` must be provided if max_retries > 0"
            )
        # specify not max retries because the default is false
        if retry_delay is not None and not max_retries:
            raise ValueError(
                "A `max_retries` argument greater than 0 must be provided if specifying "
                "a retry delay.")
        if timeout is not None and not isinstance(timeout, int):
            raise TypeError(
                "Only integer timeouts (representing seconds) are supported.")
        self.max_retries = max_retries
        self.retry_delay = retry_delay
        self.timeout = timeout

        self.trigger = trigger or prefect.triggers.all_successful
        self.skip_on_upstream_skip = skip_on_upstream_skip

        if cache_for is None and (
                cache_validator is not None and cache_validator
                is not prefect.engine.cache_validators.never_use):
            warnings.warn(
                "cache_validator provided without specifying cache expiration "
                "(cache_for); this Task will not be cached.")

        self.cache_for = cache_for
        self.cache_key = cache_key
        default_validator = (prefect.engine.cache_validators.never_use
                             if cache_for is None else
                             prefect.engine.cache_validators.duration_only)
        self.cache_validator = cache_validator or default_validator
        self.checkpoint = checkpoint
        if result_handler:
            warnings.warn(
                "Result Handlers are deprecated; please use the new style Result classes instead."
            )
            self.result = ResultHandlerResult.from_result_handler(
                result_handler)  # type: Optional[Result]
        else:
            self.result = result

        self.target = target

        # if both a target and a result were provided, update the result location
        # to point at the target
        if self.target and self.result:
            if (getattr(self.result, "location", None)
                    and self.result.location != self.target):
                warnings.warn(
                    "Both `result.location` and `target` were provided. "
                    "The `target` value will be used.")
            self.result = self.result.copy()
            self.result.location = self.target

        if state_handlers and not isinstance(state_handlers,
                                             collections.abc.Sequence):
            raise TypeError("state_handlers should be iterable.")
        self.state_handlers = state_handlers or []
        if on_failure is not None:
            self.state_handlers.append(
                callback_factory(on_failure, check=lambda s: s.is_failed()))
        self.auto_generated = False

        self.log_stdout = log_stdout

        # if new task creations are being tracked, add this task
        # this makes it possible to give guidance to users that forget
        # to add tasks to a flow
        if "_unused_task_tracker" in prefect.context:
            if not isinstance(self, prefect.tasks.core.constants.Constant):
                prefect.context._unused_task_tracker.add(self)
Esempio n. 4
0
    def __init__(
        self,
        name: str = None,
        slug: str = None,
        tags: Iterable[str] = None,
        max_retries: int = None,
        retry_delay: timedelta = None,
        timeout: int = None,
        trigger: Callable[[Set["State"]], bool] = None,
        skip_on_upstream_skip: bool = True,
        cache_for: timedelta = None,
        cache_validator: Callable = None,
        checkpoint: bool = None,
        result_handler: "ResultHandler" = None,
        state_handlers: List[Callable] = None,
        on_failure: Callable = None,
    ):

        self.name = name or type(self).__name__
        self.slug = slug

        self.id = str(uuid.uuid4())
        self.logger = logging.get_logger("Task")

        # avoid silently iterating over a string
        if isinstance(tags, str):
            raise TypeError("Tags should be a set of tags, not a string.")
        current_tags = set(prefect.context.get("tags", set()))
        self.tags = (set(tags) if tags is not None else set()) | current_tags

        max_retries = (
            max_retries
            if max_retries is not None
            else prefect.config.tasks.defaults.max_retries
        )
        retry_delay = (
            retry_delay
            if retry_delay is not None
            else prefect.config.tasks.defaults.retry_delay
        )
        timeout = (
            timeout if timeout is not None else prefect.config.tasks.defaults.timeout
        )

        if max_retries > 0 and retry_delay is None:
            raise ValueError(
                "A datetime.timedelta `retry_delay` must be provided if max_retries > 0"
            )
        if timeout is not None and not isinstance(timeout, int):
            raise TypeError(
                "Only integer timeouts (representing seconds) are supported."
            )
        self.max_retries = max_retries
        self.retry_delay = retry_delay
        self.timeout = timeout

        self.trigger = trigger or prefect.triggers.all_successful
        self.skip_on_upstream_skip = skip_on_upstream_skip

        if cache_for is None and (
            cache_validator is not None
            and cache_validator is not prefect.engine.cache_validators.never_use
        ):
            warnings.warn(
                "cache_validator provided without specifying cache expiration (cache_for); this Task will not be cached."
            )

        self.cache_for = cache_for
        default_validator = (
            prefect.engine.cache_validators.never_use
            if cache_for is None
            else prefect.engine.cache_validators.duration_only
        )
        self.cache_validator = cache_validator or default_validator
        self.checkpoint = (
            checkpoint
            if checkpoint is not None
            else prefect.config.tasks.defaults.checkpoint
        )
        self.result_handler = result_handler

        if state_handlers and not isinstance(state_handlers, collections.Sequence):
            raise TypeError("state_handlers should be iterable.")
        self.state_handlers = state_handlers or []
        if on_failure is not None:
            self.state_handlers.append(
                callback_factory(on_failure, check=lambda s: s.is_failed())
            )
Esempio n. 5
0
    def __init__(
        self,
        name: str = None,
        slug: str = None,
        tags: Iterable[str] = None,
        max_retries: int = None,
        retry_delay: timedelta = None,
        timeout: int = None,
        trigger: Callable[[Set["State"]], bool] = None,
        skip_on_upstream_skip: bool = True,
        cache_for: timedelta = None,
        cache_validator: Callable = None,
        cache_key: str = None,
        checkpoint: bool = None,
        result_handler: Optional["ResultHandler"] = None,
        state_handlers: List[Callable] = None,
        on_failure: Callable = None,
        log_stdout: bool = False,
        result: Optional["Result"] = None,
    ):
        self.name = name or type(self).__name__
        self.slug = slug or str(uuid.uuid4())

        self.logger = logging.get_logger("Task: {}".format(self.name))

        # avoid silently iterating over a string
        if isinstance(tags, str):
            raise TypeError("Tags should be a set of tags, not a string.")
        current_tags = set(prefect.context.get("tags", set()))
        self.tags = (set(tags) if tags is not None else set()) | current_tags

        max_retries = (
            max_retries
            if max_retries is not None
            else prefect.config.tasks.defaults.max_retries
        )
        retry_delay = (
            retry_delay
            if retry_delay is not None
            else prefect.config.tasks.defaults.retry_delay
        )
        timeout = (
            timeout if timeout is not None else prefect.config.tasks.defaults.timeout
        )

        if max_retries > 0 and retry_delay is None:
            raise ValueError(
                "A datetime.timedelta `retry_delay` must be provided if max_retries > 0"
            )
        # specify not max retries because the default is false
        if retry_delay is not None and not max_retries:
            raise ValueError(
                "A `max_retries` argument greater than 0 must be provided if specifying a retry delay."
            )
        if timeout is not None and not isinstance(timeout, int):
            raise TypeError(
                "Only integer timeouts (representing seconds) are supported."
            )
        self.max_retries = max_retries
        self.retry_delay = retry_delay
        self.timeout = timeout

        self.trigger = trigger or prefect.triggers.all_successful
        self.skip_on_upstream_skip = skip_on_upstream_skip

        if cache_for is None and (
            cache_validator is not None
            and cache_validator is not prefect.engine.cache_validators.never_use
        ):
            warnings.warn(
                "cache_validator provided without specifying cache expiration (cache_for); this Task will not be cached."
            )

        self.cache_for = cache_for
        self.cache_key = cache_key
        default_validator = (
            prefect.engine.cache_validators.never_use
            if cache_for is None
            else prefect.engine.cache_validators.duration_only
        )
        self.cache_validator = cache_validator or default_validator
        self.checkpoint = checkpoint
        self.result_handler = result_handler

        if cache_for or cache_key or cache_validator:
            warnings.warn(
                "DEPRECATED: all cache_* options on a Task will be deprecated in 0.11.0, and removed in 0.12.0; the options will be moved to a Task's prefect.engine.Result object.",
                UserWarning,
            )

        if result_handler:
            warnings.warn(
                "DEPRECATED: the result_handler Task option will be deprecated in 0.11.0, and removed in 0.12.0, in favor of the `result` option instead.",
                UserWarning,
            )

        if state_handlers and not isinstance(state_handlers, collections.Sequence):
            raise TypeError("state_handlers should be iterable.")
        self.state_handlers = state_handlers or []
        if on_failure is not None:
            self.state_handlers.append(
                callback_factory(on_failure, check=lambda s: s.is_failed())
            )
        self.auto_generated = False

        self.log_stdout = log_stdout
Esempio n. 6
0
        r = Random()
        v = r.randint(a=self.min, b=self.max)
        if v > threshold:
            raise signals.FAIL(message=f'{v} is greater than {threshold}')
        self.logger.info(f'Value is {v}')
        return v


def notify_on_retry(task: Task, new_state: State):
    logger: Logger = prefect.context.get('logger')
    logger.warning(
        f'Task {task.name}/{task.slug} is retrying at {new_state.start_time}',
    )


log_on_retry = callback_factory(notify_on_retry, lambda s: s.is_retrying())

name = Parameter('name', default='potato')
threshold = Parameter('threshold', default=8)

schedule = IntervalSchedule(
    start_date=datetime.fromisoformat('2021-01-01'),
    interval=timedelta(minutes=2),
)

with Flow(
        name='My first flow!',
        schedule=schedule,
        validate=True,
        executor=LocalDaskExecutor(),
) as flow: