def test_callback_factory_generates_pickleable_objs(): """ This test does some heavy lifting to create two lambda functions, and then passes them through callback_factory to generate a state-handler. We then pickle the state handler to a temporary file, delete the functions, and try to unpickle the file in a new Process, asserting that the result is what we think it should be. """ def load_bytes(fname): import cloudpickle with open(fname, "rb") as f: obj = cloudpickle.load(f) return obj(1, 2, 3) fn = lambda obj, state: None check = lambda state: True handler = callback_factory(fn, check) sd, tmp_file = tempfile.mkstemp() os.close(sd) try: with open(tmp_file, "wb") as bit_file: cloudpickle.dump(handler, bit_file) del fn del check pool = ThreadPool(processes=1) result = pool.apply_async(load_bytes, (tmp_file, )) value = result.get() assert value == 3 except Exception as exc: raise exc finally: os.unlink(tmp_file)
def __init__( self, name: str, schedule: prefect.schedules.Schedule = None, environment: Environment = None, storage: Storage = None, tasks: Iterable[Task] = None, edges: Iterable[Edge] = None, reference_tasks: Iterable[Task] = None, state_handlers: List[Callable] = None, on_failure: Callable = None, validate: bool = None, result_handler: ResultHandler = None, ): self._cache = {} # type: dict if not name: raise ValueError("A name must be provided for the flow.") self.name = name self.logger = logging.get_logger("Flow: {}".format(self.name)) self.schedule = schedule self.environment = environment or prefect.environments.RemoteEnvironment( ) self.storage = storage self.result_handler = ( result_handler or prefect.engine.get_default_result_handler_class()()) self.tasks = set() # type: Set[Task] self.edges = set() # type: Set[Edge] self.constants = collections.defaultdict( dict) # type: Dict[Task, Dict[str, Any]] for t in tasks or []: self.add_task(t) self.set_reference_tasks(reference_tasks or []) for e in edges or []: self.add_edge( upstream_task=e.upstream_task, downstream_task=e.downstream_task, key=e.key, mapped=e.mapped, validate=validate, ) self._prefect_version = prefect.__version__ if state_handlers and not isinstance(state_handlers, collections.Sequence): raise TypeError("state_handlers should be iterable.") self.state_handlers = state_handlers or [] if on_failure is not None: self.state_handlers.append( callback_factory(on_failure, check=lambda s: s.is_failed())) super().__init__()
def __init__( self, name: str = None, slug: str = None, tags: Iterable[str] = None, max_retries: int = None, retry_delay: timedelta = None, timeout: int = None, trigger: "Callable[[Dict[Edge, State]], bool]" = None, skip_on_upstream_skip: bool = True, cache_for: timedelta = None, cache_validator: Callable = None, cache_key: str = None, checkpoint: bool = None, result_handler: "ResultHandler" = None, state_handlers: List[Callable] = None, on_failure: Callable = None, log_stdout: bool = False, result: "Result" = None, target: str = None, ): self.name = name or type(self).__name__ self.slug = slug self.logger = logging.get_logger(self.name) # avoid silently iterating over a string if isinstance(tags, str): raise TypeError("Tags should be a set of tags, not a string.") current_tags = set(prefect.context.get("tags", set())) self.tags = (set(tags) if tags is not None else set()) | current_tags max_retries = (max_retries if max_retries is not None else prefect.config.tasks.defaults.max_retries) retry_delay = (retry_delay if retry_delay is not None else prefect.config.tasks.defaults.retry_delay) timeout = (timeout if timeout is not None else prefect.config.tasks.defaults.timeout) if max_retries > 0 and retry_delay is None: raise ValueError( "A datetime.timedelta `retry_delay` must be provided if max_retries > 0" ) # specify not max retries because the default is false if retry_delay is not None and not max_retries: raise ValueError( "A `max_retries` argument greater than 0 must be provided if specifying " "a retry delay.") if timeout is not None and not isinstance(timeout, int): raise TypeError( "Only integer timeouts (representing seconds) are supported.") self.max_retries = max_retries self.retry_delay = retry_delay self.timeout = timeout self.trigger = trigger or prefect.triggers.all_successful self.skip_on_upstream_skip = skip_on_upstream_skip if cache_for is None and ( cache_validator is not None and cache_validator is not prefect.engine.cache_validators.never_use): warnings.warn( "cache_validator provided without specifying cache expiration " "(cache_for); this Task will not be cached.") self.cache_for = cache_for self.cache_key = cache_key default_validator = (prefect.engine.cache_validators.never_use if cache_for is None else prefect.engine.cache_validators.duration_only) self.cache_validator = cache_validator or default_validator self.checkpoint = checkpoint if result_handler: warnings.warn( "Result Handlers are deprecated; please use the new style Result classes instead." ) self.result = ResultHandlerResult.from_result_handler( result_handler) # type: Optional[Result] else: self.result = result self.target = target # if both a target and a result were provided, update the result location # to point at the target if self.target and self.result: if (getattr(self.result, "location", None) and self.result.location != self.target): warnings.warn( "Both `result.location` and `target` were provided. " "The `target` value will be used.") self.result = self.result.copy() self.result.location = self.target if state_handlers and not isinstance(state_handlers, collections.abc.Sequence): raise TypeError("state_handlers should be iterable.") self.state_handlers = state_handlers or [] if on_failure is not None: self.state_handlers.append( callback_factory(on_failure, check=lambda s: s.is_failed())) self.auto_generated = False self.log_stdout = log_stdout # if new task creations are being tracked, add this task # this makes it possible to give guidance to users that forget # to add tasks to a flow if "_unused_task_tracker" in prefect.context: if not isinstance(self, prefect.tasks.core.constants.Constant): prefect.context._unused_task_tracker.add(self)
def __init__( self, name: str = None, slug: str = None, tags: Iterable[str] = None, max_retries: int = None, retry_delay: timedelta = None, timeout: int = None, trigger: Callable[[Set["State"]], bool] = None, skip_on_upstream_skip: bool = True, cache_for: timedelta = None, cache_validator: Callable = None, checkpoint: bool = None, result_handler: "ResultHandler" = None, state_handlers: List[Callable] = None, on_failure: Callable = None, ): self.name = name or type(self).__name__ self.slug = slug self.id = str(uuid.uuid4()) self.logger = logging.get_logger("Task") # avoid silently iterating over a string if isinstance(tags, str): raise TypeError("Tags should be a set of tags, not a string.") current_tags = set(prefect.context.get("tags", set())) self.tags = (set(tags) if tags is not None else set()) | current_tags max_retries = ( max_retries if max_retries is not None else prefect.config.tasks.defaults.max_retries ) retry_delay = ( retry_delay if retry_delay is not None else prefect.config.tasks.defaults.retry_delay ) timeout = ( timeout if timeout is not None else prefect.config.tasks.defaults.timeout ) if max_retries > 0 and retry_delay is None: raise ValueError( "A datetime.timedelta `retry_delay` must be provided if max_retries > 0" ) if timeout is not None and not isinstance(timeout, int): raise TypeError( "Only integer timeouts (representing seconds) are supported." ) self.max_retries = max_retries self.retry_delay = retry_delay self.timeout = timeout self.trigger = trigger or prefect.triggers.all_successful self.skip_on_upstream_skip = skip_on_upstream_skip if cache_for is None and ( cache_validator is not None and cache_validator is not prefect.engine.cache_validators.never_use ): warnings.warn( "cache_validator provided without specifying cache expiration (cache_for); this Task will not be cached." ) self.cache_for = cache_for default_validator = ( prefect.engine.cache_validators.never_use if cache_for is None else prefect.engine.cache_validators.duration_only ) self.cache_validator = cache_validator or default_validator self.checkpoint = ( checkpoint if checkpoint is not None else prefect.config.tasks.defaults.checkpoint ) self.result_handler = result_handler if state_handlers and not isinstance(state_handlers, collections.Sequence): raise TypeError("state_handlers should be iterable.") self.state_handlers = state_handlers or [] if on_failure is not None: self.state_handlers.append( callback_factory(on_failure, check=lambda s: s.is_failed()) )
def __init__( self, name: str = None, slug: str = None, tags: Iterable[str] = None, max_retries: int = None, retry_delay: timedelta = None, timeout: int = None, trigger: Callable[[Set["State"]], bool] = None, skip_on_upstream_skip: bool = True, cache_for: timedelta = None, cache_validator: Callable = None, cache_key: str = None, checkpoint: bool = None, result_handler: Optional["ResultHandler"] = None, state_handlers: List[Callable] = None, on_failure: Callable = None, log_stdout: bool = False, result: Optional["Result"] = None, ): self.name = name or type(self).__name__ self.slug = slug or str(uuid.uuid4()) self.logger = logging.get_logger("Task: {}".format(self.name)) # avoid silently iterating over a string if isinstance(tags, str): raise TypeError("Tags should be a set of tags, not a string.") current_tags = set(prefect.context.get("tags", set())) self.tags = (set(tags) if tags is not None else set()) | current_tags max_retries = ( max_retries if max_retries is not None else prefect.config.tasks.defaults.max_retries ) retry_delay = ( retry_delay if retry_delay is not None else prefect.config.tasks.defaults.retry_delay ) timeout = ( timeout if timeout is not None else prefect.config.tasks.defaults.timeout ) if max_retries > 0 and retry_delay is None: raise ValueError( "A datetime.timedelta `retry_delay` must be provided if max_retries > 0" ) # specify not max retries because the default is false if retry_delay is not None and not max_retries: raise ValueError( "A `max_retries` argument greater than 0 must be provided if specifying a retry delay." ) if timeout is not None and not isinstance(timeout, int): raise TypeError( "Only integer timeouts (representing seconds) are supported." ) self.max_retries = max_retries self.retry_delay = retry_delay self.timeout = timeout self.trigger = trigger or prefect.triggers.all_successful self.skip_on_upstream_skip = skip_on_upstream_skip if cache_for is None and ( cache_validator is not None and cache_validator is not prefect.engine.cache_validators.never_use ): warnings.warn( "cache_validator provided without specifying cache expiration (cache_for); this Task will not be cached." ) self.cache_for = cache_for self.cache_key = cache_key default_validator = ( prefect.engine.cache_validators.never_use if cache_for is None else prefect.engine.cache_validators.duration_only ) self.cache_validator = cache_validator or default_validator self.checkpoint = checkpoint self.result_handler = result_handler if cache_for or cache_key or cache_validator: warnings.warn( "DEPRECATED: all cache_* options on a Task will be deprecated in 0.11.0, and removed in 0.12.0; the options will be moved to a Task's prefect.engine.Result object.", UserWarning, ) if result_handler: warnings.warn( "DEPRECATED: the result_handler Task option will be deprecated in 0.11.0, and removed in 0.12.0, in favor of the `result` option instead.", UserWarning, ) if state_handlers and not isinstance(state_handlers, collections.Sequence): raise TypeError("state_handlers should be iterable.") self.state_handlers = state_handlers or [] if on_failure is not None: self.state_handlers.append( callback_factory(on_failure, check=lambda s: s.is_failed()) ) self.auto_generated = False self.log_stdout = log_stdout
r = Random() v = r.randint(a=self.min, b=self.max) if v > threshold: raise signals.FAIL(message=f'{v} is greater than {threshold}') self.logger.info(f'Value is {v}') return v def notify_on_retry(task: Task, new_state: State): logger: Logger = prefect.context.get('logger') logger.warning( f'Task {task.name}/{task.slug} is retrying at {new_state.start_time}', ) log_on_retry = callback_factory(notify_on_retry, lambda s: s.is_retrying()) name = Parameter('name', default='potato') threshold = Parameter('threshold', default=8) schedule = IntervalSchedule( start_date=datetime.fromisoformat('2021-01-01'), interval=timedelta(minutes=2), ) with Flow( name='My first flow!', schedule=schedule, validate=True, executor=LocalDaskExecutor(), ) as flow: