def map_execute_task_cmd( inputs, output_prefix, raw_output_data_prefix, max_concurrency, test, dynamic_addl_distro, dynamic_dest_dir, resolver, resolver_args, prev_checkpoint, checkpoint_path, ): logger.info(get_version_message()) raw_output_data_prefix, checkpoint_path, prev_checkpoint = normalize_inputs( raw_output_data_prefix, checkpoint_path, prev_checkpoint ) _execute_map_task( inputs=inputs, output_prefix=output_prefix, raw_output_data_prefix=raw_output_data_prefix, max_concurrency=max_concurrency, test=test, dynamic_addl_distro=dynamic_addl_distro, dynamic_dest_dir=dynamic_dest_dir, resolver=resolver, resolver_args=resolver_args, checkpoint_path=checkpoint_path, prev_checkpoint=prev_checkpoint, )
def _execute_map_task( inputs, output_prefix, raw_output_data_prefix, max_concurrency, test, resolver: str, resolver_args: List[str], checkpoint_path: Optional[str] = None, prev_checkpoint: Optional[str] = None, dynamic_addl_distro: Optional[str] = None, dynamic_dest_dir: Optional[str] = None, ): """ This function should be called by map task and aws-batch task resolver should be something like: flytekit.core.python_auto_container.default_task_resolver resolver args should be something like task_module app.workflows task_name task_1 have dashes seems to mess up click, like --task_module seems to interfere :param inputs: Where to read inputs :param output_prefix: Where to write primitive outputs :param raw_output_data_prefix: Where to write offloaded data (files, directories, dataframes). :param test: Dry run :param resolver: The task resolver to use. This needs to be loadable directly from importlib (and thus cannot be nested). :param resolver_args: Args that will be passed to the aforementioned resolver's load_task function :return: """ if len(resolver_args) < 1: raise Exception(f"Resolver args cannot be <1, got {resolver_args}") with setup_execution( raw_output_data_prefix, checkpoint_path, prev_checkpoint, dynamic_addl_distro, dynamic_dest_dir ) as ctx: resolver_obj = load_object_from_module(resolver) # Use the resolver to load the actual task object _task_def = resolver_obj.load_task(loader_args=resolver_args) if not isinstance(_task_def, PythonFunctionTask): raise Exception("Map tasks cannot be run with instance tasks.") map_task = MapPythonTask(_task_def, max_concurrency) task_index = _compute_array_job_index() output_prefix = os.path.join(output_prefix, str(task_index)) if test: logger.info( f"Test detected, returning. Inputs: {inputs} Computed task index: {task_index} " f"New output prefix: {output_prefix} Raw output path: {raw_output_data_prefix} " f"Resolver and args: {resolver} {resolver_args}" ) return _handle_annotated_task(ctx, map_task, inputs, output_prefix)
def _execute_task( inputs: str, output_prefix: str, test: bool, raw_output_data_prefix: str, resolver: str, resolver_args: List[str], checkpoint_path: Optional[str] = None, prev_checkpoint: Optional[str] = None, dynamic_addl_distro: Optional[str] = None, dynamic_dest_dir: Optional[str] = None, ): """ This function should be called for new API tasks (those only available in 0.16 and later that leverage Python native typing). resolver should be something like: flytekit.core.python_auto_container.default_task_resolver resolver args should be something like task_module app.workflows task_name task_1 have dashes seems to mess up click, like --task_module seems to interfere :param inputs: Where to read inputs :param output_prefix: Where to write primitive outputs :param raw_output_data_prefix: Where to write offloaded data (files, directories, dataframes). :param test: Dry run :param resolver: The task resolver to use. This needs to be loadable directly from importlib (and thus cannot be nested). :param resolver_args: Args that will be passed to the aforementioned resolver's load_task function :param dynamic_addl_distro: In the case of parent tasks executed using the 'fast' mode this captures where the compressed code archive has been uploaded. :param dynamic_dest_dir: In the case of parent tasks executed using the 'fast' mode this captures where compressed code archives should be installed in the flyte task container. :return: """ if len(resolver_args) < 1: raise Exception("cannot be <1") with setup_execution( raw_output_data_prefix, checkpoint_path, prev_checkpoint, dynamic_addl_distro, dynamic_dest_dir, ) as ctx: resolver_obj = load_object_from_module(resolver) # Use the resolver to load the actual task object _task_def = resolver_obj.load_task(loader_args=resolver_args) if test: logger.info( f"Test detected, returning. Args were {inputs} {output_prefix} {raw_output_data_prefix} {resolver} {resolver_args}" ) return _handle_annotated_task(ctx, _task_def, inputs, output_prefix)
def execute_task_cmd( inputs, output_prefix, raw_output_data_prefix, test, prev_checkpoint, checkpoint_path, dynamic_addl_distro, dynamic_dest_dir, resolver, resolver_args, ): logger.info(get_version_message()) # We get weird errors if there are no click echo messages at all, so emit an empty string so that unit tests pass. _click.echo("") raw_output_data_prefix, checkpoint_path, prev_checkpoint = normalize_inputs( raw_output_data_prefix, checkpoint_path, prev_checkpoint ) # For new API tasks (as of 0.16.x), we need to call a different function. # Use the presence of the resolver to differentiate between old API tasks and new API tasks # The addition of a new top-level command seemed out of scope at the time of this writing to pursue given how # pervasive this top level command already (plugins mostly). logger.debug(f"Running task execution with resolver {resolver}...") _execute_task( inputs=inputs, output_prefix=output_prefix, raw_output_data_prefix=raw_output_data_prefix, test=test, resolver=resolver, resolver_args=resolver_args, dynamic_addl_distro=dynamic_addl_distro, dynamic_dest_dir=dynamic_dest_dir, checkpoint_path=checkpoint_path, prev_checkpoint=prev_checkpoint, )
def _dispatch_execute( ctx: FlyteContext, task_def: PythonTask, inputs_path: str, output_prefix: str, ): """ Dispatches execute to PythonTask Step1: Download inputs and load into a literal map Step2: Invoke task - dispatch_execute Step3: a: [Optional] Record outputs to output_prefix b: OR if IgnoreOutputs is raised, then ignore uploading outputs c: OR if an unhandled exception is retrieved - record it as an errors.pb """ output_file_dict = {} logger.debug(f"Starting _dispatch_execute for {task_def.name}") try: # Step1 local_inputs_file = os.path.join(ctx.execution_state.working_dir, "inputs.pb") ctx.file_access.get_data(inputs_path, local_inputs_file) input_proto = utils.load_proto_from_file(_literals_pb2.LiteralMap, local_inputs_file) idl_input_literals = _literal_models.LiteralMap.from_flyte_idl(input_proto) # Step2 # Decorate the dispatch execute function before calling it, this wraps all exceptions into one # of the FlyteScopedExceptions outputs = _scoped_exceptions.system_entry_point(task_def.dispatch_execute)(ctx, idl_input_literals) # Step3a if isinstance(outputs, VoidPromise): logger.warning("Task produces no outputs") output_file_dict = {_constants.OUTPUT_FILE_NAME: _literal_models.LiteralMap(literals={})} elif isinstance(outputs, _literal_models.LiteralMap): output_file_dict = {_constants.OUTPUT_FILE_NAME: outputs} elif isinstance(outputs, _dynamic_job.DynamicJobSpec): output_file_dict = {_constants.FUTURES_FILE_NAME: outputs} else: logger.error(f"SystemError: received unknown outputs from task {outputs}") output_file_dict[_constants.ERROR_FILE_NAME] = _error_models.ErrorDocument( _error_models.ContainerError( "UNKNOWN_OUTPUT", f"Type of output received not handled {type(outputs)} outputs: {outputs}", _error_models.ContainerError.Kind.RECOVERABLE, _execution_models.ExecutionError.ErrorKind.SYSTEM, ) ) # Handle user-scoped errors except _scoped_exceptions.FlyteScopedUserException as e: if isinstance(e.value, IgnoreOutputs): logger.warning(f"User-scoped IgnoreOutputs received! Outputs.pb will not be uploaded. reason {e}!!") return output_file_dict[_constants.ERROR_FILE_NAME] = _error_models.ErrorDocument( _error_models.ContainerError( e.error_code, e.verbose_message, e.kind, _execution_models.ExecutionError.ErrorKind.USER ) ) logger.error("!! Begin User Error Captured by Flyte !!") logger.error(e.verbose_message) logger.error("!! End Error Captured by Flyte !!") # Handle system-scoped errors except _scoped_exceptions.FlyteScopedSystemException as e: if isinstance(e.value, IgnoreOutputs): logger.warning(f"System-scoped IgnoreOutputs received! Outputs.pb will not be uploaded. reason {e}!!") return output_file_dict[_constants.ERROR_FILE_NAME] = _error_models.ErrorDocument( _error_models.ContainerError( e.error_code, e.verbose_message, e.kind, _execution_models.ExecutionError.ErrorKind.SYSTEM ) ) logger.error("!! Begin System Error Captured by Flyte !!") logger.error(e.verbose_message) logger.error("!! End Error Captured by Flyte !!") # Interpret all other exceptions (some of which may be caused by the code in the try block outside of # dispatch_execute) as recoverable system exceptions. except Exception as e: # Step 3c exc_str = _traceback.format_exc() output_file_dict[_constants.ERROR_FILE_NAME] = _error_models.ErrorDocument( _error_models.ContainerError( "SYSTEM:Unknown", exc_str, _error_models.ContainerError.Kind.RECOVERABLE, _execution_models.ExecutionError.ErrorKind.SYSTEM, ) ) logger.error(f"Exception when executing task {task_def.name or task_def.id.name}, reason {str(e)}") logger.error("!! Begin Unknown System Error Captured by Flyte !!") logger.error(exc_str) logger.error("!! End Error Captured by Flyte !!") for k, v in output_file_dict.items(): utils.write_proto_to_file(v.to_flyte_idl(), os.path.join(ctx.execution_state.engine_dir, k)) ctx.file_access.put_data(ctx.execution_state.engine_dir, output_prefix, is_multipart=True) logger.info(f"Engine folder written successfully to the output prefix {output_prefix}") logger.debug("Finished _dispatch_execute")
def setup_execution( raw_output_data_prefix: str, checkpoint_path: Optional[str] = None, prev_checkpoint: Optional[str] = None, dynamic_addl_distro: Optional[str] = None, dynamic_dest_dir: Optional[str] = None, ): """ :param raw_output_data_prefix: :param checkpoint_path: :param prev_checkpoint: :param dynamic_addl_distro: Works in concert with the other dynamic arg. If present, indicates that if a dynamic task were to run, it should set fast serialize to true and use these values in FastSerializationSettings :param dynamic_dest_dir: See above. :return: """ exe_project = get_one_of("FLYTE_INTERNAL_EXECUTION_PROJECT", "_F_PRJ") exe_domain = get_one_of("FLYTE_INTERNAL_EXECUTION_DOMAIN", "_F_DM") exe_name = get_one_of("FLYTE_INTERNAL_EXECUTION_ID", "_F_NM") exe_wf = get_one_of("FLYTE_INTERNAL_EXECUTION_WORKFLOW", "_F_WF") exe_lp = get_one_of("FLYTE_INTERNAL_EXECUTION_LAUNCHPLAN", "_F_LP") tk_project = get_one_of("FLYTE_INTERNAL_TASK_PROJECT", "_F_TK_PRJ") tk_domain = get_one_of("FLYTE_INTERNAL_TASK_DOMAIN", "_F_TK_DM") tk_name = get_one_of("FLYTE_INTERNAL_TASK_NAME", "_F_TK_NM") tk_version = get_one_of("FLYTE_INTERNAL_TASK_VERSION", "_F_TK_V") compressed_serialization_settings = os.environ.get(SERIALIZED_CONTEXT_ENV_VAR, "") ctx = FlyteContextManager.current_context() # Create directories user_workspace_dir = ctx.file_access.get_random_local_directory() logger.info(f"Using user directory {user_workspace_dir}") pathlib.Path(user_workspace_dir).mkdir(parents=True, exist_ok=True) from flytekit import __version__ as _api_version checkpointer = None if checkpoint_path is not None: checkpointer = SyncCheckpoint(checkpoint_dest=checkpoint_path, checkpoint_src=prev_checkpoint) logger.debug(f"Checkpointer created with source {prev_checkpoint} and dest {checkpoint_path}") execution_parameters = ExecutionParameters( execution_id=_identifier.WorkflowExecutionIdentifier( project=exe_project, domain=exe_domain, name=exe_name, ), execution_date=_datetime.datetime.utcnow(), stats=_get_stats( cfg=StatsConfig.auto(), # Stats metric path will be: # registration_project.registration_domain.app.module.task_name.user_stats # and it will be tagged with execution-level values for project/domain/wf/lp prefix=f"{tk_project}.{tk_domain}.{tk_name}.user_stats", tags={ "exec_project": exe_project, "exec_domain": exe_domain, "exec_workflow": exe_wf, "exec_launchplan": exe_lp, "api_version": _api_version, }, ), logging=user_space_logger, tmp_dir=user_workspace_dir, raw_output_prefix=raw_output_data_prefix, checkpoint=checkpointer, task_id=_identifier.Identifier(_identifier.ResourceType.TASK, tk_project, tk_domain, tk_name, tk_version), ) try: file_access = FileAccessProvider( local_sandbox_dir=tempfile.mkdtemp(prefix="flyte"), raw_output_prefix=raw_output_data_prefix, ) except TypeError: # would be thrown from DataPersistencePlugins.find_plugin logger.error(f"No data plugin found for raw output prefix {raw_output_data_prefix}") raise es = ctx.new_execution_state().with_params( mode=ExecutionState.Mode.TASK_EXECUTION, user_space_params=execution_parameters, ) cb = ctx.new_builder().with_file_access(file_access).with_execution_state(es) if compressed_serialization_settings: ss = SerializationSettings.from_transport(compressed_serialization_settings) ssb = ss.new_builder() ssb.project = exe_project ssb.domain = exe_domain ssb.version = tk_version if dynamic_addl_distro: ssb.fast_serialization_settings = FastSerializationSettings( enabled=True, destination_dir=dynamic_dest_dir, distribution_location=dynamic_addl_distro, ) cb = cb.with_serialization_settings(ssb.build()) with FlyteContextManager.with_context(cb) as ctx: yield ctx