Ejemplo n.º 1
0
def _execute_map_task(
    inputs,
    output_prefix,
    raw_output_data_prefix,
    max_concurrency,
    test,
    dynamic_addl_distro: str,
    dynamic_dest_dir: str,
    resolver: str,
    resolver_args: List[str],
):
    if len(resolver_args) < 1:
        raise Exception(f"Resolver args cannot be <1, got {resolver_args}")

    with _TemporaryConfiguration(_internal_config.CONFIGURATION_PATH.get()):
        with setup_execution(raw_output_data_prefix, dynamic_addl_distro,
                             dynamic_dest_dir) as ctx:
            resolver_obj = load_object_from_module(resolver)
            # Use the resolver to load the actual task object
            _task_def = resolver_obj.load_task(loader_args=resolver_args)
            if not isinstance(_task_def, PythonFunctionTask):
                raise Exception("Map tasks cannot be run with instance tasks.")
            map_task = MapPythonTask(_task_def, max_concurrency)

            task_index = _compute_array_job_index()
            output_prefix = _os.path.join(output_prefix, str(task_index))

            if test:
                _click.echo(
                    f"Test detected, returning. Inputs: {inputs} Computed task index: {task_index} "
                    f"New output prefix: {output_prefix} Raw output path: {raw_output_data_prefix} "
                    f"Resolver and args: {resolver} {resolver_args}")
                return

            _handle_annotated_task(ctx, map_task, inputs, output_prefix)
Ejemplo n.º 2
0
def _execute_map_task(
    inputs,
    output_prefix,
    raw_output_data_prefix,
    max_concurrency,
    test,
    resolver: str,
    resolver_args: List[str],
    checkpoint_path: Optional[str] = None,
    prev_checkpoint: Optional[str] = None,
    dynamic_addl_distro: Optional[str] = None,
    dynamic_dest_dir: Optional[str] = None,
):
    """
    This function should be called by map task and aws-batch task
    resolver should be something like:
        flytekit.core.python_auto_container.default_task_resolver
    resolver args should be something like
        task_module app.workflows task_name task_1
    have dashes seems to mess up click, like --task_module seems to interfere

    :param inputs: Where to read inputs
    :param output_prefix: Where to write primitive outputs
    :param raw_output_data_prefix: Where to write offloaded data (files, directories, dataframes).
    :param test: Dry run
    :param resolver: The task resolver to use. This needs to be loadable directly from importlib (and thus cannot be
      nested).
    :param resolver_args: Args that will be passed to the aforementioned resolver's load_task function
    :return:
    """
    if len(resolver_args) < 1:
        raise Exception(f"Resolver args cannot be <1, got {resolver_args}")

    with setup_execution(
        raw_output_data_prefix, checkpoint_path, prev_checkpoint, dynamic_addl_distro, dynamic_dest_dir
    ) as ctx:
        resolver_obj = load_object_from_module(resolver)
        # Use the resolver to load the actual task object
        _task_def = resolver_obj.load_task(loader_args=resolver_args)
        if not isinstance(_task_def, PythonFunctionTask):
            raise Exception("Map tasks cannot be run with instance tasks.")
        map_task = MapPythonTask(_task_def, max_concurrency)

        task_index = _compute_array_job_index()
        output_prefix = os.path.join(output_prefix, str(task_index))

        if test:
            logger.info(
                f"Test detected, returning. Inputs: {inputs} Computed task index: {task_index} "
                f"New output prefix: {output_prefix} Raw output path: {raw_output_data_prefix} "
                f"Resolver and args: {resolver} {resolver_args}"
            )
            return

        _handle_annotated_task(ctx, map_task, inputs, output_prefix)
Ejemplo n.º 3
0
def _execute_task(
    inputs: str,
    output_prefix: str,
    test: bool,
    raw_output_data_prefix: str,
    resolver: str,
    resolver_args: List[str],
    checkpoint_path: Optional[str] = None,
    prev_checkpoint: Optional[str] = None,
    dynamic_addl_distro: Optional[str] = None,
    dynamic_dest_dir: Optional[str] = None,
):
    """
    This function should be called for new API tasks (those only available in 0.16 and later that leverage Python
    native typing).

    resolver should be something like:
        flytekit.core.python_auto_container.default_task_resolver
    resolver args should be something like
        task_module app.workflows task_name task_1
    have dashes seems to mess up click, like --task_module seems to interfere

    :param inputs: Where to read inputs
    :param output_prefix: Where to write primitive outputs
    :param raw_output_data_prefix: Where to write offloaded data (files, directories, dataframes).
    :param test: Dry run
    :param resolver: The task resolver to use. This needs to be loadable directly from importlib (and thus cannot be
      nested).
    :param resolver_args: Args that will be passed to the aforementioned resolver's load_task function
    :param dynamic_addl_distro: In the case of parent tasks executed using the 'fast' mode this captures where the
        compressed code archive has been uploaded.
    :param dynamic_dest_dir: In the case of parent tasks executed using the 'fast' mode this captures where compressed
        code archives should be installed in the flyte task container.
    :return:
    """
    if len(resolver_args) < 1:
        raise Exception("cannot be <1")

    with setup_execution(
        raw_output_data_prefix,
        checkpoint_path,
        prev_checkpoint,
        dynamic_addl_distro,
        dynamic_dest_dir,
    ) as ctx:
        resolver_obj = load_object_from_module(resolver)
        # Use the resolver to load the actual task object
        _task_def = resolver_obj.load_task(loader_args=resolver_args)
        if test:
            logger.info(
                f"Test detected, returning. Args were {inputs} {output_prefix} {raw_output_data_prefix} {resolver} {resolver_args}"
            )
            return
        _handle_annotated_task(ctx, _task_def, inputs, output_prefix)
    def load_task(self, loader_args: List[str]) -> ExecutableTemplateShimTask:
        logger.info(f"Task template loader args: {loader_args}")
        ctx = FlyteContext.current_context()
        task_template_local_path = os.path.join(
            ctx.execution_state.working_dir, "task_template.pb")
        ctx.file_access.get_data(loader_args[0], task_template_local_path)
        task_template_proto = common_utils.load_proto_from_file(
            _tasks_pb2.TaskTemplate, task_template_local_path)
        task_template_model = _task_model.TaskTemplate.from_flyte_idl(
            task_template_proto)

        executor_class = load_object_from_module(loader_args[1])
        return ExecutableTemplateShimTask(task_template_model, executor_class)
Ejemplo n.º 5
0
def load_naive_entity(
        module_name: str, entity_name: str,
        project_root: str) -> typing.Union[WorkflowBase, PythonTask]:
    """
    Load the workflow of a the script file.
    N.B.: it assumes that the file is self-contained, in other words, there are no relative imports.
    """
    flyte_ctx_builder = context_manager.FlyteContextManager.current_context(
    ).new_builder()
    with context_manager.FlyteContextManager.with_context(flyte_ctx_builder):
        with module_loader.add_sys_path(project_root):
            importlib.import_module(module_name)
    return module_loader.load_object_from_module(
        f"{module_name}.{entity_name}")
Ejemplo n.º 6
0
def test_load_object():
    loader_self = module_loader.load_object_from_module(
        f"{module_loader.__name__}.load_object_from_module")
    assert loader_self.__module__ == f"{module_loader.__name__}"