def test_single_step_entrypoint_out_of_proc(): with _TemporaryConfiguration(os.path.join(os.path.dirname(__file__), 'fake.config'), internal_overrides={ 'project': 'test', 'domain': 'development' }): with _utils.AutoDeletingTempDir("in") as input_dir: literal_map = _type_helpers.pack_python_std_map_to_literal_map({'a': 9}, _type_map_from_variable_map( _task_defs.add_one.interface.inputs)) input_file = os.path.join(input_dir.name, "inputs.pb") _utils.write_proto_to_file(literal_map.to_flyte_idl(), input_file) with _utils.AutoDeletingTempDir("out") as output_dir: cmd = [] cmd.extend(["--task-module", _task_defs.add_one.task_module]) cmd.extend(["--task-name", _task_defs.add_one.task_function_name]) cmd.extend(["--inputs", input_file]) cmd.extend(["--output-prefix", output_dir.name]) result = CliRunner().invoke(execute_task_cmd, cmd) assert result.exit_code == 0 p = _utils.load_proto_from_file( _literals_pb2.LiteralMap, os.path.join(output_dir.name, _constants.OUTPUT_FILE_NAME) ) raw_map = _type_helpers.unpack_literal_map_to_sdk_python_std( _literal_models.LiteralMap.from_flyte_idl(p), _type_map_from_variable_map(_task_defs.add_one.interface.outputs) ) assert raw_map['b'] == 10 assert len(raw_map) == 1
def _execute_map_task( inputs, output_prefix, raw_output_data_prefix, max_concurrency, test, dynamic_addl_distro: str, dynamic_dest_dir: str, resolver: str, resolver_args: List[str], ): if len(resolver_args) < 1: raise Exception(f"Resolver args cannot be <1, got {resolver_args}") resolver_obj = _load_resolver(resolver) with _TemporaryConfiguration(_internal_config.CONFIGURATION_PATH.get()): # Use the resolver to load the actual task object _task_def = resolver_obj.load_task(loader_args=resolver_args) if not isinstance(_task_def, PythonFunctionTask): raise Exception("Map tasks cannot be run with instance tasks.") map_task = MapPythonTask(_task_def, max_concurrency) task_index = _compute_array_job_index() output_prefix = _os.path.join(output_prefix, str(task_index)) if test: _click.echo( f"Test detected, returning. Inputs: {inputs} Computed task index: {task_index} " f"New output prefix: {output_prefix} Raw output path: {raw_output_data_prefix} " f"Resolver and args: {resolver} {resolver_args}") return _handle_annotated_task(map_task, inputs, output_prefix, raw_output_data_prefix, dynamic_addl_distro, dynamic_dest_dir)
def test_single_step_entrypoint_in_proc(): with _TemporaryConfiguration(os.path.join(os.path.dirname(__file__), 'fake.config'), internal_overrides={ 'project': 'test', 'domain': 'development' }): with _utils.AutoDeletingTempDir("in") as input_dir: literal_map = _type_helpers.pack_python_std_map_to_literal_map( {'a': 9}, _type_map_from_variable_map(_task_defs.add_one.interface.inputs)) input_file = os.path.join(input_dir.name, "inputs.pb") _utils.write_proto_to_file(literal_map.to_flyte_idl(), input_file) with _utils.AutoDeletingTempDir("out") as output_dir: _execute_task( _task_defs.add_one.task_module, _task_defs.add_one.task_function_name, input_file, output_dir.name, False ) p = _utils.load_proto_from_file( _literals_pb2.LiteralMap, os.path.join(output_dir.name, _constants.OUTPUT_FILE_NAME) ) raw_map = _type_helpers.unpack_literal_map_to_sdk_python_std( _literal_models.LiteralMap.from_flyte_idl(p), _type_map_from_variable_map(_task_defs.add_one.interface.outputs) ) assert raw_map['b'] == 10 assert len(raw_map) == 1
def execute_task(task_module, task_name, inputs, output_prefix, test): with _TemporaryConfiguration(_internal_config.CONFIGURATION_PATH.get()): with _utils.AutoDeletingTempDir('input_dir') as input_dir: # Load user code task_module = _importlib.import_module(task_module) task_def = getattr(task_module, task_name) if not test: local_inputs_file = input_dir.get_named_tempfile('inputs.pb') # Handle inputs/outputs for array job. if _os.environ.get('BATCH_JOB_ARRAY_INDEX_VAR_NAME'): job_index = _compute_array_job_index() # TODO: Perhaps remove. This is a workaround to an issue we perceived with limited entropy in # TODO: AWS batch array jobs. _flyte_random.seed_flyte_random("{} {} {}".format( _random.random(), _datetime.datetime.utcnow(), job_index)) # If an ArrayTask is discoverable, the original job index may be different than the one specified in # the environment variable. Look up the correct input/outputs in the index lookup mapping file. job_index = _map_job_index_to_child_index( input_dir, inputs, job_index) inputs = _os.path.join(inputs, str(job_index), 'inputs.pb') output_prefix = _os.path.join(output_prefix, str(job_index)) _data_proxy.Data.get_data(inputs, local_inputs_file) input_proto = _utils.load_proto_from_file( _literals_pb2.LiteralMap, local_inputs_file) _engine_loader.get_engine().get_task(task_def).execute( _literal_models.LiteralMap.from_flyte_idl(input_proto), context={'output_prefix': output_prefix})
def _execute_task(inputs, output_prefix, raw_output_data_prefix, test, resolver: str, resolver_args: List[str]): """ This function should be called for new API tasks (those only available in 0.16 and later that leverage Python native typing). resolver should be something like: flytekit.core.python_auto_container.default_task_resolver resolver args should be something like task_module app.workflows task_name task_1 have dashes seems to mess up click, like --task_module seems to interfere :param inputs: Where to read inputs :param output_prefix: Where to write primitive outputs :param raw_output_data_prefix: Where to write offloaded data (files, directories, dataframes). :param test: Dry run :param resolver: The task resolver to use. This needs to be loadable directly from importlib (and thus cannot be nested). :param resolver_args: Args that will be passed to the aforementioned resolver's load_task function :return: """ if len(resolver_args) < 1: raise Exception("cannot be <1") resolver_obj = _load_resolver(resolver) with _TemporaryConfiguration(_internal_config.CONFIGURATION_PATH.get()): # Use the resolver to load the actual task object _task_def = resolver_obj.load_task(loader_args=resolver_args) if test: _click.echo( f"Test detected, returning. Args were {inputs} {output_prefix} {raw_output_data_prefix} {resolver} {resolver_args}" ) return _handle_annotated_task(_task_def, inputs, output_prefix, raw_output_data_prefix)
def test_configuration_file(): with _TemporaryConfiguration( _os.path.join(_os.path.dirname(_os.path.realpath(__file__)), 'configs/good.config')): assert _common.CONFIGURATION_SINGLETON.get_string('sdk', 'workflow_packages') == \ 'this.module,that.module' assert _common.CONFIGURATION_SINGLETON.get_string( 'sdk', 'workflow_packages') is None
def test_configuration_file(): with _TemporaryConfiguration( _os.path.join(_os.path.dirname(_os.path.realpath(__file__)), "configs/good.config")): assert _common.CONFIGURATION_SINGLETON.get_string( "sdk", "workflow_packages") == "this.module,that.module" assert _common.CONFIGURATION_SINGLETON.get_string( "sdk", "workflow_packages") is None
def test_internal_overrides(): with _TemporaryConfiguration( _os.path.join(_os.path.dirname(_os.path.realpath(__file__)), "configs/good.config"), {"foo": "bar"}, ): assert _os.environ.get("FLYTE_INTERNAL_FOO") == "bar" assert _os.environ.get("FLYTE_INTERNAL_FOO") is None
def test_arrayjob_entrypoint_in_proc(): with _TemporaryConfiguration(os.path.join(os.path.dirname(__file__), 'fake.config'), internal_overrides={ 'project': 'test', 'domain': 'development' }): with _utils.AutoDeletingTempDir("dir") as dir: literal_map = _type_helpers.pack_python_std_map_to_literal_map( {'a': 9}, _type_map_from_variable_map( _task_defs.add_one.interface.inputs)) input_dir = os.path.join(dir.name, "1") os.mkdir( input_dir) # auto cleanup will take this subdir into account input_file = os.path.join(input_dir, "inputs.pb") _utils.write_proto_to_file(literal_map.to_flyte_idl(), input_file) # construct indexlookup.pb which has array: [1] mapped_index = _literals.Literal( _literals.Scalar(primitive=_literals.Primitive(integer=1))) index_lookup_collection = _literals.LiteralCollection( [mapped_index]) index_lookup_file = os.path.join(dir.name, "indexlookup.pb") _utils.write_proto_to_file(index_lookup_collection.to_flyte_idl(), index_lookup_file) # fake arrayjob task by setting environment variables orig_env_index_var_name = os.environ.get( 'BATCH_JOB_ARRAY_INDEX_VAR_NAME') orig_env_array_index = os.environ.get('AWS_BATCH_JOB_ARRAY_INDEX') os.environ[ 'BATCH_JOB_ARRAY_INDEX_VAR_NAME'] = 'AWS_BATCH_JOB_ARRAY_INDEX' os.environ['AWS_BATCH_JOB_ARRAY_INDEX'] = '0' execute_task(_task_defs.add_one.task_module, _task_defs.add_one.task_function_name, dir.name, dir.name, False) raw_map = _type_helpers.unpack_literal_map_to_sdk_python_std( _literal_models.LiteralMap.from_flyte_idl( _utils.load_proto_from_file( _literals_pb2.LiteralMap, os.path.join(input_dir, _constants.OUTPUT_FILE_NAME))), _type_map_from_variable_map( _task_defs.add_one.interface.outputs)) assert raw_map['b'] == 10 assert len(raw_map) == 1 # reset the env vars if orig_env_index_var_name: os.environ[ 'BATCH_JOB_ARRAY_INDEX_VAR_NAME'] = orig_env_index_var_name if orig_env_array_index: os.environ['AWS_BATCH_JOB_ARRAY_INDEX'] = orig_env_array_index
def execute(self, inputs, context=None): """ Just execute the function and return the outputs as a user-readable dictionary. :param flytekit.models.literals.LiteralMap inputs: :param context: :rtype: dict[Text,flytekit.models.common.FlyteIdlEntity] """ with _TemporaryConfiguration( _os.path.join(_os.path.dirname(__file__), "unit.config"), internal_overrides={"image": "unit_image"}, ): with _common_utils.AutoDeletingTempDir("unit_test_dir") as working_directory: with _data_proxy.LocalWorkingDirectoryContext(working_directory): return self._transform_for_user_output(self._execute_user_code(inputs))
def test_backwards_compatible_replacement(mock_execute_task): def return_args(*args, **kwargs): assert args[4] is None mock_execute_task.side_effect = return_args with _TemporaryConfiguration( os.path.join(os.path.dirname(__file__), "fake.config"), internal_overrides={"project": "test", "domain": "development"}, ): with _utils.AutoDeletingTempDir("in"): with _utils.AutoDeletingTempDir("out"): cmd = [] cmd.extend(["--task-module", "fake"]) cmd.extend(["--task-name", "fake"]) cmd.extend(["--inputs", "fake"]) cmd.extend(["--output-prefix", "fake"]) cmd.extend(["--raw-output-data-prefix", "{{.rawOutputDataPrefix}}"]) result = CliRunner().invoke(execute_task_cmd, cmd) assert result.exit_code == 0
def test_internal_overrides(): with _TemporaryConfiguration( _os.path.join(_os.path.dirname(_os.path.realpath(__file__)), 'configs/good.config'), {'foo': 'bar'}): assert _os.environ.get('FLYTE_INTERNAL_FOO') == 'bar' assert _os.environ.get('FLYTE_INTERNAL_FOO') is None