def inputs(self) -> Dict[str, Any]: """ Returns the inputs to the execution in the standard python format as dictated by the type engine. """ if self._inputs is None: client = _flyte_engine.get_client() execution_data = client.get_execution_data(self.id) # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned. input_map: _literal_models.LiteralMap = _literal_models.LiteralMap( {}) if bool(execution_data.full_inputs.literals): input_map = execution_data.full_inputs elif execution_data.inputs.bytes > 0: with _common_utils.AutoDeletingTempDir() as tmp_dir: tmp_name = _os.path.join(tmp_dir.name, "inputs.pb") _data_proxy.Data.get_data(execution_data.inputs.url, tmp_name) input_map = _literal_models.LiteralMap.from_flyte_idl( _common_utils.load_proto_from_file( _literals_pb2.Literalmap, tmp_name)) lp_id = self.spec.launch_plan workflow = _workflow.FlyteWorkflow.fetch(lp_id.project, lp_id.domain, lp_id.name, lp_id.version) self._inputs = TypeEngine.literal_map_to_kwargs( ctx=FlyteContextManager.current_context(), lm=input_map, python_types=TypeEngine.guess_python_types( workflow.interface.inputs), ) return self._inputs
def test_engine_file_output(): basic_blob_type = _core_types.BlobType( format="", dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE, ) fs = FileAccessProvider(local_sandbox_dir="/tmp/flytetesting") with context_manager.FlyteContext.current_context( ).new_file_access_context(file_access_provider=fs) as ctx: # Write some text to a file not in that directory above test_file_location = "/tmp/sample.txt" with open(test_file_location, "w") as fh: fh.write("Hello World\n") lit = TypeEngine.to_literal(ctx, test_file_location, os.PathLike, LiteralType(blob=basic_blob_type)) # Since we're using local as remote, we should be able to just read the file from the 'remote' location. with open(lit.scalar.blob.uri, "r") as fh: assert fh.readline() == "Hello World\n" # We should also be able to turn the thing back into regular python native thing. redownloaded_local_file_location = TypeEngine.to_python_value( ctx, lit, os.PathLike) with open(redownloaded_local_file_location, "r") as fh: assert fh.readline() == "Hello World\n"
def test_bad_conversion(): orig = FlyteSchema[kwtypes(my_custom=bool)] lt = TypeEngine.to_literal_type(orig) # Make a not real column type lt.schema.columns[0]._type = 15 with pytest.raises(ValueError): TypeEngine.guess_python_type(lt)
def register(cls, h: Handlers, default_for_type: Optional[bool] = True, override: Optional[bool] = False): """ Call this with any handler to register it with this dataframe meta-transformer The string "://" should not be present in any handler's protocol so we don't check for it. """ lowest_level = cls._handler_finder(h) if h.supported_format in lowest_level and override is False: raise ValueError( f"Already registered a handler for {(h.python_type, h.protocol, h.supported_format)}" ) lowest_level[h.supported_format] = h logger.debug( f"Registered {h} as handler for {h.python_type}, protocol {h.protocol}, fmt {h.supported_format}" ) if default_for_type: # TODO: Add logging, think about better ux, maybe default False and warn if doesn't exist. cls.DEFAULT_FORMATS[h.python_type] = h.supported_format cls.DEFAULT_PROTOCOLS[h.python_type] = h.protocol # Register with the type engine as well # The semantics as of now are such that it doesn't matter which order these transformers are loaded in, as # long as the older Pandas/FlyteSchema transformer do not also specify the override engine = StructuredDatasetTransformerEngine() TypeEngine.register_additional_type(engine, h.python_type, override=True)
def inputs(self) -> Dict[str, Any]: """ Returns the inputs of the task execution in the standard Python format that is produced by the type engine. """ from flytekit.control_plane.tasks.task import FlyteTask if self._inputs is None: client = _flyte_engine.get_client() execution_data = client.get_task_execution_data(self.id) # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned. input_map = _literal_models.LiteralMap({}) if bool(execution_data.full_inputs.literals): input_map = execution_data.full_inputs elif execution_data.inputs.bytes > 0: with _common_utils.AutoDeletingTempDir() as tmp_dir: tmp_name = os.path.join(tmp_dir.name, "inputs.pb") _data_proxy.Data.get_data(execution_data.inputs.url, tmp_name) input_map = _literal_models.LiteralMap.from_flyte_idl( _common_utils.load_proto_from_file( _literals_pb2.LiteralMap, tmp_name)) task = FlyteTask.fetch(self.id.task_id.project, self.id.task_id.domain, self.id.task_id.name, self.id.task_id.version) self._inputs = TypeEngine.literal_map_to_kwargs( ctx=FlyteContextManager.current_context(), lm=input_map, python_types=TypeEngine.guess_python_types( task.interface.inputs), ) return self._inputs
def extract_value( ctx: FlyteContext, input_val: Any, val_type: type, flyte_literal_type: _type_models.LiteralType ) -> _literal_models.Literal: if isinstance(input_val, list): if flyte_literal_type.collection_type is None: raise Exception( f"Not a collection type {flyte_literal_type} but got a list {input_val}" ) try: sub_type = ListTransformer.get_sub_type(val_type) except ValueError: if len(input_val) == 0: raise sub_type = type(input_val[0]) literals = [ extract_value(ctx, v, sub_type, flyte_literal_type.collection_type) for v in input_val ] return _literal_models.Literal( collection=_literal_models.LiteralCollection( literals=literals)) elif isinstance(input_val, dict): if (flyte_literal_type.map_value_type is None and flyte_literal_type.simple != _type_models.SimpleType.STRUCT): raise Exception( f"Not a map type {flyte_literal_type} but got a map {input_val}" ) k_type, sub_type = DictTransformer.get_dict_types(val_type) if flyte_literal_type.simple == _type_models.SimpleType.STRUCT: return TypeEngine.to_literal(ctx, input_val, type(input_val), flyte_literal_type) else: literals = { k: extract_value(ctx, v, sub_type, flyte_literal_type.map_value_type) for k, v in input_val.items() } return _literal_models.Literal(map=_literal_models.LiteralMap( literals=literals)) elif isinstance(input_val, Promise): # In the example above, this handles the "in2=a" type of argument return input_val.val elif isinstance(input_val, VoidPromise): raise AssertionError( f"Outputs of a non-output producing task {input_val.task_name} cannot be passed to another task." ) elif isinstance(input_val, tuple): raise AssertionError( "Tuples are not a supported type for individual values in Flyte - got a tuple -" f" {input_val}. If using named tuple in an inner task, please, de-reference the" "actual attribute that you want to use. For example, in NamedTuple('OP', x=int) then" "return v.x, instead of v, even if this has a single element") else: # This handles native values, the 5 example return TypeEngine.to_literal(ctx, input_val, val_type, flyte_literal_type)
def test_zero_floats(): ctx = FlyteContext.current_context() l0 = Literal(scalar=Scalar(primitive=Primitive(integer=0))) l1 = Literal(scalar=Scalar(primitive=Primitive(float_value=0.0))) assert TypeEngine.to_python_value(ctx, l0, float) == 0 assert TypeEngine.to_python_value(ctx, l1, float) == 0
def test_type_engine(): t = int lt = TypeEngine.to_literal_type(t) assert lt.simple == model_types.SimpleType.INTEGER t = typing.Dict[str, typing.List[typing.Dict[str, timedelta]]] lt = TypeEngine.to_literal_type(t) assert lt.map_value_type.collection_type.map_value_type.simple == model_types.SimpleType.DURATION
def test_jsondc_schemaize(): lt = TypeEngine.to_literal_type(Foo) pt = TypeEngine.guess_python_type(lt) # When postponed annotations are enabled, dataclass_json will not work and we'll end up with a # schemaless generic. # This test basically tests the broken behavior. Remove this test if # https://github.com/lovasoa/marshmallow_dataclass/issues/13 is ever fixed. assert pt is dict
def binding_data_from_python_std( ctx: _flyte_context.FlyteContext, expected_literal_type: _type_models.LiteralType, t_value: typing.Any, t_value_type: type, ) -> _literals_models.BindingData: # This handles the case where the given value is the output of another task if isinstance(t_value, Promise): if not t_value.is_ready: return _literals_models.BindingData(promise=t_value.ref) elif isinstance(t_value, VoidPromise): raise AssertionError( f"Cannot pass output from task {t_value.task_name} that produces no outputs to a downstream task" ) elif isinstance(t_value, list): if expected_literal_type.collection_type is None: raise AssertionError( f"this should be a list and it is not: {type(t_value)} vs {expected_literal_type}" ) sub_type = ListTransformer.get_sub_type(t_value_type) collection = _literals_models.BindingDataCollection(bindings=[ binding_data_from_python_std( ctx, expected_literal_type.collection_type, t, sub_type) for t in t_value ]) return _literals_models.BindingData(collection=collection) elif isinstance(t_value, dict): if (expected_literal_type.map_value_type is None and expected_literal_type.simple != _type_models.SimpleType.STRUCT): raise AssertionError( f"this should be a Dictionary type and it is not: {type(t_value)} vs {expected_literal_type}" ) k_type, v_type = DictTransformer.get_dict_types(t_value_type) if expected_literal_type.simple == _type_models.SimpleType.STRUCT: lit = TypeEngine.to_literal(ctx, t_value, type(t_value), expected_literal_type) return _literals_models.BindingData(scalar=lit.scalar) else: m = _literals_models.BindingDataMap( bindings={ k: binding_data_from_python_std( ctx, expected_literal_type.map_value_type, v, v_type) for k, v in t_value.items() }) return _literals_models.BindingData(map=m) # This is the scalar case - e.g. my_task(in1=5) scalar = TypeEngine.to_literal(ctx, t_value, t_value_type, expected_literal_type).scalar return _literals_models.BindingData(scalar=scalar)
def test_assert_type(): ctx = context_manager.FlyteContextManager.current_context() with context_manager.FlyteContextManager.with_context( ctx.with_execution_state(ctx.new_execution_state().with_params(mode=ExecutionState.Mode.TASK_EXECUTION)) ) as ctx: schema = FlyteSchema[kwtypes(x=int, y=float)] fst = FlyteSchemaTransformer() lt = fst.get_literal_type(schema) with pytest.raises(ValueError, match="DataFrames of type <class 'int'> are not supported currently"): TypeEngine.to_literal(ctx, 3, schema, lt)
def test_bad_tag(): # Will not be able to load this with pytest.raises(ValueError): lt = LiteralType(simple=SimpleType.STRUCT, metadata={"pb_type": "bad.tag"}) TypeEngine.guess_python_type(lt) # Doesn't match pb field key with pytest.raises(ValueError): lt = LiteralType(simple=SimpleType.STRUCT, metadata={}) TypeEngine.guess_python_type(lt)
def test_guessing_containers(): b = model_types.LiteralType(simple=model_types.SimpleType.BOOLEAN) lt = model_types.LiteralType(collection_type=b) pt = TypeEngine.guess_python_type(lt) assert pt == typing.List[bool] dur = model_types.LiteralType(simple=model_types.SimpleType.DURATION) lt = model_types.LiteralType(map_value_type=dur) pt = TypeEngine.guess_python_type(lt) assert pt == typing.Dict[str, timedelta]
def test_pb_guess_python_type(): artifact_tag = catalog_pb2.CatalogArtifactTag(artifact_id="artifact_1", name="artifact_name") x = {"a": artifact_tag} lt = TypeEngine.to_literal_type(catalog_pb2.CatalogArtifactTag) gt = TypeEngine.guess_python_type(lt) assert gt == catalog_pb2.CatalogArtifactTag ctx = FlyteContextManager.current_context() lm = TypeEngine.dict_to_literal_map(ctx, x, {"a": gt}) pv = TypeEngine.to_python_value(ctx, lm.literals["a"], gt) assert pv == artifact_tag
def extract_value( ctx: FlyteContext, input_val: Any, val_type: type, flyte_literal_type: _type_models.LiteralType ) -> _literal_models.Literal: if isinstance(input_val, list): if flyte_literal_type.collection_type is None: raise Exception( f"Not a collection type {flyte_literal_type} but got a list {input_val}" ) try: sub_type = ListTransformer.get_sub_type(val_type) except ValueError: if len(input_val) == 0: raise sub_type = type(input_val[0]) literals = [ extract_value(ctx, v, sub_type, flyte_literal_type.collection_type) for v in input_val ] return _literal_models.Literal( collection=_literal_models.LiteralCollection( literals=literals)) elif isinstance(input_val, dict): if (flyte_literal_type.map_value_type is None and flyte_literal_type.simple != _type_models.SimpleType.STRUCT): raise Exception( f"Not a map type {flyte_literal_type} but got a map {input_val}" ) k_type, sub_type = DictTransformer.get_dict_types(val_type) if flyte_literal_type.simple == _type_models.SimpleType.STRUCT: return TypeEngine.to_literal(ctx, input_val, type(input_val), flyte_literal_type) else: literals = { k: extract_value(ctx, v, sub_type, flyte_literal_type.map_value_type) for k, v in input_val.items() } return _literal_models.Literal(map=_literal_models.LiteralMap( literals=literals)) elif isinstance(input_val, Promise): # In the example above, this handles the "in2=a" type of argument return input_val.val elif isinstance(input_val, VoidPromise): raise AssertionError( f"Outputs of a non-output producing task {input_val.task_name} cannot be passed to another task." ) else: # This handles native values, the 5 example return TypeEngine.to_literal(ctx, input_val, val_type, flyte_literal_type)
def test_engine(): t = FlyteDirectory lt = TypeEngine.to_literal_type(t) assert lt.blob is not None assert lt.blob.dimensionality == BlobType.BlobDimensionality.MULTIPART assert lt.blob.format == "" t2 = FlyteDirectory["csv"] lt = TypeEngine.to_literal_type(t2) assert lt.blob is not None assert lt.blob.dimensionality == BlobType.BlobDimensionality.MULTIPART assert lt.blob.format == "csv"
def test_format_correct(): class TempEncoder(StructuredDatasetEncoder): def __init__(self): super().__init__(pd.DataFrame, S3, "avro") def encode( self, ctx: FlyteContext, structured_dataset: StructuredDataset, structured_dataset_type: StructuredDatasetType, ) -> literals.StructuredDataset: return literals.StructuredDataset( uri="/tmp/avro", metadata=StructuredDatasetMetadata(structured_dataset_type)) ctx = FlyteContextManager.current_context() df = pd.DataFrame({"name": ["Tom", "Joseph"], "age": [20, 22]}) annotated_sd_type = Annotated[StructuredDataset, "avro", kwtypes(name=str, age=int)] df_literal_type = TypeEngine.to_literal_type(annotated_sd_type) assert df_literal_type.structured_dataset_type is not None assert len(df_literal_type.structured_dataset_type.columns) == 2 assert df_literal_type.structured_dataset_type.columns[0].name == "name" assert df_literal_type.structured_dataset_type.columns[ 0].literal_type.simple is not None assert df_literal_type.structured_dataset_type.columns[1].name == "age" assert df_literal_type.structured_dataset_type.columns[ 1].literal_type.simple is not None assert df_literal_type.structured_dataset_type.format == "avro" sd = annotated_sd_type(df) with pytest.raises(ValueError): TypeEngine.to_literal(ctx, sd, python_type=annotated_sd_type, expected=df_literal_type) StructuredDatasetTransformerEngine.register(TempEncoder(), default_for_type=False) sd2 = annotated_sd_type(df) sd_literal = TypeEngine.to_literal(ctx, sd2, python_type=annotated_sd_type, expected=df_literal_type) assert sd_literal.scalar.structured_dataset.metadata.structured_dataset_type.format == "avro" @task def t1() -> Annotated[StructuredDataset, "avro"]: return StructuredDataset(dataframe=df) assert t1().file_format == "avro"
def test_create_native_named_tuple(): ctx = FlyteContextManager.current_context() t = create_native_named_tuple(ctx, promises=None, entity_interface=Interface()) assert t is None p1 = Promise(var="x", val=TypeEngine.to_literal( ctx, 1, int, LiteralType(simple=SimpleType.INTEGER))) p2 = Promise(var="y", val=TypeEngine.to_literal( ctx, 2, int, LiteralType(simple=SimpleType.INTEGER))) t = create_native_named_tuple( ctx, promises=p1, entity_interface=Interface(outputs={"x": int})) assert t assert t == 1 t = create_native_named_tuple(ctx, promises=[], entity_interface=Interface()) assert t is None t = create_native_named_tuple(ctx, promises=[p1, p2], entity_interface=Interface(outputs={ "x": int, "y": int })) assert t assert t == (1, 2) t = create_native_named_tuple(ctx, promises=[p1, p2], entity_interface=Interface( outputs={ "x": int, "y": int }, output_tuple_name="Tup")) assert t assert t == (1, 2) assert t.__class__.__name__ == "Tup" with pytest.raises(KeyError): create_native_named_tuple(ctx, promises=[p1, p2], entity_interface=Interface( outputs={"x": int}, output_tuple_name="Tup"))
def test_transformer_to_literal_local(): random_dir = context_manager.FlyteContext.current_context().file_access.get_random_local_directory() fs = FileAccessProvider(local_sandbox_dir=random_dir, raw_output_prefix=os.path.join(random_dir, "raw")) ctx = context_manager.FlyteContext.current_context() with context_manager.FlyteContextManager.with_context(ctx.with_file_access(fs)) as ctx: # Use a separate directory that we know won't be the same as anything generated by flytekit itself, lest we # accidentally try to cp -R /some/folder /some/folder/sub which causes exceptions obviously. p = "/tmp/flyte/test_fd_transformer" # Create an empty directory and call to literal on it if os.path.exists(p): shutil.rmtree(p) pathlib.Path(p).mkdir(parents=True) tf = FlyteDirToMultipartBlobTransformer() lt = tf.get_literal_type(FlyteDirectory) literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt) assert literal.scalar.blob.uri.startswith(random_dir) # Create a director with one file in it if os.path.exists(p): shutil.rmtree(p) pathlib.Path(p).mkdir(parents=True) with open(os.path.join(p, "xyz"), "w") as fh: fh.write("Hello world\n") literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt) mock_remote_files = os.listdir(literal.scalar.blob.uri) assert mock_remote_files == ["xyz"] # The only primitives allowed are strings with pytest.raises(AssertionError): tf.to_literal(ctx, 3, FlyteDirectory, lt) with pytest.raises(TypeError, match="No automatic conversion from <class 'int'>"): TypeEngine.to_literal(ctx, 3, FlyteDirectory, lt) # Can't use if it's not a directory with pytest.raises(FlyteAssertion): p = "/tmp/flyte/xyz" path = pathlib.Path(p) try: path.unlink() except OSError: ... with open(p, "w") as fh: fh.write("hello world\n") tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt)
def test_comparison_lits(): px = Promise("x", TypeEngine.to_literal(None, 5, int, None)) py = Promise("y", TypeEngine.to_literal(None, 8, int, None)) def eval_expr(expr, expected: bool): print(f"{expr} evals to {expr.eval()}") assert expected == expr.eval() eval_expr(px == py, False) eval_expr(px < py, True) eval_expr((px == py) & (px < py), False) eval_expr(((px == py) & (px < py)) | (px > py), False) eval_expr(px < 5, False) eval_expr(px >= 5, True) eval_expr(py >= 5, True)
def unwrap_literal_map_and_execute( self, ctx: FlyteContext, input_literal_map: _literal_models.LiteralMap ) -> Union[VoidPromise, _literal_models.LiteralMap, _dynamic_job.DynamicJobSpec]: """ Please see the implementation of the dispatch_execute function in the real task. """ # Invoked before the task is executed # Translate the input literals to Python native native_inputs = TypeEngine.literal_map_to_kwargs( ctx, input_literal_map, self.python_interface.inputs) logger.info(f"Invoking {self.name} with inputs: {native_inputs}") try: native_outputs = self.execute(**native_inputs) except Exception as e: logger.exception(f"Exception when executing {e}") raise e logger.info( f"Task executed successfully in user level, outputs: {native_outputs}" ) expected_output_names = list(self.python_interface.outputs.keys()) if len(expected_output_names) == 1: native_outputs_as_map = {expected_output_names[0]: native_outputs} elif len(expected_output_names) == 0: native_outputs_as_map = {} else: native_outputs_as_map = { expected_output_names[i]: native_outputs[i] for i, _ in enumerate(native_outputs) } # We manually construct a LiteralMap here because task inputs and outputs actually violate the assumption # built into the IDL that all the values of a literal map are of the same type. literals = {} for k, v in native_outputs_as_map.items(): literal_type = self.interface.outputs[k].type py_type = self.python_interface.outputs[k] if isinstance(v, tuple): raise AssertionError( f"Output({k}) in task{self.name} received a tuple {v}, instead of {py_type}" ) literals[k] = TypeEngine.to_literal(ctx, v, py_type, literal_type) outputs_literal_map = _literal_models.LiteralMap(literals=literals) # After the execute has been successfully completed return outputs_literal_map
def test_to_python_value_with_incoming_columns(): # make a literal with a type that has two columns original_type = Annotated[pd.DataFrame, kwtypes(name=str, age=int)] ctx = FlyteContextManager.current_context() lt = TypeEngine.to_literal_type(original_type) df = generate_pandas() fdt = StructuredDatasetTransformerEngine() lit = fdt.to_literal(ctx, df, python_type=original_type, expected=lt) assert len(lit.scalar.structured_dataset.metadata.structured_dataset_type. columns) == 2 # declare a new type that only has one column # get the dataframe, make sure it has the column that was asked for. subset_sd_type = Annotated[StructuredDataset, kwtypes(age=int)] sd = fdt.to_python_value(ctx, lit, subset_sd_type) assert sd.metadata.structured_dataset_type.columns[0].name == "age" sub_df = sd.open(pd.DataFrame).all() assert sub_df.shape[1] == 1 # check when columns are not specified, should pull both and add column information. sd = fdt.to_python_value(ctx, lit, StructuredDataset) assert len(sd.metadata.structured_dataset_type.columns) == 2 # should also work if subset type is just an annotated pd.DataFrame subset_pd_type = Annotated[pd.DataFrame, kwtypes(age=int)] sub_df = fdt.to_python_value(ctx, lit, subset_pd_type) assert sub_df.shape[1] == 1
def test_dont_convert_remotes(): @task def t1(in1: FlyteFile): print(in1) @dynamic def dyn(in1: FlyteFile): t1(in1=in1) fd = FlyteFile("s3://anything") with context_manager.FlyteContext.current_context( ).new_serialization_settings( serialization_settings=context_manager.SerializationSettings( project="test_proj", domain="test_domain", version="abc", image_config=ImageConfig( Image(name="name", fqn="image", tag="name")), env={}, )) as ctx: with ctx.new_execution_context( mode=ExecutionState.Mode.TASK_EXECUTION) as ctx: lit = TypeEngine.to_literal( ctx, fd, FlyteFile, BlobType("", dimensionality=BlobType.BlobDimensionality.SINGLE)) lm = LiteralMap(literals={"in1": lit}) wf = dyn.dispatch_execute(ctx, lm) assert wf.nodes[0].inputs[ 0].binding.scalar.blob.uri == "s3://anything"
def test_to_python_value_without_incoming_columns(): # make a literal with a type with no columns ctx = FlyteContextManager.current_context() lt = TypeEngine.to_literal_type(pd.DataFrame) df = generate_pandas() fdt = StructuredDatasetTransformerEngine() lit = fdt.to_literal(ctx, df, python_type=pd.DataFrame, expected=lt) assert len(lit.scalar.structured_dataset.metadata.structured_dataset_type. columns) == 0 # declare a new type that only has one column # get the dataframe, make sure it has the column that was asked for. subset_sd_type = Annotated[StructuredDataset, kwtypes(age=int)] sd = fdt.to_python_value(ctx, lit, subset_sd_type) assert sd.metadata.structured_dataset_type.columns[0].name == "age" sub_df = sd.open(pd.DataFrame).all() assert sub_df.shape[1] == 1 # check when columns are not specified, should pull both and add column information. # todo: see the todos in the open_as, and iter_as functions in StructuredDatasetTransformerEngine # we have to recreate the literal because the test case above filled in the metadata lit = fdt.to_literal(ctx, df, python_type=pd.DataFrame, expected=lt) sd = fdt.to_python_value(ctx, lit, StructuredDataset) assert sd.metadata.structured_dataset_type.columns == [] sub_df = sd.open(pd.DataFrame).all() assert sub_df.shape[1] == 2 # should also work if subset type is just an annotated pd.DataFrame lit = fdt.to_literal(ctx, df, python_type=pd.DataFrame, expected=lt) subset_pd_type = Annotated[pd.DataFrame, kwtypes(age=int)] sub_df = fdt.to_python_value(ctx, lit, subset_pd_type) assert sub_df.shape[1] == 1
def test_two(two_sample_inputs): my_input = two_sample_inputs[0] my_input_2 = two_sample_inputs[1] @dynamic def dt1(a: List[MyInput]) -> List[FlyteFile]: x = [] for aa in a: x.append(aa.main_product) return x with FlyteContextManager.with_context( FlyteContextManager.current_context().with_serialization_settings( SerializationSettings( project="test_proj", domain="test_domain", version="abc", image_config=ImageConfig(Image(name="name", fqn="image", tag="name")), env={}, ) ) ) as ctx: with FlyteContextManager.with_context( ctx.with_execution_state( ctx.execution_state.with_params( mode=ExecutionState.Mode.TASK_EXECUTION, ) ) ) as ctx: input_literal_map = TypeEngine.dict_to_literal_map( ctx, d={"a": [my_input, my_input_2]}, type_hints={"a": List[MyInput]} ) dynamic_job_spec = dt1.dispatch_execute(ctx, input_literal_map) assert len(dynamic_job_spec.literals["o0"].collection.literals) == 2
def test_named_tuple(): t = typing.NamedTuple("Outputs", [("x_str", str), ("y_int", int)]) var_map = TypeEngine.named_tuple_to_variable_map(t) assert var_map.variables[ "x_str"].type.simple == model_types.SimpleType.STRING assert var_map.variables[ "y_int"].type.simple == model_types.SimpleType.INTEGER
def test_protos(): ctx = FlyteContext.current_context() pb = errors_pb2.ContainerError(code="code", message="message") lt = TypeEngine.to_literal_type(errors_pb2.ContainerError) assert lt.simple == SimpleType.STRUCT assert lt.metadata["pb_type"] == "flyteidl.core.errors_pb2.ContainerError" lit = TypeEngine.to_literal(ctx, pb, errors_pb2.ContainerError, lt) new_python_val = TypeEngine.to_python_value(ctx, lit, errors_pb2.ContainerError) assert new_python_val == pb # Test error l0 = Literal(scalar=Scalar(primitive=Primitive(integer=4))) with pytest.raises(AssertionError): TypeEngine.to_python_value(ctx, l0, errors_pb2.ContainerError)
def _workflow_fn_outputs_to_promise( ctx: FlyteContext, native_outputs: typing.Dict[str, type], # Actually an orderedDict typed_outputs: Dict[str, _interface_models.Variable], outputs: Union[Any, Tuple[Any]], ) -> List[Promise]: if len(native_outputs) == 1: if isinstance(outputs, tuple): if len(outputs) != 1: raise AssertionError( f"The Workflow specification indicates only one return value, received {len(outputs)}" ) else: outputs = (outputs, ) if len(native_outputs) > 1: if not isinstance(outputs, tuple) or len(native_outputs) != len(outputs): # Length check, clean up exception raise AssertionError( f"The workflow specification indicates {len(native_outputs)} return vals, but received {len(outputs)}" ) # This recasts the Promises provided by the outputs of the workflow's tasks into the correct output names # of the workflow itself return_vals = [] for (k, t), v in zip(native_outputs.items(), outputs): if isinstance(v, Promise): return_vals.append(v.with_var(k)) else: # Found a return type that is not a promise, so we need to transform it var = typed_outputs[k] return_vals.append( Promise(var=k, val=TypeEngine.to_literal(ctx, v, t, var.type))) return return_vals
def add_workflow_output( self, output_name: str, p: Union[Promise, List[Promise], Dict[str, Promise]], python_type: Optional[Type] = None ): """ Add an output with the given name from the given node output. """ if output_name in self._python_interface.outputs: raise FlyteValidationException(f"Output {output_name} already exists in workflow {self.name}") if python_type is None: if type(p) == list or type(p) == dict: raise FlyteValidationException( f"If specifying a list or dict of Promises, you must specify the python_type type for {output_name}" f" starting with the container type (e.g. List[int]" ) python_type = p.ref.node.flyte_entity.python_interface.outputs[p.var] logger.debug(f"Inferring python type for wf output {output_name} from Promise provided {python_type}") flyte_type = TypeEngine.to_literal_type(python_type=python_type) ctx = FlyteContext.current_context() if ctx.compilation_state is not None: raise Exception("Can't already be compiling") with FlyteContextManager.with_context(ctx.with_compilation_state(self.compilation_state)) as ctx: b = binding_from_python_std( ctx, output_name, expected_literal_type=flyte_type, t_value=p, t_value_type=python_type ) self._output_bindings.append(b) self._python_interface = self._python_interface.with_outputs(extra_outputs={output_name: python_type}) self._interface = transform_interface_to_typed_interface(self._python_interface)
def transform_inputs_to_parameters( ctx: context_manager.FlyteContext, interface: Interface) -> _interface_models.ParameterMap: """ Transforms the given interface (with inputs) to a Parameter Map with defaults set :param interface: the interface object """ if interface is None or interface.inputs_with_defaults is None: return _interface_models.ParameterMap({}) inputs_vars = transform_variable_map(interface.inputs) params = {} inputs_with_def = interface.inputs_with_defaults for k, v in inputs_vars.items(): val, _default = inputs_with_def[k] required = _default is None default_lv = None if _default is not None: default_lv = TypeEngine.to_literal(ctx, _default, python_type=interface.inputs[k], expected=v.type) params[k] = _interface_models.Parameter(var=v, default=default_lv, required=required) return _interface_models.ParameterMap(params)