コード例 #1
0
    def inputs(self) -> Dict[str, Any]:
        """
        Returns the inputs to the execution in the standard python format as dictated by the type engine.
        """
        if self._inputs is None:
            client = _flyte_engine.get_client()
            execution_data = client.get_execution_data(self.id)

            # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned.
            input_map: _literal_models.LiteralMap = _literal_models.LiteralMap(
                {})
            if bool(execution_data.full_inputs.literals):
                input_map = execution_data.full_inputs
            elif execution_data.inputs.bytes > 0:
                with _common_utils.AutoDeletingTempDir() as tmp_dir:
                    tmp_name = _os.path.join(tmp_dir.name, "inputs.pb")
                    _data_proxy.Data.get_data(execution_data.inputs.url,
                                              tmp_name)
                    input_map = _literal_models.LiteralMap.from_flyte_idl(
                        _common_utils.load_proto_from_file(
                            _literals_pb2.Literalmap, tmp_name))
            lp_id = self.spec.launch_plan
            workflow = _workflow.FlyteWorkflow.fetch(lp_id.project,
                                                     lp_id.domain, lp_id.name,
                                                     lp_id.version)
            self._inputs = TypeEngine.literal_map_to_kwargs(
                ctx=FlyteContextManager.current_context(),
                lm=input_map,
                python_types=TypeEngine.guess_python_types(
                    workflow.interface.inputs),
            )
        return self._inputs
コード例 #2
0
def test_engine_file_output():
    basic_blob_type = _core_types.BlobType(
        format="",
        dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE,
    )

    fs = FileAccessProvider(local_sandbox_dir="/tmp/flytetesting")
    with context_manager.FlyteContext.current_context(
    ).new_file_access_context(file_access_provider=fs) as ctx:
        # Write some text to a file not in that directory above
        test_file_location = "/tmp/sample.txt"
        with open(test_file_location, "w") as fh:
            fh.write("Hello World\n")

        lit = TypeEngine.to_literal(ctx, test_file_location, os.PathLike,
                                    LiteralType(blob=basic_blob_type))

        # Since we're using local as remote, we should be able to just read the file from the 'remote' location.
        with open(lit.scalar.blob.uri, "r") as fh:
            assert fh.readline() == "Hello World\n"

        # We should also be able to turn the thing back into regular python native thing.
        redownloaded_local_file_location = TypeEngine.to_python_value(
            ctx, lit, os.PathLike)
        with open(redownloaded_local_file_location, "r") as fh:
            assert fh.readline() == "Hello World\n"
コード例 #3
0
def test_bad_conversion():
    orig = FlyteSchema[kwtypes(my_custom=bool)]
    lt = TypeEngine.to_literal_type(orig)
    # Make a not real column type
    lt.schema.columns[0]._type = 15
    with pytest.raises(ValueError):
        TypeEngine.guess_python_type(lt)
コード例 #4
0
    def register(cls,
                 h: Handlers,
                 default_for_type: Optional[bool] = True,
                 override: Optional[bool] = False):
        """
        Call this with any handler to register it with this dataframe meta-transformer

        The string "://" should not be present in any handler's protocol so we don't check for it.
        """
        lowest_level = cls._handler_finder(h)
        if h.supported_format in lowest_level and override is False:
            raise ValueError(
                f"Already registered a handler for {(h.python_type, h.protocol, h.supported_format)}"
            )
        lowest_level[h.supported_format] = h
        logger.debug(
            f"Registered {h} as handler for {h.python_type}, protocol {h.protocol}, fmt {h.supported_format}"
        )

        if default_for_type:
            # TODO: Add logging, think about better ux, maybe default False and warn if doesn't exist.
            cls.DEFAULT_FORMATS[h.python_type] = h.supported_format
            cls.DEFAULT_PROTOCOLS[h.python_type] = h.protocol

        # Register with the type engine as well
        # The semantics as of now are such that it doesn't matter which order these transformers are loaded in, as
        # long as the older Pandas/FlyteSchema transformer do not also specify the override
        engine = StructuredDatasetTransformerEngine()
        TypeEngine.register_additional_type(engine,
                                            h.python_type,
                                            override=True)
コード例 #5
0
    def inputs(self) -> Dict[str, Any]:
        """
        Returns the inputs of the task execution in the standard Python format that is produced by
        the type engine.
        """
        from flytekit.control_plane.tasks.task import FlyteTask

        if self._inputs is None:
            client = _flyte_engine.get_client()
            execution_data = client.get_task_execution_data(self.id)

            # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned.
            input_map = _literal_models.LiteralMap({})
            if bool(execution_data.full_inputs.literals):
                input_map = execution_data.full_inputs
            elif execution_data.inputs.bytes > 0:
                with _common_utils.AutoDeletingTempDir() as tmp_dir:
                    tmp_name = os.path.join(tmp_dir.name, "inputs.pb")
                    _data_proxy.Data.get_data(execution_data.inputs.url,
                                              tmp_name)
                    input_map = _literal_models.LiteralMap.from_flyte_idl(
                        _common_utils.load_proto_from_file(
                            _literals_pb2.LiteralMap, tmp_name))

            task = FlyteTask.fetch(self.id.task_id.project,
                                   self.id.task_id.domain,
                                   self.id.task_id.name,
                                   self.id.task_id.version)
            self._inputs = TypeEngine.literal_map_to_kwargs(
                ctx=FlyteContextManager.current_context(),
                lm=input_map,
                python_types=TypeEngine.guess_python_types(
                    task.interface.inputs),
            )
        return self._inputs
コード例 #6
0
 def extract_value(
     ctx: FlyteContext, input_val: Any, val_type: type,
     flyte_literal_type: _type_models.LiteralType
 ) -> _literal_models.Literal:
     if isinstance(input_val, list):
         if flyte_literal_type.collection_type is None:
             raise Exception(
                 f"Not a collection type {flyte_literal_type} but got a list {input_val}"
             )
         try:
             sub_type = ListTransformer.get_sub_type(val_type)
         except ValueError:
             if len(input_val) == 0:
                 raise
             sub_type = type(input_val[0])
         literals = [
             extract_value(ctx, v, sub_type,
                           flyte_literal_type.collection_type)
             for v in input_val
         ]
         return _literal_models.Literal(
             collection=_literal_models.LiteralCollection(
                 literals=literals))
     elif isinstance(input_val, dict):
         if (flyte_literal_type.map_value_type is None
                 and flyte_literal_type.simple !=
                 _type_models.SimpleType.STRUCT):
             raise Exception(
                 f"Not a map type {flyte_literal_type} but got a map {input_val}"
             )
         k_type, sub_type = DictTransformer.get_dict_types(val_type)
         if flyte_literal_type.simple == _type_models.SimpleType.STRUCT:
             return TypeEngine.to_literal(ctx, input_val, type(input_val),
                                          flyte_literal_type)
         else:
             literals = {
                 k: extract_value(ctx, v, sub_type,
                                  flyte_literal_type.map_value_type)
                 for k, v in input_val.items()
             }
             return _literal_models.Literal(map=_literal_models.LiteralMap(
                 literals=literals))
     elif isinstance(input_val, Promise):
         # In the example above, this handles the "in2=a" type of argument
         return input_val.val
     elif isinstance(input_val, VoidPromise):
         raise AssertionError(
             f"Outputs of a non-output producing task {input_val.task_name} cannot be passed to another task."
         )
     elif isinstance(input_val, tuple):
         raise AssertionError(
             "Tuples are not a supported type for individual values in Flyte - got a tuple -"
             f" {input_val}. If using named tuple in an inner task, please, de-reference the"
             "actual attribute that you want to use. For example, in NamedTuple('OP', x=int) then"
             "return v.x, instead of v, even if this has a single element")
     else:
         # This handles native values, the 5 example
         return TypeEngine.to_literal(ctx, input_val, val_type,
                                      flyte_literal_type)
コード例 #7
0
ファイル: test_type_engine.py プロジェクト: jaychia/flytekit
def test_zero_floats():
    ctx = FlyteContext.current_context()

    l0 = Literal(scalar=Scalar(primitive=Primitive(integer=0)))
    l1 = Literal(scalar=Scalar(primitive=Primitive(float_value=0.0)))

    assert TypeEngine.to_python_value(ctx, l0, float) == 0
    assert TypeEngine.to_python_value(ctx, l1, float) == 0
コード例 #8
0
ファイル: test_type_engine.py プロジェクト: jaychia/flytekit
def test_type_engine():
    t = int
    lt = TypeEngine.to_literal_type(t)
    assert lt.simple == model_types.SimpleType.INTEGER

    t = typing.Dict[str, typing.List[typing.Dict[str, timedelta]]]
    lt = TypeEngine.to_literal_type(t)
    assert lt.map_value_type.collection_type.map_value_type.simple == model_types.SimpleType.DURATION
コード例 #9
0
def test_jsondc_schemaize():
    lt = TypeEngine.to_literal_type(Foo)
    pt = TypeEngine.guess_python_type(lt)

    # When postponed annotations are enabled, dataclass_json will not work and we'll end up with a
    # schemaless generic.
    # This test basically tests the broken behavior. Remove this test if
    # https://github.com/lovasoa/marshmallow_dataclass/issues/13 is ever fixed.
    assert pt is dict
コード例 #10
0
def binding_data_from_python_std(
    ctx: _flyte_context.FlyteContext,
    expected_literal_type: _type_models.LiteralType,
    t_value: typing.Any,
    t_value_type: type,
) -> _literals_models.BindingData:
    # This handles the case where the given value is the output of another task
    if isinstance(t_value, Promise):
        if not t_value.is_ready:
            return _literals_models.BindingData(promise=t_value.ref)

    elif isinstance(t_value, VoidPromise):
        raise AssertionError(
            f"Cannot pass output from task {t_value.task_name} that produces no outputs to a downstream task"
        )

    elif isinstance(t_value, list):
        if expected_literal_type.collection_type is None:
            raise AssertionError(
                f"this should be a list and it is not: {type(t_value)} vs {expected_literal_type}"
            )

        sub_type = ListTransformer.get_sub_type(t_value_type)
        collection = _literals_models.BindingDataCollection(bindings=[
            binding_data_from_python_std(
                ctx, expected_literal_type.collection_type, t, sub_type)
            for t in t_value
        ])

        return _literals_models.BindingData(collection=collection)

    elif isinstance(t_value, dict):
        if (expected_literal_type.map_value_type is None
                and expected_literal_type.simple !=
                _type_models.SimpleType.STRUCT):
            raise AssertionError(
                f"this should be a Dictionary type and it is not: {type(t_value)} vs {expected_literal_type}"
            )
        k_type, v_type = DictTransformer.get_dict_types(t_value_type)
        if expected_literal_type.simple == _type_models.SimpleType.STRUCT:
            lit = TypeEngine.to_literal(ctx, t_value, type(t_value),
                                        expected_literal_type)
            return _literals_models.BindingData(scalar=lit.scalar)
        else:
            m = _literals_models.BindingDataMap(
                bindings={
                    k: binding_data_from_python_std(
                        ctx, expected_literal_type.map_value_type, v, v_type)
                    for k, v in t_value.items()
                })
        return _literals_models.BindingData(map=m)

    # This is the scalar case - e.g. my_task(in1=5)
    scalar = TypeEngine.to_literal(ctx, t_value, t_value_type,
                                   expected_literal_type).scalar
    return _literals_models.BindingData(scalar=scalar)
コード例 #11
0
def test_assert_type():
    ctx = context_manager.FlyteContextManager.current_context()
    with context_manager.FlyteContextManager.with_context(
        ctx.with_execution_state(ctx.new_execution_state().with_params(mode=ExecutionState.Mode.TASK_EXECUTION))
    ) as ctx:
        schema = FlyteSchema[kwtypes(x=int, y=float)]
        fst = FlyteSchemaTransformer()
        lt = fst.get_literal_type(schema)
        with pytest.raises(ValueError, match="DataFrames of type <class 'int'> are not supported currently"):
            TypeEngine.to_literal(ctx, 3, schema, lt)
コード例 #12
0
ファイル: test_protobuf.py プロジェクト: flyteorg/flytekit
def test_bad_tag():
    # Will not be able to load this
    with pytest.raises(ValueError):
        lt = LiteralType(simple=SimpleType.STRUCT, metadata={"pb_type": "bad.tag"})
        TypeEngine.guess_python_type(lt)

    # Doesn't match pb field key
    with pytest.raises(ValueError):
        lt = LiteralType(simple=SimpleType.STRUCT, metadata={})
        TypeEngine.guess_python_type(lt)
コード例 #13
0
def test_guessing_containers():
    b = model_types.LiteralType(simple=model_types.SimpleType.BOOLEAN)
    lt = model_types.LiteralType(collection_type=b)
    pt = TypeEngine.guess_python_type(lt)
    assert pt == typing.List[bool]

    dur = model_types.LiteralType(simple=model_types.SimpleType.DURATION)
    lt = model_types.LiteralType(map_value_type=dur)
    pt = TypeEngine.guess_python_type(lt)
    assert pt == typing.Dict[str, timedelta]
コード例 #14
0
ファイル: test_protobuf.py プロジェクト: flyteorg/flytekit
def test_pb_guess_python_type():
    artifact_tag = catalog_pb2.CatalogArtifactTag(artifact_id="artifact_1", name="artifact_name")

    x = {"a": artifact_tag}
    lt = TypeEngine.to_literal_type(catalog_pb2.CatalogArtifactTag)
    gt = TypeEngine.guess_python_type(lt)
    assert gt == catalog_pb2.CatalogArtifactTag
    ctx = FlyteContextManager.current_context()
    lm = TypeEngine.dict_to_literal_map(ctx, x, {"a": gt})
    pv = TypeEngine.to_python_value(ctx, lm.literals["a"], gt)
    assert pv == artifact_tag
コード例 #15
0
 def extract_value(
     ctx: FlyteContext, input_val: Any, val_type: type,
     flyte_literal_type: _type_models.LiteralType
 ) -> _literal_models.Literal:
     if isinstance(input_val, list):
         if flyte_literal_type.collection_type is None:
             raise Exception(
                 f"Not a collection type {flyte_literal_type} but got a list {input_val}"
             )
         try:
             sub_type = ListTransformer.get_sub_type(val_type)
         except ValueError:
             if len(input_val) == 0:
                 raise
             sub_type = type(input_val[0])
         literals = [
             extract_value(ctx, v, sub_type,
                           flyte_literal_type.collection_type)
             for v in input_val
         ]
         return _literal_models.Literal(
             collection=_literal_models.LiteralCollection(
                 literals=literals))
     elif isinstance(input_val, dict):
         if (flyte_literal_type.map_value_type is None
                 and flyte_literal_type.simple !=
                 _type_models.SimpleType.STRUCT):
             raise Exception(
                 f"Not a map type {flyte_literal_type} but got a map {input_val}"
             )
         k_type, sub_type = DictTransformer.get_dict_types(val_type)
         if flyte_literal_type.simple == _type_models.SimpleType.STRUCT:
             return TypeEngine.to_literal(ctx, input_val, type(input_val),
                                          flyte_literal_type)
         else:
             literals = {
                 k: extract_value(ctx, v, sub_type,
                                  flyte_literal_type.map_value_type)
                 for k, v in input_val.items()
             }
             return _literal_models.Literal(map=_literal_models.LiteralMap(
                 literals=literals))
     elif isinstance(input_val, Promise):
         # In the example above, this handles the "in2=a" type of argument
         return input_val.val
     elif isinstance(input_val, VoidPromise):
         raise AssertionError(
             f"Outputs of a non-output producing task {input_val.task_name} cannot be passed to another task."
         )
     else:
         # This handles native values, the 5 example
         return TypeEngine.to_literal(ctx, input_val, val_type,
                                      flyte_literal_type)
コード例 #16
0
def test_engine():
    t = FlyteDirectory
    lt = TypeEngine.to_literal_type(t)
    assert lt.blob is not None
    assert lt.blob.dimensionality == BlobType.BlobDimensionality.MULTIPART
    assert lt.blob.format == ""

    t2 = FlyteDirectory["csv"]
    lt = TypeEngine.to_literal_type(t2)
    assert lt.blob is not None
    assert lt.blob.dimensionality == BlobType.BlobDimensionality.MULTIPART
    assert lt.blob.format == "csv"
コード例 #17
0
def test_format_correct():
    class TempEncoder(StructuredDatasetEncoder):
        def __init__(self):
            super().__init__(pd.DataFrame, S3, "avro")

        def encode(
            self,
            ctx: FlyteContext,
            structured_dataset: StructuredDataset,
            structured_dataset_type: StructuredDatasetType,
        ) -> literals.StructuredDataset:
            return literals.StructuredDataset(
                uri="/tmp/avro",
                metadata=StructuredDatasetMetadata(structured_dataset_type))

    ctx = FlyteContextManager.current_context()
    df = pd.DataFrame({"name": ["Tom", "Joseph"], "age": [20, 22]})

    annotated_sd_type = Annotated[StructuredDataset, "avro",
                                  kwtypes(name=str, age=int)]
    df_literal_type = TypeEngine.to_literal_type(annotated_sd_type)
    assert df_literal_type.structured_dataset_type is not None
    assert len(df_literal_type.structured_dataset_type.columns) == 2
    assert df_literal_type.structured_dataset_type.columns[0].name == "name"
    assert df_literal_type.structured_dataset_type.columns[
        0].literal_type.simple is not None
    assert df_literal_type.structured_dataset_type.columns[1].name == "age"
    assert df_literal_type.structured_dataset_type.columns[
        1].literal_type.simple is not None
    assert df_literal_type.structured_dataset_type.format == "avro"

    sd = annotated_sd_type(df)
    with pytest.raises(ValueError):
        TypeEngine.to_literal(ctx,
                              sd,
                              python_type=annotated_sd_type,
                              expected=df_literal_type)

    StructuredDatasetTransformerEngine.register(TempEncoder(),
                                                default_for_type=False)
    sd2 = annotated_sd_type(df)
    sd_literal = TypeEngine.to_literal(ctx,
                                       sd2,
                                       python_type=annotated_sd_type,
                                       expected=df_literal_type)
    assert sd_literal.scalar.structured_dataset.metadata.structured_dataset_type.format == "avro"

    @task
    def t1() -> Annotated[StructuredDataset, "avro"]:
        return StructuredDataset(dataframe=df)

    assert t1().file_format == "avro"
コード例 #18
0
def test_create_native_named_tuple():
    ctx = FlyteContextManager.current_context()
    t = create_native_named_tuple(ctx,
                                  promises=None,
                                  entity_interface=Interface())
    assert t is None

    p1 = Promise(var="x",
                 val=TypeEngine.to_literal(
                     ctx, 1, int, LiteralType(simple=SimpleType.INTEGER)))
    p2 = Promise(var="y",
                 val=TypeEngine.to_literal(
                     ctx, 2, int, LiteralType(simple=SimpleType.INTEGER)))

    t = create_native_named_tuple(
        ctx, promises=p1, entity_interface=Interface(outputs={"x": int}))
    assert t
    assert t == 1

    t = create_native_named_tuple(ctx,
                                  promises=[],
                                  entity_interface=Interface())
    assert t is None

    t = create_native_named_tuple(ctx,
                                  promises=[p1, p2],
                                  entity_interface=Interface(outputs={
                                      "x": int,
                                      "y": int
                                  }))
    assert t
    assert t == (1, 2)

    t = create_native_named_tuple(ctx,
                                  promises=[p1, p2],
                                  entity_interface=Interface(
                                      outputs={
                                          "x": int,
                                          "y": int
                                      },
                                      output_tuple_name="Tup"))
    assert t
    assert t == (1, 2)
    assert t.__class__.__name__ == "Tup"

    with pytest.raises(KeyError):
        create_native_named_tuple(ctx,
                                  promises=[p1, p2],
                                  entity_interface=Interface(
                                      outputs={"x": int},
                                      output_tuple_name="Tup"))
コード例 #19
0
def test_transformer_to_literal_local():

    random_dir = context_manager.FlyteContext.current_context().file_access.get_random_local_directory()
    fs = FileAccessProvider(local_sandbox_dir=random_dir, raw_output_prefix=os.path.join(random_dir, "raw"))
    ctx = context_manager.FlyteContext.current_context()
    with context_manager.FlyteContextManager.with_context(ctx.with_file_access(fs)) as ctx:
        # Use a separate directory that we know won't be the same as anything generated by flytekit itself, lest we
        # accidentally try to cp -R /some/folder /some/folder/sub which causes exceptions obviously.
        p = "/tmp/flyte/test_fd_transformer"

        # Create an empty directory and call to literal on it
        if os.path.exists(p):
            shutil.rmtree(p)
        pathlib.Path(p).mkdir(parents=True)

        tf = FlyteDirToMultipartBlobTransformer()
        lt = tf.get_literal_type(FlyteDirectory)
        literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt)
        assert literal.scalar.blob.uri.startswith(random_dir)

        # Create a director with one file in it
        if os.path.exists(p):
            shutil.rmtree(p)
        pathlib.Path(p).mkdir(parents=True)
        with open(os.path.join(p, "xyz"), "w") as fh:
            fh.write("Hello world\n")
        literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt)

        mock_remote_files = os.listdir(literal.scalar.blob.uri)
        assert mock_remote_files == ["xyz"]

        # The only primitives allowed are strings
        with pytest.raises(AssertionError):
            tf.to_literal(ctx, 3, FlyteDirectory, lt)

        with pytest.raises(TypeError, match="No automatic conversion from <class 'int'>"):
            TypeEngine.to_literal(ctx, 3, FlyteDirectory, lt)

        # Can't use if it's not a directory
        with pytest.raises(FlyteAssertion):
            p = "/tmp/flyte/xyz"
            path = pathlib.Path(p)
            try:
                path.unlink()
            except OSError:
                ...
            with open(p, "w") as fh:
                fh.write("hello world\n")
            tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt)
コード例 #20
0
def test_comparison_lits():
    px = Promise("x", TypeEngine.to_literal(None, 5, int, None))
    py = Promise("y", TypeEngine.to_literal(None, 8, int, None))

    def eval_expr(expr, expected: bool):
        print(f"{expr} evals to {expr.eval()}")
        assert expected == expr.eval()

    eval_expr(px == py, False)
    eval_expr(px < py, True)
    eval_expr((px == py) & (px < py), False)
    eval_expr(((px == py) & (px < py)) | (px > py), False)
    eval_expr(px < 5, False)
    eval_expr(px >= 5, True)
    eval_expr(py >= 5, True)
コード例 #21
0
    def unwrap_literal_map_and_execute(
        self, ctx: FlyteContext, input_literal_map: _literal_models.LiteralMap
    ) -> Union[VoidPromise, _literal_models.LiteralMap,
               _dynamic_job.DynamicJobSpec]:
        """
        Please see the implementation of the dispatch_execute function in the real task.
        """

        # Invoked before the task is executed
        # Translate the input literals to Python native
        native_inputs = TypeEngine.literal_map_to_kwargs(
            ctx, input_literal_map, self.python_interface.inputs)

        logger.info(f"Invoking {self.name} with inputs: {native_inputs}")
        try:
            native_outputs = self.execute(**native_inputs)
        except Exception as e:
            logger.exception(f"Exception when executing {e}")
            raise e
        logger.info(
            f"Task executed successfully in user level, outputs: {native_outputs}"
        )

        expected_output_names = list(self.python_interface.outputs.keys())
        if len(expected_output_names) == 1:
            native_outputs_as_map = {expected_output_names[0]: native_outputs}
        elif len(expected_output_names) == 0:
            native_outputs_as_map = {}
        else:
            native_outputs_as_map = {
                expected_output_names[i]: native_outputs[i]
                for i, _ in enumerate(native_outputs)
            }

        # We manually construct a LiteralMap here because task inputs and outputs actually violate the assumption
        # built into the IDL that all the values of a literal map are of the same type.
        literals = {}
        for k, v in native_outputs_as_map.items():
            literal_type = self.interface.outputs[k].type
            py_type = self.python_interface.outputs[k]
            if isinstance(v, tuple):
                raise AssertionError(
                    f"Output({k}) in task{self.name} received a tuple {v}, instead of {py_type}"
                )
            literals[k] = TypeEngine.to_literal(ctx, v, py_type, literal_type)
        outputs_literal_map = _literal_models.LiteralMap(literals=literals)
        # After the execute has been successfully completed
        return outputs_literal_map
コード例 #22
0
def test_to_python_value_with_incoming_columns():
    # make a literal with a type that has two columns
    original_type = Annotated[pd.DataFrame, kwtypes(name=str, age=int)]
    ctx = FlyteContextManager.current_context()
    lt = TypeEngine.to_literal_type(original_type)
    df = generate_pandas()
    fdt = StructuredDatasetTransformerEngine()
    lit = fdt.to_literal(ctx, df, python_type=original_type, expected=lt)
    assert len(lit.scalar.structured_dataset.metadata.structured_dataset_type.
               columns) == 2

    # declare a new type that only has one column
    # get the dataframe, make sure it has the column that was asked for.
    subset_sd_type = Annotated[StructuredDataset, kwtypes(age=int)]
    sd = fdt.to_python_value(ctx, lit, subset_sd_type)
    assert sd.metadata.structured_dataset_type.columns[0].name == "age"
    sub_df = sd.open(pd.DataFrame).all()
    assert sub_df.shape[1] == 1

    # check when columns are not specified, should pull both and add column information.
    sd = fdt.to_python_value(ctx, lit, StructuredDataset)
    assert len(sd.metadata.structured_dataset_type.columns) == 2

    # should also work if subset type is just an annotated pd.DataFrame
    subset_pd_type = Annotated[pd.DataFrame, kwtypes(age=int)]
    sub_df = fdt.to_python_value(ctx, lit, subset_pd_type)
    assert sub_df.shape[1] == 1
コード例 #23
0
def test_dont_convert_remotes():
    @task
    def t1(in1: FlyteFile):
        print(in1)

    @dynamic
    def dyn(in1: FlyteFile):
        t1(in1=in1)

    fd = FlyteFile("s3://anything")

    with context_manager.FlyteContext.current_context(
    ).new_serialization_settings(
            serialization_settings=context_manager.SerializationSettings(
                project="test_proj",
                domain="test_domain",
                version="abc",
                image_config=ImageConfig(
                    Image(name="name", fqn="image", tag="name")),
                env={},
            )) as ctx:
        with ctx.new_execution_context(
                mode=ExecutionState.Mode.TASK_EXECUTION) as ctx:
            lit = TypeEngine.to_literal(
                ctx, fd, FlyteFile,
                BlobType("",
                         dimensionality=BlobType.BlobDimensionality.SINGLE))
            lm = LiteralMap(literals={"in1": lit})
            wf = dyn.dispatch_execute(ctx, lm)
            assert wf.nodes[0].inputs[
                0].binding.scalar.blob.uri == "s3://anything"
コード例 #24
0
def test_to_python_value_without_incoming_columns():
    # make a literal with a type with no columns
    ctx = FlyteContextManager.current_context()
    lt = TypeEngine.to_literal_type(pd.DataFrame)
    df = generate_pandas()
    fdt = StructuredDatasetTransformerEngine()
    lit = fdt.to_literal(ctx, df, python_type=pd.DataFrame, expected=lt)
    assert len(lit.scalar.structured_dataset.metadata.structured_dataset_type.
               columns) == 0

    # declare a new type that only has one column
    # get the dataframe, make sure it has the column that was asked for.
    subset_sd_type = Annotated[StructuredDataset, kwtypes(age=int)]
    sd = fdt.to_python_value(ctx, lit, subset_sd_type)
    assert sd.metadata.structured_dataset_type.columns[0].name == "age"
    sub_df = sd.open(pd.DataFrame).all()
    assert sub_df.shape[1] == 1

    # check when columns are not specified, should pull both and add column information.
    # todo: see the todos in the open_as, and iter_as functions in StructuredDatasetTransformerEngine
    #  we have to recreate the literal because the test case above filled in the metadata
    lit = fdt.to_literal(ctx, df, python_type=pd.DataFrame, expected=lt)
    sd = fdt.to_python_value(ctx, lit, StructuredDataset)
    assert sd.metadata.structured_dataset_type.columns == []
    sub_df = sd.open(pd.DataFrame).all()
    assert sub_df.shape[1] == 2

    # should also work if subset type is just an annotated pd.DataFrame
    lit = fdt.to_literal(ctx, df, python_type=pd.DataFrame, expected=lt)
    subset_pd_type = Annotated[pd.DataFrame, kwtypes(age=int)]
    sub_df = fdt.to_python_value(ctx, lit, subset_pd_type)
    assert sub_df.shape[1] == 1
コード例 #25
0
def test_two(two_sample_inputs):
    my_input = two_sample_inputs[0]
    my_input_2 = two_sample_inputs[1]

    @dynamic
    def dt1(a: List[MyInput]) -> List[FlyteFile]:
        x = []
        for aa in a:
            x.append(aa.main_product)
        return x

    with FlyteContextManager.with_context(
        FlyteContextManager.current_context().with_serialization_settings(
            SerializationSettings(
                project="test_proj",
                domain="test_domain",
                version="abc",
                image_config=ImageConfig(Image(name="name", fqn="image", tag="name")),
                env={},
            )
        )
    ) as ctx:
        with FlyteContextManager.with_context(
            ctx.with_execution_state(
                ctx.execution_state.with_params(
                    mode=ExecutionState.Mode.TASK_EXECUTION,
                )
            )
        ) as ctx:
            input_literal_map = TypeEngine.dict_to_literal_map(
                ctx, d={"a": [my_input, my_input_2]}, type_hints={"a": List[MyInput]}
            )
            dynamic_job_spec = dt1.dispatch_execute(ctx, input_literal_map)
            assert len(dynamic_job_spec.literals["o0"].collection.literals) == 2
コード例 #26
0
ファイル: test_type_engine.py プロジェクト: jaychia/flytekit
def test_named_tuple():
    t = typing.NamedTuple("Outputs", [("x_str", str), ("y_int", int)])
    var_map = TypeEngine.named_tuple_to_variable_map(t)
    assert var_map.variables[
        "x_str"].type.simple == model_types.SimpleType.STRING
    assert var_map.variables[
        "y_int"].type.simple == model_types.SimpleType.INTEGER
コード例 #27
0
def test_protos():
    ctx = FlyteContext.current_context()

    pb = errors_pb2.ContainerError(code="code", message="message")
    lt = TypeEngine.to_literal_type(errors_pb2.ContainerError)
    assert lt.simple == SimpleType.STRUCT
    assert lt.metadata["pb_type"] == "flyteidl.core.errors_pb2.ContainerError"

    lit = TypeEngine.to_literal(ctx, pb, errors_pb2.ContainerError, lt)
    new_python_val = TypeEngine.to_python_value(ctx, lit, errors_pb2.ContainerError)
    assert new_python_val == pb

    # Test error
    l0 = Literal(scalar=Scalar(primitive=Primitive(integer=4)))
    with pytest.raises(AssertionError):
        TypeEngine.to_python_value(ctx, l0, errors_pb2.ContainerError)
コード例 #28
0
def _workflow_fn_outputs_to_promise(
    ctx: FlyteContext,
    native_outputs: typing.Dict[str, type],  # Actually an orderedDict
    typed_outputs: Dict[str, _interface_models.Variable],
    outputs: Union[Any, Tuple[Any]],
) -> List[Promise]:
    if len(native_outputs) == 1:
        if isinstance(outputs, tuple):
            if len(outputs) != 1:
                raise AssertionError(
                    f"The Workflow specification indicates only one return value, received {len(outputs)}"
                )
        else:
            outputs = (outputs, )

    if len(native_outputs) > 1:
        if not isinstance(outputs,
                          tuple) or len(native_outputs) != len(outputs):
            # Length check, clean up exception
            raise AssertionError(
                f"The workflow specification indicates {len(native_outputs)} return vals, but received {len(outputs)}"
            )

    # This recasts the Promises provided by the outputs of the workflow's tasks into the correct output names
    # of the workflow itself
    return_vals = []
    for (k, t), v in zip(native_outputs.items(), outputs):
        if isinstance(v, Promise):
            return_vals.append(v.with_var(k))
        else:
            # Found a return type that is not a promise, so we need to transform it
            var = typed_outputs[k]
            return_vals.append(
                Promise(var=k, val=TypeEngine.to_literal(ctx, v, t, var.type)))
    return return_vals
コード例 #29
0
ファイル: workflow.py プロジェクト: dylanwilder/flytekit
    def add_workflow_output(
        self, output_name: str, p: Union[Promise, List[Promise], Dict[str, Promise]], python_type: Optional[Type] = None
    ):
        """
        Add an output with the given name from the given node output.
        """
        if output_name in self._python_interface.outputs:
            raise FlyteValidationException(f"Output {output_name} already exists in workflow {self.name}")

        if python_type is None:
            if type(p) == list or type(p) == dict:
                raise FlyteValidationException(
                    f"If specifying a list or dict of Promises, you must specify the python_type type for {output_name}"
                    f" starting with the container type (e.g. List[int]"
                )
            python_type = p.ref.node.flyte_entity.python_interface.outputs[p.var]
            logger.debug(f"Inferring python type for wf output {output_name} from Promise provided {python_type}")

        flyte_type = TypeEngine.to_literal_type(python_type=python_type)

        ctx = FlyteContext.current_context()
        if ctx.compilation_state is not None:
            raise Exception("Can't already be compiling")
        with FlyteContextManager.with_context(ctx.with_compilation_state(self.compilation_state)) as ctx:
            b = binding_from_python_std(
                ctx, output_name, expected_literal_type=flyte_type, t_value=p, t_value_type=python_type
            )
            self._output_bindings.append(b)
            self._python_interface = self._python_interface.with_outputs(extra_outputs={output_name: python_type})
            self._interface = transform_interface_to_typed_interface(self._python_interface)
コード例 #30
0
ファイル: interface.py プロジェクト: xquek-fn/flytekit
def transform_inputs_to_parameters(
        ctx: context_manager.FlyteContext,
        interface: Interface) -> _interface_models.ParameterMap:
    """
    Transforms the given interface (with inputs) to a Parameter Map with defaults set
    :param interface: the interface object
    """
    if interface is None or interface.inputs_with_defaults is None:
        return _interface_models.ParameterMap({})
    inputs_vars = transform_variable_map(interface.inputs)
    params = {}
    inputs_with_def = interface.inputs_with_defaults
    for k, v in inputs_vars.items():
        val, _default = inputs_with_def[k]
        required = _default is None
        default_lv = None
        if _default is not None:
            default_lv = TypeEngine.to_literal(ctx,
                                               _default,
                                               python_type=interface.inputs[k],
                                               expected=v.type)
        params[k] = _interface_models.Parameter(var=v,
                                                default=default_lv,
                                                required=required)
    return _interface_models.ParameterMap(params)