コード例 #1
0
    def execute(self, context, inputs):
        """
        :param flytekit.engines.common.EngineContext context:
        :param flytekit.models.literals.LiteralMap inputs:
        :rtype: dict[Text, flytekit.models.common.FlyteIdlEntity]
        :returns: This function must return a dictionary mapping 'filenames' to Flyte Interface Entities.  These
            entities will be used by the engine to pass data from node to node, populate metadata, etc. etc..  Each
            engine will have different behavior.  For instance, the Flyte engine will upload the entities to a remote
            working directory (with the names provided), which will in turn allow Flyte Propeller to push along the
            workflow.  Where as local engine will merely feed the outputs directly into the next node.
        """
        inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std(
            inputs,
            {
                k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                for k, v in _six.iteritems(self.interface.inputs)
            },
        )
        outputs_dict = {
            name: _task_output.OutputReference(
                _type_helpers.get_sdk_type_from_literal_type(variable.type))
            for name, variable in _six.iteritems(self.interface.outputs)
        }
        inputs_dict.update(outputs_dict)

        self._execute_user_code(context, inputs_dict)

        return {
            _constants.OUTPUT_FILE_NAME:
            _literal_models.LiteralMap(literals={
                k: v.sdk_value
                for k, v in _six.iteritems(outputs_dict)
            })
        }
コード例 #2
0
    def _produce_dynamic_job_spec(self, context, inputs):
        """
        Runs user code and and produces future task nodes to run sub-tasks.
        :param context:
        :param flytekit.models.literals.LiteralMap literal_map inputs:
        :rtype: flytekit.models.dynamic_job.DynamicJobSpec
        """
        inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std(
            inputs,
            {
                k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                for k, v in _six.iteritems(self.interface.inputs)
            },
        )
        outputs_dict = {
            name: _task_output.OutputReference(
                _type_helpers.get_sdk_type_from_literal_type(variable.type))
            for name, variable in _six.iteritems(self.interface.outputs)
        }

        # Add outputs to inputs
        inputs_dict.update(outputs_dict)

        nodes = []
        tasks = []
        # One node per query
        generated_queries = self._generate_plugin_objects(context, inputs_dict)

        # Create output bindings always - this has to happen after user code has run
        output_bindings = [
            _literal_models.Binding(
                var=name,
                binding=_interface.BindingData.from_python_std(
                    b.sdk_type.to_flyte_literal_type(), b.value),
            ) for name, b in _six.iteritems(outputs_dict)
        ]

        i = 0
        for quboleHiveJob in generated_queries:
            hive_job_node = _create_hive_job_node("HiveQuery_{}".format(i),
                                                  quboleHiveJob.to_flyte_idl(),
                                                  self.metadata)
            nodes.append(hive_job_node)
            tasks.append(hive_job_node.executable_sdk_object)
            i += 1

        dynamic_job_spec = _dynamic_job.DynamicJobSpec(
            min_successes=len(nodes),
            tasks=tasks,
            nodes=nodes,
            outputs=output_bindings,
            subworkflows=[],
        )

        return dynamic_job_spec
コード例 #3
0
ファイル: interface.py プロジェクト: sauravsrijan/flytekit
 def __repr__(self):
     return "({inputs}) -> ({outputs})".format(
         inputs=", ".join([
             "{}: {}".format(
                 k, _type_helpers.get_sdk_type_from_literal_type(v.type))
             for k, v in _six.iteritems(self.inputs)
         ]),
         outputs=", ".join([
             "{}: {}".format(
                 k, _type_helpers.get_sdk_type_from_literal_type(v.type))
             for k, v in _six.iteritems(self.outputs)
         ]))
コード例 #4
0
    def _produce_dynamic_job_spec(self, context, inputs):
        """
        Runs user code and and produces future task nodes to run sub-tasks.
        :param context:
        :param flytekit.models.literals.LiteralMap literal_map inputs:
        :rtype: flytekit.models.dynamic_job.DynamicJobSpec
        """
        inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std(
            inputs, {
                k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                for k, v in _six.iteritems(self.interface.inputs)
            })
        outputs_dict = {
            name: _task_output.OutputReference(
                _type_helpers.get_sdk_type_from_literal_type(variable.type))
            for name, variable in _six.iteritems(self.interface.outputs)
        }

        # Add outputs to inputs
        inputs_dict.update(outputs_dict)

        # Note: Today a hive task corresponds to a dynamic job spec with one node, which contains multiple
        # queries. We may change this in future.
        nodes = []
        tasks = []
        generated_queries = self._generate_hive_queries(context, inputs_dict)

        # Create output bindings always - this has to happen after user code has run
        output_bindings = [
            _literal_models.Binding(
                var=name,
                binding=_interface.BindingData.from_python_std(
                    b.sdk_type.to_flyte_literal_type(), b.value))
            for name, b in _six.iteritems(outputs_dict)
        ]

        if len(generated_queries.query_collection.queries) > 0:
            hive_job_node = _create_hive_job_node(
                "HiveQueries", generated_queries.to_flyte_idl(), self.metadata)
            nodes.append(hive_job_node)
            tasks.append(hive_job_node.executable_sdk_object)

        dynamic_job_spec = _dynamic_job.DynamicJobSpec(
            min_successes=len(
                nodes
            ),  # At most we only have one node for now, see above comment
            tasks=tasks,
            nodes=nodes,
            outputs=output_bindings,
            subworkflows=[])

        return dynamic_job_spec
コード例 #5
0
    def execute(self, context, inputs):
        """
        :param flytekit.engines.common.EngineContext context:
        :param flytekit.models.literals.LiteralMap inputs:
        :rtype: dict[Text, flytekit.models.common.FlyteIdlEntity]
        :returns: This function must return a dictionary mapping 'filenames' to Flyte Interface Entities.  These
            entities will be used by the engine to pass data from node to node, populate metadata, etc. etc..  Each
            engine will have different behavior.  For instance, the Flyte engine will upload the entities to a remote
            working directory (with the names provided), which will in turn allow Flyte Propeller to push along the
            workflow.  Where as local engine will merely feed the outputs directly into the next node.
        """
        inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std(
            inputs,
            {
                k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                for k, v in _six.iteritems(self.interface.inputs)
            },
        )

        input_notebook_path = self._notebook_path
        # Execute Notebook via Papermill.
        output_notebook_path = input_notebook_path.split(
            ".ipynb")[0] + "-out.ipynb"
        _pm.execute_notebook(input_notebook_path,
                             output_notebook_path,
                             parameters=inputs_dict)

        # Parse Outputs from Notebook.
        outputs = None
        with open(output_notebook_path) as json_file:
            data = _json.load(json_file)
            for p in data["cells"]:
                meta = p["metadata"]
                if "outputs" in meta["tags"]:
                    outputs = " ".join(p["outputs"][0]["data"]["text/plain"])

        if outputs is not None:
            dict = _literal_models._literals_pb2.LiteralMap()
            _text_format.Parse(outputs, dict)

        # Add output_notebook as an output to the task.
        output_notebook = _task_output.OutputReference(
            _type_helpers.get_sdk_type_from_literal_type(
                _Types.Blob.to_flyte_literal_type()))
        output_notebook.set(output_notebook_path)

        output_literal_map = _literal_models.LiteralMap.from_flyte_idl(dict)
        output_literal_map.literals[
            OUTPUT_NOTEBOOK] = output_notebook.sdk_value

        return {_constants.OUTPUT_FILE_NAME: output_literal_map}
コード例 #6
0
ファイル: flyte.py プロジェクト: vglocus/flytekit
 def get_sdk_type_from_literal_type(self, literal_type):
     """
     :param flytekit.models.types.LiteralType literal_type:
     :rtype: flytekit.common.types.base_sdk_types.FlyteSdkType
     """
     if literal_type.collection_type is not None:
         return _container_types.List(
             _helpers.get_sdk_type_from_literal_type(
                 literal_type.collection_type))
     elif literal_type.map_value_type is not None:
         raise NotImplementedError("TODO: Implement map")
     elif literal_type.schema is not None:
         return _schema.schema_instantiator_from_proto(literal_type.schema)
     elif literal_type.blob is not None:
         return self._get_blob_impl_from_type(literal_type.blob)
     elif literal_type.simple is not None:
         if (literal_type.simple == _literal_type_models.SimpleType.BINARY
                 and _proto.Protobuf.PB_FIELD_KEY in literal_type.metadata):
             return _proto_sdk_type_from_tag(
                 literal_type.metadata[_proto.Protobuf.PB_FIELD_KEY])
         if (literal_type.simple == _literal_type_models.SimpleType.STRUCT
                 and literal_type.metadata
                 and _proto.Protobuf.PB_FIELD_KEY in literal_type.metadata):
             return _generic_proto_sdk_type_from_tag(
                 literal_type.metadata[_proto.Protobuf.PB_FIELD_KEY])
         sdk_type = self._SIMPLE_TYPE_LOOKUP_TABLE.get(literal_type.simple)
         if sdk_type is None:
             raise NotImplementedError(
                 "We haven't implemented this type yet:  Simple type={}".
                 format(literal_type.simple))
         return sdk_type
     else:
         raise _system_exceptions.FlyteSystemAssertion(
             "An unrecognized literal type was received: {}".format(
                 literal_type))
コード例 #7
0
def test_hive_task_query_generation():
    with _common_utils.AutoDeletingTempDir(
            "user_dir") as user_working_directory:
        context = _common_engine.EngineContext(
            execution_id=WorkflowExecutionIdentifier(project="unit_test",
                                                     domain="unit_test",
                                                     name="unit_test"),
            execution_date=_datetime.utcnow(),
            stats=None,  # TODO: A mock stats object that we can read later.
            logging=
            _logging,  # TODO: A mock logging object that we can read later.
            tmp_dir=user_working_directory,
        )
        references = {
            name: _task_output.OutputReference(
                _type_helpers.get_sdk_type_from_literal_type(variable.type))
            for name, variable in _six.iteritems(two_queries.interface.outputs)
        }

        qubole_hive_jobs = two_queries._generate_plugin_objects(
            context, references)
        assert len(qubole_hive_jobs) == 2

        # deprecated, collection is only here for backwards compatibility
        assert len(qubole_hive_jobs[0].query_collection.queries) == 1
        assert len(qubole_hive_jobs[1].query_collection.queries) == 1

        # The output references should now have the same fake S3 path as the formatted queries
        assert references["hive_results"].value[0].uri != ""
        assert references["hive_results"].value[1].uri != ""
        assert references["hive_results"].value[0].uri in qubole_hive_jobs[
            0].query.query
        assert references["hive_results"].value[1].uri in qubole_hive_jobs[
            1].query.query
コード例 #8
0
ファイル: interface.py プロジェクト: sauravsrijan/flytekit
 def from_python_std(cls, literal_type, t_value, upstream_nodes=None):
     """
     :param flytekit.models.types.LiteralType literal_type:
     :param T t_value:
     :param list[flytekit.common.nodes.SdkNode] upstream_nodes: [Optional] Keeps track of the nodes upstream,
         if applicable.
     :rtype: BindingData
     """
     scalar = None
     collection = None
     promise = None
     map = None
     downstream_sdk_type = _type_helpers.get_sdk_type_from_literal_type(
         literal_type)
     if isinstance(t_value, _promise.Input):
         if not downstream_sdk_type.is_castable_from(t_value.sdk_type):
             _user_exceptions.FlyteTypeException(
                 t_value.sdk_type,
                 downstream_sdk_type,
                 additional_msg="When binding workflow input: {}".format(
                     t_value))
         promise = t_value.promise
     elif isinstance(t_value, _promise.NodeOutput):
         if not downstream_sdk_type.is_castable_from(t_value.sdk_type):
             _user_exceptions.FlyteTypeException(
                 t_value.sdk_type,
                 downstream_sdk_type,
                 additional_msg="When binding node output: {}".format(
                     t_value))
         promise = t_value
         if upstream_nodes is not None:
             upstream_nodes.append(t_value.sdk_node)
     elif isinstance(t_value, list):
         if not issubclass(downstream_sdk_type, _containers.ListImpl):
             raise _user_exceptions.FlyteTypeException(
                 type(t_value),
                 downstream_sdk_type,
                 received_value=t_value,
                 additional_msg="Cannot bind a list to a non-list type.")
         collection = _literal_models.BindingDataCollection([
             BindingData.from_python_std(
                 downstream_sdk_type.sub_type.to_flyte_literal_type(),
                 v,
                 upstream_nodes=upstream_nodes) for v in t_value
         ])
     elif isinstance(t_value, dict) and \
             (not issubclass(downstream_sdk_type, _primitives.Generic) or BindingData._has_sub_bindings(t_value)):
         # TODO: This behavior should be embedded in the type engine.  Someone should be able to alter behavior of
         # TODO: binding logic by injecting their own type engine.  The same goes for the list check above.
         raise NotImplementedError(
             "TODO: Cannot use map bindings at the moment")
     else:
         sdk_value = downstream_sdk_type.from_python_std(t_value)
         scalar = sdk_value.scalar
         collection = sdk_value.collection
         map = sdk_value.map
     return cls(scalar=scalar,
                collection=collection,
                map=map,
                promise=promise)
コード例 #9
0
 def __init__(self, type_map, node):
     """
     :param dict[Text, flytekit.models.interface.Variable] type_map:
     :param SdkNode node:
     """
     super(ParameterMapper, self).__init__()
     for key, var in _six.iteritems(type_map):
         self[key] = self._return_mapping_object(node, _type_helpers.get_sdk_type_from_literal_type(var.type), key)
     self._initialized = True
コード例 #10
0
 def _python_std_input_map_to_literal_map(self, inputs):
     """
     :param dict[Text,Any] inputs: A dictionary of Python standard inputs that will be type-checked and compiled
         to a LiteralMap
     :rtype: flytekit.models.literals.LiteralMap
     """
     return _type_helpers.pack_python_std_map_to_literal_map(inputs, {
         k: _type_helpers.get_sdk_type_from_literal_type(v.type)
         for k, v in _six.iteritems(self.interface.inputs)
     })
コード例 #11
0
def test_sdk_output_references_construction():
    references = {
        name: _task_output.OutputReference(_type_helpers.get_sdk_type_from_literal_type(variable.type))
        for name, variable in _six.iteritems(two_queries.interface.outputs)
    }
    # Before user code is run, the outputs passed to the user code should not have values
    assert references['hive_results'].sdk_value == _base_sdk_types.Void()

    # Should be a list of schemas
    assert isinstance(references['hive_results'].sdk_type, _containers.TypedCollectionType)
    assert isinstance(references['hive_results'].sdk_type.sub_type, _schema.SchemaInstantiator)
コード例 #12
0
 def unit_test(self, **input_map):
     """
     :param dict[Text, T] input_map: Python Std input from users.  We will cast these to the appropriate Flyte
         literals.
     :returns: Depends on the behavior of the specific task in the unit engine.
     """
     return _engine_loader.get_engine('unit').get_task(self).execute(
         _type_helpers.pack_python_std_map_to_literal_map(
             input_map, {
                 k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                 for k, v in _six.iteritems(self.interface.inputs)
             }))
コード例 #13
0
 def _validate_inputs(self, inputs):
     """
     :param dict[Text, flytekit.models.interface.Variable] inputs:  Input variables to validate
     :raises: flytekit.common.exceptions.user.FlyteValidationException
     """
     for k, v in _six.iteritems(inputs):
         sdk_type = _helpers.get_sdk_type_from_literal_type(v.type)
         if sdk_type not in input_types_supported:
             raise _user_exceptions.FlyteValidationException(
                 "Input Type '{}' not supported.  Only Primitives are supported for Scala/Java Spark."
                 .format(sdk_type))
     super(SdkGenericSparkTask, self)._validate_inputs(inputs)
コード例 #14
0
 def promote_from_model(cls, model):
     """
     :param flytekit.models.types.SchemaType model:
     :rtype: SchemaType
     """
     _PROTO_ENUM_TO_SDK_TYPE = {
         _type_models.SchemaType.SchemaColumn.SchemaColumnType.INTEGER:
         _helpers.get_sdk_type_from_literal_type(
             _primitives.Integer.to_flyte_literal_type()),
         _type_models.SchemaType.SchemaColumn.SchemaColumnType.FLOAT:
         _helpers.get_sdk_type_from_literal_type(
             _primitives.Float.to_flyte_literal_type()),
         _type_models.SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN:
         _helpers.get_sdk_type_from_literal_type(
             _primitives.Boolean.to_flyte_literal_type()),
         _type_models.SchemaType.SchemaColumn.SchemaColumnType.DATETIME:
         _helpers.get_sdk_type_from_literal_type(
             _primitives.Datetime.to_flyte_literal_type()),
         _type_models.SchemaType.SchemaColumn.SchemaColumnType.DURATION:
         _helpers.get_sdk_type_from_literal_type(
             _primitives.Timedelta.to_flyte_literal_type()),
         _type_models.SchemaType.SchemaColumn.SchemaColumnType.STRING:
         _helpers.get_sdk_type_from_literal_type(
             _primitives.String.to_flyte_literal_type()),
     }
     return cls([(c.name, _PROTO_ENUM_TO_SDK_TYPE[c.type])
                 for c in model.columns])
コード例 #15
0
ファイル: engine.py プロジェクト: xquek-fn/flytekit
 def _transform_for_user_output(self, outputs):
     """
     Just return the outputs as a user-readable dictionary.
     :param dict[Text,flytekit.models.common.FlyteIdlEntity] outputs:
     :rtype: T
     """
     literal_map = outputs[_sdk_constants.OUTPUT_FILE_NAME]
     return {
         name: _type_helpers.get_sdk_value_from_literal(
             literal_map.literals[name], sdk_type=_type_helpers.get_sdk_type_from_literal_type(variable.type),
         ).to_python_std()
         for name, variable in _six.iteritems(self.sdk_task.interface.outputs)
     }
コード例 #16
0
 def local_execute(self, **input_map):
     """
     :param dict[Text, T] input_map: Python Std input from users.  We will cast these to the appropriate Flyte
         literals.
     :rtype: dict[Text, T]
     :returns: The output produced by this task in Python standard format.
     """
     return _engine_loader.get_engine('local').get_task(self).execute(
         _type_helpers.pack_python_std_map_to_literal_map(
             input_map, {
                 k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                 for k, v in _six.iteritems(self.interface.inputs)
             }))
コード例 #17
0
 def _validate_inputs(self, inputs):
     """
     This method should be overridden in sub-classes that intend to do additional checks on inputs.  If validation
     fails, this function should raise an informative exception.
     :param dict[Text, flytekit.models.interface.Variable] inputs:  Input variables to validate
     :raises: flytekit.common.exceptions.user.FlyteValidationException
     """
     super(SdkRunnableTask, self)._validate_inputs(inputs)
     for k, v in _six.iteritems(inputs):
         if not self._is_argname_in_function_definition(k):
             raise _user_exceptions.FlyteValidationException(
                 "The input named '{}' was not specified in the task function.  Therefore, this input cannot be "
                 "provided to the task.".format(k))
         if _type_helpers.get_sdk_type_from_literal_type(
                 v.type) in type(self)._banned_inputs:
             raise _user_exceptions.FlyteValidationException(
                 "The input '{}' is not an accepted input type.".format(v))
コード例 #18
0
    def promote_from_model(cls, model):
        """
        :param flytekit.models.interface.Parameter model:
        :rtype: Parameter
        """
        sdk_type = _type_helpers.get_sdk_type_from_literal_type(model.var.type)

        if model.default is not None:
            default_value = sdk_type.from_flyte_idl(
                model.default.to_flyte_idl()).to_python_std()
            return cls("",
                       sdk_type,
                       help=model.var.description,
                       required=False,
                       default=default_value)
        else:
            return cls("", sdk_type, help=model.var.description, required=True)
コード例 #19
0
    def _validate_outputs(self, outputs):
        """
        :param dict[Text, flytekit.models.interface.Variable] inputs:  Input variables to validate
        :raises: flytekit.common.exceptions.user.FlyteValidationException
        """

        # Add output_notebook as an implicit output to the task.

        for k, v in _six.iteritems(outputs):

            if k == OUTPUT_NOTEBOOK:
                raise ValueError(
                    "{} is a reserved output keyword. Please use a different output name."
                    .format(OUTPUT_NOTEBOOK))

            sdk_type = _type_helpers.get_sdk_type_from_literal_type(v.type)
            if sdk_type not in _notebook_types_map.values():
                raise _user_exceptions.FlyteValidationException(
                    "Output Type '{}' not supported.  Only Primitives are supported for notebook."
                    .format(sdk_type))
        super(SdkNotebookTask, self)._validate_outputs(outputs)
コード例 #20
0
def _type_map_from_variable_map(variable_map):
    return {
        k: _type_helpers.get_sdk_type_from_literal_type(v.type) for k, v in
        six.iteritems(variable_map)
    }
コード例 #21
0
ファイル: launch_plan.py プロジェクト: forkkit/flytekit
    def __init__(
        self,
        sdk_workflow,
        default_inputs=None,
        fixed_inputs=None,
        role=None,
        schedule=None,
        notifications=None,
        labels=None,
        annotations=None,
        auth=None,
    ):
        """
        :param flytekit.common.workflow.SdkWorkflow sdk_workflow:
        :param dict[Text,flytekit.common.promise.Input] default_inputs:
        :param dict[Text,Any] fixed_inputs: These inputs will be fixed and not need to be set when executing this
            launch plan.
        :param Text role: Deprecated. IAM role to execute this launch plan with.
        :param flytekit.models.schedule.Schedule: Schedule to apply to this workflow.
        :param list[flytekit.models.common.Notification]: List of notifications to apply to this launch plan.
        :param flytekit.models.common.Labels labels: Any custom kubernetes labels to apply to workflows executed by this
            launch plan.
        :param flytekit.models.common.Annotations annotations: Any custom kubernetes annotations to apply to workflows
            executed by this launch plan.
            Any custom kubernetes annotations to apply to workflows executed by this launch plan.
        :param flytekit.models.launch_plan.Auth auth: The auth method with which to execute the workflow.
        """
        if role and auth:
            raise ValueError(
                "Cannot set both role and auth. Role is deprecated, use auth instead."
            )

        fixed_inputs = fixed_inputs or {}
        default_inputs = default_inputs or {}

        if role:
            auth = _launch_plan_models.Auth(assumable_iam_role=role)

        super(SdkRunnableLaunchPlan, self).__init__(
            _identifier.Identifier(_identifier_model.ResourceType.WORKFLOW,
                                   _internal_config.PROJECT.get(),
                                   _internal_config.DOMAIN.get(),
                                   _uuid.uuid4().hex,
                                   _internal_config.VERSION.get()),
            _launch_plan_models.LaunchPlanMetadata(
                schedule=schedule or _schedule_model.Schedule(''),
                notifications=notifications or []),
            _interface_models.ParameterMap(default_inputs),
            _type_helpers.pack_python_std_map_to_literal_map(
                fixed_inputs, {
                    k: _type_helpers.get_sdk_type_from_literal_type(var.type)
                    for k, var in _six.iteritems(sdk_workflow.interface.inputs)
                    if k in fixed_inputs
                }),
            labels or _common_models.Labels({}),
            annotations or _common_models.Annotations({}),
            auth,
        )
        self._interface = _interface.TypedInterface(
            {k: v.var
             for k, v in _six.iteritems(default_inputs)},
            sdk_workflow.interface.outputs)
        self._upstream_entities = {sdk_workflow}
        self._sdk_workflow = sdk_workflow
コード例 #22
0
class Types(object):
    Integer = _helpers.get_sdk_type_from_literal_type(
        _primitives.Integer.to_flyte_literal_type())
    """
    Use this to specify a simple integer type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, A Python int will be received, if set.
            2) Otherwise, a None value will be received.

        As output:
            1) User code may pass an int or long value.
            2) Output can also be nulled with a None value.

        From command-line:
            Specify an integer or integer string.

    .. code-block:: python

        @inputs(a=Types.Integer)
        @outputs(b=Types.Integer)
        @python_task
        def double(wf_params, a, b):
            b.set(a * 2)
    """

    Float = _helpers.get_sdk_type_from_literal_type(
        _primitives.Float.to_flyte_literal_type())
    """
    Use this to specify a simple floating point type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            A Python float will be received, if set.  Otherwise, a None value will be received.

        As output:
            User code may pass a float value.  It can also be nulled with a None value.

        From command-line:
            Specify a float or floating-point string.

    .. code-block:: python

        @inputs(a=Types.Float)
        @outputs(b=Types.Float)
        @python_task
        def square(wf_params, a, b):
            b.set(a * a)
    """

    String = _helpers.get_sdk_type_from_literal_type(
        _primitives.String.to_flyte_literal_type())
    """
    Use this to specify a simple string type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            A Python str (Python 2) or unicode (Python 3) will be received, if set.  Otherwise, a None value will be
            received.

        As output:
            User code may pass a str value (Python 2) or a unicode value (Python 3).  It can also be nulled with a None
            value.

        From command-line:
            Specify a string.

    .. code-block:: python

        @inputs(a=Types.String, b=Types.String)
        @outputs(c=Types.String)
        @python_task
        def concat(wf_params, a, b):
            c.set(a + b)
    """

    Boolean = _helpers.get_sdk_type_from_literal_type(
        _primitives.Boolean.to_flyte_literal_type())
    """
    Use this to specify a simple bool type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            A Python bool will be received, if set.  Otherwise, a None value will be received.

        As output:
            User code may pass a bool value.  It can also be nulled with a None value.

        From command-line:
            Specify 0, 1, true, or false.

    .. code-block:: python

        @inputs(a=Types.Boolean)
        @outputs(b=Types.Boolean)
        @python_task
        def invert(wf_params, a, b):
            b.set(not a)
    """

    Datetime = _helpers.get_sdk_type_from_literal_type(
        _primitives.Datetime.to_flyte_literal_type())
    """
    Use this to specify a simple datetime type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            A Python timezone-aware datetime.datetime will be received with a UTC time, if set.  Otherwise,
            a None value will be received.

        As output:
            User code may pass a timezone-aware datetime.datetime value.  It can also be nulled with a None value.

        From command-line:
            Specify a timezone-aware, parsable datestring.  i.e. 2019-01-01T00:00+00:00

    .. note::

        The engine requires that datetimes be timezone aware.  By default, Python datetime.datetime is not timezone
        aware.

    .. code-block:: python

        @inputs(a=Types.Datetime)
        @outputs(b=Types.Datetime)
        @python_task
        def tomorrow(wf_params, a, b):
            b.set(a + datetime.timedelta(days=1))
    """

    Timedelta = _helpers.get_sdk_type_from_literal_type(
        _primitives.Timedelta.to_flyte_literal_type())
    """
    Use this to specify a simple timedelta type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            A Python datetime.timedelta will be received, if set.  Otherwise, a None value will be received.

        As output:
            User code may pass a datetime.timedelta value.  It can also be nulled with a None value.

        From command-line:
            Specify a parsable duration string.  i.e. 1h30m24s

    .. code-block:: python

        @inputs(a=Types.Timedelta)
        @outputs(b=Types.Timedelta)
        @python_task
        def hundred_times_longer(wf_params, a, b):
            b.set(a * 100)
    """

    Generic = _helpers.get_sdk_type_from_literal_type(
        _primitives.Generic.to_flyte_literal_type())
    """
    Use this to specify a simple JSON type. The Generic type offer a flexible (but loose) extension to flyte's typing
    system by allowing custom types/objects to be passed through. It's strongly recommended for producers & consumers of
    entities that produce or consume a Generic type to perform their own expectations checks on the integrity of the
    object.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, a Python dict with JSON-ifiable primitives and nested lists or maps.
            2) Otherwise, a None value will be received.

        As output:
            1) User code may pass a Python dict with arbitrarily nested lists and dictionaries.  JSON-ifiable
               primitives may also be specified.
            2) Output can also be nulled with a None value.

        From command-line:
            Specify a JSON string.

    .. code-block:: python

        @inputs(a=Types.Generic)
        @outputs(b=Types.Generic)
        @python_task
        def operate(wf_params, a, b):
            if a['operation'] == 'add':
                a['value'] += a['operand']  # a['value'] is a number
            elif a['operation'] == 'merge':
                a['value'].update(a['some']['nested'][0]['field'])
            b.set(a)

        # For better readability, it's strongly advised to leverage python's type aliasing.
        MyTypeA = Types.Generic
        MyTypeB = Types.Generic

        # This makes it clearer that it received a certain type and produces a different one. Other tasks that consume
        # MyTypeB should do so in their input declaration.
        @inputs(a=MyTypeA)
        @outputs(b=MyTypeB)
        @python_task
        def operate(wf_params, a, b):
            if a['operation'] == 'add':
                a['value'] += a['operand']  # a['value'] is a number
            elif a['operation'] == 'merge':
                a['value'].update(a['some']['nested'][0]['field'])
            b.set(a)
    """

    Blob = _blobs.Blob
    """
    Use this to specify a Blob object which is essentially a managed file.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, a :py:class:`flytekit.common.types.impl.blobs.Blob` object will be received.
            2) If not set, a None value.

        As output:
            1) A user may specify a path string.
            2) A user may construct a :py:class:`flytekit.common.types.impl.blobs.Blob` object and pass it as output.
            3) Output can be nulled with a None value.

        From command-line:
            Specify a path to the blob. This path must be accessible from the container when executing--either by
            being downloaded from an accessible remote location like s3 or as a local file.

    .. code-block:: python

        @inputs(a=Types.Blob)
        @outputs(b=Types.Blob)
        @python_task
        def copy(wf_params, a, b):
            with a as reader:
                txt = reader.read()

            out = Types.Blob()  # Create at a random location specified in flytekit configuration
            with out as writer:
                writer.write(txt)
            b.set(out)
    """

    CSV = _blobs.CSV
    """
    Use this to specify a CSV blob object which is essentially a managed file in the CSV format.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, a :py:class:`flytekit.common.types.impl.blobs.CSV` object will be received.
            2) If not set, a None value.

        As output:
            1) A user may specify a path string.
            2) A user may construct a :py:class:`flytekit.common.types.impl.blobs.CSV` object and pass it as output.
            3) Output can be nulled with a None value.

        From command-line:
            Specify a path to the CSV. This path must be accessible from the container when executing--either by
            being downloaded from an accessible remote location like s3 or as a local file.

    .. code-block:: python

        @inputs(a=Types.CSV)
        @outputs(b=Types.CSV)
        @python_task
        def copy(wf_params, a, b):
            with a as reader:
                txt = reader.read()

            out = Types.CSV()  # Create at a random location specified in flytekit configuration
            with out as writer:
                writer.write(txt)
            b.set(out)
    """

    MultiPartBlob = _blobs.MultiPartBlob
    """
    Use this to specify a multi-part blob object which is essentially a chunked file in a non-recursive directory.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, a :py:class:`flytekit.common.types.impl.blobs.MultiPartBlob` object will be received.
            2) If not set, a None value.

        As output:
            1) A user may specify a path string.
            2) A user may construct a :py:class:`flytekit.common.types.impl.blobs.MultiPartBlob` object and pass it as
               output.
            3) Output can be nulled with a None value.

        From command-line:
            Specify a path to the multi-part blob. This path must be accessible from the container when
            executing--either by being downloaded from an accessible remote location like s3 or as a local file.

    .. code-block:: python

        @inputs(a=Types.MultiPartBlob)
        @outputs(b=Types.MultiPartBlob)
        @python_task
        def concat_then_split(wf_params, a, b):
            txt = ""
            with a as chunks:
                for chunk in chunks:
                    txt += chunk.read()

            out = Types.MultiPartBlob()  # Create at a random location specified in flytekit configuration
            with out.create_part('000000') as writer:
                writer.write("Chunk1")
            with out.create_part('000001') as writer:
                writer.write("Chunk2")
            b.set(out)
    """

    MultiPartCSV = _blobs.MultiPartCSV
    """
    See :py:attr:`flytekit.sdk.types.Types.MultiPartBlob`, but in CSV format
    """

    Schema = staticmethod(_schema.schema_instantiator)
    """
    Use this to specify a Schema blob object which is essentially a chunked stream of Parquet dataframes.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        Cast behavior:
            1) A generic schema (specified as `Types.Schema()`) can receive input from any schema type regardless of
               column definitions.
            2) A schema can receive as input any schema object as long as the upstream schema has a superset of the
               column names defined and the types match for paired columns.  Ordering does not matter.

        As input:
            1) If set, a :py:class:`flytekit.common.types.impl.schema.Schema` object will be received.
            2) If not set, a None value.

        As output:
            1) A user may specify a path string to a chunked dataframe non-recursive directory.
            2) A user may construct a :py:class:`flytekit.common.types.impl.schema.Schema` object (with the correct
               column definitions) and pass it as output.
            3) Output can be nulled with a None value.

        From command-line:
            Specify a path to the schema object. This path must be accessible from the container when
            executing--either by being downloaded from an accessible remote location like s3 or as a local file.

    .. code-block:: python

        @inputs(generic=Types.Schema(), typed=Types.Schema([('a', Types.Integer), ('b', Types.Float)]))
        @outputs(b=Types.Schema([('a', Types.Integer), ('b', Types.Float)]))
        @python_task
        def concat_then_split(wf_params, generic, typed,):
            with typed as reader:
                # Each chunk is loaded as a pandas.DataFrame object
                for df in reader.iter_chunks():
                    # Operate on the dataframe

            # Create at a random location specified in flytekit configuration
            out = Types.Schema([('a', Types.Integer), ('b', Types.Float)])()
            with out as writer:
                writer.write(
                    pandas.DataFrame.from_dict(
                        {
                            'a': [1, 2, 3],
                            'b': [5.0, 6.0, 7.0]
                        }
                    )
                )
            b.set(out)
    """

    Proto = staticmethod(_proto.create_protobuf)
    """
    Proto type wraps a protobuf type to provide interoperability between protobuf and flyte typing system. Using this
    type, you can define custom input/output variable types of flyte entities and continue to provide strong typing
    syntax. Proto type serializes proto objects as binary (leveraging `flyteidl's Binary literal <https://github.com/lyft/flyteidl/blob/793b09d190148236f41ad8160b5cec9a3325c16f/protos/flyteidl/core/literals.proto#L45>`_).
    Binary serialization of protobufs is the most space-efficient serialization form. Because of the way protobufs are
    designed, unmarshalling the serialized proto requires access to the corresponding type. In order to use/visualize
    the serialized proto, you will generally need to write custom code in the corresponding component.

    .. note::

        The protobuf Python library should be installed on the PYTHONPATH to ensure the type engine can access the
        appropriate Python code to deserialize the protobuf.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, a Python protobuf object of the type specified in the definition.
            2) If not set, a None value.

        As output:
            1) A Python protobuf object matching the type specified by the users.
            2) Set None to null the output.

        From command-line:
            A base-64 encoded string of the serialized protobuf.

    .. code-block:: python

        from protos import my_protos_pb2

        @inputs(a=Types.Proto(my_protos_pb2.Custom))
        @outputs(b=Types.Proto(my_protos_pb2.Custom))
        @python_task
        def assert_and_create(wf_params, a, b):
            assert a.field1 == 1
            assert a.field2 == 'abc'
            b.set(
                my_protos_pb2.Custom(
                    field1=100,
                    field2='hello'
                )
            )
    """

    GenericProto = staticmethod(_proto.create_generic)
    """
    GenericProto type wraps a protobuf type to provide interoperability between protobuf and flyte typing system. Using
    this type, you can define custom input/output variable types of flyte entities and continue to provide strong typing
    syntax. Proto type serializes proto objects as binary (leveraging `flyteidl's Binary literal <https://github.com/lyft/flyteidl/blob/793b09d190148236f41ad8160b5cec9a3325c16f/protos/flyteidl/core/literals.proto#L63>`_).
    A generic proto is a specialization of the Generic type with added convenience functions to support marshalling/
    unmarshalling of the underlying protobuf object using the protobuf official json marshaller. While GenericProto type
    does not produce the most space-efficient representation of protobufs, it's a suitable solution for making protobufs
    easily accessible (i.e. humanly readable) in other flyte components (e.g. console, cli... etc.).

    .. note::

        The protobuf Python library should be installed on the PYTHONPATH to ensure the type engine can access the
        appropriate Python code to deserialize the protobuf.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, a Python protobuf object of the type specified in the definition.
            2) If not set, a None value.

        As output:
            1) A Python protobuf object matching the type specified by the users.
            2) Set None to null the output.

        From command-line:
            A base-64 encoded string of the serialized protobuf.

    .. code-block:: python

        from protos import my_protos_pb2

        @inputs(a=Types.GenericProto(my_protos_pb2.Custom))
        @outputs(b=Types.GenericProto(my_protos_pb2.Custom))
        @python_task
        def assert_and_create(wf_params, a, b):
            assert a.field1 == 1
            assert a.field2 == 'abc'
            b.set(
                my_protos_pb2.Custom(
                    field1=100,
                    field2='hello'
                )
            )
    """

    List = staticmethod(_containers.List)
    """
コード例 #23
0
ファイル: sdk_dynamic.py プロジェクト: pingsutw/flytekit
    def _produce_dynamic_job_spec(self, context, inputs):
        """
        Runs user code and and produces future task nodes to run sub-tasks.
        :param context:
        :param flytekit.models.literals.LiteralMap literal_map inputs:
        :rtype: (_dynamic_job.DynamicJobSpec, dict[Text, flytekit.models.common.FlyteIdlEntity])
        """
        inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std(
            inputs, {
                k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                for k, v in _six.iteritems(self.interface.inputs)
            })
        outputs_dict = {
            name: PromiseOutputReference(
                _type_helpers.get_sdk_type_from_literal_type(variable.type))
            for name, variable in _six.iteritems(self.interface.outputs)
        }

        # Because users declare both inputs and outputs in their functions signatures, merge them together
        # before calling user code
        inputs_dict.update(outputs_dict)
        yielded_sub_tasks = [
            sub_task
            for sub_task in super(SdkDynamicTask, self)._execute_user_code(
                context, inputs_dict) or []
        ]

        upstream_nodes = list()
        output_bindings = [
            _literal_models.Binding(
                var=name,
                binding=_interface.BindingData.from_python_std(
                    b.sdk_type.to_flyte_literal_type(),
                    b.raw_value,
                    upstream_nodes=upstream_nodes))
            for name, b in _six.iteritems(outputs_dict)
        ]
        upstream_nodes = set(upstream_nodes)

        generated_files = {}
        # Keeping future-tasks in original order. We don't use upstream_nodes exclusively because the parent task can
        # yield sub-tasks that it never uses to produce final outputs but they need to execute nevertheless.
        array_job_index = {}
        tasks = set()
        nodes = []
        sub_workflows = set()
        visited_nodes = set()
        generated_ids = {}
        effective_failure_ratio = self._allowed_failure_ratio or 0.0

        # TODO: This function needs to be cleaned up.
        # The reason we chain these two together is because we allow users to not have to explicitly "yield" the
        # node. As long as the subtask/lp/subwf has an output that's referenced, it'll get picked up.
        for sub_task_node in _itertools.chain(yielded_sub_tasks,
                                              upstream_nodes):
            if sub_task_node in visited_nodes:
                continue
            visited_nodes.add(sub_task_node)
            executable = sub_task_node.executable_sdk_object

            # If the executable object that we're dealing with is registerable (ie, SdkRunnableLaunchPlan, SdkWorkflow
            # SdkTask, or SdkRunnableTask), then it should have the ability to give itself a name. After assigning
            # itself the name, also make sure the id is properly set according to current config values.
            if isinstance(executable, _registerable.RegisterableEntity):
                executable.auto_assign_name()
                executable._id = _identifier.Identifier(
                    executable.resource_type,
                    _internal_config.TASK_PROJECT.get()
                    or _internal_config.PROJECT.get(),
                    _internal_config.TASK_DOMAIN.get()
                    or _internal_config.DOMAIN.get(),
                    executable.platform_valid_name,
                    _internal_config.TASK_VERSION.get()
                    or _internal_config.VERSION.get())

            # Generate an id that's unique in the document (if the same task is used multiple times with
            # different resources, executable_sdk_object.id will be the same but generated node_ids should not
            # be.
            safe_task_id = _six.text_type(
                sub_task_node.executable_sdk_object.id)
            if safe_task_id in generated_ids:
                new_count = generated_ids[
                    safe_task_id] = generated_ids[safe_task_id] + 1
            else:
                new_count = generated_ids[safe_task_id] = 0
            unique_node_id = _dnsify("{}-{}".format(safe_task_id, new_count))

            # Handling case where the yielded node is launch plan
            if isinstance(sub_task_node.executable_sdk_object,
                          _launch_plan.SdkLaunchPlan):
                node = sub_task_node.assign_id_and_return(unique_node_id)
                _append_node(generated_files, node, nodes, sub_task_node)
            # Handling case where the yielded node is launching a sub-workflow
            elif isinstance(sub_task_node.executable_sdk_object,
                            _workflow.SdkWorkflow):
                node = sub_task_node.assign_id_and_return(unique_node_id)
                _append_node(generated_files, node, nodes, sub_task_node)
                # Add the workflow itself to the yielded sub-workflows
                sub_workflows.add(sub_task_node.executable_sdk_object)
                # Recursively discover statically defined upstream entities (tasks, wfs)
                SdkDynamicTask._add_upstream_entities(
                    sub_task_node.executable_sdk_object, sub_workflows, tasks)
            # Handling tasks
            else:
                # If the task can run as an array job, group its instances together. Otherwise, keep each
                # invocation as a separate node.
                if SdkDynamicTask._can_run_as_array(
                        sub_task_node.executable_sdk_object.type):
                    if sub_task_node.executable_sdk_object in array_job_index:
                        array_job, node = array_job_index[
                            sub_task_node.executable_sdk_object]
                        array_job.size += 1
                        array_job.min_successes = int(
                            math.ceil((1 - effective_failure_ratio) *
                                      array_job.size))
                    else:
                        array_job = self._create_array_job(
                            inputs_prefix=unique_node_id)
                        node = sub_task_node.assign_id_and_return(
                            unique_node_id)
                        array_job_index[
                            sub_task_node.executable_sdk_object] = (array_job,
                                                                    node)

                    node_index = _six.text_type(array_job.size - 1)
                    for k, node_output in _six.iteritems(
                            sub_task_node.outputs):
                        if not node_output.sdk_node.id:
                            node_output.sdk_node.assign_id_and_return(node.id)
                        node_output.var = "[{}].{}".format(
                            node_index, node_output.var)

                    # Upload inputs to working directory under /array_job.input_ref/<index>/inputs.pb
                    input_path = _os.path.join(node.id, node_index,
                                               _constants.INPUT_FILE_NAME)
                    generated_files[input_path] = _literal_models.LiteralMap(
                        literals={
                            binding.var: binding.binding.to_literal_model()
                            for binding in sub_task_node.inputs
                        })
                else:
                    node = sub_task_node.assign_id_and_return(unique_node_id)
                    tasks.add(sub_task_node.executable_sdk_object)
                    _append_node(generated_files, node, nodes, sub_task_node)

        # assign custom field to the ArrayJob properties computed.
        for task, (array_job, _) in _six.iteritems(array_job_index):
            # TODO: Reconstruct task template object instead of modifying an existing one?
            tasks.add(
                task.assign_custom_and_return(
                    array_job.to_dict()).assign_type_and_return(
                        _constants.SdkTaskType.CONTAINER_ARRAY_TASK))

        # min_successes is absolute, it's computed as the reverse of allowed_failure_ratio and multiplied by the
        # total length of tasks to get an absolute count.
        nodes.extend([
            array_job_node for (_, array_job_node) in array_job_index.values()
        ])
        dynamic_job_spec = _dynamic_job.DynamicJobSpec(
            min_successes=len(nodes),
            tasks=list(tasks),
            nodes=nodes,
            outputs=output_bindings,
            subworkflows=list(sub_workflows))

        return dynamic_job_spec, generated_files
コード例 #24
0
class Types(object):
    Integer = _helpers.get_sdk_type_from_literal_type(
        _primitives.Integer.to_flyte_literal_type())
    """
    Use this to specify a simple integer type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, A Python int will be received, if set.
            2) Otherwise, a None value will be received.

        As output:
            1) User code may pass an int or long value.
            2) Output can also be nulled with a None value.

        From command-line:
            Specify an integer or integer string.

    .. code-block:: python

        @inputs(a=Types.Integer)
        @outputs(b=Types.Integer)
        @python_task
        def double(wf_params, a, b):
            b.set(a * 2)
    """

    Float = _helpers.get_sdk_type_from_literal_type(
        _primitives.Float.to_flyte_literal_type())
    """
    Use this to specify a simple floating point type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            A Python float will be received, if set.  Otherwise, a None value will be received.

        As output:
            User code may pass a float value.  It can also be nulled with a None value.

        From command-line:
            Specify a float or floating-point string.

    .. code-block:: python

        @inputs(a=Types.Float)
        @outputs(b=Types.Float)
        @python_task
        def square(wf_params, a, b):
            b.set(a * a)
    """

    String = _helpers.get_sdk_type_from_literal_type(
        _primitives.String.to_flyte_literal_type())
    """
    Use this to specify a simple string type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            A Python str (Python 2) or unicode (Python 3) will be received, if set.  Otherwise, a None value will be
            received.

        As output:
            User code may pass a str value (Python 2) or a unicode value (Python 3).  It can also be nulled with a None
            value.

        From command-line:
            Specify a string.

    .. code-block:: python

        @inputs(a=Types.String, b=Types.String)
        @outputs(c=Types.String)
        @python_task
        def concat(wf_params, a, b):
            c.set(a + b)
    """

    Boolean = _helpers.get_sdk_type_from_literal_type(
        _primitives.Boolean.to_flyte_literal_type())
    """
    Use this to specify a simple bool type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            A Python bool will be received, if set.  Otherwise, a None value will be received.

        As output:
            User code may pass a bool value.  It can also be nulled with a None value.

        From command-line:
            Specify 0, 1, true, or false.

    .. code-block:: python

        @inputs(a=Types.Boolean)
        @outputs(b=Types.Boolean)
        @python_task
        def invert(wf_params, a, b):
            b.set(not a)
    """

    Datetime = _helpers.get_sdk_type_from_literal_type(
        _primitives.Datetime.to_flyte_literal_type())
    """
    Use this to specify a simple datetime type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            A Python timezone-aware datetime.datetime will be received with a UTC time, if set.  Otherwise,
            a None value will be received.

        As output:
            User code may pass a timezone-aware datetime.datetime value.  It can also be nulled with a None value.

        From command-line:
            Specify a timezone-aware, parsable datestring.  i.e. 2019-01-01T00:00+00:00

    .. note::

        The engine requires that datetimes be timezone aware.  By default, Python datetime.datetime is not timezone
        aware.

    .. code-block:: python

        @inputs(a=Types.Datetime)
        @outputs(b=Types.Datetime)
        @python_task
        def tomorrow(wf_params, a, b):
            b.set(a + datetime.timedelta(days=1))
    """

    Timedelta = _helpers.get_sdk_type_from_literal_type(
        _primitives.Timedelta.to_flyte_literal_type())
    """
    Use this to specify a simple timedelta type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            A Python datetime.timedelta will be received, if set.  Otherwise, a None value will be received.

        As output:
            User code may pass a datetime.timedelta value.  It can also be nulled with a None value.

        From command-line:
            Specify a parsable duration string.  i.e. 1h30m24s

    .. code-block:: python

        @inputs(a=Types.Timedelta)
        @outputs(b=Types.Timedelta)
        @python_task
        def hundred_times_longer(wf_params, a, b):
            b.set(a * 100)
    """

    Generic = _helpers.get_sdk_type_from_literal_type(
        _primitives.Generic.to_flyte_literal_type())
    """
    Use this to specify a simple JSON type.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, a Python dict with JSON-ifiable primitives and nested lists or maps.
            2) Otherwise, a None value will be received.

        As output:
            1) User code may pass a Python dict with arbitrarily nested lists and dictionaries.  JSON-ifiable
               primitives may also be specified.
            2) Output can also be nulled with a None value.

        From command-line:
            Specify a JSON string.

    .. code-block:: python

        @inputs(a=Types.Generic)
        @outputs(b=Types.Generic)
        @python_task
        def operate(wf_params, a, b):
            if a['operation'] == 'add':
                a['value'] += a['operand']  # a['value'] is a number
            elif a['operation'] == 'merge':
                a['value'].update(a['some']['nested'][0]['field'])
            b.set(a)
    """

    Blob = _blobs.Blob
    """
    Use this to specify a Blob object which is essentially a managed file.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, a :py:class:`flytekit.common.types.impl.blobs.Blob` object will be received.
            2) If not set, a None value.

        As output:
            1) A user may specify a path string.
            2) A user may construct a :py:class:`flytekit.common.types.impl.blobs.Blob` object and pass it as output.
            3) Output can be nulled with a None value.

        From command-line:
            Specify a path to the blob. This path must be accessible from the container when executing--either by
            being downloaded from an accessible remote location like s3 or as a local file.

    .. code-block:: python

        @inputs(a=Types.Blob)
        @outputs(b=Types.Blob)
        @python_task
        def copy(wf_params, a, b):
            with a as reader:
                txt = reader.read()

            out = Types.Blob()  # Create at a random location specified in flytekit configuration
            with out as writer:
                writer.write(txt)
            b.set(out)
    """

    CSV = _blobs.CSV
    """
    Use this to specify a CSV blob object which is essentially a managed file in the CSV format.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, a :py:class:`flytekit.common.types.impl.blobs.CSV` object will be received.
            2) If not set, a None value.

        As output:
            1) A user may specify a path string.
            2) A user may construct a :py:class:`flytekit.common.types.impl.blobs.CSV` object and pass it as output.
            3) Output can be nulled with a None value.

        From command-line:
            Specify a path to the CSV. This path must be accessible from the container when executing--either by
            being downloaded from an accessible remote location like s3 or as a local file.

    .. code-block:: python

        @inputs(a=Types.CSV)
        @outputs(b=Types.CSV)
        @python_task
        def copy(wf_params, a, b):
            with a as reader:
                txt = reader.read()

            out = Types.CSV()  # Create at a random location specified in flytekit configuration
            with out as writer:
                writer.write(txt)
            b.set(out)
    """

    MultiPartBlob = _blobs.MultiPartBlob
    """
    Use this to specify a multi-part blob object which is essentially a chunked file in a non-recursive directory.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, a :py:class:`flytekit.common.types.impl.blobs.MultiPartBlob` object will be received.
            2) If not set, a None value.

        As output:
            1) A user may specify a path string.
            2) A user may construct a :py:class:`flytekit.common.types.impl.blobs.MultiPartBlob` object and pass it as
               output.
            3) Output can be nulled with a None value.

        From command-line:
            Specify a path to the multi-part blob. This path must be accessible from the container when
            executing--either by being downloaded from an accessible remote location like s3 or as a local file.

    .. code-block:: python

        @inputs(a=Types.MultiPartBlob)
        @outputs(b=Types.MultiPartBlob)
        @python_task
        def concat_then_split(wf_params, a, b):
            txt = ""
            with a as chunks:
                for chunk in chunks:
                    txt += chunk.read()

            out = Types.MultiPartBlob()  # Create at a random location specified in flytekit configuration
            with out.create_part('000000') as writer:
                writer.write("Chunk1")
            with out.create_part('000001') as writer:
                writer.write("Chunk2")
            b.set(out)
    """

    MultiPartCSV = _blobs.MultiPartCSV
    """
    See :py:attr:`flytekit.sdk.types.Types.MultiPartBlob`, but in CSV format
    """

    Schema = staticmethod(_schema.schema_instantiator)
    """
    Use this to specify a Schema blob object which is essentially a chunked stream of Parquet dataframes.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        Cast behavior:
            1) A generic schema (specified as `Types.Schema()`) can receive input from any schema type regardless of
               column definitions.
            2) A schema can receive as input any schema object as long as the upstream schema has a superset of the
               column names defined and the types match for paired columns.  Ordering does not matter.

        As input:
            1) If set, a :py:class:`flytekit.common.types.impl.schema.Schema` object will be received.
            2) If not set, a None value.

        As output:
            1) A user may specify a path string to a chunked dataframe non-recursive directory.
            2) A user may construct a :py:class:`flytekit.common.types.impl.schema.Schema` object (with the correct
               column definitions) and pass it as output.
            3) Output can be nulled with a None value.

        From command-line:
            Specify a path to the schema object. This path must be accessible from the container when
            executing--either by being downloaded from an accessible remote location like s3 or as a local file.

    .. code-block:: python

        @inputs(generic=Types.Schema(), typed=Types.Schema([('a', Types.Integer), ('b', Types.Float)]))
        @outputs(b=Types.Schema([('a', Types.Integer), ('b', Types.Float)]))
        @python_task
        def concat_then_split(wf_params, generic, typed,):
            with typed as reader:
                # Each chunk is loaded as a pandas.DataFrame object
                for df in reader.iter_chunks():
                    # Operate on the dataframe

            # Create at a random location specified in flytekit configuration
            out = Types.Schema([('a', Types.Integer), ('b', Types.Float)])()
            with out as writer:
                writer.write(
                    pandas.DataFrame.from_dict(
                        {
                            'a': [1, 2, 3],
                            'b': [5.0, 6.0, 7.0]
                        }
                    )
                )
            b.set(out)
    """

    Proto = staticmethod(_proto.create_protobuf)
    """
    Use this to specify a custom protobuf type.

    .. note::

        The protobuf Python library should be installed on the PYTHONPATH to ensure the type engine can access the
        appropriate Python code to deserialize the protobuf.

    When used with an SDK-decorated method, expect this behavior from the default type engine:

        As input:
            1) If set, a Python protobuf object of the type specified in the definition.
            2) If not set, a None value.

        As output:
            1) A Python protobuf object matching the type specified by the users.
            2) Set None to null the output.

        From command-line:
            A base-64 encoded string of the serialized protobuf.

    .. code-block:: python

        from protos import my_protos_pb2

        @inputs(a=Types.Proto(my_protos_pb2.Custom))
        @outputs(b=Types.Proto(my_protos_pb2.Custom))
        @python_task
        def assert_and_create(wf_params, a, b):
            assert a.field1 == 1
            assert a.field2 == 'abc'
            b.set(
                my_protos_pb2.Custom(
                    field1=100,
                    field2='hello'
                )
            )
    """

    List = staticmethod(_containers.List)
    """
コード例 #25
0
ファイル: test_helpers.py プロジェクト: vglocus/flytekit
def test_get_sdk_type_from_literal_type():
    o = _type_helpers.get_sdk_type_from_literal_type(
        _model_types.LiteralType(simple=_model_types.SimpleType.FLOAT))
    assert o == _sdk_types.Types.Float
コード例 #26
0
    def __init__(
        self,
        sdk_workflow,
        default_inputs=None,
        fixed_inputs=None,
        role=None,
        schedule=None,
        notifications=None,
        labels=None,
        annotations=None,
        auth_role=None,
        raw_output_data_config=None,
    ):
        """
        :param flytekit.common.local_workflow.SdkRunnableWorkflow sdk_workflow:
        :param dict[Text,flytekit.common.promise.Input] default_inputs:
        :param dict[Text,Any] fixed_inputs: These inputs will be fixed and not need to be set when executing this
            launch plan.
        :param Text role: Deprecated. IAM role to execute this launch plan with.
        :param flytekit.models.schedule.Schedule: Schedule to apply to this workflow.
        :param list[flytekit.models.common.Notification]: List of notifications to apply to this launch plan.
        :param flytekit.models.common.Labels labels: Any custom kubernetes labels to apply to workflows executed by this
            launch plan.
        :param flytekit.models.common.Annotations annotations: Any custom kubernetes annotations to apply to workflows
            executed by this launch plan.
            Any custom kubernetes annotations to apply to workflows executed by this launch plan.
        :param flytekit.models.common.Authrole auth_role: The auth method with which to execute the workflow.
        :param flytekit.models.common.RawOutputDataConfig raw_output_data_config: Config for offloading data
        """
        if role and auth_role:
            raise ValueError(
                "Cannot set both role and auth. Role is deprecated, use auth instead."
            )

        fixed_inputs = fixed_inputs or {}
        default_inputs = default_inputs or {}

        if role:
            auth_role = _common_models.AuthRole(assumable_iam_role=role)

        # The constructor for SdkLaunchPlan sets the id to None anyways so we don't bother passing in an ID. The ID
        # should be set in one of three places,
        #   1) When the object is registered (in the code above)
        #   2) By the dynamic task code after this runnable object has already been __call__'ed. The SdkNode produced
        #      maintains a link to this object and will set the ID according to the configuration variables present.
        #   3) When SdkLaunchPlan.fetch() is run
        super(SdkRunnableLaunchPlan, self).__init__(
            None,
            _launch_plan_models.LaunchPlanMetadata(
                schedule=schedule or _schedule_model.Schedule(""),
                notifications=notifications or [],
            ),
            _interface_models.ParameterMap(default_inputs),
            _type_helpers.pack_python_std_map_to_literal_map(
                fixed_inputs,
                {
                    k: _type_helpers.get_sdk_type_from_literal_type(var.type)
                    for k, var in _six.iteritems(sdk_workflow.interface.inputs)
                    if k in fixed_inputs
                },
            ),
            labels or _common_models.Labels({}),
            annotations or _common_models.Annotations({}),
            auth_role,
            raw_output_data_config or _common_models.RawOutputDataConfig(""),
        )
        self._interface = _interface.TypedInterface(
            {k: v.var
             for k, v in _six.iteritems(default_inputs)},
            sdk_workflow.interface.outputs,
        )
        self._upstream_entities = {sdk_workflow}
        self._sdk_workflow = sdk_workflow
コード例 #27
0
    def _produce_dynamic_job_spec(self, context, inputs):
        """
        Runs user code and and produces future task nodes to run sub-tasks.
        :param context:
        :param flytekit.models.literals.LiteralMap literal_map inputs:
        :rtype: (_dynamic_job.DynamicJobSpec, dict[Text, flytekit.models.common.FlyteIdlEntity])
        """
        inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std(
            inputs, {
                k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                for k, v in _six.iteritems(self.interface.inputs)
            })
        outputs_dict = {
            name: PromiseOutputReference(
                _type_helpers.get_sdk_type_from_literal_type(variable.type))
            for name, variable in _six.iteritems(self.interface.outputs)
        }

        inputs_dict.update(outputs_dict)
        yielded_sub_tasks = [
            sub_task
            for sub_task in super(SdkDynamicTask, self)._execute_user_code(
                context, inputs_dict) or []
        ]

        upstream_nodes = list()
        output_bindings = [
            _literal_models.Binding(
                var=name,
                binding=_interface.BindingData.from_python_std(
                    b.sdk_type.to_flyte_literal_type(),
                    b.raw_value,
                    upstream_nodes=upstream_nodes))
            for name, b in _six.iteritems(outputs_dict)
        ]
        upstream_nodes = set(upstream_nodes)

        generated_files = {}
        # Keeping future-tasks in original order. We don't use upstream_nodes exclusively because the parent task can
        # yield sub-tasks that it never uses to produce final outputs but they need to execute nevertheless.
        array_job_index = {}
        tasks = []
        nodes = []
        visited_nodes = set()
        generated_ids = {}
        effective_failure_ratio = self._allowed_failure_ratio or 0.0
        for sub_task_node in _itertools.chain(yielded_sub_tasks,
                                              upstream_nodes):
            if sub_task_node in visited_nodes:
                continue
            visited_nodes.add(sub_task_node)

            # Generate an id that's unique in the document (if the same task is used multiple times with
            # different resources, executable_sdk_object.id will be the same but generated node_ids should not
            # be.
            safe_task_id = _six.text_type(
                sub_task_node.executable_sdk_object.id)
            if safe_task_id in generated_ids:
                new_count = generated_ids[
                    safe_task_id] = generated_ids[safe_task_id] + 1
            else:
                new_count = generated_ids[safe_task_id] = 0
            unique_node_id = _dnsify("{}-{}".format(safe_task_id, new_count))

            # If the task can run as an array job, group its instances together. Otherwise, keep each invocation as a
            # separate node.
            if SdkDynamicTask._can_run_as_array(
                    sub_task_node.executable_sdk_object.type):
                if sub_task_node.executable_sdk_object in array_job_index:
                    array_job, node = array_job_index[
                        sub_task_node.executable_sdk_object]
                    array_job.size += 1
                    array_job.min_successes = int(
                        math.ceil(
                            (1 - effective_failure_ratio) * array_job.size))
                else:
                    array_job = self._create_array_job(
                        inputs_prefix=unique_node_id)
                    node = sub_task_node.assign_id_and_return(unique_node_id)
                    array_job_index[sub_task_node.executable_sdk_object] = (
                        array_job, node)

                node_index = _six.text_type(array_job.size - 1)
                for k, node_output in _six.iteritems(sub_task_node.outputs):
                    if not node_output.sdk_node.id:
                        node_output.sdk_node.assign_id_and_return(node.id)
                    node_output.var = "[{}].{}".format(node_index,
                                                       node_output.var)

                # Upload inputs to working directory under /array_job.input_ref/<index>/inputs.pb
                input_path = _os.path.join(node.id, node_index,
                                           _constants.INPUT_FILE_NAME)
                generated_files[input_path] = _literal_models.LiteralMap(
                    literals={
                        binding.var: binding.binding.to_literal_model()
                        for binding in sub_task_node.inputs
                    })
            else:
                node = sub_task_node.assign_id_and_return(unique_node_id)

                tasks.append(sub_task_node.executable_sdk_object)
                nodes.append(node)

                for k, node_output in _six.iteritems(sub_task_node.outputs):
                    if not node_output.sdk_node.id:
                        node_output.sdk_node.assign_id_and_return(node.id)

                # Upload inputs to working directory under /array_job.input_ref/inputs.pb
                input_path = _os.path.join(node.id, _constants.INPUT_FILE_NAME)
                generated_files[input_path] = _literal_models.LiteralMap(
                    literals={
                        binding.var: binding.binding.to_literal_model()
                        for binding in sub_task_node.inputs
                    })

        # assign custom field to the ArrayJob properties computed.
        for task, (array_job, _) in _six.iteritems(array_job_index):
            # TODO: Reconstruct task template object instead of modifying an existing one?
            tasks.append(
                task.assign_custom_and_return(
                    array_job.to_dict()).assign_type_and_return(
                        _constants.SdkTaskType.CONTAINER_ARRAY_TASK))

        # min_successes is absolute, it's computed as the reverse of allowed_failure_ratio and multiplied by the
        # total length of tasks to get an absolute count.
        nodes.extend([
            array_job_node for (_, array_job_node) in array_job_index.values()
        ])
        dynamic_job_spec = _dynamic_job.DynamicJobSpec(
            min_successes=len(nodes),
            tasks=tasks,
            nodes=nodes,
            outputs=output_bindings,
            subworkflows=[])

        return dynamic_job_spec, generated_files