def execute(self, context, inputs): """ :param flytekit.engines.common.EngineContext context: :param flytekit.models.literals.LiteralMap inputs: :rtype: dict[Text, flytekit.models.common.FlyteIdlEntity] :returns: This function must return a dictionary mapping 'filenames' to Flyte Interface Entities. These entities will be used by the engine to pass data from node to node, populate metadata, etc. etc.. Each engine will have different behavior. For instance, the Flyte engine will upload the entities to a remote working directory (with the names provided), which will in turn allow Flyte Propeller to push along the workflow. Where as local engine will merely feed the outputs directly into the next node. """ inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std( inputs, { k: _type_helpers.get_sdk_type_from_literal_type(v.type) for k, v in _six.iteritems(self.interface.inputs) }, ) outputs_dict = { name: _task_output.OutputReference( _type_helpers.get_sdk_type_from_literal_type(variable.type)) for name, variable in _six.iteritems(self.interface.outputs) } inputs_dict.update(outputs_dict) self._execute_user_code(context, inputs_dict) return { _constants.OUTPUT_FILE_NAME: _literal_models.LiteralMap(literals={ k: v.sdk_value for k, v in _six.iteritems(outputs_dict) }) }
def _produce_dynamic_job_spec(self, context, inputs): """ Runs user code and and produces future task nodes to run sub-tasks. :param context: :param flytekit.models.literals.LiteralMap literal_map inputs: :rtype: flytekit.models.dynamic_job.DynamicJobSpec """ inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std( inputs, { k: _type_helpers.get_sdk_type_from_literal_type(v.type) for k, v in _six.iteritems(self.interface.inputs) }, ) outputs_dict = { name: _task_output.OutputReference( _type_helpers.get_sdk_type_from_literal_type(variable.type)) for name, variable in _six.iteritems(self.interface.outputs) } # Add outputs to inputs inputs_dict.update(outputs_dict) nodes = [] tasks = [] # One node per query generated_queries = self._generate_plugin_objects(context, inputs_dict) # Create output bindings always - this has to happen after user code has run output_bindings = [ _literal_models.Binding( var=name, binding=_interface.BindingData.from_python_std( b.sdk_type.to_flyte_literal_type(), b.value), ) for name, b in _six.iteritems(outputs_dict) ] i = 0 for quboleHiveJob in generated_queries: hive_job_node = _create_hive_job_node("HiveQuery_{}".format(i), quboleHiveJob.to_flyte_idl(), self.metadata) nodes.append(hive_job_node) tasks.append(hive_job_node.executable_sdk_object) i += 1 dynamic_job_spec = _dynamic_job.DynamicJobSpec( min_successes=len(nodes), tasks=tasks, nodes=nodes, outputs=output_bindings, subworkflows=[], ) return dynamic_job_spec
def __repr__(self): return "({inputs}) -> ({outputs})".format( inputs=", ".join([ "{}: {}".format( k, _type_helpers.get_sdk_type_from_literal_type(v.type)) for k, v in _six.iteritems(self.inputs) ]), outputs=", ".join([ "{}: {}".format( k, _type_helpers.get_sdk_type_from_literal_type(v.type)) for k, v in _six.iteritems(self.outputs) ]))
def _produce_dynamic_job_spec(self, context, inputs): """ Runs user code and and produces future task nodes to run sub-tasks. :param context: :param flytekit.models.literals.LiteralMap literal_map inputs: :rtype: flytekit.models.dynamic_job.DynamicJobSpec """ inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std( inputs, { k: _type_helpers.get_sdk_type_from_literal_type(v.type) for k, v in _six.iteritems(self.interface.inputs) }) outputs_dict = { name: _task_output.OutputReference( _type_helpers.get_sdk_type_from_literal_type(variable.type)) for name, variable in _six.iteritems(self.interface.outputs) } # Add outputs to inputs inputs_dict.update(outputs_dict) # Note: Today a hive task corresponds to a dynamic job spec with one node, which contains multiple # queries. We may change this in future. nodes = [] tasks = [] generated_queries = self._generate_hive_queries(context, inputs_dict) # Create output bindings always - this has to happen after user code has run output_bindings = [ _literal_models.Binding( var=name, binding=_interface.BindingData.from_python_std( b.sdk_type.to_flyte_literal_type(), b.value)) for name, b in _six.iteritems(outputs_dict) ] if len(generated_queries.query_collection.queries) > 0: hive_job_node = _create_hive_job_node( "HiveQueries", generated_queries.to_flyte_idl(), self.metadata) nodes.append(hive_job_node) tasks.append(hive_job_node.executable_sdk_object) dynamic_job_spec = _dynamic_job.DynamicJobSpec( min_successes=len( nodes ), # At most we only have one node for now, see above comment tasks=tasks, nodes=nodes, outputs=output_bindings, subworkflows=[]) return dynamic_job_spec
def execute(self, context, inputs): """ :param flytekit.engines.common.EngineContext context: :param flytekit.models.literals.LiteralMap inputs: :rtype: dict[Text, flytekit.models.common.FlyteIdlEntity] :returns: This function must return a dictionary mapping 'filenames' to Flyte Interface Entities. These entities will be used by the engine to pass data from node to node, populate metadata, etc. etc.. Each engine will have different behavior. For instance, the Flyte engine will upload the entities to a remote working directory (with the names provided), which will in turn allow Flyte Propeller to push along the workflow. Where as local engine will merely feed the outputs directly into the next node. """ inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std( inputs, { k: _type_helpers.get_sdk_type_from_literal_type(v.type) for k, v in _six.iteritems(self.interface.inputs) }, ) input_notebook_path = self._notebook_path # Execute Notebook via Papermill. output_notebook_path = input_notebook_path.split( ".ipynb")[0] + "-out.ipynb" _pm.execute_notebook(input_notebook_path, output_notebook_path, parameters=inputs_dict) # Parse Outputs from Notebook. outputs = None with open(output_notebook_path) as json_file: data = _json.load(json_file) for p in data["cells"]: meta = p["metadata"] if "outputs" in meta["tags"]: outputs = " ".join(p["outputs"][0]["data"]["text/plain"]) if outputs is not None: dict = _literal_models._literals_pb2.LiteralMap() _text_format.Parse(outputs, dict) # Add output_notebook as an output to the task. output_notebook = _task_output.OutputReference( _type_helpers.get_sdk_type_from_literal_type( _Types.Blob.to_flyte_literal_type())) output_notebook.set(output_notebook_path) output_literal_map = _literal_models.LiteralMap.from_flyte_idl(dict) output_literal_map.literals[ OUTPUT_NOTEBOOK] = output_notebook.sdk_value return {_constants.OUTPUT_FILE_NAME: output_literal_map}
def get_sdk_type_from_literal_type(self, literal_type): """ :param flytekit.models.types.LiteralType literal_type: :rtype: flytekit.common.types.base_sdk_types.FlyteSdkType """ if literal_type.collection_type is not None: return _container_types.List( _helpers.get_sdk_type_from_literal_type( literal_type.collection_type)) elif literal_type.map_value_type is not None: raise NotImplementedError("TODO: Implement map") elif literal_type.schema is not None: return _schema.schema_instantiator_from_proto(literal_type.schema) elif literal_type.blob is not None: return self._get_blob_impl_from_type(literal_type.blob) elif literal_type.simple is not None: if (literal_type.simple == _literal_type_models.SimpleType.BINARY and _proto.Protobuf.PB_FIELD_KEY in literal_type.metadata): return _proto_sdk_type_from_tag( literal_type.metadata[_proto.Protobuf.PB_FIELD_KEY]) if (literal_type.simple == _literal_type_models.SimpleType.STRUCT and literal_type.metadata and _proto.Protobuf.PB_FIELD_KEY in literal_type.metadata): return _generic_proto_sdk_type_from_tag( literal_type.metadata[_proto.Protobuf.PB_FIELD_KEY]) sdk_type = self._SIMPLE_TYPE_LOOKUP_TABLE.get(literal_type.simple) if sdk_type is None: raise NotImplementedError( "We haven't implemented this type yet: Simple type={}". format(literal_type.simple)) return sdk_type else: raise _system_exceptions.FlyteSystemAssertion( "An unrecognized literal type was received: {}".format( literal_type))
def test_hive_task_query_generation(): with _common_utils.AutoDeletingTempDir( "user_dir") as user_working_directory: context = _common_engine.EngineContext( execution_id=WorkflowExecutionIdentifier(project="unit_test", domain="unit_test", name="unit_test"), execution_date=_datetime.utcnow(), stats=None, # TODO: A mock stats object that we can read later. logging= _logging, # TODO: A mock logging object that we can read later. tmp_dir=user_working_directory, ) references = { name: _task_output.OutputReference( _type_helpers.get_sdk_type_from_literal_type(variable.type)) for name, variable in _six.iteritems(two_queries.interface.outputs) } qubole_hive_jobs = two_queries._generate_plugin_objects( context, references) assert len(qubole_hive_jobs) == 2 # deprecated, collection is only here for backwards compatibility assert len(qubole_hive_jobs[0].query_collection.queries) == 1 assert len(qubole_hive_jobs[1].query_collection.queries) == 1 # The output references should now have the same fake S3 path as the formatted queries assert references["hive_results"].value[0].uri != "" assert references["hive_results"].value[1].uri != "" assert references["hive_results"].value[0].uri in qubole_hive_jobs[ 0].query.query assert references["hive_results"].value[1].uri in qubole_hive_jobs[ 1].query.query
def from_python_std(cls, literal_type, t_value, upstream_nodes=None): """ :param flytekit.models.types.LiteralType literal_type: :param T t_value: :param list[flytekit.common.nodes.SdkNode] upstream_nodes: [Optional] Keeps track of the nodes upstream, if applicable. :rtype: BindingData """ scalar = None collection = None promise = None map = None downstream_sdk_type = _type_helpers.get_sdk_type_from_literal_type( literal_type) if isinstance(t_value, _promise.Input): if not downstream_sdk_type.is_castable_from(t_value.sdk_type): _user_exceptions.FlyteTypeException( t_value.sdk_type, downstream_sdk_type, additional_msg="When binding workflow input: {}".format( t_value)) promise = t_value.promise elif isinstance(t_value, _promise.NodeOutput): if not downstream_sdk_type.is_castable_from(t_value.sdk_type): _user_exceptions.FlyteTypeException( t_value.sdk_type, downstream_sdk_type, additional_msg="When binding node output: {}".format( t_value)) promise = t_value if upstream_nodes is not None: upstream_nodes.append(t_value.sdk_node) elif isinstance(t_value, list): if not issubclass(downstream_sdk_type, _containers.ListImpl): raise _user_exceptions.FlyteTypeException( type(t_value), downstream_sdk_type, received_value=t_value, additional_msg="Cannot bind a list to a non-list type.") collection = _literal_models.BindingDataCollection([ BindingData.from_python_std( downstream_sdk_type.sub_type.to_flyte_literal_type(), v, upstream_nodes=upstream_nodes) for v in t_value ]) elif isinstance(t_value, dict) and \ (not issubclass(downstream_sdk_type, _primitives.Generic) or BindingData._has_sub_bindings(t_value)): # TODO: This behavior should be embedded in the type engine. Someone should be able to alter behavior of # TODO: binding logic by injecting their own type engine. The same goes for the list check above. raise NotImplementedError( "TODO: Cannot use map bindings at the moment") else: sdk_value = downstream_sdk_type.from_python_std(t_value) scalar = sdk_value.scalar collection = sdk_value.collection map = sdk_value.map return cls(scalar=scalar, collection=collection, map=map, promise=promise)
def __init__(self, type_map, node): """ :param dict[Text, flytekit.models.interface.Variable] type_map: :param SdkNode node: """ super(ParameterMapper, self).__init__() for key, var in _six.iteritems(type_map): self[key] = self._return_mapping_object(node, _type_helpers.get_sdk_type_from_literal_type(var.type), key) self._initialized = True
def _python_std_input_map_to_literal_map(self, inputs): """ :param dict[Text,Any] inputs: A dictionary of Python standard inputs that will be type-checked and compiled to a LiteralMap :rtype: flytekit.models.literals.LiteralMap """ return _type_helpers.pack_python_std_map_to_literal_map(inputs, { k: _type_helpers.get_sdk_type_from_literal_type(v.type) for k, v in _six.iteritems(self.interface.inputs) })
def test_sdk_output_references_construction(): references = { name: _task_output.OutputReference(_type_helpers.get_sdk_type_from_literal_type(variable.type)) for name, variable in _six.iteritems(two_queries.interface.outputs) } # Before user code is run, the outputs passed to the user code should not have values assert references['hive_results'].sdk_value == _base_sdk_types.Void() # Should be a list of schemas assert isinstance(references['hive_results'].sdk_type, _containers.TypedCollectionType) assert isinstance(references['hive_results'].sdk_type.sub_type, _schema.SchemaInstantiator)
def unit_test(self, **input_map): """ :param dict[Text, T] input_map: Python Std input from users. We will cast these to the appropriate Flyte literals. :returns: Depends on the behavior of the specific task in the unit engine. """ return _engine_loader.get_engine('unit').get_task(self).execute( _type_helpers.pack_python_std_map_to_literal_map( input_map, { k: _type_helpers.get_sdk_type_from_literal_type(v.type) for k, v in _six.iteritems(self.interface.inputs) }))
def _validate_inputs(self, inputs): """ :param dict[Text, flytekit.models.interface.Variable] inputs: Input variables to validate :raises: flytekit.common.exceptions.user.FlyteValidationException """ for k, v in _six.iteritems(inputs): sdk_type = _helpers.get_sdk_type_from_literal_type(v.type) if sdk_type not in input_types_supported: raise _user_exceptions.FlyteValidationException( "Input Type '{}' not supported. Only Primitives are supported for Scala/Java Spark." .format(sdk_type)) super(SdkGenericSparkTask, self)._validate_inputs(inputs)
def promote_from_model(cls, model): """ :param flytekit.models.types.SchemaType model: :rtype: SchemaType """ _PROTO_ENUM_TO_SDK_TYPE = { _type_models.SchemaType.SchemaColumn.SchemaColumnType.INTEGER: _helpers.get_sdk_type_from_literal_type( _primitives.Integer.to_flyte_literal_type()), _type_models.SchemaType.SchemaColumn.SchemaColumnType.FLOAT: _helpers.get_sdk_type_from_literal_type( _primitives.Float.to_flyte_literal_type()), _type_models.SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN: _helpers.get_sdk_type_from_literal_type( _primitives.Boolean.to_flyte_literal_type()), _type_models.SchemaType.SchemaColumn.SchemaColumnType.DATETIME: _helpers.get_sdk_type_from_literal_type( _primitives.Datetime.to_flyte_literal_type()), _type_models.SchemaType.SchemaColumn.SchemaColumnType.DURATION: _helpers.get_sdk_type_from_literal_type( _primitives.Timedelta.to_flyte_literal_type()), _type_models.SchemaType.SchemaColumn.SchemaColumnType.STRING: _helpers.get_sdk_type_from_literal_type( _primitives.String.to_flyte_literal_type()), } return cls([(c.name, _PROTO_ENUM_TO_SDK_TYPE[c.type]) for c in model.columns])
def _transform_for_user_output(self, outputs): """ Just return the outputs as a user-readable dictionary. :param dict[Text,flytekit.models.common.FlyteIdlEntity] outputs: :rtype: T """ literal_map = outputs[_sdk_constants.OUTPUT_FILE_NAME] return { name: _type_helpers.get_sdk_value_from_literal( literal_map.literals[name], sdk_type=_type_helpers.get_sdk_type_from_literal_type(variable.type), ).to_python_std() for name, variable in _six.iteritems(self.sdk_task.interface.outputs) }
def local_execute(self, **input_map): """ :param dict[Text, T] input_map: Python Std input from users. We will cast these to the appropriate Flyte literals. :rtype: dict[Text, T] :returns: The output produced by this task in Python standard format. """ return _engine_loader.get_engine('local').get_task(self).execute( _type_helpers.pack_python_std_map_to_literal_map( input_map, { k: _type_helpers.get_sdk_type_from_literal_type(v.type) for k, v in _six.iteritems(self.interface.inputs) }))
def _validate_inputs(self, inputs): """ This method should be overridden in sub-classes that intend to do additional checks on inputs. If validation fails, this function should raise an informative exception. :param dict[Text, flytekit.models.interface.Variable] inputs: Input variables to validate :raises: flytekit.common.exceptions.user.FlyteValidationException """ super(SdkRunnableTask, self)._validate_inputs(inputs) for k, v in _six.iteritems(inputs): if not self._is_argname_in_function_definition(k): raise _user_exceptions.FlyteValidationException( "The input named '{}' was not specified in the task function. Therefore, this input cannot be " "provided to the task.".format(k)) if _type_helpers.get_sdk_type_from_literal_type( v.type) in type(self)._banned_inputs: raise _user_exceptions.FlyteValidationException( "The input '{}' is not an accepted input type.".format(v))
def promote_from_model(cls, model): """ :param flytekit.models.interface.Parameter model: :rtype: Parameter """ sdk_type = _type_helpers.get_sdk_type_from_literal_type(model.var.type) if model.default is not None: default_value = sdk_type.from_flyte_idl( model.default.to_flyte_idl()).to_python_std() return cls("", sdk_type, help=model.var.description, required=False, default=default_value) else: return cls("", sdk_type, help=model.var.description, required=True)
def _validate_outputs(self, outputs): """ :param dict[Text, flytekit.models.interface.Variable] inputs: Input variables to validate :raises: flytekit.common.exceptions.user.FlyteValidationException """ # Add output_notebook as an implicit output to the task. for k, v in _six.iteritems(outputs): if k == OUTPUT_NOTEBOOK: raise ValueError( "{} is a reserved output keyword. Please use a different output name." .format(OUTPUT_NOTEBOOK)) sdk_type = _type_helpers.get_sdk_type_from_literal_type(v.type) if sdk_type not in _notebook_types_map.values(): raise _user_exceptions.FlyteValidationException( "Output Type '{}' not supported. Only Primitives are supported for notebook." .format(sdk_type)) super(SdkNotebookTask, self)._validate_outputs(outputs)
def _type_map_from_variable_map(variable_map): return { k: _type_helpers.get_sdk_type_from_literal_type(v.type) for k, v in six.iteritems(variable_map) }
def __init__( self, sdk_workflow, default_inputs=None, fixed_inputs=None, role=None, schedule=None, notifications=None, labels=None, annotations=None, auth=None, ): """ :param flytekit.common.workflow.SdkWorkflow sdk_workflow: :param dict[Text,flytekit.common.promise.Input] default_inputs: :param dict[Text,Any] fixed_inputs: These inputs will be fixed and not need to be set when executing this launch plan. :param Text role: Deprecated. IAM role to execute this launch plan with. :param flytekit.models.schedule.Schedule: Schedule to apply to this workflow. :param list[flytekit.models.common.Notification]: List of notifications to apply to this launch plan. :param flytekit.models.common.Labels labels: Any custom kubernetes labels to apply to workflows executed by this launch plan. :param flytekit.models.common.Annotations annotations: Any custom kubernetes annotations to apply to workflows executed by this launch plan. Any custom kubernetes annotations to apply to workflows executed by this launch plan. :param flytekit.models.launch_plan.Auth auth: The auth method with which to execute the workflow. """ if role and auth: raise ValueError( "Cannot set both role and auth. Role is deprecated, use auth instead." ) fixed_inputs = fixed_inputs or {} default_inputs = default_inputs or {} if role: auth = _launch_plan_models.Auth(assumable_iam_role=role) super(SdkRunnableLaunchPlan, self).__init__( _identifier.Identifier(_identifier_model.ResourceType.WORKFLOW, _internal_config.PROJECT.get(), _internal_config.DOMAIN.get(), _uuid.uuid4().hex, _internal_config.VERSION.get()), _launch_plan_models.LaunchPlanMetadata( schedule=schedule or _schedule_model.Schedule(''), notifications=notifications or []), _interface_models.ParameterMap(default_inputs), _type_helpers.pack_python_std_map_to_literal_map( fixed_inputs, { k: _type_helpers.get_sdk_type_from_literal_type(var.type) for k, var in _six.iteritems(sdk_workflow.interface.inputs) if k in fixed_inputs }), labels or _common_models.Labels({}), annotations or _common_models.Annotations({}), auth, ) self._interface = _interface.TypedInterface( {k: v.var for k, v in _six.iteritems(default_inputs)}, sdk_workflow.interface.outputs) self._upstream_entities = {sdk_workflow} self._sdk_workflow = sdk_workflow
class Types(object): Integer = _helpers.get_sdk_type_from_literal_type( _primitives.Integer.to_flyte_literal_type()) """ Use this to specify a simple integer type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, A Python int will be received, if set. 2) Otherwise, a None value will be received. As output: 1) User code may pass an int or long value. 2) Output can also be nulled with a None value. From command-line: Specify an integer or integer string. .. code-block:: python @inputs(a=Types.Integer) @outputs(b=Types.Integer) @python_task def double(wf_params, a, b): b.set(a * 2) """ Float = _helpers.get_sdk_type_from_literal_type( _primitives.Float.to_flyte_literal_type()) """ Use this to specify a simple floating point type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: A Python float will be received, if set. Otherwise, a None value will be received. As output: User code may pass a float value. It can also be nulled with a None value. From command-line: Specify a float or floating-point string. .. code-block:: python @inputs(a=Types.Float) @outputs(b=Types.Float) @python_task def square(wf_params, a, b): b.set(a * a) """ String = _helpers.get_sdk_type_from_literal_type( _primitives.String.to_flyte_literal_type()) """ Use this to specify a simple string type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: A Python str (Python 2) or unicode (Python 3) will be received, if set. Otherwise, a None value will be received. As output: User code may pass a str value (Python 2) or a unicode value (Python 3). It can also be nulled with a None value. From command-line: Specify a string. .. code-block:: python @inputs(a=Types.String, b=Types.String) @outputs(c=Types.String) @python_task def concat(wf_params, a, b): c.set(a + b) """ Boolean = _helpers.get_sdk_type_from_literal_type( _primitives.Boolean.to_flyte_literal_type()) """ Use this to specify a simple bool type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: A Python bool will be received, if set. Otherwise, a None value will be received. As output: User code may pass a bool value. It can also be nulled with a None value. From command-line: Specify 0, 1, true, or false. .. code-block:: python @inputs(a=Types.Boolean) @outputs(b=Types.Boolean) @python_task def invert(wf_params, a, b): b.set(not a) """ Datetime = _helpers.get_sdk_type_from_literal_type( _primitives.Datetime.to_flyte_literal_type()) """ Use this to specify a simple datetime type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: A Python timezone-aware datetime.datetime will be received with a UTC time, if set. Otherwise, a None value will be received. As output: User code may pass a timezone-aware datetime.datetime value. It can also be nulled with a None value. From command-line: Specify a timezone-aware, parsable datestring. i.e. 2019-01-01T00:00+00:00 .. note:: The engine requires that datetimes be timezone aware. By default, Python datetime.datetime is not timezone aware. .. code-block:: python @inputs(a=Types.Datetime) @outputs(b=Types.Datetime) @python_task def tomorrow(wf_params, a, b): b.set(a + datetime.timedelta(days=1)) """ Timedelta = _helpers.get_sdk_type_from_literal_type( _primitives.Timedelta.to_flyte_literal_type()) """ Use this to specify a simple timedelta type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: A Python datetime.timedelta will be received, if set. Otherwise, a None value will be received. As output: User code may pass a datetime.timedelta value. It can also be nulled with a None value. From command-line: Specify a parsable duration string. i.e. 1h30m24s .. code-block:: python @inputs(a=Types.Timedelta) @outputs(b=Types.Timedelta) @python_task def hundred_times_longer(wf_params, a, b): b.set(a * 100) """ Generic = _helpers.get_sdk_type_from_literal_type( _primitives.Generic.to_flyte_literal_type()) """ Use this to specify a simple JSON type. The Generic type offer a flexible (but loose) extension to flyte's typing system by allowing custom types/objects to be passed through. It's strongly recommended for producers & consumers of entities that produce or consume a Generic type to perform their own expectations checks on the integrity of the object. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, a Python dict with JSON-ifiable primitives and nested lists or maps. 2) Otherwise, a None value will be received. As output: 1) User code may pass a Python dict with arbitrarily nested lists and dictionaries. JSON-ifiable primitives may also be specified. 2) Output can also be nulled with a None value. From command-line: Specify a JSON string. .. code-block:: python @inputs(a=Types.Generic) @outputs(b=Types.Generic) @python_task def operate(wf_params, a, b): if a['operation'] == 'add': a['value'] += a['operand'] # a['value'] is a number elif a['operation'] == 'merge': a['value'].update(a['some']['nested'][0]['field']) b.set(a) # For better readability, it's strongly advised to leverage python's type aliasing. MyTypeA = Types.Generic MyTypeB = Types.Generic # This makes it clearer that it received a certain type and produces a different one. Other tasks that consume # MyTypeB should do so in their input declaration. @inputs(a=MyTypeA) @outputs(b=MyTypeB) @python_task def operate(wf_params, a, b): if a['operation'] == 'add': a['value'] += a['operand'] # a['value'] is a number elif a['operation'] == 'merge': a['value'].update(a['some']['nested'][0]['field']) b.set(a) """ Blob = _blobs.Blob """ Use this to specify a Blob object which is essentially a managed file. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, a :py:class:`flytekit.common.types.impl.blobs.Blob` object will be received. 2) If not set, a None value. As output: 1) A user may specify a path string. 2) A user may construct a :py:class:`flytekit.common.types.impl.blobs.Blob` object and pass it as output. 3) Output can be nulled with a None value. From command-line: Specify a path to the blob. This path must be accessible from the container when executing--either by being downloaded from an accessible remote location like s3 or as a local file. .. code-block:: python @inputs(a=Types.Blob) @outputs(b=Types.Blob) @python_task def copy(wf_params, a, b): with a as reader: txt = reader.read() out = Types.Blob() # Create at a random location specified in flytekit configuration with out as writer: writer.write(txt) b.set(out) """ CSV = _blobs.CSV """ Use this to specify a CSV blob object which is essentially a managed file in the CSV format. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, a :py:class:`flytekit.common.types.impl.blobs.CSV` object will be received. 2) If not set, a None value. As output: 1) A user may specify a path string. 2) A user may construct a :py:class:`flytekit.common.types.impl.blobs.CSV` object and pass it as output. 3) Output can be nulled with a None value. From command-line: Specify a path to the CSV. This path must be accessible from the container when executing--either by being downloaded from an accessible remote location like s3 or as a local file. .. code-block:: python @inputs(a=Types.CSV) @outputs(b=Types.CSV) @python_task def copy(wf_params, a, b): with a as reader: txt = reader.read() out = Types.CSV() # Create at a random location specified in flytekit configuration with out as writer: writer.write(txt) b.set(out) """ MultiPartBlob = _blobs.MultiPartBlob """ Use this to specify a multi-part blob object which is essentially a chunked file in a non-recursive directory. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, a :py:class:`flytekit.common.types.impl.blobs.MultiPartBlob` object will be received. 2) If not set, a None value. As output: 1) A user may specify a path string. 2) A user may construct a :py:class:`flytekit.common.types.impl.blobs.MultiPartBlob` object and pass it as output. 3) Output can be nulled with a None value. From command-line: Specify a path to the multi-part blob. This path must be accessible from the container when executing--either by being downloaded from an accessible remote location like s3 or as a local file. .. code-block:: python @inputs(a=Types.MultiPartBlob) @outputs(b=Types.MultiPartBlob) @python_task def concat_then_split(wf_params, a, b): txt = "" with a as chunks: for chunk in chunks: txt += chunk.read() out = Types.MultiPartBlob() # Create at a random location specified in flytekit configuration with out.create_part('000000') as writer: writer.write("Chunk1") with out.create_part('000001') as writer: writer.write("Chunk2") b.set(out) """ MultiPartCSV = _blobs.MultiPartCSV """ See :py:attr:`flytekit.sdk.types.Types.MultiPartBlob`, but in CSV format """ Schema = staticmethod(_schema.schema_instantiator) """ Use this to specify a Schema blob object which is essentially a chunked stream of Parquet dataframes. When used with an SDK-decorated method, expect this behavior from the default type engine: Cast behavior: 1) A generic schema (specified as `Types.Schema()`) can receive input from any schema type regardless of column definitions. 2) A schema can receive as input any schema object as long as the upstream schema has a superset of the column names defined and the types match for paired columns. Ordering does not matter. As input: 1) If set, a :py:class:`flytekit.common.types.impl.schema.Schema` object will be received. 2) If not set, a None value. As output: 1) A user may specify a path string to a chunked dataframe non-recursive directory. 2) A user may construct a :py:class:`flytekit.common.types.impl.schema.Schema` object (with the correct column definitions) and pass it as output. 3) Output can be nulled with a None value. From command-line: Specify a path to the schema object. This path must be accessible from the container when executing--either by being downloaded from an accessible remote location like s3 or as a local file. .. code-block:: python @inputs(generic=Types.Schema(), typed=Types.Schema([('a', Types.Integer), ('b', Types.Float)])) @outputs(b=Types.Schema([('a', Types.Integer), ('b', Types.Float)])) @python_task def concat_then_split(wf_params, generic, typed,): with typed as reader: # Each chunk is loaded as a pandas.DataFrame object for df in reader.iter_chunks(): # Operate on the dataframe # Create at a random location specified in flytekit configuration out = Types.Schema([('a', Types.Integer), ('b', Types.Float)])() with out as writer: writer.write( pandas.DataFrame.from_dict( { 'a': [1, 2, 3], 'b': [5.0, 6.0, 7.0] } ) ) b.set(out) """ Proto = staticmethod(_proto.create_protobuf) """ Proto type wraps a protobuf type to provide interoperability between protobuf and flyte typing system. Using this type, you can define custom input/output variable types of flyte entities and continue to provide strong typing syntax. Proto type serializes proto objects as binary (leveraging `flyteidl's Binary literal <https://github.com/lyft/flyteidl/blob/793b09d190148236f41ad8160b5cec9a3325c16f/protos/flyteidl/core/literals.proto#L45>`_). Binary serialization of protobufs is the most space-efficient serialization form. Because of the way protobufs are designed, unmarshalling the serialized proto requires access to the corresponding type. In order to use/visualize the serialized proto, you will generally need to write custom code in the corresponding component. .. note:: The protobuf Python library should be installed on the PYTHONPATH to ensure the type engine can access the appropriate Python code to deserialize the protobuf. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, a Python protobuf object of the type specified in the definition. 2) If not set, a None value. As output: 1) A Python protobuf object matching the type specified by the users. 2) Set None to null the output. From command-line: A base-64 encoded string of the serialized protobuf. .. code-block:: python from protos import my_protos_pb2 @inputs(a=Types.Proto(my_protos_pb2.Custom)) @outputs(b=Types.Proto(my_protos_pb2.Custom)) @python_task def assert_and_create(wf_params, a, b): assert a.field1 == 1 assert a.field2 == 'abc' b.set( my_protos_pb2.Custom( field1=100, field2='hello' ) ) """ GenericProto = staticmethod(_proto.create_generic) """ GenericProto type wraps a protobuf type to provide interoperability between protobuf and flyte typing system. Using this type, you can define custom input/output variable types of flyte entities and continue to provide strong typing syntax. Proto type serializes proto objects as binary (leveraging `flyteidl's Binary literal <https://github.com/lyft/flyteidl/blob/793b09d190148236f41ad8160b5cec9a3325c16f/protos/flyteidl/core/literals.proto#L63>`_). A generic proto is a specialization of the Generic type with added convenience functions to support marshalling/ unmarshalling of the underlying protobuf object using the protobuf official json marshaller. While GenericProto type does not produce the most space-efficient representation of protobufs, it's a suitable solution for making protobufs easily accessible (i.e. humanly readable) in other flyte components (e.g. console, cli... etc.). .. note:: The protobuf Python library should be installed on the PYTHONPATH to ensure the type engine can access the appropriate Python code to deserialize the protobuf. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, a Python protobuf object of the type specified in the definition. 2) If not set, a None value. As output: 1) A Python protobuf object matching the type specified by the users. 2) Set None to null the output. From command-line: A base-64 encoded string of the serialized protobuf. .. code-block:: python from protos import my_protos_pb2 @inputs(a=Types.GenericProto(my_protos_pb2.Custom)) @outputs(b=Types.GenericProto(my_protos_pb2.Custom)) @python_task def assert_and_create(wf_params, a, b): assert a.field1 == 1 assert a.field2 == 'abc' b.set( my_protos_pb2.Custom( field1=100, field2='hello' ) ) """ List = staticmethod(_containers.List) """
def _produce_dynamic_job_spec(self, context, inputs): """ Runs user code and and produces future task nodes to run sub-tasks. :param context: :param flytekit.models.literals.LiteralMap literal_map inputs: :rtype: (_dynamic_job.DynamicJobSpec, dict[Text, flytekit.models.common.FlyteIdlEntity]) """ inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std( inputs, { k: _type_helpers.get_sdk_type_from_literal_type(v.type) for k, v in _six.iteritems(self.interface.inputs) }) outputs_dict = { name: PromiseOutputReference( _type_helpers.get_sdk_type_from_literal_type(variable.type)) for name, variable in _six.iteritems(self.interface.outputs) } # Because users declare both inputs and outputs in their functions signatures, merge them together # before calling user code inputs_dict.update(outputs_dict) yielded_sub_tasks = [ sub_task for sub_task in super(SdkDynamicTask, self)._execute_user_code( context, inputs_dict) or [] ] upstream_nodes = list() output_bindings = [ _literal_models.Binding( var=name, binding=_interface.BindingData.from_python_std( b.sdk_type.to_flyte_literal_type(), b.raw_value, upstream_nodes=upstream_nodes)) for name, b in _six.iteritems(outputs_dict) ] upstream_nodes = set(upstream_nodes) generated_files = {} # Keeping future-tasks in original order. We don't use upstream_nodes exclusively because the parent task can # yield sub-tasks that it never uses to produce final outputs but they need to execute nevertheless. array_job_index = {} tasks = set() nodes = [] sub_workflows = set() visited_nodes = set() generated_ids = {} effective_failure_ratio = self._allowed_failure_ratio or 0.0 # TODO: This function needs to be cleaned up. # The reason we chain these two together is because we allow users to not have to explicitly "yield" the # node. As long as the subtask/lp/subwf has an output that's referenced, it'll get picked up. for sub_task_node in _itertools.chain(yielded_sub_tasks, upstream_nodes): if sub_task_node in visited_nodes: continue visited_nodes.add(sub_task_node) executable = sub_task_node.executable_sdk_object # If the executable object that we're dealing with is registerable (ie, SdkRunnableLaunchPlan, SdkWorkflow # SdkTask, or SdkRunnableTask), then it should have the ability to give itself a name. After assigning # itself the name, also make sure the id is properly set according to current config values. if isinstance(executable, _registerable.RegisterableEntity): executable.auto_assign_name() executable._id = _identifier.Identifier( executable.resource_type, _internal_config.TASK_PROJECT.get() or _internal_config.PROJECT.get(), _internal_config.TASK_DOMAIN.get() or _internal_config.DOMAIN.get(), executable.platform_valid_name, _internal_config.TASK_VERSION.get() or _internal_config.VERSION.get()) # Generate an id that's unique in the document (if the same task is used multiple times with # different resources, executable_sdk_object.id will be the same but generated node_ids should not # be. safe_task_id = _six.text_type( sub_task_node.executable_sdk_object.id) if safe_task_id in generated_ids: new_count = generated_ids[ safe_task_id] = generated_ids[safe_task_id] + 1 else: new_count = generated_ids[safe_task_id] = 0 unique_node_id = _dnsify("{}-{}".format(safe_task_id, new_count)) # Handling case where the yielded node is launch plan if isinstance(sub_task_node.executable_sdk_object, _launch_plan.SdkLaunchPlan): node = sub_task_node.assign_id_and_return(unique_node_id) _append_node(generated_files, node, nodes, sub_task_node) # Handling case where the yielded node is launching a sub-workflow elif isinstance(sub_task_node.executable_sdk_object, _workflow.SdkWorkflow): node = sub_task_node.assign_id_and_return(unique_node_id) _append_node(generated_files, node, nodes, sub_task_node) # Add the workflow itself to the yielded sub-workflows sub_workflows.add(sub_task_node.executable_sdk_object) # Recursively discover statically defined upstream entities (tasks, wfs) SdkDynamicTask._add_upstream_entities( sub_task_node.executable_sdk_object, sub_workflows, tasks) # Handling tasks else: # If the task can run as an array job, group its instances together. Otherwise, keep each # invocation as a separate node. if SdkDynamicTask._can_run_as_array( sub_task_node.executable_sdk_object.type): if sub_task_node.executable_sdk_object in array_job_index: array_job, node = array_job_index[ sub_task_node.executable_sdk_object] array_job.size += 1 array_job.min_successes = int( math.ceil((1 - effective_failure_ratio) * array_job.size)) else: array_job = self._create_array_job( inputs_prefix=unique_node_id) node = sub_task_node.assign_id_and_return( unique_node_id) array_job_index[ sub_task_node.executable_sdk_object] = (array_job, node) node_index = _six.text_type(array_job.size - 1) for k, node_output in _six.iteritems( sub_task_node.outputs): if not node_output.sdk_node.id: node_output.sdk_node.assign_id_and_return(node.id) node_output.var = "[{}].{}".format( node_index, node_output.var) # Upload inputs to working directory under /array_job.input_ref/<index>/inputs.pb input_path = _os.path.join(node.id, node_index, _constants.INPUT_FILE_NAME) generated_files[input_path] = _literal_models.LiteralMap( literals={ binding.var: binding.binding.to_literal_model() for binding in sub_task_node.inputs }) else: node = sub_task_node.assign_id_and_return(unique_node_id) tasks.add(sub_task_node.executable_sdk_object) _append_node(generated_files, node, nodes, sub_task_node) # assign custom field to the ArrayJob properties computed. for task, (array_job, _) in _six.iteritems(array_job_index): # TODO: Reconstruct task template object instead of modifying an existing one? tasks.add( task.assign_custom_and_return( array_job.to_dict()).assign_type_and_return( _constants.SdkTaskType.CONTAINER_ARRAY_TASK)) # min_successes is absolute, it's computed as the reverse of allowed_failure_ratio and multiplied by the # total length of tasks to get an absolute count. nodes.extend([ array_job_node for (_, array_job_node) in array_job_index.values() ]) dynamic_job_spec = _dynamic_job.DynamicJobSpec( min_successes=len(nodes), tasks=list(tasks), nodes=nodes, outputs=output_bindings, subworkflows=list(sub_workflows)) return dynamic_job_spec, generated_files
class Types(object): Integer = _helpers.get_sdk_type_from_literal_type( _primitives.Integer.to_flyte_literal_type()) """ Use this to specify a simple integer type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, A Python int will be received, if set. 2) Otherwise, a None value will be received. As output: 1) User code may pass an int or long value. 2) Output can also be nulled with a None value. From command-line: Specify an integer or integer string. .. code-block:: python @inputs(a=Types.Integer) @outputs(b=Types.Integer) @python_task def double(wf_params, a, b): b.set(a * 2) """ Float = _helpers.get_sdk_type_from_literal_type( _primitives.Float.to_flyte_literal_type()) """ Use this to specify a simple floating point type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: A Python float will be received, if set. Otherwise, a None value will be received. As output: User code may pass a float value. It can also be nulled with a None value. From command-line: Specify a float or floating-point string. .. code-block:: python @inputs(a=Types.Float) @outputs(b=Types.Float) @python_task def square(wf_params, a, b): b.set(a * a) """ String = _helpers.get_sdk_type_from_literal_type( _primitives.String.to_flyte_literal_type()) """ Use this to specify a simple string type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: A Python str (Python 2) or unicode (Python 3) will be received, if set. Otherwise, a None value will be received. As output: User code may pass a str value (Python 2) or a unicode value (Python 3). It can also be nulled with a None value. From command-line: Specify a string. .. code-block:: python @inputs(a=Types.String, b=Types.String) @outputs(c=Types.String) @python_task def concat(wf_params, a, b): c.set(a + b) """ Boolean = _helpers.get_sdk_type_from_literal_type( _primitives.Boolean.to_flyte_literal_type()) """ Use this to specify a simple bool type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: A Python bool will be received, if set. Otherwise, a None value will be received. As output: User code may pass a bool value. It can also be nulled with a None value. From command-line: Specify 0, 1, true, or false. .. code-block:: python @inputs(a=Types.Boolean) @outputs(b=Types.Boolean) @python_task def invert(wf_params, a, b): b.set(not a) """ Datetime = _helpers.get_sdk_type_from_literal_type( _primitives.Datetime.to_flyte_literal_type()) """ Use this to specify a simple datetime type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: A Python timezone-aware datetime.datetime will be received with a UTC time, if set. Otherwise, a None value will be received. As output: User code may pass a timezone-aware datetime.datetime value. It can also be nulled with a None value. From command-line: Specify a timezone-aware, parsable datestring. i.e. 2019-01-01T00:00+00:00 .. note:: The engine requires that datetimes be timezone aware. By default, Python datetime.datetime is not timezone aware. .. code-block:: python @inputs(a=Types.Datetime) @outputs(b=Types.Datetime) @python_task def tomorrow(wf_params, a, b): b.set(a + datetime.timedelta(days=1)) """ Timedelta = _helpers.get_sdk_type_from_literal_type( _primitives.Timedelta.to_flyte_literal_type()) """ Use this to specify a simple timedelta type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: A Python datetime.timedelta will be received, if set. Otherwise, a None value will be received. As output: User code may pass a datetime.timedelta value. It can also be nulled with a None value. From command-line: Specify a parsable duration string. i.e. 1h30m24s .. code-block:: python @inputs(a=Types.Timedelta) @outputs(b=Types.Timedelta) @python_task def hundred_times_longer(wf_params, a, b): b.set(a * 100) """ Generic = _helpers.get_sdk_type_from_literal_type( _primitives.Generic.to_flyte_literal_type()) """ Use this to specify a simple JSON type. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, a Python dict with JSON-ifiable primitives and nested lists or maps. 2) Otherwise, a None value will be received. As output: 1) User code may pass a Python dict with arbitrarily nested lists and dictionaries. JSON-ifiable primitives may also be specified. 2) Output can also be nulled with a None value. From command-line: Specify a JSON string. .. code-block:: python @inputs(a=Types.Generic) @outputs(b=Types.Generic) @python_task def operate(wf_params, a, b): if a['operation'] == 'add': a['value'] += a['operand'] # a['value'] is a number elif a['operation'] == 'merge': a['value'].update(a['some']['nested'][0]['field']) b.set(a) """ Blob = _blobs.Blob """ Use this to specify a Blob object which is essentially a managed file. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, a :py:class:`flytekit.common.types.impl.blobs.Blob` object will be received. 2) If not set, a None value. As output: 1) A user may specify a path string. 2) A user may construct a :py:class:`flytekit.common.types.impl.blobs.Blob` object and pass it as output. 3) Output can be nulled with a None value. From command-line: Specify a path to the blob. This path must be accessible from the container when executing--either by being downloaded from an accessible remote location like s3 or as a local file. .. code-block:: python @inputs(a=Types.Blob) @outputs(b=Types.Blob) @python_task def copy(wf_params, a, b): with a as reader: txt = reader.read() out = Types.Blob() # Create at a random location specified in flytekit configuration with out as writer: writer.write(txt) b.set(out) """ CSV = _blobs.CSV """ Use this to specify a CSV blob object which is essentially a managed file in the CSV format. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, a :py:class:`flytekit.common.types.impl.blobs.CSV` object will be received. 2) If not set, a None value. As output: 1) A user may specify a path string. 2) A user may construct a :py:class:`flytekit.common.types.impl.blobs.CSV` object and pass it as output. 3) Output can be nulled with a None value. From command-line: Specify a path to the CSV. This path must be accessible from the container when executing--either by being downloaded from an accessible remote location like s3 or as a local file. .. code-block:: python @inputs(a=Types.CSV) @outputs(b=Types.CSV) @python_task def copy(wf_params, a, b): with a as reader: txt = reader.read() out = Types.CSV() # Create at a random location specified in flytekit configuration with out as writer: writer.write(txt) b.set(out) """ MultiPartBlob = _blobs.MultiPartBlob """ Use this to specify a multi-part blob object which is essentially a chunked file in a non-recursive directory. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, a :py:class:`flytekit.common.types.impl.blobs.MultiPartBlob` object will be received. 2) If not set, a None value. As output: 1) A user may specify a path string. 2) A user may construct a :py:class:`flytekit.common.types.impl.blobs.MultiPartBlob` object and pass it as output. 3) Output can be nulled with a None value. From command-line: Specify a path to the multi-part blob. This path must be accessible from the container when executing--either by being downloaded from an accessible remote location like s3 or as a local file. .. code-block:: python @inputs(a=Types.MultiPartBlob) @outputs(b=Types.MultiPartBlob) @python_task def concat_then_split(wf_params, a, b): txt = "" with a as chunks: for chunk in chunks: txt += chunk.read() out = Types.MultiPartBlob() # Create at a random location specified in flytekit configuration with out.create_part('000000') as writer: writer.write("Chunk1") with out.create_part('000001') as writer: writer.write("Chunk2") b.set(out) """ MultiPartCSV = _blobs.MultiPartCSV """ See :py:attr:`flytekit.sdk.types.Types.MultiPartBlob`, but in CSV format """ Schema = staticmethod(_schema.schema_instantiator) """ Use this to specify a Schema blob object which is essentially a chunked stream of Parquet dataframes. When used with an SDK-decorated method, expect this behavior from the default type engine: Cast behavior: 1) A generic schema (specified as `Types.Schema()`) can receive input from any schema type regardless of column definitions. 2) A schema can receive as input any schema object as long as the upstream schema has a superset of the column names defined and the types match for paired columns. Ordering does not matter. As input: 1) If set, a :py:class:`flytekit.common.types.impl.schema.Schema` object will be received. 2) If not set, a None value. As output: 1) A user may specify a path string to a chunked dataframe non-recursive directory. 2) A user may construct a :py:class:`flytekit.common.types.impl.schema.Schema` object (with the correct column definitions) and pass it as output. 3) Output can be nulled with a None value. From command-line: Specify a path to the schema object. This path must be accessible from the container when executing--either by being downloaded from an accessible remote location like s3 or as a local file. .. code-block:: python @inputs(generic=Types.Schema(), typed=Types.Schema([('a', Types.Integer), ('b', Types.Float)])) @outputs(b=Types.Schema([('a', Types.Integer), ('b', Types.Float)])) @python_task def concat_then_split(wf_params, generic, typed,): with typed as reader: # Each chunk is loaded as a pandas.DataFrame object for df in reader.iter_chunks(): # Operate on the dataframe # Create at a random location specified in flytekit configuration out = Types.Schema([('a', Types.Integer), ('b', Types.Float)])() with out as writer: writer.write( pandas.DataFrame.from_dict( { 'a': [1, 2, 3], 'b': [5.0, 6.0, 7.0] } ) ) b.set(out) """ Proto = staticmethod(_proto.create_protobuf) """ Use this to specify a custom protobuf type. .. note:: The protobuf Python library should be installed on the PYTHONPATH to ensure the type engine can access the appropriate Python code to deserialize the protobuf. When used with an SDK-decorated method, expect this behavior from the default type engine: As input: 1) If set, a Python protobuf object of the type specified in the definition. 2) If not set, a None value. As output: 1) A Python protobuf object matching the type specified by the users. 2) Set None to null the output. From command-line: A base-64 encoded string of the serialized protobuf. .. code-block:: python from protos import my_protos_pb2 @inputs(a=Types.Proto(my_protos_pb2.Custom)) @outputs(b=Types.Proto(my_protos_pb2.Custom)) @python_task def assert_and_create(wf_params, a, b): assert a.field1 == 1 assert a.field2 == 'abc' b.set( my_protos_pb2.Custom( field1=100, field2='hello' ) ) """ List = staticmethod(_containers.List) """
def test_get_sdk_type_from_literal_type(): o = _type_helpers.get_sdk_type_from_literal_type( _model_types.LiteralType(simple=_model_types.SimpleType.FLOAT)) assert o == _sdk_types.Types.Float
def __init__( self, sdk_workflow, default_inputs=None, fixed_inputs=None, role=None, schedule=None, notifications=None, labels=None, annotations=None, auth_role=None, raw_output_data_config=None, ): """ :param flytekit.common.local_workflow.SdkRunnableWorkflow sdk_workflow: :param dict[Text,flytekit.common.promise.Input] default_inputs: :param dict[Text,Any] fixed_inputs: These inputs will be fixed and not need to be set when executing this launch plan. :param Text role: Deprecated. IAM role to execute this launch plan with. :param flytekit.models.schedule.Schedule: Schedule to apply to this workflow. :param list[flytekit.models.common.Notification]: List of notifications to apply to this launch plan. :param flytekit.models.common.Labels labels: Any custom kubernetes labels to apply to workflows executed by this launch plan. :param flytekit.models.common.Annotations annotations: Any custom kubernetes annotations to apply to workflows executed by this launch plan. Any custom kubernetes annotations to apply to workflows executed by this launch plan. :param flytekit.models.common.Authrole auth_role: The auth method with which to execute the workflow. :param flytekit.models.common.RawOutputDataConfig raw_output_data_config: Config for offloading data """ if role and auth_role: raise ValueError( "Cannot set both role and auth. Role is deprecated, use auth instead." ) fixed_inputs = fixed_inputs or {} default_inputs = default_inputs or {} if role: auth_role = _common_models.AuthRole(assumable_iam_role=role) # The constructor for SdkLaunchPlan sets the id to None anyways so we don't bother passing in an ID. The ID # should be set in one of three places, # 1) When the object is registered (in the code above) # 2) By the dynamic task code after this runnable object has already been __call__'ed. The SdkNode produced # maintains a link to this object and will set the ID according to the configuration variables present. # 3) When SdkLaunchPlan.fetch() is run super(SdkRunnableLaunchPlan, self).__init__( None, _launch_plan_models.LaunchPlanMetadata( schedule=schedule or _schedule_model.Schedule(""), notifications=notifications or [], ), _interface_models.ParameterMap(default_inputs), _type_helpers.pack_python_std_map_to_literal_map( fixed_inputs, { k: _type_helpers.get_sdk_type_from_literal_type(var.type) for k, var in _six.iteritems(sdk_workflow.interface.inputs) if k in fixed_inputs }, ), labels or _common_models.Labels({}), annotations or _common_models.Annotations({}), auth_role, raw_output_data_config or _common_models.RawOutputDataConfig(""), ) self._interface = _interface.TypedInterface( {k: v.var for k, v in _six.iteritems(default_inputs)}, sdk_workflow.interface.outputs, ) self._upstream_entities = {sdk_workflow} self._sdk_workflow = sdk_workflow
def _produce_dynamic_job_spec(self, context, inputs): """ Runs user code and and produces future task nodes to run sub-tasks. :param context: :param flytekit.models.literals.LiteralMap literal_map inputs: :rtype: (_dynamic_job.DynamicJobSpec, dict[Text, flytekit.models.common.FlyteIdlEntity]) """ inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std( inputs, { k: _type_helpers.get_sdk_type_from_literal_type(v.type) for k, v in _six.iteritems(self.interface.inputs) }) outputs_dict = { name: PromiseOutputReference( _type_helpers.get_sdk_type_from_literal_type(variable.type)) for name, variable in _six.iteritems(self.interface.outputs) } inputs_dict.update(outputs_dict) yielded_sub_tasks = [ sub_task for sub_task in super(SdkDynamicTask, self)._execute_user_code( context, inputs_dict) or [] ] upstream_nodes = list() output_bindings = [ _literal_models.Binding( var=name, binding=_interface.BindingData.from_python_std( b.sdk_type.to_flyte_literal_type(), b.raw_value, upstream_nodes=upstream_nodes)) for name, b in _six.iteritems(outputs_dict) ] upstream_nodes = set(upstream_nodes) generated_files = {} # Keeping future-tasks in original order. We don't use upstream_nodes exclusively because the parent task can # yield sub-tasks that it never uses to produce final outputs but they need to execute nevertheless. array_job_index = {} tasks = [] nodes = [] visited_nodes = set() generated_ids = {} effective_failure_ratio = self._allowed_failure_ratio or 0.0 for sub_task_node in _itertools.chain(yielded_sub_tasks, upstream_nodes): if sub_task_node in visited_nodes: continue visited_nodes.add(sub_task_node) # Generate an id that's unique in the document (if the same task is used multiple times with # different resources, executable_sdk_object.id will be the same but generated node_ids should not # be. safe_task_id = _six.text_type( sub_task_node.executable_sdk_object.id) if safe_task_id in generated_ids: new_count = generated_ids[ safe_task_id] = generated_ids[safe_task_id] + 1 else: new_count = generated_ids[safe_task_id] = 0 unique_node_id = _dnsify("{}-{}".format(safe_task_id, new_count)) # If the task can run as an array job, group its instances together. Otherwise, keep each invocation as a # separate node. if SdkDynamicTask._can_run_as_array( sub_task_node.executable_sdk_object.type): if sub_task_node.executable_sdk_object in array_job_index: array_job, node = array_job_index[ sub_task_node.executable_sdk_object] array_job.size += 1 array_job.min_successes = int( math.ceil( (1 - effective_failure_ratio) * array_job.size)) else: array_job = self._create_array_job( inputs_prefix=unique_node_id) node = sub_task_node.assign_id_and_return(unique_node_id) array_job_index[sub_task_node.executable_sdk_object] = ( array_job, node) node_index = _six.text_type(array_job.size - 1) for k, node_output in _six.iteritems(sub_task_node.outputs): if not node_output.sdk_node.id: node_output.sdk_node.assign_id_and_return(node.id) node_output.var = "[{}].{}".format(node_index, node_output.var) # Upload inputs to working directory under /array_job.input_ref/<index>/inputs.pb input_path = _os.path.join(node.id, node_index, _constants.INPUT_FILE_NAME) generated_files[input_path] = _literal_models.LiteralMap( literals={ binding.var: binding.binding.to_literal_model() for binding in sub_task_node.inputs }) else: node = sub_task_node.assign_id_and_return(unique_node_id) tasks.append(sub_task_node.executable_sdk_object) nodes.append(node) for k, node_output in _six.iteritems(sub_task_node.outputs): if not node_output.sdk_node.id: node_output.sdk_node.assign_id_and_return(node.id) # Upload inputs to working directory under /array_job.input_ref/inputs.pb input_path = _os.path.join(node.id, _constants.INPUT_FILE_NAME) generated_files[input_path] = _literal_models.LiteralMap( literals={ binding.var: binding.binding.to_literal_model() for binding in sub_task_node.inputs }) # assign custom field to the ArrayJob properties computed. for task, (array_job, _) in _six.iteritems(array_job_index): # TODO: Reconstruct task template object instead of modifying an existing one? tasks.append( task.assign_custom_and_return( array_job.to_dict()).assign_type_and_return( _constants.SdkTaskType.CONTAINER_ARRAY_TASK)) # min_successes is absolute, it's computed as the reverse of allowed_failure_ratio and multiplied by the # total length of tasks to get an absolute count. nodes.extend([ array_job_node for (_, array_job_node) in array_job_index.values() ]) dynamic_job_spec = _dynamic_job.DynamicJobSpec( min_successes=len(nodes), tasks=tasks, nodes=nodes, outputs=output_bindings, subworkflows=[]) return dynamic_job_spec, generated_files