Beispiel #1
0
    def inputs(self):
        """
        Returns the inputs of the task execution in the standard Python format that is produced by
        the type engine.
        :rtype: dict[Text, T]
        """
        if self._inputs is None:
            client = _flyte_engine.get_client()
            execution_data = client.get_task_execution_data(self.id)

            # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned.
            if bool(execution_data.full_inputs.literals):
                input_map = execution_data.full_inputs
            elif execution_data.inputs.bytes > 0:
                with _common_utils.AutoDeletingTempDir() as t:
                    tmp_name = _os.path.join(t.name, "inputs.pb")
                    _data_proxy.Data.get_data(execution_data.inputs.url, tmp_name)
                    input_map = _literal_models.LiteralMap.from_flyte_idl(
                        _common_utils.load_proto_from_file(_literals_pb2.LiteralMap, tmp_name)
                    )
            else:
                input_map = _literal_models.LiteralMap({})

            self._inputs = _type_helpers.unpack_literal_map_to_sdk_python_std(input_map)
        return self._inputs
Beispiel #2
0
    def outputs(self):
        """
        Returns the outputs of the task execution, if available, in the standard Python format that is produced by
        the type engine. If not available, perhaps due to execution being in progress or an error being produced,
        this will raise an exception.
        :rtype: dict[Text, T]
        """
        if not self.is_complete:
            raise _user_exceptions.FlyteAssertion(
                "Please what until the task execution has completed before requesting the outputs."
            )
        if self.error:
            raise _user_exceptions.FlyteAssertion("Outputs could not be found because the execution ended in failure.")

        if self._outputs is None:
            client = _flyte_engine.get_client()
            execution_data = client.get_task_execution_data(self.id)

            # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned.
            if bool(execution_data.full_outputs.literals):
                output_map = execution_data.full_outputs

            elif execution_data.outputs.bytes > 0:
                with _common_utils.AutoDeletingTempDir() as t:
                    tmp_name = _os.path.join(t.name, "outputs.pb")
                    _data_proxy.Data.get_data(execution_data.outputs.url, tmp_name)
                    output_map = _literal_models.LiteralMap.from_flyte_idl(
                        _common_utils.load_proto_from_file(_literals_pb2.LiteralMap, tmp_name)
                    )
            else:
                output_map = _literal_models.LiteralMap({})
            self._outputs = _type_helpers.unpack_literal_map_to_sdk_python_std(output_map)
        return self._outputs
Beispiel #3
0
def test_single_step_entrypoint_in_proc():
    with _TemporaryConfiguration(os.path.join(os.path.dirname(__file__), 'fake.config'),
                                 internal_overrides={
                                     'project': 'test',
                                     'domain': 'development'
                                 }):
        with _utils.AutoDeletingTempDir("in") as input_dir:
            literal_map = _type_helpers.pack_python_std_map_to_literal_map(
                {'a': 9}, _type_map_from_variable_map(_task_defs.add_one.interface.inputs))
            input_file = os.path.join(input_dir.name, "inputs.pb")
            _utils.write_proto_to_file(literal_map.to_flyte_idl(), input_file)

            with _utils.AutoDeletingTempDir("out") as output_dir:
                _execute_task(
                    _task_defs.add_one.task_module,
                    _task_defs.add_one.task_function_name,
                    input_file,
                    output_dir.name,
                    False
                )

                p = _utils.load_proto_from_file(
                    _literals_pb2.LiteralMap,
                    os.path.join(output_dir.name, _constants.OUTPUT_FILE_NAME)
                )
                raw_map = _type_helpers.unpack_literal_map_to_sdk_python_std(
                    _literal_models.LiteralMap.from_flyte_idl(p),
                    _type_map_from_variable_map(_task_defs.add_one.interface.outputs)
                )
                assert raw_map['b'] == 10
                assert len(raw_map) == 1
Beispiel #4
0
def test_single_step_entrypoint_out_of_proc():
    with _TemporaryConfiguration(os.path.join(os.path.dirname(__file__), 'fake.config'),
                                 internal_overrides={
                                     'project': 'test',
                                     'domain': 'development'
                                 }):
        with _utils.AutoDeletingTempDir("in") as input_dir:
            literal_map = _type_helpers.pack_python_std_map_to_literal_map({'a': 9}, _type_map_from_variable_map(
                _task_defs.add_one.interface.inputs))
            input_file = os.path.join(input_dir.name, "inputs.pb")
            _utils.write_proto_to_file(literal_map.to_flyte_idl(), input_file)

            with _utils.AutoDeletingTempDir("out") as output_dir:
                cmd = []
                cmd.extend(["--task-module", _task_defs.add_one.task_module])
                cmd.extend(["--task-name", _task_defs.add_one.task_function_name])
                cmd.extend(["--inputs", input_file])
                cmd.extend(["--output-prefix", output_dir.name])
                result = CliRunner().invoke(execute_task_cmd, cmd)

                assert result.exit_code == 0
                p = _utils.load_proto_from_file(
                    _literals_pb2.LiteralMap,
                    os.path.join(output_dir.name, _constants.OUTPUT_FILE_NAME)
                )
                raw_map = _type_helpers.unpack_literal_map_to_sdk_python_std(
                    _literal_models.LiteralMap.from_flyte_idl(p),
                    _type_map_from_variable_map(_task_defs.add_one.interface.outputs)
                )
                assert raw_map['b'] == 10
                assert len(raw_map) == 1
Beispiel #5
0
    def execute(self, context, inputs):
        """
        :param flytekit.engines.common.EngineContext context:
        :param flytekit.models.literals.LiteralMap inputs:
        :rtype: dict[Text, flytekit.models.common.FlyteIdlEntity]
        :returns: This function must return a dictionary mapping 'filenames' to Flyte Interface Entities.  These
            entities will be used by the engine to pass data from node to node, populate metadata, etc. etc..  Each
            engine will have different behavior.  For instance, the Flyte engine will upload the entities to a remote
            working directory (with the names provided), which will in turn allow Flyte Propeller to push along the
            workflow.  Where as local engine will merely feed the outputs directly into the next node.
        """
        inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std(
            inputs,
            {
                k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                for k, v in _six.iteritems(self.interface.inputs)
            },
        )
        outputs_dict = {
            name: _task_output.OutputReference(
                _type_helpers.get_sdk_type_from_literal_type(variable.type))
            for name, variable in _six.iteritems(self.interface.outputs)
        }
        inputs_dict.update(outputs_dict)

        self._execute_user_code(context, inputs_dict)

        return {
            _constants.OUTPUT_FILE_NAME:
            _literal_models.LiteralMap(literals={
                k: v.sdk_value
                for k, v in _six.iteritems(outputs_dict)
            })
        }
def test_arrayjob_entrypoint_in_proc():
    with _TemporaryConfiguration(os.path.join(os.path.dirname(__file__),
                                              'fake.config'),
                                 internal_overrides={
                                     'project': 'test',
                                     'domain': 'development'
                                 }):
        with _utils.AutoDeletingTempDir("dir") as dir:
            literal_map = _type_helpers.pack_python_std_map_to_literal_map(
                {'a': 9},
                _type_map_from_variable_map(
                    _task_defs.add_one.interface.inputs))

            input_dir = os.path.join(dir.name, "1")
            os.mkdir(
                input_dir)  # auto cleanup will take this subdir into account

            input_file = os.path.join(input_dir, "inputs.pb")
            _utils.write_proto_to_file(literal_map.to_flyte_idl(), input_file)

            # construct indexlookup.pb which has array: [1]
            mapped_index = _literals.Literal(
                _literals.Scalar(primitive=_literals.Primitive(integer=1)))
            index_lookup_collection = _literals.LiteralCollection(
                [mapped_index])
            index_lookup_file = os.path.join(dir.name, "indexlookup.pb")
            _utils.write_proto_to_file(index_lookup_collection.to_flyte_idl(),
                                       index_lookup_file)

            # fake arrayjob task by setting environment variables
            orig_env_index_var_name = os.environ.get(
                'BATCH_JOB_ARRAY_INDEX_VAR_NAME')
            orig_env_array_index = os.environ.get('AWS_BATCH_JOB_ARRAY_INDEX')
            os.environ[
                'BATCH_JOB_ARRAY_INDEX_VAR_NAME'] = 'AWS_BATCH_JOB_ARRAY_INDEX'
            os.environ['AWS_BATCH_JOB_ARRAY_INDEX'] = '0'

            execute_task(_task_defs.add_one.task_module,
                         _task_defs.add_one.task_function_name, dir.name,
                         dir.name, False)

            raw_map = _type_helpers.unpack_literal_map_to_sdk_python_std(
                _literal_models.LiteralMap.from_flyte_idl(
                    _utils.load_proto_from_file(
                        _literals_pb2.LiteralMap,
                        os.path.join(input_dir, _constants.OUTPUT_FILE_NAME))),
                _type_map_from_variable_map(
                    _task_defs.add_one.interface.outputs))
            assert raw_map['b'] == 10
            assert len(raw_map) == 1

            # reset the env vars
            if orig_env_index_var_name:
                os.environ[
                    'BATCH_JOB_ARRAY_INDEX_VAR_NAME'] = orig_env_index_var_name
            if orig_env_array_index:
                os.environ['AWS_BATCH_JOB_ARRAY_INDEX'] = orig_env_array_index
Beispiel #7
0
 def inputs(self):
     """
     Returns the inputs to the execution in the standard Python format as dictated by the type engine.
     :rtype: dict[Text, T]
     """
     if self._inputs is None:
         self._inputs = _type_helpers.unpack_literal_map_to_sdk_python_std(
             _engine_loader.get_engine().get_node_execution(
                 self).get_inputs())
     return self._inputs
Beispiel #8
0
    def _produce_dynamic_job_spec(self, context, inputs):
        """
        Runs user code and and produces future task nodes to run sub-tasks.
        :param context:
        :param flytekit.models.literals.LiteralMap literal_map inputs:
        :rtype: flytekit.models.dynamic_job.DynamicJobSpec
        """
        inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std(
            inputs,
            {
                k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                for k, v in _six.iteritems(self.interface.inputs)
            },
        )
        outputs_dict = {
            name: _task_output.OutputReference(
                _type_helpers.get_sdk_type_from_literal_type(variable.type))
            for name, variable in _six.iteritems(self.interface.outputs)
        }

        # Add outputs to inputs
        inputs_dict.update(outputs_dict)

        nodes = []
        tasks = []
        # One node per query
        generated_queries = self._generate_plugin_objects(context, inputs_dict)

        # Create output bindings always - this has to happen after user code has run
        output_bindings = [
            _literal_models.Binding(
                var=name,
                binding=_interface.BindingData.from_python_std(
                    b.sdk_type.to_flyte_literal_type(), b.value),
            ) for name, b in _six.iteritems(outputs_dict)
        ]

        i = 0
        for quboleHiveJob in generated_queries:
            hive_job_node = _create_hive_job_node("HiveQuery_{}".format(i),
                                                  quboleHiveJob.to_flyte_idl(),
                                                  self.metadata)
            nodes.append(hive_job_node)
            tasks.append(hive_job_node.executable_sdk_object)
            i += 1

        dynamic_job_spec = _dynamic_job.DynamicJobSpec(
            min_successes=len(nodes),
            tasks=tasks,
            nodes=nodes,
            outputs=output_bindings,
            subworkflows=[],
        )

        return dynamic_job_spec
Beispiel #9
0
    def _produce_dynamic_job_spec(self, context, inputs):
        """
        Runs user code and and produces future task nodes to run sub-tasks.
        :param context:
        :param flytekit.models.literals.LiteralMap literal_map inputs:
        :rtype: flytekit.models.dynamic_job.DynamicJobSpec
        """
        inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std(
            inputs, {
                k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                for k, v in _six.iteritems(self.interface.inputs)
            })
        outputs_dict = {
            name: _task_output.OutputReference(
                _type_helpers.get_sdk_type_from_literal_type(variable.type))
            for name, variable in _six.iteritems(self.interface.outputs)
        }

        # Add outputs to inputs
        inputs_dict.update(outputs_dict)

        # Note: Today a hive task corresponds to a dynamic job spec with one node, which contains multiple
        # queries. We may change this in future.
        nodes = []
        tasks = []
        generated_queries = self._generate_hive_queries(context, inputs_dict)

        # Create output bindings always - this has to happen after user code has run
        output_bindings = [
            _literal_models.Binding(
                var=name,
                binding=_interface.BindingData.from_python_std(
                    b.sdk_type.to_flyte_literal_type(), b.value))
            for name, b in _six.iteritems(outputs_dict)
        ]

        if len(generated_queries.query_collection.queries) > 0:
            hive_job_node = _create_hive_job_node(
                "HiveQueries", generated_queries.to_flyte_idl(), self.metadata)
            nodes.append(hive_job_node)
            tasks.append(hive_job_node.executable_sdk_object)

        dynamic_job_spec = _dynamic_job.DynamicJobSpec(
            min_successes=len(
                nodes
            ),  # At most we only have one node for now, see above comment
            tasks=tasks,
            nodes=nodes,
            outputs=output_bindings,
            subworkflows=[])

        return dynamic_job_spec
Beispiel #10
0
    def execute(self, context, inputs):
        """
        :param flytekit.engines.common.EngineContext context:
        :param flytekit.models.literals.LiteralMap inputs:
        :rtype: dict[Text, flytekit.models.common.FlyteIdlEntity]
        :returns: This function must return a dictionary mapping 'filenames' to Flyte Interface Entities.  These
            entities will be used by the engine to pass data from node to node, populate metadata, etc. etc..  Each
            engine will have different behavior.  For instance, the Flyte engine will upload the entities to a remote
            working directory (with the names provided), which will in turn allow Flyte Propeller to push along the
            workflow.  Where as local engine will merely feed the outputs directly into the next node.
        """
        inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std(
            inputs,
            {
                k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                for k, v in _six.iteritems(self.interface.inputs)
            },
        )

        input_notebook_path = self._notebook_path
        # Execute Notebook via Papermill.
        output_notebook_path = input_notebook_path.split(
            ".ipynb")[0] + "-out.ipynb"
        _pm.execute_notebook(input_notebook_path,
                             output_notebook_path,
                             parameters=inputs_dict)

        # Parse Outputs from Notebook.
        outputs = None
        with open(output_notebook_path) as json_file:
            data = _json.load(json_file)
            for p in data["cells"]:
                meta = p["metadata"]
                if "outputs" in meta["tags"]:
                    outputs = " ".join(p["outputs"][0]["data"]["text/plain"])

        if outputs is not None:
            dict = _literal_models._literals_pb2.LiteralMap()
            _text_format.Parse(outputs, dict)

        # Add output_notebook as an output to the task.
        output_notebook = _task_output.OutputReference(
            _type_helpers.get_sdk_type_from_literal_type(
                _Types.Blob.to_flyte_literal_type()))
        output_notebook.set(output_notebook_path)

        output_literal_map = _literal_models.LiteralMap.from_flyte_idl(dict)
        output_literal_map.literals[
            OUTPUT_NOTEBOOK] = output_notebook.sdk_value

        return {_constants.OUTPUT_FILE_NAME: output_literal_map}
 def test_with_custom_predicate_with_valid_dist_context(self):
     with mock.patch.dict(
             os.environ,
         {
             _sm_distribution.SM_ENV_VAR_CURRENT_HOST: "algo-1",
             _sm_distribution.SM_ENV_VAR_HOSTS:
             '["algo-0", "algo-1", "algo-2"]',
             _sm_distribution.SM_ENV_VAR_NETWORK_INTERFACE_NAME: "eth0",
         },
             clear=True,
     ):
         # fill in the distributed_training_context to the context object and execute again
         self._my_distributed_task._output_persist_predicate = predicate
         ret = self._my_distributed_task.execute(self._context,
                                                 self._task_input)
         assert _common_constants.OUTPUT_FILE_NAME in ret.keys()
         python_std_output_map = _type_helpers.unpack_literal_map_to_sdk_python_std(
             ret[_common_constants.OUTPUT_FILE_NAME])
         assert "model" in python_std_output_map.keys()
Beispiel #12
0
    def outputs(self):
        """
        Returns the outputs to the execution in the standard Python format as dictated by the type engine.  If the
        execution ended in error or the execution is in progress, an exception will be raised.
        :rtype: dict[Text, T]
        """
        if not self.is_complete:
            raise _user_exceptions.FlyteAssertion(
                "Please what until the node execution has completed before "
                "requesting the outputs.")
        if self.error:
            raise _user_exceptions.FlyteAssertion(
                "Outputs could not be found because the execution ended in failure."
            )

        if self._outputs is None:
            self._outputs = _type_helpers.unpack_literal_map_to_sdk_python_std(
                _engine_loader.get_engine().get_node_execution(
                    self).get_outputs())
        return self._outputs
Beispiel #13
0
    def outputs(self):
        """
        Returns the outputs of the task execution, if available, in the standard Python format that is produced by
        the type engine. If not available, perhaps due to execution being in progress or an error being produced,
        this will raise an exception.
        :rtype: dict[Text, T]
        """
        if not self.is_complete:
            raise _user_exceptions.FlyteAssertion(
                "Please what until the task execution has completed before "
                "requesting the outputs.")
        if self.error:
            raise _user_exceptions.FlyteAssertion(
                "Outputs could not be found because the execution ended in failure."
            )

        if self._outputs is None:
            self._outputs = _type_helpers.unpack_literal_map_to_sdk_python_std(
                _engine_loader.get_engine().get_task_execution(
                    self).get_outputs())
        return self._outputs
Beispiel #14
0
    def _produce_dynamic_job_spec(self, context, inputs):
        """
        Runs user code and and produces future task nodes to run sub-tasks.
        :param context:
        :param flytekit.models.literals.LiteralMap literal_map inputs:
        :rtype: (_dynamic_job.DynamicJobSpec, dict[Text, flytekit.models.common.FlyteIdlEntity])
        """
        inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std(
            inputs, {
                k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                for k, v in _six.iteritems(self.interface.inputs)
            })
        outputs_dict = {
            name: PromiseOutputReference(
                _type_helpers.get_sdk_type_from_literal_type(variable.type))
            for name, variable in _six.iteritems(self.interface.outputs)
        }

        inputs_dict.update(outputs_dict)
        yielded_sub_tasks = [
            sub_task
            for sub_task in super(SdkDynamicTask, self)._execute_user_code(
                context, inputs_dict) or []
        ]

        upstream_nodes = list()
        output_bindings = [
            _literal_models.Binding(
                var=name,
                binding=_interface.BindingData.from_python_std(
                    b.sdk_type.to_flyte_literal_type(),
                    b.raw_value,
                    upstream_nodes=upstream_nodes))
            for name, b in _six.iteritems(outputs_dict)
        ]
        upstream_nodes = set(upstream_nodes)

        generated_files = {}
        # Keeping future-tasks in original order. We don't use upstream_nodes exclusively because the parent task can
        # yield sub-tasks that it never uses to produce final outputs but they need to execute nevertheless.
        array_job_index = {}
        tasks = []
        nodes = []
        visited_nodes = set()
        generated_ids = {}
        effective_failure_ratio = self._allowed_failure_ratio or 0.0
        for sub_task_node in _itertools.chain(yielded_sub_tasks,
                                              upstream_nodes):
            if sub_task_node in visited_nodes:
                continue
            visited_nodes.add(sub_task_node)

            # Generate an id that's unique in the document (if the same task is used multiple times with
            # different resources, executable_sdk_object.id will be the same but generated node_ids should not
            # be.
            safe_task_id = _six.text_type(
                sub_task_node.executable_sdk_object.id)
            if safe_task_id in generated_ids:
                new_count = generated_ids[
                    safe_task_id] = generated_ids[safe_task_id] + 1
            else:
                new_count = generated_ids[safe_task_id] = 0
            unique_node_id = _dnsify("{}-{}".format(safe_task_id, new_count))

            # If the task can run as an array job, group its instances together. Otherwise, keep each invocation as a
            # separate node.
            if SdkDynamicTask._can_run_as_array(
                    sub_task_node.executable_sdk_object.type):
                if sub_task_node.executable_sdk_object in array_job_index:
                    array_job, node = array_job_index[
                        sub_task_node.executable_sdk_object]
                    array_job.size += 1
                    array_job.min_successes = int(
                        math.ceil(
                            (1 - effective_failure_ratio) * array_job.size))
                else:
                    array_job = self._create_array_job(
                        inputs_prefix=unique_node_id)
                    node = sub_task_node.assign_id_and_return(unique_node_id)
                    array_job_index[sub_task_node.executable_sdk_object] = (
                        array_job, node)

                node_index = _six.text_type(array_job.size - 1)
                for k, node_output in _six.iteritems(sub_task_node.outputs):
                    if not node_output.sdk_node.id:
                        node_output.sdk_node.assign_id_and_return(node.id)
                    node_output.var = "[{}].{}".format(node_index,
                                                       node_output.var)

                # Upload inputs to working directory under /array_job.input_ref/<index>/inputs.pb
                input_path = _os.path.join(node.id, node_index,
                                           _constants.INPUT_FILE_NAME)
                generated_files[input_path] = _literal_models.LiteralMap(
                    literals={
                        binding.var: binding.binding.to_literal_model()
                        for binding in sub_task_node.inputs
                    })
            else:
                node = sub_task_node.assign_id_and_return(unique_node_id)

                tasks.append(sub_task_node.executable_sdk_object)
                nodes.append(node)

                for k, node_output in _six.iteritems(sub_task_node.outputs):
                    if not node_output.sdk_node.id:
                        node_output.sdk_node.assign_id_and_return(node.id)

                # Upload inputs to working directory under /array_job.input_ref/inputs.pb
                input_path = _os.path.join(node.id, _constants.INPUT_FILE_NAME)
                generated_files[input_path] = _literal_models.LiteralMap(
                    literals={
                        binding.var: binding.binding.to_literal_model()
                        for binding in sub_task_node.inputs
                    })

        # assign custom field to the ArrayJob properties computed.
        for task, (array_job, _) in _six.iteritems(array_job_index):
            # TODO: Reconstruct task template object instead of modifying an existing one?
            tasks.append(
                task.assign_custom_and_return(
                    array_job.to_dict()).assign_type_and_return(
                        _constants.SdkTaskType.CONTAINER_ARRAY_TASK))

        # min_successes is absolute, it's computed as the reverse of allowed_failure_ratio and multiplied by the
        # total length of tasks to get an absolute count.
        nodes.extend([
            array_job_node for (_, array_job_node) in array_job_index.values()
        ])
        dynamic_job_spec = _dynamic_job.DynamicJobSpec(
            min_successes=len(nodes),
            tasks=tasks,
            nodes=nodes,
            outputs=output_bindings,
            subworkflows=[])

        return dynamic_job_spec, generated_files
Beispiel #15
0
    def _produce_dynamic_job_spec(self, context, inputs):
        """
        Runs user code and and produces future task nodes to run sub-tasks.
        :param context:
        :param flytekit.models.literals.LiteralMap literal_map inputs:
        :rtype: (_dynamic_job.DynamicJobSpec, dict[Text, flytekit.models.common.FlyteIdlEntity])
        """
        inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std(
            inputs, {
                k: _type_helpers.get_sdk_type_from_literal_type(v.type)
                for k, v in _six.iteritems(self.interface.inputs)
            })
        outputs_dict = {
            name: PromiseOutputReference(
                _type_helpers.get_sdk_type_from_literal_type(variable.type))
            for name, variable in _six.iteritems(self.interface.outputs)
        }

        # Because users declare both inputs and outputs in their functions signatures, merge them together
        # before calling user code
        inputs_dict.update(outputs_dict)
        yielded_sub_tasks = [
            sub_task
            for sub_task in super(SdkDynamicTask, self)._execute_user_code(
                context, inputs_dict) or []
        ]

        upstream_nodes = list()
        output_bindings = [
            _literal_models.Binding(
                var=name,
                binding=_interface.BindingData.from_python_std(
                    b.sdk_type.to_flyte_literal_type(),
                    b.raw_value,
                    upstream_nodes=upstream_nodes))
            for name, b in _six.iteritems(outputs_dict)
        ]
        upstream_nodes = set(upstream_nodes)

        generated_files = {}
        # Keeping future-tasks in original order. We don't use upstream_nodes exclusively because the parent task can
        # yield sub-tasks that it never uses to produce final outputs but they need to execute nevertheless.
        array_job_index = {}
        tasks = set()
        nodes = []
        sub_workflows = set()
        visited_nodes = set()
        generated_ids = {}
        effective_failure_ratio = self._allowed_failure_ratio or 0.0

        # TODO: This function needs to be cleaned up.
        # The reason we chain these two together is because we allow users to not have to explicitly "yield" the
        # node. As long as the subtask/lp/subwf has an output that's referenced, it'll get picked up.
        for sub_task_node in _itertools.chain(yielded_sub_tasks,
                                              upstream_nodes):
            if sub_task_node in visited_nodes:
                continue
            visited_nodes.add(sub_task_node)
            executable = sub_task_node.executable_sdk_object

            # If the executable object that we're dealing with is registerable (ie, SdkRunnableLaunchPlan, SdkWorkflow
            # SdkTask, or SdkRunnableTask), then it should have the ability to give itself a name. After assigning
            # itself the name, also make sure the id is properly set according to current config values.
            if isinstance(executable, _registerable.RegisterableEntity):
                executable.auto_assign_name()
                executable._id = _identifier.Identifier(
                    executable.resource_type,
                    _internal_config.TASK_PROJECT.get()
                    or _internal_config.PROJECT.get(),
                    _internal_config.TASK_DOMAIN.get()
                    or _internal_config.DOMAIN.get(),
                    executable.platform_valid_name,
                    _internal_config.TASK_VERSION.get()
                    or _internal_config.VERSION.get())

            # Generate an id that's unique in the document (if the same task is used multiple times with
            # different resources, executable_sdk_object.id will be the same but generated node_ids should not
            # be.
            safe_task_id = _six.text_type(
                sub_task_node.executable_sdk_object.id)
            if safe_task_id in generated_ids:
                new_count = generated_ids[
                    safe_task_id] = generated_ids[safe_task_id] + 1
            else:
                new_count = generated_ids[safe_task_id] = 0
            unique_node_id = _dnsify("{}-{}".format(safe_task_id, new_count))

            # Handling case where the yielded node is launch plan
            if isinstance(sub_task_node.executable_sdk_object,
                          _launch_plan.SdkLaunchPlan):
                node = sub_task_node.assign_id_and_return(unique_node_id)
                _append_node(generated_files, node, nodes, sub_task_node)
            # Handling case where the yielded node is launching a sub-workflow
            elif isinstance(sub_task_node.executable_sdk_object,
                            _workflow.SdkWorkflow):
                node = sub_task_node.assign_id_and_return(unique_node_id)
                _append_node(generated_files, node, nodes, sub_task_node)
                # Add the workflow itself to the yielded sub-workflows
                sub_workflows.add(sub_task_node.executable_sdk_object)
                # Recursively discover statically defined upstream entities (tasks, wfs)
                SdkDynamicTask._add_upstream_entities(
                    sub_task_node.executable_sdk_object, sub_workflows, tasks)
            # Handling tasks
            else:
                # If the task can run as an array job, group its instances together. Otherwise, keep each
                # invocation as a separate node.
                if SdkDynamicTask._can_run_as_array(
                        sub_task_node.executable_sdk_object.type):
                    if sub_task_node.executable_sdk_object in array_job_index:
                        array_job, node = array_job_index[
                            sub_task_node.executable_sdk_object]
                        array_job.size += 1
                        array_job.min_successes = int(
                            math.ceil((1 - effective_failure_ratio) *
                                      array_job.size))
                    else:
                        array_job = self._create_array_job(
                            inputs_prefix=unique_node_id)
                        node = sub_task_node.assign_id_and_return(
                            unique_node_id)
                        array_job_index[
                            sub_task_node.executable_sdk_object] = (array_job,
                                                                    node)

                    node_index = _six.text_type(array_job.size - 1)
                    for k, node_output in _six.iteritems(
                            sub_task_node.outputs):
                        if not node_output.sdk_node.id:
                            node_output.sdk_node.assign_id_and_return(node.id)
                        node_output.var = "[{}].{}".format(
                            node_index, node_output.var)

                    # Upload inputs to working directory under /array_job.input_ref/<index>/inputs.pb
                    input_path = _os.path.join(node.id, node_index,
                                               _constants.INPUT_FILE_NAME)
                    generated_files[input_path] = _literal_models.LiteralMap(
                        literals={
                            binding.var: binding.binding.to_literal_model()
                            for binding in sub_task_node.inputs
                        })
                else:
                    node = sub_task_node.assign_id_and_return(unique_node_id)
                    tasks.add(sub_task_node.executable_sdk_object)
                    _append_node(generated_files, node, nodes, sub_task_node)

        # assign custom field to the ArrayJob properties computed.
        for task, (array_job, _) in _six.iteritems(array_job_index):
            # TODO: Reconstruct task template object instead of modifying an existing one?
            tasks.add(
                task.assign_custom_and_return(
                    array_job.to_dict()).assign_type_and_return(
                        _constants.SdkTaskType.CONTAINER_ARRAY_TASK))

        # min_successes is absolute, it's computed as the reverse of allowed_failure_ratio and multiplied by the
        # total length of tasks to get an absolute count.
        nodes.extend([
            array_job_node for (_, array_job_node) in array_job_index.values()
        ])
        dynamic_job_spec = _dynamic_job.DynamicJobSpec(
            min_successes=len(nodes),
            tasks=list(tasks),
            nodes=nodes,
            outputs=output_bindings,
            subworkflows=list(sub_workflows))

        return dynamic_job_spec, generated_files