def __init__(self, type, metadata, interface, custom, container=None): """ :param Text type: This is used to define additional extensions for use by Propeller or SDK. :param TaskMetadata metadata: This contains information needed at runtime to determine behavior such as whether or not outputs are discoverable, timeouts, and retries. :param flytekit.common.interface.TypedInterface interface: The interface definition for this task. :param dict[Text, T] custom: Arbitrary type for use by plugins. :param Container container: Provides the necessary entrypoint information for execution. For instance, a Container might be specified with the necessary command line arguments. """ # TODO: Remove the identifier portion and fill in with local values. super(SdkTask, self).__init__( _identifier.Identifier( _identifier_model.ResourceType.TASK, _internal_config.PROJECT.get(), _internal_config.DOMAIN.get(), _uuid.uuid4().hex, _internal_config.VERSION.get(), ), type, metadata, interface, custom, container=container, )
def __init__(self, inputs, outputs, nodes): """ :param list[flytekit.common.promise.Input] inputs: :param list[Output] outputs: :param list[flytekit.common.nodes.SdkNode] nodes: """ for n in nodes: for upstream in n.upstream_nodes: if upstream.id is None: raise _user_exceptions.FlyteAssertion( "Some nodes contained in the workflow were not found in the workflow description. Please " "ensure all nodes are either assigned to attributes within the class or an element in a " "list, dict, or tuple which is stored as an attribute in the class." ) super(SdkWorkflow, self).__init__( id=_identifier.Identifier(_identifier_model.ResourceType.WORKFLOW, _internal_config.PROJECT.get(), _internal_config.DOMAIN.get(), _uuid.uuid4().hex, _internal_config.VERSION.get()), metadata=_workflow_models.WorkflowMetadata(), interface=_interface.TypedInterface( {v.name: v.var for v in inputs}, {v.name: v.var for v in outputs}), nodes=nodes, outputs=[ _literal_models.Binding(v.name, v.binding_data) for v in outputs ], ) self._user_inputs = inputs self._upstream_entities = set(n.executable_sdk_object for n in nodes)
def __init__(self, inputs, outputs, nodes, id=None, metadata=None, metadata_defaults=None, interface=None, output_bindings=None): """ :param list[flytekit.common.promise.Input] inputs: :param list[Output] outputs: :param list[flytekit.common.nodes.SdkNode] nodes: :param flytekit.models.core.identifier.Identifier id: This is an autogenerated id by the system. The id is globally unique across Flyte. :param WorkflowMetadata metadata: This contains information on how to run the workflow. :param flytekit.models.interface.TypedInterface interface: Defines a strongly typed interface for the Workflow (inputs, outputs). This can include some optional parameters. :param list[flytekit.models.literals.Binding] output_bindings: A list of output bindings that specify how to construct workflow outputs. Bindings can pull node outputs or specify literals. All workflow outputs specified in the interface field must be bound in order for the workflow to be validated. A workflow has an implicit dependency on all of its nodes to execute successfully in order to bind final outputs. """ for n in nodes: for upstream in n.upstream_nodes: if upstream.id is None: raise _user_exceptions.FlyteAssertion( "Some nodes contained in the workflow were not found in the workflow description. Please " "ensure all nodes are either assigned to attributes within the class or an element in a " "list, dict, or tuple which is stored as an attribute in the class." ) # Allow overrides if specified for all the arguments to the parent class constructor id = id if id is not None else _identifier.Identifier( _identifier_model.ResourceType.WORKFLOW, _internal_config.PROJECT.get(), _internal_config.DOMAIN.get(), _uuid.uuid4().hex, _internal_config.VERSION.get()) metadata = metadata if metadata is not None else _workflow_models.WorkflowMetadata( ) interface = interface if interface is not None else _interface.TypedInterface( {v.name: v.var for v in inputs}, {v.name: v.var for v in outputs}) output_bindings = output_bindings if output_bindings is not None else \ [_literal_models.Binding(v.name, v.binding_data) for v in outputs] super(SdkWorkflow, self).__init__( id=id, metadata=metadata, metadata_defaults=_workflow_models.WorkflowMetadataDefaults(), interface=interface, nodes=nodes, outputs=output_bindings, ) self._user_inputs = inputs self._upstream_entities = set(n.executable_sdk_object for n in nodes)
def serialize_tasks_only(pkgs, folder=None): """ :param list[Text] pkgs: :param Text folder: :return: """ # m = module (i.e. python file) # k = value of dir(m), type str # o = object (e.g. SdkWorkflow) loaded_entities = [] for m, k, o in iterate_registerable_entities_in_order(pkgs, include_entities={_sdk_task.SdkTask}): name = _utils.fqdn(m.__name__, k, entity_type=o.resource_type) _logging.debug("Found module {}\n K: {} Instantiated in {}".format(m, k, o._instantiated_in)) o._id = _identifier.Identifier( o.resource_type, _PROJECT_PLACEHOLDER, _DOMAIN_PLACEHOLDER, name, _VERSION_PLACEHOLDER ) loaded_entities.append(o) zero_padded_length = _determine_text_chars(len(loaded_entities)) for i, entity in enumerate(loaded_entities): serialized = entity.serialize() fname_index = str(i).zfill(zero_padded_length) fname = "{}_{}.pb".format(fname_index, entity._id.name) click.echo(" Writing {} to\n {}".format(entity._id, fname)) if folder: fname = _os.path.join(folder, fname) _write_proto_to_file(serialized, fname) identifier_fname = "{}_{}.identifier.pb".format(fname_index, entity._id.name) if folder: identifier_fname = _os.path.join(folder, identifier_fname) _write_proto_to_file(entity._id.to_flyte_idl(), identifier_fname)
def register_and_launch(self, project, domain, name=None, version=None, inputs=None): """ :param Text project: The project in which to register and launch this task. :param Text domain: The domain in which to register and launch this task. :param Text name: The name to give this task. :param Text version: The version in which to register this task :param dict[Text, Any] inputs: A dictionary of Python standard inputs that will be type-checked, then compiled to a LiteralMap. :rtype: flytekit.common.workflow_execution.SdkWorkflowExecution """ self.validate() version = self._produce_deterministic_version(version) if name is None: try: self.auto_assign_name() generated_name = self._platform_valid_name except _system_exceptions.FlyteSystemException: # If we're not able to assign a platform valid name, use the deterministically-produced version instead. generated_name = version name = name if name else generated_name id_to_register = _identifier.Identifier(_identifier_model.ResourceType.TASK, project, domain, name, version) old_id = self.id try: self._id = id_to_register _engine_loader.get_engine().get_task(self).register(id_to_register) except: self._id = old_id raise return self.launch(project, domain, inputs=inputs)
def register(self, project, domain, name, version): """ :param Text project: The project in which to register this task. :param Text domain: The domain in which to register this task. :param Text name: The name to give this task. :param Text version: The version in which to register this task. """ # TODO: Revisit the notion of supplying the project, domain, name, version, as opposed to relying on the # current ID. self.validate() id_to_register = _identifier.Identifier( _identifier_model.ResourceType.TASK, project, domain, name, version) old_id = self.id client = _flyte_engine.get_client() try: self._id = id_to_register client.create_task(id_to_register, _task_model.TaskSpec(self)) self._id = old_id self._has_registered = True return str(id_to_register) except _user_exceptions.FlyteEntityAlreadyExistsException: pass except Exception: self._id = old_id raise
def fetch(cls, project, domain, name, version=None): """ This function uses the engine loader to call create a hydrated task from Admin. :param Text project: :param Text domain: :param Text name: :param Text version: :rtype: SdkWorkflow """ version = version or _internal_config.VERSION.get() workflow_id = _identifier.Identifier( _identifier_model.ResourceType.WORKFLOW, project, domain, name, version) admin_workflow = _flyte_engine.get_client().get_workflow(workflow_id) cwc = admin_workflow.closure.compiled_workflow primary_template = cwc.primary.template sub_workflow_map = { sw.template.id: sw.template for sw in cwc.sub_workflows } task_map = {t.template.id: t.template for t in cwc.tasks} sdk_workflow = cls.promote_from_model(primary_template, sub_workflow_map, task_map) sdk_workflow._id = workflow_id sdk_workflow._has_registered = True return sdk_workflow
def fetch(cls, project, domain, name, version=None): """ This function uses the engine loader to call create a hydrated task from Admin. :param Text project: :param Text domain: :param Text name: :param Text version: [Optional] If not set, the SDK will fetch the active launch plan for the given project, domain, and name. :rtype: SdkLaunchPlan """ from flytekit.common import workflow as _workflow launch_plan_id = _identifier.Identifier( _identifier_model.ResourceType.LAUNCH_PLAN, project, domain, name, version) if launch_plan_id.version: lp = _flyte_engine.get_client().get_launch_plan(launch_plan_id) else: named_entity_id = _common_models.NamedEntityIdentifier( launch_plan_id.project, launch_plan_id.domain, launch_plan_id.name) lp = _flyte_engine.get_client().get_active_launch_plan( named_entity_id) sdk_lp = cls.promote_from_model(lp.spec) sdk_lp._id = lp.id # TODO: Add a test for this, and this function as a whole wf_id = sdk_lp.workflow_id lp_wf = _workflow.SdkWorkflow.fetch(wf_id.project, wf_id.domain, wf_id.name, wf_id.version) sdk_lp._interface = lp_wf.interface sdk_lp._has_registered = True return sdk_lp
def register_all(project, domain, pkgs, test, version): if test: click.echo('Test switch enabled, not doing anything...') click.echo('Running task, workflow, and launch plan registration for {}, {}, {} with version {}'.format( project, domain, pkgs, version)) # m = module (i.e. python file) # k = value of dir(m), type str # o = object (e.g. SdkWorkflow) loaded_entities = [] for m, k, o in iterate_registerable_entities_in_order(pkgs): name = _utils.fqdn(m.__name__, k, entity_type=o.resource_type) _logging.debug("Found module {}\n K: {} Instantiated in {}".format(m, k, o._instantiated_in)) o._id = _identifier.Identifier( o.resource_type, project, domain, name, version ) loaded_entities.append(o) for o in loaded_entities: if test: click.echo("Would register {:20} {}".format("{}:".format(o.entity_type_text), o.id.name)) else: click.echo("Registering {:20} {}".format("{}:".format(o.entity_type_text), o.id.name)) o.register(project, domain, o.id.name, version)
def register(self, project, domain, name, version): """ :param Text project: :param Text domain: :param Text name: :param Text version: """ self.validate() id_to_register = _identifier.Identifier( _identifier_model.ResourceType.WORKFLOW, project, domain, name, version) old_id = self.id self._id = id_to_register try: client = _flyte_engine.get_client() sub_workflows = self.get_sub_workflows() client.create_workflow( id_to_register, _admin_workflow_model.WorkflowSpec( self, sub_workflows, ), ) self._id = id_to_register self._has_registered = True return str(id_to_register) except _user_exceptions.FlyteEntityAlreadyExistsException: pass except Exception: self._id = old_id raise
def __init__( self, inputs: List[_promise.Input], nodes: List[_nodes.SdkNode], interface, output_bindings, id=None, metadata=None, metadata_defaults=None, disable_default_launch_plan=False, ): """ :param list[flytekit.common.nodes.SdkNode] nodes: :param flytekit.models.interface.TypedInterface interface: Defines a strongly typed interface for the Workflow (inputs, outputs). This can include some optional parameters. :param list[flytekit.models.literals.Binding] output_bindings: A list of output bindings that specify how to construct workflow outputs. Bindings can pull node outputs or specify literals. All workflow outputs specified in the interface field must be bound in order for the workflow to be validated. A workflow has an implicit dependency on all of its nodes to execute successfully in order to bind final outputs. :param flytekit.models.core.identifier.Identifier id: This is an autogenerated id by the system. The id is globally unique across Flyte. :param WorkflowMetadata metadata: This contains information on how to run the workflow. :param flytekit.models.core.workflow.WorkflowMetadataDefaults metadata_defaults: Defaults to be passed to nodes contained within workflow. :param bool disable_default_launch_plan: Determines whether to create a default launch plan for the workflow. """ # Save the promise.Input objects for future use. self._user_inputs = inputs # Set optional settings id = ( id if id is not None else _identifier.Identifier( _identifier_model.ResourceType.WORKFLOW, _internal_config.PROJECT.get(), _internal_config.DOMAIN.get(), _uuid.uuid4().hex, _internal_config.VERSION.get(), ) ) metadata = metadata if metadata is not None else _workflow_models.WorkflowMetadata() metadata_defaults = ( metadata_defaults if metadata_defaults is not None else _workflow_models.WorkflowMetadataDefaults() ) super(SdkRunnableWorkflow, self).__init__( nodes=nodes, interface=interface, output_bindings=output_bindings, id=id, metadata=metadata, metadata_defaults=metadata_defaults, ) # Set this last as it's set in constructor self._upstream_entities = set(n.executable_sdk_object for n in nodes) self._should_create_default_launch_plan = not disable_default_launch_plan
def construct_from_class_definition( cls, inputs: List[_promise.Input], outputs: List[Output], nodes: List[_nodes.SdkNode], metadata: _workflow_models.WorkflowMetadata = None, metadata_defaults: _workflow_models.WorkflowMetadataDefaults = None, disable_default_launch_plan: bool = False, ) -> "SdkRunnableWorkflow": """ This constructor is here to provide backwards-compatibility for class-defined Workflows :param list[flytekit.common.promise.Input] inputs: :param list[Output] outputs: :param list[flytekit.common.nodes.SdkNode] nodes: :param WorkflowMetadata metadata: This contains information on how to run the workflow. :param flytekit.models.core.workflow.WorkflowMetadataDefaults metadata_defaults: Defaults to be passed to nodes contained within workflow. :param bool disable_default_launch_plan: Determines whether to create a default launch plan for the workflow or not. :rtype: SdkRunnableWorkflow """ for n in nodes: for upstream in n.upstream_nodes: if upstream.id is None: raise _user_exceptions.FlyteAssertion( "Some nodes contained in the workflow were not found in the workflow description. Please " "ensure all nodes are either assigned to attributes within the class or an element in a " "list, dict, or tuple which is stored as an attribute in the class." ) id = _identifier.Identifier( _identifier_model.ResourceType.WORKFLOW, _internal_config.PROJECT.get(), _internal_config.DOMAIN.get(), _uuid.uuid4().hex, _internal_config.VERSION.get(), ) interface = _interface.TypedInterface({v.name: v.var for v in inputs}, {v.name: v.var for v in outputs}) output_bindings = [ _literal_models.Binding(v.name, v.binding_data) for v in outputs ] return cls( inputs=inputs, nodes=nodes, interface=interface, output_bindings=output_bindings, id=id, metadata=metadata, metadata_defaults=metadata_defaults, disable_default_launch_plan=disable_default_launch_plan, )
def serialize_all(project, domain, pkgs, version, folder=None): """ In order to register, we have to comply with Admin's endpoints. Those endpoints take the following objects. These flyteidl.admin.launch_plan_pb2.LaunchPlanSpec flyteidl.admin.workflow_pb2.WorkflowSpec flyteidl.admin.task_pb2.TaskSpec However, if we were to merely call .to_flyte_idl() on all the discovered entities, what we would get are: flyteidl.admin.launch_plan_pb2.LaunchPlanSpec flyteidl.core.workflow_pb2.WorkflowTemplate flyteidl.core.tasks_pb2.TaskTemplate For Workflows and Tasks therefore, there is special logic in the serialize function that translates these objects. :param Text project: :param Text domain: :param list[Text] pkgs: :param Text version: :param Text folder: :return: """ # m = module (i.e. python file) # k = value of dir(m), type str # o = object (e.g. SdkWorkflow) loaded_entities = [] for m, k, o in iterate_registerable_entities_in_order(pkgs): name = _utils.fqdn(m.__name__, k, entity_type=o.resource_type) _logging.debug("Found module {}\n K: {} Instantiated in {}".format( m, k, o._instantiated_in)) o._id = _identifier.Identifier(o.resource_type, project, domain, name, version) loaded_entities.append(o) zero_padded_length = _determine_text_chars(len(loaded_entities)) for i, entity in enumerate(loaded_entities): serialized = entity.serialize() fname_index = str(i).zfill(zero_padded_length) fname = '{}_{}.pb'.format(fname_index, entity._id.name) click.echo(' Writing {} to\n {}'.format(entity._id, fname)) if folder: fname = _os.path.join(folder, fname) _write_proto_to_file(serialized, fname) # Not everything serialized will necessarily have an identifier field in it, even though some do (like the # TaskTemplate). To be more rigorous, we write an explicit identifier file that reflects the choices (like # project/domain, etc.) made for this serialize call. We should not allow users to specify a different project # for instance come registration time, to avoid mismatches between potential internal ids like the TaskTemplate # and the registered entity. identifier_fname = '{}_{}.identifier.pb'.format( fname_index, entity._id.name) if folder: identifier_fname = _os.path.join(folder, identifier_fname) _write_proto_to_file(entity._id.to_flyte_idl(), identifier_fname)
def fetch(cls, project, domain, name, version): """ This function uses the engine loader to call create a hydrated task from Admin. :param Text project: :param Text domain: :param Text name: :param Text version: :rtype: SdkTask """ task_id = _identifier.Identifier(_identifier_model.ResourceType.TASK, project, domain, name, version) admin_task = _engine_loader.get_engine().fetch_task(task_id=task_id) sdk_task = cls.promote_from_model(admin_task.closure.compiled_task.template) sdk_task._id = task_id return sdk_task
def register(self, project, domain, name, version): """ :param Text project: :param Text domain: :param Text name: :param Text version: """ self.validate() id_to_register = _identifier.Identifier( _identifier_model.ResourceType.LAUNCH_PLAN, project, domain, name, version) _engine_loader.get_engine().get_launch_plan(self).register( id_to_register) self._id = id_to_register return _six.text_type(self.id)
def register(self, project, domain, name, version): """ :param Text project: The project in which to register this task. :param Text domain: The domain in which to register this task. :param Text name: The name to give this task. :param Text version: The version in which to register this task. """ self.validate() id_to_register = _identifier.Identifier(_identifier_model.ResourceType.TASK, project, domain, name, version) old_id = self.id try: self._id = id_to_register _engine_loader.get_engine().get_task(self).register(id_to_register) return _six.text_type(self.id) except: self._id = old_id raise
def fetch(cls, project, domain, name, version=None): """ This function uses the engine loader to call create a hydrated task from Admin. :param Text project: :param Text domain: :param Text name: :param Text version: :rtype: SdkLaunchPlan """ version = version or _internal_config.VERSION.get() launch_plan_id = _identifier.Identifier( _identifier_model.ResourceType.LAUNCH_PLAN, project, domain, name, version) lp = _engine_loader.get_engine().fetch_launch_plan(launch_plan_id) sdk_lp = cls.promote_from_model(lp.spec) sdk_lp._id = launch_plan_id return sdk_lp
def fast_register_all( project: str, domain: str, pkgs: _List[str], test: bool, version: str, source_dir: _os.PathLike, dest_dir: _os.PathLike = None, ): if test: click.echo("Test switch enabled, not doing anything...") if not version: digest = _compute_digest(source_dir) else: digest = version remote_package_path = _upload_package( source_dir, digest, _sdk_config.FAST_REGISTRATION_DIR.get()) click.echo( "Running task, workflow, and launch plan fast registration for {}, {}, {} with version {} and code dir {}" .format(project, domain, pkgs, digest, source_dir)) # m = module (i.e. python file) # k = value of dir(m), type str # o = object (e.g. SdkWorkflow) for m, k, o in iterate_registerable_entities_in_order(pkgs): name = _utils.fqdn(m.__name__, k, entity_type=o.resource_type) o._id = _identifier.Identifier(o.resource_type, project, domain, name, digest) if test: click.echo("Would fast register {:20} {}".format( "{}:".format(o.entity_type_text), o.id.name)) else: click.echo("Fast registering {:20} {}".format( "{}:".format(o.entity_type_text), o.id.name)) _get_additional_distribution_loc( _sdk_config.FAST_REGISTRATION_DIR.get(), digest) if isinstance(o, _sdk_runnable_task.SdkRunnableTask): o.fast_register(project, domain, o.id.name, digest, remote_package_path, dest_dir) else: o.register(project, domain, o.id.name, digest)
def fetch(cls, project, domain, name, version=None): """ This function uses the engine loader to call create a hydrated task from Admin. :param Text project: :param Text domain: :param Text name: :param Text version: :rtype: SdkWorkflow """ version = version or _internal_config.VERSION.get() workflow_id = _identifier.Identifier( _identifier_model.ResourceType.WORKFLOW, project, domain, name, version) admin_workflow = _engine_loader.get_engine().fetch_workflow( workflow_id) sdk_workflow = cls.promote_from_model( admin_workflow.closure.compiled_workflow.primary.template) sdk_workflow._id = workflow_id return sdk_workflow
def register(self, project, domain, name, version): """ :param Text project: :param Text domain: :param Text name: :param Text version: """ self.validate() id_to_register = _identifier.Identifier( _identifier_model.ResourceType.LAUNCH_PLAN, project, domain, name, version ) client = _flyte_engine.get_client() try: client.create_launch_plan(id_to_register, self) except _user_exceptions.FlyteEntityAlreadyExistsException: pass self._id = id_to_register return str(self.id)
def register(self, project, domain, name, version): """ :param Text project: :param Text domain: :param Text name: :param Text version: """ self.validate() id_to_register = _identifier.Identifier( _identifier_model.ResourceType.WORKFLOW, project, domain, name, version) old_id = self.id try: self._id = id_to_register _engine_loader.get_engine().get_workflow(self).register( id_to_register) return _six.text_type(self.id) except: self._id = old_id raise
def serialize_all( pkgs: List[str] = None, local_source_root: str = None, folder: str = None, mode: SerializationMode = None, image: str = None, config_path: str = None, flytekit_virtualenv_root: str = None, ): """ In order to register, we have to comply with Admin's endpoints. Those endpoints take the following objects. These flyteidl.admin.launch_plan_pb2.LaunchPlanSpec flyteidl.admin.workflow_pb2.WorkflowSpec flyteidl.admin.task_pb2.TaskSpec However, if we were to merely call .to_flyte_idl() on all the discovered entities, what we would get are: flyteidl.admin.launch_plan_pb2.LaunchPlanSpec flyteidl.core.workflow_pb2.WorkflowTemplate flyteidl.core.tasks_pb2.TaskTemplate For Workflows and Tasks therefore, there is special logic in the serialize function that translates these objects. :param list[Text] pkgs: :param Text folder: :return: """ # m = module (i.e. python file) # k = value of dir(m), type str # o = object (e.g. SdkWorkflow) env = { _internal_config.CONFIGURATION_PATH.env_var: config_path if config_path else _internal_config.CONFIGURATION_PATH.get(), _internal_config.IMAGE.env_var: image, } serialization_settings = flyte_context.SerializationSettings( project=_PROJECT_PLACEHOLDER, domain=_DOMAIN_PLACEHOLDER, version=_VERSION_PLACEHOLDER, image_config=flyte_context.get_image_config(img_name=image), env=env, flytekit_virtualenv_root=flytekit_virtualenv_root, entrypoint_settings=flyte_context.EntrypointSettings( path=_os.path.join(flytekit_virtualenv_root, _DEFAULT_FLYTEKIT_RELATIVE_ENTRYPOINT_LOC)), ) with flyte_context.FlyteContext.current_context( ).new_serialization_settings( serialization_settings=serialization_settings) as ctx: loaded_entities = [] for m, k, o in iterate_registerable_entities_in_order( pkgs, local_source_root=local_source_root): name = _utils.fqdn(m.__name__, k, entity_type=o.resource_type) _logging.debug( "Found module {}\n K: {} Instantiated in {}".format( m, k, o._instantiated_in)) o._id = _identifier.Identifier(o.resource_type, _PROJECT_PLACEHOLDER, _DOMAIN_PLACEHOLDER, name, _VERSION_PLACEHOLDER) loaded_entities.append(o) ctx.serialization_settings.add_instance_var( InstanceVar(module=m, name=k, o=o)) click.echo( f"Found {len(flyte_context.FlyteEntities.entities)} tasks/workflows" ) mode = mode if mode else SerializationMode.DEFAULT # TODO: Clean up the copy() - it's here because we call get_default_launch_plan, which may create a LaunchPlan # object, which gets added to the FlyteEntities.entities list, which we're iterating over. for entity in flyte_context.FlyteEntities.entities.copy(): # TODO: Add a reachable check. Since these entities are always added by the constructor, weird things can # happen. If someone creates a workflow inside a workflow, we don't actually want the inner workflow to be # registered. Or do we? Certainly, we don't want inner tasks to be registered because we don't know how # to reach them, but perhaps workflows should be okay to take into account generated workflows. # Also a user may import dir_b.workflows from dir_a.workflows but workflow packages might only # specify dir_a if isinstance(entity, PythonTask) or isinstance( entity, Workflow) or isinstance(entity, LaunchPlan): if isinstance(entity, PythonTask): if mode == SerializationMode.DEFAULT: serializable = get_serializable( ctx.serialization_settings, entity) elif mode == SerializationMode.FAST: serializable = get_serializable( ctx.serialization_settings, entity, fast=True) else: raise AssertionError( f"Unrecognized serialization mode: {mode}") else: serializable = get_serializable(ctx.serialization_settings, entity) loaded_entities.append(serializable) if isinstance(entity, Workflow): lp = LaunchPlan.get_default_launch_plan(ctx, entity) launch_plan = get_serializable(ctx.serialization_settings, lp) loaded_entities.append(launch_plan) zero_padded_length = _determine_text_chars(len(loaded_entities)) for i, entity in enumerate(loaded_entities): if entity.has_registered: _logging.info( f"Skipping entity {entity.id} because already registered") continue serialized = entity.serialize() fname_index = str(i).zfill(zero_padded_length) fname = "{}_{}_{}.pb".format(fname_index, entity.id.name, entity.id.resource_type) click.echo( f" Writing type: {entity.id.resource_type_name()}, {entity.id.name} to\n {fname}" ) if folder: fname = _os.path.join(folder, fname) _write_proto_to_file(serialized, fname) click.secho( f"Successfully serialized {len(loaded_entities)} flyte objects", fg="green")
def __init__( self, sdk_workflow, default_inputs=None, fixed_inputs=None, role=None, schedule=None, notifications=None, labels=None, annotations=None, auth=None, ): """ :param flytekit.common.workflow.SdkWorkflow sdk_workflow: :param dict[Text,flytekit.common.promise.Input] default_inputs: :param dict[Text,Any] fixed_inputs: These inputs will be fixed and not need to be set when executing this launch plan. :param Text role: Deprecated. IAM role to execute this launch plan with. :param flytekit.models.schedule.Schedule: Schedule to apply to this workflow. :param list[flytekit.models.common.Notification]: List of notifications to apply to this launch plan. :param flytekit.models.common.Labels labels: Any custom kubernetes labels to apply to workflows executed by this launch plan. :param flytekit.models.common.Annotations annotations: Any custom kubernetes annotations to apply to workflows executed by this launch plan. Any custom kubernetes annotations to apply to workflows executed by this launch plan. :param flytekit.models.launch_plan.Auth auth: The auth method with which to execute the workflow. """ if role and auth: raise ValueError( "Cannot set both role and auth. Role is deprecated, use auth instead." ) fixed_inputs = fixed_inputs or {} default_inputs = default_inputs or {} if role: auth = _launch_plan_models.Auth(assumable_iam_role=role) super(SdkRunnableLaunchPlan, self).__init__( _identifier.Identifier(_identifier_model.ResourceType.WORKFLOW, _internal_config.PROJECT.get(), _internal_config.DOMAIN.get(), _uuid.uuid4().hex, _internal_config.VERSION.get()), _launch_plan_models.LaunchPlanMetadata( schedule=schedule or _schedule_model.Schedule(''), notifications=notifications or []), _interface_models.ParameterMap(default_inputs), _type_helpers.pack_python_std_map_to_literal_map( fixed_inputs, { k: _type_helpers.get_sdk_type_from_literal_type(var.type) for k, var in _six.iteritems(sdk_workflow.interface.inputs) if k in fixed_inputs }), labels or _common_models.Labels({}), annotations or _common_models.Annotations({}), auth, ) self._interface = _interface.TypedInterface( {k: v.var for k, v in _six.iteritems(default_inputs)}, sdk_workflow.interface.outputs) self._upstream_entities = {sdk_workflow} self._sdk_workflow = sdk_workflow
def _produce_dynamic_job_spec(self, context, inputs): """ Runs user code and and produces future task nodes to run sub-tasks. :param context: :param flytekit.models.literals.LiteralMap literal_map inputs: :rtype: (_dynamic_job.DynamicJobSpec, dict[Text, flytekit.models.common.FlyteIdlEntity]) """ inputs_dict = _type_helpers.unpack_literal_map_to_sdk_python_std( inputs, { k: _type_helpers.get_sdk_type_from_literal_type(v.type) for k, v in _six.iteritems(self.interface.inputs) }) outputs_dict = { name: PromiseOutputReference( _type_helpers.get_sdk_type_from_literal_type(variable.type)) for name, variable in _six.iteritems(self.interface.outputs) } # Because users declare both inputs and outputs in their functions signatures, merge them together # before calling user code inputs_dict.update(outputs_dict) yielded_sub_tasks = [ sub_task for sub_task in super(SdkDynamicTask, self)._execute_user_code( context, inputs_dict) or [] ] upstream_nodes = list() output_bindings = [ _literal_models.Binding( var=name, binding=_interface.BindingData.from_python_std( b.sdk_type.to_flyte_literal_type(), b.raw_value, upstream_nodes=upstream_nodes)) for name, b in _six.iteritems(outputs_dict) ] upstream_nodes = set(upstream_nodes) generated_files = {} # Keeping future-tasks in original order. We don't use upstream_nodes exclusively because the parent task can # yield sub-tasks that it never uses to produce final outputs but they need to execute nevertheless. array_job_index = {} tasks = set() nodes = [] sub_workflows = set() visited_nodes = set() generated_ids = {} effective_failure_ratio = self._allowed_failure_ratio or 0.0 # TODO: This function needs to be cleaned up. # The reason we chain these two together is because we allow users to not have to explicitly "yield" the # node. As long as the subtask/lp/subwf has an output that's referenced, it'll get picked up. for sub_task_node in _itertools.chain(yielded_sub_tasks, upstream_nodes): if sub_task_node in visited_nodes: continue visited_nodes.add(sub_task_node) executable = sub_task_node.executable_sdk_object # If the executable object that we're dealing with is registerable (ie, SdkRunnableLaunchPlan, SdkWorkflow # SdkTask, or SdkRunnableTask), then it should have the ability to give itself a name. After assigning # itself the name, also make sure the id is properly set according to current config values. if isinstance(executable, _registerable.RegisterableEntity): executable.auto_assign_name() executable._id = _identifier.Identifier( executable.resource_type, _internal_config.TASK_PROJECT.get() or _internal_config.PROJECT.get(), _internal_config.TASK_DOMAIN.get() or _internal_config.DOMAIN.get(), executable.platform_valid_name, _internal_config.TASK_VERSION.get() or _internal_config.VERSION.get()) # Generate an id that's unique in the document (if the same task is used multiple times with # different resources, executable_sdk_object.id will be the same but generated node_ids should not # be. safe_task_id = _six.text_type( sub_task_node.executable_sdk_object.id) if safe_task_id in generated_ids: new_count = generated_ids[ safe_task_id] = generated_ids[safe_task_id] + 1 else: new_count = generated_ids[safe_task_id] = 0 unique_node_id = _dnsify("{}-{}".format(safe_task_id, new_count)) # Handling case where the yielded node is launch plan if isinstance(sub_task_node.executable_sdk_object, _launch_plan.SdkLaunchPlan): node = sub_task_node.assign_id_and_return(unique_node_id) _append_node(generated_files, node, nodes, sub_task_node) # Handling case where the yielded node is launching a sub-workflow elif isinstance(sub_task_node.executable_sdk_object, _workflow.SdkWorkflow): node = sub_task_node.assign_id_and_return(unique_node_id) _append_node(generated_files, node, nodes, sub_task_node) # Add the workflow itself to the yielded sub-workflows sub_workflows.add(sub_task_node.executable_sdk_object) # Recursively discover statically defined upstream entities (tasks, wfs) SdkDynamicTask._add_upstream_entities( sub_task_node.executable_sdk_object, sub_workflows, tasks) # Handling tasks else: # If the task can run as an array job, group its instances together. Otherwise, keep each # invocation as a separate node. if SdkDynamicTask._can_run_as_array( sub_task_node.executable_sdk_object.type): if sub_task_node.executable_sdk_object in array_job_index: array_job, node = array_job_index[ sub_task_node.executable_sdk_object] array_job.size += 1 array_job.min_successes = int( math.ceil((1 - effective_failure_ratio) * array_job.size)) else: array_job = self._create_array_job( inputs_prefix=unique_node_id) node = sub_task_node.assign_id_and_return( unique_node_id) array_job_index[ sub_task_node.executable_sdk_object] = (array_job, node) node_index = _six.text_type(array_job.size - 1) for k, node_output in _six.iteritems( sub_task_node.outputs): if not node_output.sdk_node.id: node_output.sdk_node.assign_id_and_return(node.id) node_output.var = "[{}].{}".format( node_index, node_output.var) # Upload inputs to working directory under /array_job.input_ref/<index>/inputs.pb input_path = _os.path.join(node.id, node_index, _constants.INPUT_FILE_NAME) generated_files[input_path] = _literal_models.LiteralMap( literals={ binding.var: binding.binding.to_literal_model() for binding in sub_task_node.inputs }) else: node = sub_task_node.assign_id_and_return(unique_node_id) tasks.add(sub_task_node.executable_sdk_object) _append_node(generated_files, node, nodes, sub_task_node) # assign custom field to the ArrayJob properties computed. for task, (array_job, _) in _six.iteritems(array_job_index): # TODO: Reconstruct task template object instead of modifying an existing one? tasks.add( task.assign_custom_and_return( array_job.to_dict()).assign_type_and_return( _constants.SdkTaskType.CONTAINER_ARRAY_TASK)) # min_successes is absolute, it's computed as the reverse of allowed_failure_ratio and multiplied by the # total length of tasks to get an absolute count. nodes.extend([ array_job_node for (_, array_job_node) in array_job_index.values() ]) dynamic_job_spec = _dynamic_job.DynamicJobSpec( min_successes=len(nodes), tasks=list(tasks), nodes=nodes, outputs=output_bindings, subworkflows=list(sub_workflows)) return dynamic_job_spec, generated_files
def serialize_all( pkgs: List[str] = None, local_source_root: str = None, folder: str = None, mode: SerializationMode = None, image: str = None, config_path: str = None, flytekit_virtualenv_root: str = None, python_interpreter: str = None, ): """ This function will write to the folder specified the following protobuf types :: flyteidl.admin.launch_plan_pb2.LaunchPlan flyteidl.admin.workflow_pb2.WorkflowSpec flyteidl.admin.task_pb2.TaskSpec These can be inspected by calling (in the launch plan case) :: flyte-cli parse-proto -f filename.pb -p flyteidl.admin.launch_plan_pb2.LaunchPlan See :py:class:`flytekit.models.core.identifier.ResourceType` to match the trailing index in the file name with the entity type. :param pkgs: Dot-delimited Python packages/subpackages to look into for serialization. :param local_source_root: Where to start looking for the code. :param folder: Where to write the output protobuf files :param mode: Regular vs fast :param image: The fully qualified and versioned default image to use :param config_path: Path to the config file, if any, to be used during serialization :param flytekit_virtualenv_root: The full path of the virtual env in the container. """ # m = module (i.e. python file) # k = value of dir(m), type str # o = object (e.g. SdkWorkflow) env = { _internal_config.CONFIGURATION_PATH.env_var: config_path if config_path else _internal_config.CONFIGURATION_PATH.get(), _internal_config.IMAGE.env_var: image, } if not (mode == SerializationMode.DEFAULT or mode == SerializationMode.FAST): raise AssertionError(f"Unrecognized serialization mode: {mode}") fast_serialization_settings = flyte_context.FastSerializationSettings( enabled=mode == SerializationMode.FAST, # TODO: if we want to move the destination dir as a serialization argument, we should initialize it here ) serialization_settings = flyte_context.SerializationSettings( project=_PROJECT_PLACEHOLDER, domain=_DOMAIN_PLACEHOLDER, version=_VERSION_PLACEHOLDER, image_config=flyte_context.get_image_config(img_name=image), env=env, flytekit_virtualenv_root=flytekit_virtualenv_root, python_interpreter=python_interpreter, entrypoint_settings=flyte_context.EntrypointSettings( path=_os.path.join(flytekit_virtualenv_root, _DEFAULT_FLYTEKIT_RELATIVE_ENTRYPOINT_LOC)), fast_serialization_settings=fast_serialization_settings, ) ctx = flyte_context.FlyteContextManager.current_context( ).with_serialization_settings(serialization_settings) with flyte_context.FlyteContextManager.with_context(ctx) as ctx: old_style_entities = [] # This first for loop is for legacy API entities - SdkTask, SdkWorkflow, etc. The _get_entity_to_module # function that this iterate calls only works on legacy objects for m, k, o in iterate_registerable_entities_in_order( pkgs, local_source_root=local_source_root): name = _utils.fqdn(m.__name__, k, entity_type=o.resource_type) _logging.debug( "Found module {}\n K: {} Instantiated in {}".format( m, k, o._instantiated_in)) o._id = _identifier.Identifier(o.resource_type, _PROJECT_PLACEHOLDER, _DOMAIN_PLACEHOLDER, name, _VERSION_PLACEHOLDER) old_style_entities.append(o) serialized_old_style_entities = [] for entity in old_style_entities: if entity.has_registered: _logging.info( f"Skipping entity {entity.id} because already registered") continue serialized_old_style_entities.append(entity.serialize()) click.echo( f"Found {len(flyte_context.FlyteEntities.entities)} tasks/workflows" ) new_api_model_values = get_registrable_entities(ctx) loaded_entities = serialized_old_style_entities + new_api_model_values if folder is None: folder = "." persist_registrable_entities(loaded_entities, folder) click.secho( f"Successfully serialized {len(loaded_entities)} flyte objects", fg="green")
def serialize_all( pkgs: List[str] = None, local_source_root: str = None, folder: str = None, mode: SerializationMode = None, image: str = None, config_path: str = None, flytekit_virtualenv_root: str = None, python_interpreter: str = None, ): """ This function will write to the folder specified the following protobuf types :: flyteidl.admin.launch_plan_pb2.LaunchPlan flyteidl.admin.workflow_pb2.WorkflowSpec flyteidl.admin.task_pb2.TaskSpec These can be inspected by calling (in the launch plan case) :: flyte-cli parse-proto -f filename.pb -p flyteidl.admin.launch_plan_pb2.LaunchPlan See :py:class:`flytekit.models.core.identifier.ResourceType` to match the trailing index in the file name with the entity type. :param pkgs: Dot-delimited Python packages/subpackages to look into for serialization. :param local_source_root: Where to start looking for the code. :param folder: Where to write the output protobuf files :param mode: Regular vs fast :param image: The fully qualified and versioned default image to use :param config_path: Path to the config file, if any, to be used during serialization :param flytekit_virtualenv_root: The full path of the virtual env in the container. """ # m = module (i.e. python file) # k = value of dir(m), type str # o = object (e.g. SdkWorkflow) env = { _internal_config.CONFIGURATION_PATH.env_var: config_path if config_path else _internal_config.CONFIGURATION_PATH.get(), _internal_config.IMAGE.env_var: image, } serialization_settings = flyte_context.SerializationSettings( project=_PROJECT_PLACEHOLDER, domain=_DOMAIN_PLACEHOLDER, version=_VERSION_PLACEHOLDER, image_config=flyte_context.get_image_config(img_name=image), env=env, flytekit_virtualenv_root=flytekit_virtualenv_root, python_interpreter=python_interpreter, entrypoint_settings=flyte_context.EntrypointSettings( path=_os.path.join(flytekit_virtualenv_root, _DEFAULT_FLYTEKIT_RELATIVE_ENTRYPOINT_LOC)), ) ctx = flyte_context.FlyteContextManager.current_context( ).with_serialization_settings(serialization_settings) with flyte_context.FlyteContextManager.with_context(ctx) as ctx: old_style_entities = [] # This first for loop is for legacy API entities - SdkTask, SdkWorkflow, etc. The _get_entity_to_module # function that this iterate calls only works on legacy objects for m, k, o in iterate_registerable_entities_in_order( pkgs, local_source_root=local_source_root): name = _utils.fqdn(m.__name__, k, entity_type=o.resource_type) _logging.debug( "Found module {}\n K: {} Instantiated in {}".format( m, k, o._instantiated_in)) o._id = _identifier.Identifier(o.resource_type, _PROJECT_PLACEHOLDER, _DOMAIN_PLACEHOLDER, name, _VERSION_PLACEHOLDER) old_style_entities.append(o) serialized_old_style_entities = [] for entity in old_style_entities: if entity.has_registered: _logging.info( f"Skipping entity {entity.id} because already registered") continue serialized_old_style_entities.append(entity.serialize()) click.echo( f"Found {len(flyte_context.FlyteEntities.entities)} tasks/workflows" ) mode = mode if mode else SerializationMode.DEFAULT new_api_serializable_entities = OrderedDict() # TODO: Clean up the copy() - it's here because we call get_default_launch_plan, which may create a LaunchPlan # object, which gets added to the FlyteEntities.entities list, which we're iterating over. for entity in flyte_context.FlyteEntities.entities.copy(): # TODO: Add a reachable check. Since these entities are always added by the constructor, weird things can # happen. If someone creates a workflow inside a workflow, we don't actually want the inner workflow to be # registered. Or do we? Certainly, we don't want inner tasks to be registered because we don't know how # to reach them, but perhaps workflows should be okay to take into account generated workflows. # Also a user may import dir_b.workflows from dir_a.workflows but workflow packages might only # specify dir_a if isinstance(entity, PythonTask) or isinstance( entity, WorkflowBase) or isinstance(entity, LaunchPlan): if isinstance(entity, PythonTask): if mode == SerializationMode.DEFAULT: get_serializable(new_api_serializable_entities, ctx.serialization_settings, entity) elif mode == SerializationMode.FAST: get_serializable(new_api_serializable_entities, ctx.serialization_settings, entity, fast=True) else: raise AssertionError( f"Unrecognized serialization mode: {mode}") else: get_serializable(new_api_serializable_entities, ctx.serialization_settings, entity) if isinstance(entity, WorkflowBase): lp = LaunchPlan.get_default_launch_plan(ctx, entity) get_serializable(new_api_serializable_entities, ctx.serialization_settings, lp) new_api_model_values = list(new_api_serializable_entities.values()) new_api_model_values = list( filter(_should_register_with_admin, new_api_model_values)) new_api_model_values = [v.to_flyte_idl() for v in new_api_model_values] loaded_entities = serialized_old_style_entities + new_api_model_values zero_padded_length = _determine_text_chars(len(loaded_entities)) for i, entity in enumerate(loaded_entities): fname_index = str(i).zfill(zero_padded_length) if isinstance(entity, _idl_admin_TaskSpec): fname = "{}_{}_1.pb".format(fname_index, entity.template.id.name) elif isinstance(entity, _idl_admin_WorkflowSpec): fname = "{}_{}_2.pb".format(fname_index, entity.template.id.name) elif isinstance(entity, _idl_admin_LaunchPlan): fname = "{}_{}_3.pb".format(fname_index, entity.id.name) else: raise Exception(f"Bad format {type(entity)}") click.echo(f" Writing to file: {fname}") if folder: fname = _os.path.join(folder, fname) _write_proto_to_file(entity, fname) click.secho( f"Successfully serialized {len(loaded_entities)} flyte objects", fg="green")