Ejemplo n.º 1
0
def test_dataloadingconfig():
    dlc = task.DataLoadingConfig("s3://input/path", "s3://output/path", True,
                                 task.DataLoadingConfig.LITERALMAP_FORMAT_YAML)
    dlc2 = task.DataLoadingConfig.from_flyte_idl(dlc.to_flyte_idl())
    assert dlc2 == dlc

    dlc = task.DataLoadingConfig("s3://input/path", "s3://output/path", True,
                                 task.DataLoadingConfig.LITERALMAP_FORMAT_YAML, io_strategy=task.IOStrategy())
    dlc2 = task.DataLoadingConfig.from_flyte_idl(dlc.to_flyte_idl())
    assert dlc2 == dlc
Ejemplo n.º 2
0
 def get_container(
         self, settings: SerializationSettings) -> _task_model.Container:
     env = {
         **settings.env,
         **self.environment
     } if self.environment else settings.env
     return _get_container_definition(
         image=self._image,
         command=self._cmd,
         args=self._args,
         data_loading_config=_task_model.DataLoadingConfig(
             input_path=self._input_data_dir,
             output_path=self._output_data_dir,
             format=self._md_format.value,
             enabled=True,
             io_strategy=self._io_strategy.value
             if self._io_strategy else None,
         ),
         environment=env,
         cpu_request=self.resources.requests.cpu,
         cpu_limit=self.resources.limits.cpu,
         memory_request=self.resources.requests.mem,
         memory_limit=self.resources.limits.mem,
         ephemeral_storage_request=self.resources.requests.
         ephemeral_storage,
         ephemeral_storage_limit=self.resources.limits.ephemeral_storage,
     )
Ejemplo n.º 3
0
 def get_container(self, settings: SerializationSettings) -> _task_model.Container:
     env = {**settings.env, **self.environment} if self.environment else settings.env
     return _get_container_definition(
         image=self._image,
         command=self._cmd,
         args=self._args,
         data_loading_config=_task_model.DataLoadingConfig(
             input_path=self._input_data_dir,
             output_path=self._output_data_dir,
             format=self._md_format.value,
             enabled=True,
             io_strategy=self._io_strategy.value if self._io_strategy else None,
         ),
         environment=env,
     )
Ejemplo n.º 4
0
    def __init__(
        self,
        inputs: Dict[str, FlyteSdkType],
        image: str,
        outputs: Dict[str, FlyteSdkType] = None,
        input_data_dir: str = None,
        output_data_dir: str = None,
        metadata_format: int = METADATA_FORMAT_JSON,
        io_strategy: _task_models.IOStrategy = None,
        command: List[str] = None,
        args: List[str] = None,
        storage_request: str = None,
        cpu_request: str = None,
        gpu_request: str = None,
        memory_request: str = None,
        storage_limit: str = None,
        cpu_limit: str = None,
        gpu_limit: str = None,
        memory_limit: str = None,
        environment: Dict[str, str] = None,
        interruptible: bool = False,
        discoverable: bool = False,
        discovery_version: str = None,
        retries: int = 1,
        timeout: _datetime.timedelta = None,
    ):
        """
        :param inputs:
        :param outputs:
        :param image:
        :param command:
        :param args:
        :param storage_request:
        :param cpu_request:
        :param gpu_request:
        :param memory_request:
        :param storage_limit:
        :param cpu_limit:
        :param gpu_limit:
        :param memory_limit:
        :param environment:
        :param interruptible:
        :param discoverable:
        :param discovery_version:
        :param retries:
        :param timeout:
        :param input_data_dir: This is the directory where data will be downloaded to
        :param output_data_dir: This is the directory where data will be uploaded from
        :param metadata_format: Format in which the metadata will be available for the script
        """

        # Set as class fields which are used down below to configure implicit
        # parameters
        self._data_loading_config = _task_models.DataLoadingConfig(
            input_path=input_data_dir,
            output_path=output_data_dir,
            format=metadata_format,
            enabled=True,
            io_strategy=io_strategy,
        )

        metadata = _task_models.TaskMetadata(
            discoverable,
            # This needs to have the proper version reflected in it
            _task_models.RuntimeMetadata(
                _task_models.RuntimeMetadata.RuntimeType.FLYTE_SDK,
                flytekit.__version__,
                "python",
            ),
            timeout or _datetime.timedelta(seconds=0),
            _literals.RetryStrategy(retries),
            interruptible,
            discovery_version,
            None,
        )

        # The interface is defined using the inputs and outputs
        i = _interface.TypedInterface(inputs=types_to_variable(inputs),
                                      outputs=types_to_variable(outputs))

        # This sets the base SDKTask with container etc
        super(SdkRawContainerTask, self).__init__(
            _constants.SdkTaskType.RAW_CONTAINER_TASK,
            metadata,
            i,
            None,
            container=_get_container_definition(
                image=image,
                args=args,
                command=command,
                data_loading_config=self._data_loading_config,
                storage_request=storage_request,
                cpu_request=cpu_request,
                gpu_request=gpu_request,
                memory_request=memory_request,
                storage_limit=storage_limit,
                cpu_limit=cpu_limit,
                gpu_limit=gpu_limit,
                memory_limit=memory_limit,
                environment=environment,
            ),
        )