Example #1
0
            external_schema_type="arrow" if pa_schema else None,
            external_schema_bytes=typing.cast(pa.lib.Schema,
                                              pa_schema).to_string().encode()
            if pa_schema else None,
        )

    def get_literal_type(
            self, t: typing.Union[Type[StructuredDataset],
                                  typing.Any]) -> LiteralType:
        """
        Provide a concrete implementation so that writers of custom dataframe handlers since there's nothing that
        special about the literal type. Any dataframe type will always be associated with the structured dataset type.
        The other aspects of it - columns, external schema type, etc. can be read from associated metadata.

        :param t: The python dataframe type, which is mostly ignored.
        """
        return LiteralType(structured_dataset_type=self._get_dataset_type(t))

    def guess_python_type(self, literal_type: LiteralType) -> Type[T]:
        # todo: technically we should return the dataframe type specified in the constructor, but to do that,
        #   we'd have to store that, which we don't do today. See possibly #1363
        if literal_type.structured_dataset_type is not None:
            return StructuredDataset
        raise ValueError(
            f"StructuredDatasetTransformerEngine cannot reverse {literal_type}"
        )


flyte_dataset_transformer = StructuredDatasetTransformerEngine()
TypeEngine.register(flyte_dataset_transformer)
Example #2
0
                blob=Blob(metadata=meta, uri=remote_path or source_path)))

    def to_python_value(
            self, ctx: FlyteContext, lv: Literal,
            expected_python_type: typing.Type[FlyteFile]) -> FlyteFile:

        uri = lv.scalar.blob.uri

        # This is a local file path, like /usr/local/my_file, don't mess with it. Certainly, downloading it doesn't
        # make any sense.
        if not ctx.file_access.is_remote(uri):
            return expected_python_type(uri)

        # For the remote case, return an FlyteFile object that can download
        local_path = ctx.file_access.get_random_local_path(uri)

        def _downloader():
            return ctx.file_access.get_data(uri,
                                            local_path,
                                            is_multipart=False)

        expected_format = FlyteFilePathTransformer.get_format(
            expected_python_type)
        ff = FlyteFile[expected_format](local_path, _downloader)
        ff._remote_source = uri

        return ff


TypeEngine.register(FlyteFilePathTransformer())
    def get_literal_type(self, t: Type[PipelineModel]) -> LiteralType:
        return LiteralType(blob=self._TYPE_INFO)

    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: PipelineModel,
        python_type: Type[PipelineModel],
        expected: LiteralType,
    ) -> Literal:
        local_path = ctx.file_access.get_random_local_path()
        pathlib.Path(local_path).parent.mkdir(parents=True, exist_ok=True)
        python_val.save(local_path)

        remote_dir = ctx.file_access.get_random_remote_directory()
        ctx.file_access.upload_directory(local_path, remote_dir)

        return Literal(scalar=Scalar(blob=Blob(
            uri=remote_dir, metadata=BlobMetadata(type=self._TYPE_INFO))))

    def to_python_value(
            self, ctx: FlyteContext, lv: Literal,
            expected_python_type: Type[PipelineModel]) -> PipelineModel:
        local_dir = ctx.file_access.get_random_local_directory()
        ctx.file_access.download_directory(lv.scalar.blob.uri, local_dir)

        return PipelineModel.load(local_dir)


TypeEngine.register(PySparkPipelineModelTransformer())
Example #4
0
                               fmt=SchemaFormat.PARQUET)
        w.write(python_val)
        remote_path = ctx.file_access.get_random_remote_directory()
        ctx.file_access.put_data(local_dir, remote_path, is_multipart=True)
        return Literal(scalar=Scalar(
            schema=Schema(remote_path, self._get_schema_type())))

    def to_python_value(
            self, ctx: FlyteContext, lv: Literal,
            expected_python_type: Type[pandas.DataFrame]) -> pandas.DataFrame:
        if not (lv and lv.scalar and lv.scalar.schema):
            return pandas.DataFrame()
        local_dir = ctx.file_access.get_random_local_directory()
        ctx.file_access.get_data(lv.scalar.schema.uri,
                                 local_dir,
                                 is_multipart=True)
        r = PandasSchemaReader(local_dir=local_dir,
                               cols=None,
                               fmt=SchemaFormat.PARQUET)
        return r.all()

    def to_html(self, ctx: FlyteContext, python_val: pandas.DataFrame,
                expected_python_type: Type[T]):
        return python_val.describe().to_html()


SchemaEngine.register_handler(
    SchemaHandler("pandas-dataframe-schema", pandas.DataFrame,
                  PandasSchemaReader, PandasSchemaWriter))
TypeEngine.register(PandasDataFrameTransformer())
Example #5
0
                format=self.ONNX_FORMAT,
                dimensionality=BlobType.BlobDimensionality.SINGLE)),
        )))

    def to_python_value(
        self,
        ctx: FlyteContext,
        lv: Literal,
        expected_python_type: Type[ONNXFile],
    ) -> ONNXFile:
        if not (lv.scalar.blob.uri
                and lv.scalar.blob.metadata.format == self.ONNX_FORMAT):
            raise TypeTransformerFailedError(
                f"ONNX format isn't of the expected type {expected_python_type}"
            )

        return ONNXFile(path=lv.scalar.blob.uri)

    def guess_python_type(self,
                          literal_type: LiteralType) -> Type[PyTorch2ONNX]:
        if (literal_type.blob is not None and literal_type.blob.dimensionality
                == BlobType.BlobDimensionality.SINGLE
                and literal_type.blob.format == self.ONNX_FORMAT):
            return PyTorch2ONNX

        raise TypeTransformerFailedError(
            f"Transformer {self} cannot reverse {literal_type}")


TypeEngine.register(PyTorch2ONNXTransformer())
Example #6
0
            metadata=BlobMetadata(type=BlobType(
                format=self.ONNX_FORMAT,
                dimensionality=BlobType.BlobDimensionality.SINGLE)),
        )))

    def to_python_value(
        self,
        ctx: FlyteContext,
        lv: Literal,
        expected_python_type: Type[ONNXFile],
    ) -> ONNXFile:
        if not lv.scalar.blob.uri:
            raise TypeTransformerFailedError(
                f"ONNX format isn't of the expected type {expected_python_type}"
            )

        return ONNXFile(path=lv.scalar.blob.uri)

    def guess_python_type(self,
                          literal_type: LiteralType) -> Type[ScikitLearn2ONNX]:
        if (literal_type.blob is not None and literal_type.blob.dimensionality
                == BlobType.BlobDimensionality.SINGLE
                and literal_type.blob.format == self.ONNX_FORMAT):
            return ScikitLearn2ONNX

        raise TypeTransformerFailedError(
            f"Transformer {self} cannot reverse {literal_type}")


TypeEngine.register(ScikitLearn2ONNXTransformer())
Example #7
0
        local_dir = ctx.file_access.get_random_local_directory()
        os.makedirs(local_dir, exist_ok=True)
        local_path = ctx.file_access.get_random_local_path()
        uri = os.path.join(local_dir, local_path)
        with open(uri, "w+b") as outfile:
            cloudpickle.dump(python_val, outfile)

        remote_path = ctx.file_access.get_random_remote_path(uri)
        ctx.file_access.put_data(uri, remote_path, is_multipart=False)
        return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))

    def guess_python_type(self, literal_type: LiteralType) -> typing.Type[FlytePickle[typing.Any]]:
        if (
            literal_type.blob is not None
            and literal_type.blob.dimensionality == _core_types.BlobType.BlobDimensionality.SINGLE
            and literal_type.blob.format == FlytePickleTransformer.PYTHON_PICKLE_FORMAT
        ):
            return FlytePickle

        raise ValueError(f"Transformer {self} cannot reverse {literal_type}")

    def get_literal_type(self, t: Type[T]) -> LiteralType:
        return LiteralType(
            blob=_core_types.BlobType(
                format=self.PYTHON_PICKLE_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE
            )
        )


TypeEngine.register(FlytePickleTransformer())
Example #8
0
        # make any sense.
        if not ctx.file_access.is_remote(uri):
            return expected_python_type(uri)

        # For the remote case, return an FlyteDirectory object that can download
        local_folder = ctx.file_access.get_random_local_directory()

        def _downloader():
            return ctx.file_access.get_data(uri,
                                            local_folder,
                                            is_multipart=True)

        expected_format = self.get_format(expected_python_type)

        fd = FlyteDirectory.__class_getitem__(expected_format)(local_folder,
                                                               _downloader)
        fd._remote_source = uri

        return fd

    def guess_python_type(
            self, literal_type: LiteralType
    ) -> typing.Type[FlyteDirectory[typing.Any]]:
        if (literal_type.blob is not None and literal_type.blob.dimensionality
                == _core_types.BlobType.BlobDimensionality.MULTIPART):
            return FlyteDirectory.__class_getitem__(literal_type.blob.format)
        raise ValueError(f"Transformer {self} cannot reverse {literal_type}")


TypeEngine.register(FlyteDirToMultipartBlobTransformer())
Example #9
0
                    metadata=BlobMetadata(
                        type=BlobType(format=self.ONNX_FORMAT, dimensionality=BlobType.BlobDimensionality.SINGLE)
                    ),
                )
            )
        )

    def to_python_value(
        self,
        ctx: FlyteContext,
        lv: Literal,
        expected_python_type: Type[ONNXFile],
    ) -> ONNXFile:
        if not lv.scalar.blob.uri:
            raise TypeTransformerFailedError(f"ONNX format isn't of the expected type {expected_python_type}")

        return ONNXFile(path=lv.scalar.blob.uri)

    def guess_python_type(self, literal_type: LiteralType) -> Type[TensorFlow2ONNX]:
        if (
            literal_type.blob is not None
            and literal_type.blob.dimensionality == BlobType.BlobDimensionality.SINGLE
            and literal_type.blob.format == self.ONNX_FORMAT
        ):
            return TensorFlow2ONNX

        raise TypeTransformerFailedError(f"Transformer {self} cannot reverse {literal_type}")


TypeEngine.register(TensorFlow2ONNXTransformer())
Example #10
0
            local_path=ctx.file_access.get_random_local_directory(),
            remote_path=lv.scalar.schema.uri,
            downloader=downloader,
            supported_mode=SchemaOpenMode.READ,
        )

    def guess_python_type(self, literal_type: LiteralType) -> Type[T]:
        if not literal_type.schema:
            raise ValueError(f"Cannot reverse {literal_type}")
        columns = {}
        for literal_column in literal_type.schema.columns:
            if literal_column.type == SchemaType.SchemaColumn.SchemaColumnType.INTEGER:
                columns[literal_column.name] = int
            elif literal_column.type == SchemaType.SchemaColumn.SchemaColumnType.FLOAT:
                columns[literal_column.name] = float
            elif literal_column.type == SchemaType.SchemaColumn.SchemaColumnType.STRING:
                columns[literal_column.name] = str
            elif literal_column.type == SchemaType.SchemaColumn.SchemaColumnType.DATETIME:
                columns[literal_column.name] = _datetime.datetime
            elif literal_column.type == SchemaType.SchemaColumn.SchemaColumnType.DURATION:
                columns[literal_column.name] = _datetime.timedelta
            elif literal_column.type == SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN:
                columns[literal_column.name] = bool
            else:
                raise ValueError(
                    f"Unknown schema column type {literal_column}")
        return FlyteSchema[columns]


TypeEngine.register(FlyteSchemaTransformer())
Example #11
0
        # This is a local file path, like /usr/local/my_file, don't mess with it. Certainly, downloading it doesn't
        # make any sense.
        if not ctx.file_access.is_remote(uri):
            return expected_python_type(uri)

        # For the remote case, return an FlyteFile object that can download
        local_path = ctx.file_access.get_random_local_path(uri)

        def _downloader():
            return ctx.file_access.get_data(uri, local_path, is_multipart=False)

        expected_format = FlyteFilePathTransformer.get_format(expected_python_type)
        ff = FlyteFile.__class_getitem__(expected_format)(local_path, _downloader)
        ff._remote_source = uri

        return ff

    def guess_python_type(self, literal_type: LiteralType) -> typing.Type[FlyteFile[typing.Any]]:
        if (
            literal_type.blob is not None
            and literal_type.blob.dimensionality == BlobType.BlobDimensionality.SINGLE
            and literal_type.blob.format != FlytePickleTransformer.PYTHON_PICKLE_FORMAT
        ):
            return FlyteFile.__class_getitem__(literal_type.blob.format)

        raise ValueError(f"Transformer {self} cannot reverse {literal_type}")


TypeEngine.register(FlyteFilePathTransformer(), additional_types=[os.PathLike])
Example #12
0
        remote_path = ctx.file_access.get_random_remote_path(local_path)
        ctx.file_access.put_data(local_path, remote_path, is_multipart=False)
        return Literal(scalar=Scalar(
            blob=Blob(metadata=meta, uri=remote_path)))

    def to_python_value(self, ctx: FlyteContext, lv: Literal,
                        expected_python_type: Type[np.ndarray]) -> np.ndarray:
        try:
            uri = lv.scalar.blob.uri
        except AttributeError:
            TypeTransformerFailedError(
                f"Cannot convert from {lv} to {expected_python_type}")

        local_path = ctx.file_access.get_random_local_path()
        ctx.file_access.get_data(uri, local_path, is_multipart=False)

        # load numpy array from a file
        return np.load(file=local_path)

    def guess_python_type(
            self, literal_type: LiteralType) -> typing.Type[np.ndarray]:
        if (literal_type.blob is not None and literal_type.blob.dimensionality
                == _core_types.BlobType.BlobDimensionality.SINGLE
                and literal_type.blob.format == self.NUMPY_ARRAY_FORMAT):
            return np.ndarray

        raise ValueError(f"Transformer {self} cannot reverse {literal_type}")


TypeEngine.register(NumpyArrayTransformer())
Example #13
0
    ) -> PyTorchCheckpoint:
        try:
            uri = lv.scalar.blob.uri
        except AttributeError:
            TypeTransformerFailedError(f"Cannot convert from {lv} to {expected_python_type}")

        local_path = ctx.file_access.get_random_local_path()
        ctx.file_access.get_data(uri, local_path, is_multipart=False)

        # cpu <-> gpu conversion
        if torch.cuda.is_available():
            map_location = "cuda:0"
        else:
            map_location = torch.device("cpu")

        # load checkpoint from a file
        return typing.cast(PyTorchCheckpoint, torch.load(local_path, map_location=map_location))

    def guess_python_type(self, literal_type: LiteralType) -> Type[PyTorchCheckpoint]:
        if (
            literal_type.blob is not None
            and literal_type.blob.dimensionality == _core_types.BlobType.BlobDimensionality.SINGLE
            and literal_type.blob.format == self.PYTORCH_CHECKPOINT_FORMAT
        ):
            return PyTorchCheckpoint

        raise ValueError(f"Transformer {self} cannot reverse {literal_type}")


TypeEngine.register(PyTorchCheckpointTransformer())