Example #1
0
        python_type: Type[pandas.DataFrame],
        expected: LiteralType,
    ) -> Literal:
        local_dir = ctx.file_access.get_random_local_directory()
        w = PandasSchemaWriter(local_dir=local_dir,
                               cols=None,
                               fmt=SchemaFormat.PARQUET)
        w.write(python_val)
        remote_path = ctx.file_access.get_random_remote_directory()
        ctx.file_access.put_data(local_dir, remote_path, is_multipart=True)
        return Literal(scalar=Scalar(
            schema=Schema(remote_path, self._get_schema_type())))

    def to_python_value(
            self, ctx: FlyteContext, lv: Literal,
            expected_python_type: Type[pandas.DataFrame]) -> pandas.DataFrame:
        if not (lv and lv.scalar and lv.scalar.schema):
            return pandas.DataFrame()
        local_dir = ctx.file_access.get_random_local_directory()
        ctx.file_access.download_directory(lv.scalar.schema.uri, local_dir)
        r = PandasSchemaReader(local_dir=local_dir,
                               cols=None,
                               fmt=SchemaFormat.PARQUET)
        return r.all()


SchemaEngine.register_handler(
    SchemaHandler("pandas-dataframe-schema", pandas.DataFrame,
                  PandasSchemaReader, PandasSchemaWriter))
TypeEngine.register(PandasDataFrameTransformer())
Example #2
0
        )
        writer = schema.open(type(python_val))
        writer.write(python_val)
        h = SchemaEngine.get_handler(type(python_val))
        if not h.handles_remote_io:
            ctx.file_access.put_data(schema.local_path,
                                     schema.remote_path,
                                     is_multipart=True)
        return Literal(scalar=Scalar(schema=Schema(
            schema.remote_path, self._get_schema_type(python_type))))

    def to_python_value(
            self, ctx: FlyteContext, lv: Literal,
            expected_python_type: Type[FlyteSchema]) -> FlyteSchema:
        if not (lv and lv.scalar and lv.scalar.schema):
            raise AssertionError(
                "Can only covert a literal schema to a FlyteSchema")

        def downloader(x, y):
            ctx.file_access.download_directory(x, y)

        return expected_python_type(
            local_path=ctx.file_access.get_random_local_directory(),
            remote_path=lv.scalar.schema.uri,
            downloader=downloader,
            supported_mode=SchemaOpenMode.READ,
        )


TypeEngine.register(FlyteSchemaTransformer())
Example #3
0
                blob=Blob(metadata=meta, uri=remote_path or source_path)))

    def to_python_value(
            self, ctx: FlyteContext, lv: Literal,
            expected_python_type: typing.Type[FlyteFile]) -> FlyteFile:

        uri = lv.scalar.blob.uri

        # This is a local file path, like /usr/local/my_file, don't mess with it. Certainly, downloading it doesn't
        # make any sense.
        if not ctx.file_access.is_remote(uri):
            return expected_python_type(uri)

        # For the remote case, return an FlyteFile object that can download
        local_path = ctx.file_access.get_random_local_path(uri)

        def _downloader():
            return ctx.file_access.get_data(uri,
                                            local_path,
                                            is_multipart=False)

        expected_format = FlyteFilePathTransformer.get_format(
            expected_python_type)
        ff = FlyteFile[expected_format](local_path, _downloader)
        ff._remote_source = uri

        return ff


TypeEngine.register(FlyteFilePathTransformer())
Example #4
0
    def get_literal_type(self,
                         t: Type[_params.ParameterRangeOneOf]) -> LiteralType:
        return primitives.Generic.to_flyte_literal_type()

    def to_literal(
        self,
        ctx: FlyteContext,
        python_val: _params.ParameterRangeOneOf,
        python_type: Type[_hpo_job_model.HyperparameterTuningJobConfig],
        expected: LiteralType,
    ) -> Literal:
        d = MessageToDict(python_val.to_flyte_idl())
        return DictTransformer.dict_to_generic_literal(d)

    def to_python_value(
        self, ctx: FlyteContext, lv: Literal,
        expected_python_type: Type[_params.ParameterRangeOneOf]
    ) -> _params.ParameterRangeOneOf:
        if lv and lv.scalar and lv.scalar.generic is not None:
            d = json.loads(json_format.MessageToJson(lv.scalar.generic))
            o = _pb2_params.ParameterRangeOneOf()
            o = json_format.ParseDict(d, o)
            return _params.ParameterRangeOneOf.from_flyte_idl(o)
        return None


# %%
# Register the types
TypeEngine.register(HPOTuningJobConfigTransformer())
TypeEngine.register(ParameterRangesTransformer())
Example #5
0
    def to_python_value(
            self, ctx: FlyteContext, lv: Literal,
            expected_python_type: typing.Type[FlyteDirectory]
    ) -> FlyteDirectory:

        uri = lv.scalar.blob.uri

        # This is a local file path, like /usr/local/my_file, don't mess with it. Certainly, downloading it doesn't
        # make any sense.
        if not ctx.file_access.is_remote(uri):
            return expected_python_type(uri)

        # For the remote case, return an FlyteDirectory object that can download
        local_folder = ctx.file_access.get_random_local_directory()

        def _downloader():
            return ctx.file_access.get_data(uri,
                                            local_folder,
                                            is_multipart=True)

        expected_format = self.get_format(expected_python_type)

        fd = FlyteDirectory[expected_format](local_folder, _downloader)
        fd._remote_source = uri

        return fd


TypeEngine.register(FlyteDirToMultipartBlobTransformer())
Example #6
0
        self, ctx: FlyteContext, lv: Literal, expected_python_type: Type[MyDataset]
    ) -> MyDataset:
        """
        In this function we want to be able to re-hydrate the custom object from Flyte Literal value
        """
        # Step 1: lets download remote data locally
        local_dir = ctx.file_access.get_random_local_directory()
        ctx.file_access.download_directory(lv.scalar.blob.uri, local_dir)
        # Step 2: create the MyDataset object
        return MyDataset(base_dir=local_dir)


# %%
# Before we can use MyDataset in our tasks, we need to let flytekit know that ``MyDataset`` should be considered as a
# valid type. This is done using the :py:func:`flytekit.annotated.type_engine.TypeEngine.register` function.
TypeEngine.register(MyDatasetTransformer())


# %%
# Now the new type should be ready to use. Let us write an example generator and consumer for this new datatype


@task
def generate() -> MyDataset:
    d = MyDataset()
    for i in range(3):
        fp = d.new_file(f"x{i}")
        with open(fp, "w") as f:
            f.write(f"Contents of file{i}")

    return d