python_type: Type[pandas.DataFrame], expected: LiteralType, ) -> Literal: local_dir = ctx.file_access.get_random_local_directory() w = PandasSchemaWriter(local_dir=local_dir, cols=None, fmt=SchemaFormat.PARQUET) w.write(python_val) remote_path = ctx.file_access.get_random_remote_directory() ctx.file_access.put_data(local_dir, remote_path, is_multipart=True) return Literal(scalar=Scalar( schema=Schema(remote_path, self._get_schema_type()))) def to_python_value( self, ctx: FlyteContext, lv: Literal, expected_python_type: Type[pandas.DataFrame]) -> pandas.DataFrame: if not (lv and lv.scalar and lv.scalar.schema): return pandas.DataFrame() local_dir = ctx.file_access.get_random_local_directory() ctx.file_access.download_directory(lv.scalar.schema.uri, local_dir) r = PandasSchemaReader(local_dir=local_dir, cols=None, fmt=SchemaFormat.PARQUET) return r.all() SchemaEngine.register_handler( SchemaHandler("pandas-dataframe-schema", pandas.DataFrame, PandasSchemaReader, PandasSchemaWriter)) TypeEngine.register(PandasDataFrameTransformer())
) writer = schema.open(type(python_val)) writer.write(python_val) h = SchemaEngine.get_handler(type(python_val)) if not h.handles_remote_io: ctx.file_access.put_data(schema.local_path, schema.remote_path, is_multipart=True) return Literal(scalar=Scalar(schema=Schema( schema.remote_path, self._get_schema_type(python_type)))) def to_python_value( self, ctx: FlyteContext, lv: Literal, expected_python_type: Type[FlyteSchema]) -> FlyteSchema: if not (lv and lv.scalar and lv.scalar.schema): raise AssertionError( "Can only covert a literal schema to a FlyteSchema") def downloader(x, y): ctx.file_access.download_directory(x, y) return expected_python_type( local_path=ctx.file_access.get_random_local_directory(), remote_path=lv.scalar.schema.uri, downloader=downloader, supported_mode=SchemaOpenMode.READ, ) TypeEngine.register(FlyteSchemaTransformer())
blob=Blob(metadata=meta, uri=remote_path or source_path))) def to_python_value( self, ctx: FlyteContext, lv: Literal, expected_python_type: typing.Type[FlyteFile]) -> FlyteFile: uri = lv.scalar.blob.uri # This is a local file path, like /usr/local/my_file, don't mess with it. Certainly, downloading it doesn't # make any sense. if not ctx.file_access.is_remote(uri): return expected_python_type(uri) # For the remote case, return an FlyteFile object that can download local_path = ctx.file_access.get_random_local_path(uri) def _downloader(): return ctx.file_access.get_data(uri, local_path, is_multipart=False) expected_format = FlyteFilePathTransformer.get_format( expected_python_type) ff = FlyteFile[expected_format](local_path, _downloader) ff._remote_source = uri return ff TypeEngine.register(FlyteFilePathTransformer())
def get_literal_type(self, t: Type[_params.ParameterRangeOneOf]) -> LiteralType: return primitives.Generic.to_flyte_literal_type() def to_literal( self, ctx: FlyteContext, python_val: _params.ParameterRangeOneOf, python_type: Type[_hpo_job_model.HyperparameterTuningJobConfig], expected: LiteralType, ) -> Literal: d = MessageToDict(python_val.to_flyte_idl()) return DictTransformer.dict_to_generic_literal(d) def to_python_value( self, ctx: FlyteContext, lv: Literal, expected_python_type: Type[_params.ParameterRangeOneOf] ) -> _params.ParameterRangeOneOf: if lv and lv.scalar and lv.scalar.generic is not None: d = json.loads(json_format.MessageToJson(lv.scalar.generic)) o = _pb2_params.ParameterRangeOneOf() o = json_format.ParseDict(d, o) return _params.ParameterRangeOneOf.from_flyte_idl(o) return None # %% # Register the types TypeEngine.register(HPOTuningJobConfigTransformer()) TypeEngine.register(ParameterRangesTransformer())
def to_python_value( self, ctx: FlyteContext, lv: Literal, expected_python_type: typing.Type[FlyteDirectory] ) -> FlyteDirectory: uri = lv.scalar.blob.uri # This is a local file path, like /usr/local/my_file, don't mess with it. Certainly, downloading it doesn't # make any sense. if not ctx.file_access.is_remote(uri): return expected_python_type(uri) # For the remote case, return an FlyteDirectory object that can download local_folder = ctx.file_access.get_random_local_directory() def _downloader(): return ctx.file_access.get_data(uri, local_folder, is_multipart=True) expected_format = self.get_format(expected_python_type) fd = FlyteDirectory[expected_format](local_folder, _downloader) fd._remote_source = uri return fd TypeEngine.register(FlyteDirToMultipartBlobTransformer())
self, ctx: FlyteContext, lv: Literal, expected_python_type: Type[MyDataset] ) -> MyDataset: """ In this function we want to be able to re-hydrate the custom object from Flyte Literal value """ # Step 1: lets download remote data locally local_dir = ctx.file_access.get_random_local_directory() ctx.file_access.download_directory(lv.scalar.blob.uri, local_dir) # Step 2: create the MyDataset object return MyDataset(base_dir=local_dir) # %% # Before we can use MyDataset in our tasks, we need to let flytekit know that ``MyDataset`` should be considered as a # valid type. This is done using the :py:func:`flytekit.annotated.type_engine.TypeEngine.register` function. TypeEngine.register(MyDatasetTransformer()) # %% # Now the new type should be ready to use. Let us write an example generator and consumer for this new datatype @task def generate() -> MyDataset: d = MyDataset() for i in range(3): fp = d.new_file(f"x{i}") with open(fp, "w") as f: f.write(f"Contents of file{i}") return d