def dbnd_setup_plugin(): import tensorflow from dbnd_tensorflow.marshalling.tensorflow_marshaller import ( TensorflowKerasHistoryMarshaller, TensorflowKerasModelMarshaller, ) from dbnd_tensorflow.marshalling.tensorflow_values import ( TensorflowHistoryValueType, TensorflowModelValueType, ) register_marshaller( tensorflow.python.keras.engine.training.Model, FileFormat.tfmodel, TensorflowKerasModelMarshaller(), ) register_marshaller( tensorflow.python.keras.callbacks.History, FileFormat.tfhistory, TensorflowKerasHistoryMarshaller(), ) register_value_type(TensorflowModelValueType()) register_value_type(TensorflowHistoryValueType())
def dbnd_register_spark_types(): register_value_type(SparkDataFrameValueType()) for file_format, marshaller in ( (FileFormat.txt, SparkMarshaller(fmt=FileFormat.txt)), (FileFormat.csv, SparkDataFrameToCsv()), (FileFormat.json, SparkMarshaller(fmt=FileFormat.json)), (FileFormat.parquet, SparkMarshaller(fmt=FileFormat.parquet)), ): register_marshaller(pyspark.sql.DataFrame, file_format, marshaller)
def dbnd_setup_plugin(): from tensorflow.keras import models from tensorflow.keras.callbacks import History from dbnd_tensorflow.marshalling.tensorflow_marshaller import ( TensorflowKerasHistoryMarshaller, TensorflowKerasModelMarshaller, ) from dbnd_tensorflow.marshalling.tensorflow_values import ( TensorflowHistoryValueType, TensorflowModelValueType, ) register_marshaller( models.Model, FileFormat.tfmodel, TensorflowKerasModelMarshaller() ) register_marshaller( History, FileFormat.tfhistory, TensorflowKerasHistoryMarshaller() ) register_value_type(TensorflowModelValueType()) register_value_type(TensorflowHistoryValueType())
# 1. create file extension z_file_ext = register_file_extension("z") class JoblibSizedMessageMarshaller(Marshaller): def target_to_value(self, target, **kwargs): with target.open() as fp: from_file = joblib.load(fp.name) return from_file def value_to_target(self, value, target, **kwargs): with target.open("w") as fp: joblib.dump(value, fp.name) # 2. register type to extension mapping register_marshaller(SizedMessage, z_file_ext, JoblibSizedMessageMarshaller()) @task(result=output.target_config(TargetConfig(format=z_file_ext))) def dump_as_joblib(): # type: ()-> SizedMessage return SizedMessage("example message \n", 10) @task(result=output.txt[int]) def load_as_joblib(sized_message: SizedMessage): return sized_message.msg * sized_message.size
key="features", **kwargs) targets = super(MyDataToHdf5, self)._pd_read(*args, key="targets", **kwargs) return MyData(features=features, targets=targets) def _pd_to(self, data, file_or_path, *args, **kwargs): kwargs = combine_mappings({"format": "fixed"}, kwargs) with pd.HDFStore(file_or_path, "w") as store: kwargs.pop("mode", None) store.put("features", data.features, data_columns=True, **kwargs) store.put("targets", data.targets, data_columns=True, **kwargs) register_marshaller(MyData, FileFormat.hdf5, MyDataToHdf5()) MyDataParameter = register_custom_parameter(MyData, parameter.data.type(MyData)) class MyDataReport(PythonTask): my_data = parameter[MyData] report = output[DataFrame] def run(self): self.report = self.my_data.features.head(1) class BuildMyData(PythonTask): my_data = output.hdf5[MyData]
import pandas as pd from dbnd import output, task from targets.marshalling import register_marshaller from targets.marshalling.pandas import _PandasMarshaller from targets.target_config import register_file_extension # 1. create file extension excel_file_ext = register_file_extension("xlsx") class DataFrameToExcel(_PandasMarshaller): def _pd_read(self, *args, **kwargs): return pd.read_excel(*args, **kwargs) def _pd_to(self, value, *args, **kwargs): return value.to_excel(*args, **kwargs) # 2. register type to extension mapping register_marshaller(pd.DataFrame, excel_file_ext, DataFrameToExcel()) @task(result=output(output_ext=excel_file_ext)) def dump_as_excel_table(): # type: ()-> pd.DataFrame df = pd.DataFrame(data=list(zip(["Bob", "Jessica"], [968, 155])), columns=["Names", "Births"]) return df
""" Override build in implementation of Hdf5 serialization """ import pandas as pd from dbnd import output, task from targets.marshalling import DataFrameToHdf5, register_marshaller from targets.target_config import FileFormat class DataFrameToHdf5Table(DataFrameToHdf5): def _pd_to(self, value, *args, **kwargs): # WE WILL CHANGE THE DEFAULT FORMAT FOR MARSHALLER kwargs.setdefault("format", "table") return super(DataFrameToHdf5Table, self)._pd_to(value, *args, **kwargs) register_marshaller(pd.DataFrame, FileFormat.hdf5, DataFrameToHdf5Table()) @task(result=output.hdf5) def dump_as_hdf5_table(): # type: ()-> pd.DataFrame return pd.DataFrame(data=list(zip(["Bob", "Jessica"], [968, 155])), columns=["Names", "Births"])