Exemple #1
0
def dbnd_setup_plugin():
    import tensorflow

    from dbnd_tensorflow.marshalling.tensorflow_marshaller import (
        TensorflowKerasHistoryMarshaller,
        TensorflowKerasModelMarshaller,
    )
    from dbnd_tensorflow.marshalling.tensorflow_values import (
        TensorflowHistoryValueType,
        TensorflowModelValueType,
    )

    register_marshaller(
        tensorflow.python.keras.engine.training.Model,
        FileFormat.tfmodel,
        TensorflowKerasModelMarshaller(),
    )
    register_marshaller(
        tensorflow.python.keras.callbacks.History,
        FileFormat.tfhistory,
        TensorflowKerasHistoryMarshaller(),
    )

    register_value_type(TensorflowModelValueType())
    register_value_type(TensorflowHistoryValueType())
Exemple #2
0
def dbnd_register_spark_types():
    register_value_type(SparkDataFrameValueType())

    for file_format, marshaller in (
        (FileFormat.txt, SparkMarshaller(fmt=FileFormat.txt)),
        (FileFormat.csv, SparkDataFrameToCsv()),
        (FileFormat.json, SparkMarshaller(fmt=FileFormat.json)),
        (FileFormat.parquet, SparkMarshaller(fmt=FileFormat.parquet)),
    ):
        register_marshaller(pyspark.sql.DataFrame, file_format, marshaller)
Exemple #3
0
def dbnd_setup_plugin():
    from tensorflow.keras import models
    from tensorflow.keras.callbacks import History

    from dbnd_tensorflow.marshalling.tensorflow_marshaller import (
        TensorflowKerasHistoryMarshaller,
        TensorflowKerasModelMarshaller,
    )
    from dbnd_tensorflow.marshalling.tensorflow_values import (
        TensorflowHistoryValueType,
        TensorflowModelValueType,
    )

    register_marshaller(
        models.Model, FileFormat.tfmodel, TensorflowKerasModelMarshaller()
    )
    register_marshaller(
        History, FileFormat.tfhistory, TensorflowKerasHistoryMarshaller()
    )

    register_value_type(TensorflowModelValueType())
    register_value_type(TensorflowHistoryValueType())
Exemple #4
0

# 1. create file extension
z_file_ext = register_file_extension("z")


class JoblibSizedMessageMarshaller(Marshaller):
    def target_to_value(self, target, **kwargs):
        with target.open() as fp:
            from_file = joblib.load(fp.name)
            return from_file

    def value_to_target(self, value, target, **kwargs):
        with target.open("w") as fp:
            joblib.dump(value, fp.name)


# 2. register type to extension mapping
register_marshaller(SizedMessage, z_file_ext, JoblibSizedMessageMarshaller())


@task(result=output.target_config(TargetConfig(format=z_file_ext)))
def dump_as_joblib():
    # type: ()-> SizedMessage
    return SizedMessage("example message \n", 10)


@task(result=output.txt[int])
def load_as_joblib(sized_message: SizedMessage):
    return sized_message.msg * sized_message.size
Exemple #5
0
                                                      key="features",
                                                      **kwargs)
        targets = super(MyDataToHdf5, self)._pd_read(*args,
                                                     key="targets",
                                                     **kwargs)
        return MyData(features=features, targets=targets)

    def _pd_to(self, data, file_or_path, *args, **kwargs):
        kwargs = combine_mappings({"format": "fixed"}, kwargs)
        with pd.HDFStore(file_or_path, "w") as store:
            kwargs.pop("mode", None)
            store.put("features", data.features, data_columns=True, **kwargs)
            store.put("targets", data.targets, data_columns=True, **kwargs)


register_marshaller(MyData, FileFormat.hdf5, MyDataToHdf5())
MyDataParameter = register_custom_parameter(MyData,
                                            parameter.data.type(MyData))


class MyDataReport(PythonTask):
    my_data = parameter[MyData]
    report = output[DataFrame]

    def run(self):
        self.report = self.my_data.features.head(1)


class BuildMyData(PythonTask):
    my_data = output.hdf5[MyData]
import pandas as pd

from dbnd import output, task
from targets.marshalling import register_marshaller
from targets.marshalling.pandas import _PandasMarshaller
from targets.target_config import register_file_extension

# 1. create file extension
excel_file_ext = register_file_extension("xlsx")


class DataFrameToExcel(_PandasMarshaller):
    def _pd_read(self, *args, **kwargs):
        return pd.read_excel(*args, **kwargs)

    def _pd_to(self, value, *args, **kwargs):
        return value.to_excel(*args, **kwargs)


# 2. register type to extension mapping
register_marshaller(pd.DataFrame, excel_file_ext, DataFrameToExcel())


@task(result=output(output_ext=excel_file_ext))
def dump_as_excel_table():
    # type: ()-> pd.DataFrame
    df = pd.DataFrame(data=list(zip(["Bob", "Jessica"], [968, 155])),
                      columns=["Names", "Births"])
    return df
Exemple #7
0
"""
Override build in implementation of Hdf5 serialization
"""
import pandas as pd

from dbnd import output, task
from targets.marshalling import DataFrameToHdf5, register_marshaller
from targets.target_config import FileFormat


class DataFrameToHdf5Table(DataFrameToHdf5):
    def _pd_to(self, value, *args, **kwargs):
        # WE WILL CHANGE THE DEFAULT FORMAT FOR MARSHALLER
        kwargs.setdefault("format", "table")
        return super(DataFrameToHdf5Table, self)._pd_to(value, *args, **kwargs)


register_marshaller(pd.DataFrame, FileFormat.hdf5, DataFrameToHdf5Table())


@task(result=output.hdf5)
def dump_as_hdf5_table():
    # type: ()-> pd.DataFrame
    return pd.DataFrame(data=list(zip(["Bob", "Jessica"], [968, 155])),
                        columns=["Names", "Births"])