예제 #1
0
        if extension in ("xlsx", "xltx"):
            return load_workbook(f)
        raise Exception(
            f"Deserialization: file extension {extension} is not supported by openpyxl_workbook type."
        )

    def copy(self, data):
        return self.from_bytes(self.as_bytes(data)[0])

    def data_characteristics(self, data):
        return dict(
            description=f"Excel workbook with {len(data.sheetnames)} sheets.")


OPENPYXL_WORKBOOK_STATE_TYPE = OpenpyxlWorkbookStateType()
register_state_type(Workbook, OPENPYXL_WORKBOOK_STATE_TYPE)


@command
def workbook(data, index=True, header=True, context=None):
    """Convert bytes or a dataframe to a workbook"""
    context = get_context(context)
    if type(data) == bytes:
        context.info("Workbook from bytes")
        return OPENPYXL_WORKBOOK_STATE_TYPE.from_bytes(data)
    elif isinstance(data, pd.DataFrame):
        context.info("Workbook from pandas DataFrame")
        wb = Workbook()
        ws = wb.active
        for r in dataframe_to_rows(df, index=index, header=header):
            ws.append(r)
예제 #2
0
        )

    def data_characteristics(self, data):
        s = StringIO()
        summary=keras_summary(data)
        return dict(description=f"Keras model", summary=summary)

    def copy(self, data):
        print ("KERAS MODEL COPY")
        model = clone_model(data)
        model.set_weights(data.get_weights())
        return model


KERASMODEL_STATE_TYPE = KerasModelStateType()
register_state_type(Model, KERASMODEL_STATE_TYPE)
register_state_type(Functional, KERASMODEL_STATE_TYPE)


@command
def keras_plot_model(
    model,
    show_shapes: bool = False,
    show_layer_names: bool = True,
    rankdir: str = "TB",
    expand_nested: bool = False,
    dpi: int = 96,
):
    "Keras plot model as png"
    print("************* keras_plot_model")
    assert isinstance(model, Model)
예제 #3
0
            return StoredDataframeIterator.from_dict(
                json.loads(b.decode("utf-8")))
        raise Exception(
            f"Deserialization: file extension {extension} is not supported by dataframe type."
        )

    def copy(self, data):
        return data.copy()

    def data_characteristics(self, data):
        return dict(description=
                    f"Dataframe iterator with {len(data.item_keys)} batches.")


STORED_DATAFRAME_ITERATOR_STATE_TYPE = StoredDataframeIteratorStateType()
register_state_type(StoredDataframeIterator,
                    STORED_DATAFRAME_ITERATOR_STATE_TYPE)


def _store_batches(idf, key, max_batches=None, context=None):
    """Store iterator of dataframes (batches) in a store.
    The key specifies a directory in the store where the items will be stored.
    Helper function yielding StoredDataframeIterator object and dataframes.
    """
    context = get_context(context)
    context.info(f"Store iterator")
    batch_number = 0
    if max_batches in ("0", "", None):
        max_batches = 0
    else:
        max_batches = int(max_batches)
    store = context.store()
예제 #4
0
        if extension in ("pickle", "pkl"):
            return pickle.loads(b)
        raise Exception(
            f"Deserialization: file extension {extension} is not supported by Matplotlib Figure type."
        )

    def copy(self, data):
        return data

    def data_characteristics(self, data):
        return dict(description=f"Matplotlib figure")


MATPLOTLIB_FIGURE_STATE_TYPE = MatplotlibFigureStateType()
register_state_type(plt.Figure, MATPLOTLIB_FIGURE_STATE_TYPE)


@command
def mpl(state, *series):
    """Matplotlib chart"""
    fig = plt.figure(figsize=(8, 6), dpi=300)
    axis = fig.add_subplot(1, 1, 1)
    series = list(reversed(list(series)))
    df = state.get()
    extension = None

    while len(series):
        t = series.pop()
        if t in ["jpg", "png", "svg"]:
            extension = t
예제 #5
0
파일: lq_hxl.py 프로젝트: orest-d/liquer
        if extension == "csv":
            return hxl.data(f)
        raise Exception(
            f"Deserialization: file extension {extension} is not supported by HXL dataset type."
        )

    def copy(self, data):
        """Make a deep copy of the data"""
        return data

    def data_characteristics(self, data):
        return dict(description=f"HXL dataset")


HXL_DATASET_STATE_TYPE = HxlStateType()
register_state_type(hxl.Dataset, HXL_DATASET_STATE_TYPE)
register_state_type(hxl.io.HXLReader, HXL_DATASET_STATE_TYPE)


@first_command
def hxl_from(url):
    """Load data from URL"""
    return hxl.data(url)


@command
def hxl2df(data):
    """Convert hxl dataset to pandas dataframe"""
    f = BytesIO()
    for line in data.gen_csv(show_headers=True, show_tags=True):
        f.write(line.encode("utf-8"))
예제 #6
0
파일: lq_pandas.py 프로젝트: orest-d/liquer
            f"Deserialization: file extension {extension} is not supported by dataframe type."
        )

    def copy(self, data):
        return data.copy()

    def data_characteristics(self, data):
        return dict(description=f"Dataframe with {len(data.columns)} columns and {len(data)} rows.",
        columns=[str(c) for c in data.columns],
        number_of_columns = len(data.columns),
        number_of_rows = len(data),
        )
        

DATAFRAME_STATE_TYPE = DataframeStateType()
register_state_type(pd.DataFrame, DATAFRAME_STATE_TYPE)


@command
def to_df(data):
    "Convert data to DataFrame; data should be list of dicts or dict of lists."
    return pd.DataFrame(data)


@first_command
def df_from(url, extension=None):
    """Load data from URL"""
    if extension is None:
        extension = url.split(".")[-1]
        if extension not in "csv tsv xls xlsx msgpack".split():
            extension = "csv"
예제 #7
0
                f"Serialization: file extension {extension} is not supported by DataFusion data-frame type."
            )

    def from_bytes(self, b: bytes, extension=None):
        raise Exception(
            f"Deserialization is not supported by DataFusion data-frame type.")

    def copy(self, data):
        return self.from_bytes(self.as_bytes(data)[0])

    def data_characteristics(self, data):
        return dict(description=f"DataFusion data-frame")


DATAFUSION_DATAFRAME_STATE_TYPE = DatafusionDataframeStateType()
register_state_type(daf.DataFrame, DATAFUSION_DATAFRAME_STATE_TYPE)


class DatafusionContextStateType(StateType):
    def identifier(self):
        return "datafusion_context"

    def default_extension(self):
        return "pickle"

    def is_type_of(self, data):
        return isinstance(data, daf.ExecutionContext)

    def as_bytes(self, data, extension=None):
        raise Exception(
            f"Serialization is not supported by DataFusion ExecutionContext type."
예제 #8
0
파일: lq_polars.py 프로젝트: orest-d/liquer
            return pl.read_csv(f)
        elif extension == "parquet":
            return pl.read_parquet(f)
        raise Exception(
            f"Deserialization: file extension {extension} is not supported by polars data-frame type."
        )

    def copy(self, data):
        return self.from_bytes(self.as_bytes(data)[0])

    def data_characteristics(self, data):
        return dict(description=f"Polars data-frame with {len(data.columns)} and {len(data)} rows.")
        

POLARS_DATAFRAME_STATE_TYPE = PolarsDataframeStateType()
register_state_type(pl.DataFrame, POLARS_DATAFRAME_STATE_TYPE)

@command
def polars_df(data, extension=None, context=None):
    """Convert bytes or a dataframe to a workbook"""
    context=get_context(context)
    if type(data)==bytes:
        context.info(f"Polars data-frame from bytes. Extension:'{extension}'")
        return POLARS_DATAFRAME_STATE_TYPE.from_bytes(data, extension=extension)
    elif isinstance(data,pd.DataFrame):
        context.info("Polars data-frame from Pandas data-frame")
        return pl.DataFrame(data)
    elif isinstance(data,pl.DataFrame):
        context.info("Polars data-frame kept as it is")
        return data
    raise Exception(f"Unsupported polars dataframe type: {type(data)}")
예제 #9
0
                )
            else:
                raise Exception(
                    f"Deserialization: file extension {extension} is not supported by PIL Image type."
                )

    def copy(self, data):
        return data.copy()

    def data_characteristics(self, data):
        width, height = data.size
        return dict(description=f"Image {width}x{height}")


PIL_IMAGE_STATE_TYPE = PILImageStateType()
register_state_type(PIL.Image.Image, PIL_IMAGE_STATE_TYPE)


@command(ns="pil")
def resize(image, width, height, resample=None):
    """Resize image"""
    resample = dict(
        nearest=PIL.Image.NEAREST,
        box=PIL.Image.BOX,
        bilinear=PIL.Image.BILINEAR,
        hamming=PIL.Image.HAMMING,
        bicubic=PIL.Image.BICUBIC,
        lanczos=PIL.Image.LANCZOS,
    ).get(str(resample).lower())

    return image.copy().resize((int(width), int(height)), resample=resample)