Beispiel #1
0
def blosc_args(dt):
    if np.issubdtype(dt, int):
        return bloscpack.BloscArgs(dt.itemsize, clevel=3, shuffle=True)
    if np.issubdtype(dt, np.datetime64):
        return bloscpack.BloscArgs(dt.itemsize, clevel=3, shuffle=True)
    if np.issubdtype(dt, float):
        return bloscpack.BloscArgs(dt.itemsize, clevel=1, shuffle=False)
    return None
Beispiel #2
0
def save_object_to_disk(
    obj,
    file_path: Path,
    compression_method: str = None,  # "gzip" / "blosc"
    obj_name: str = None,
    element_size_estimate_mb: float = None,
) -> bool:
    """
    Save a pickle-serialized and optionally gzip/blosc-compressed object to disk, if estimated size is within the limits.
    Returns 'True' if saved, 'False' otherwise.
    """

    # create parent directory if needed
    dir_path = file_path.parent
    Path.mkdir(dir_path, parents=True, exist_ok=True)

    # split iterables to chunks if possible
    if element_size_estimate_mb is not None:
        MAX_CHUNK_MB = 2000  # should be about optimized
        chunk_size = max(int(MAX_CHUNK_MB / element_size_estimate_mb), 1)
        chunked_obj = list(chunks(obj, chunk_size))
    else:  # obj isn't iterable - treat as a single chunk
        chunked_obj = [obj]

    if compression_method == "gzip":
        with gzip.open(file_path, "wb", compresslevel=1) as f_gzip:
            for chunk_ in chunked_obj:
                pickle.dump(chunk_, f_gzip, protocol=-1)  # type: ignore

    elif compression_method == "blosc":
        blosc_args = bloscpack.BloscArgs(typesize=4, clevel=1, cname="zlib")
        with open(file_path, "wb") as f_blosc:
            for chunk_ in chunked_obj:
                p_chunk = pickle.dumps(chunk_, protocol=-1)
                cmprsd_chunk = bloscpack.pack_bytes_to_bytes(
                    p_chunk, blosc_args=blosc_args)
                pickle.dump(cmprsd_chunk, f_blosc, protocol=-1)

    else:  # save uncompressed
        with open(file_path, "wb") as f:
            for chunk_ in chunked_obj:
                pickle.dump(chunk_, f, protocol=-1)

    logging.debug(
        f"Object '{obj_name}' of class '{obj.__class__.__name__}' ({compression_method}-compressed) saved as: {file_path}"
    )
    return True