Beispiel #1
0
def totif(images, path, prefix="image", overwrite=False, credentials=None):
    """
    Write out TIF files for 2d image data.

    See also
    --------
    thunder.data.images.totif
    """
    dims = images.dims
    if not len(dims) in [2, 3]:
        raise ValueError("Only 2D or 3D images can be exported to tif, "
                         "images are %d-dimensional." % len(dims))

    from scipy.misc import imsave
    from io import BytesIO
    from thunder.writers import get_parallel_writer

    def tobuffer(kv):
        key, img = kv
        fname = prefix + "-" + "%05d.tif" % int(key)
        bytebuf = BytesIO()
        imsave(bytebuf, img, format='TIFF')
        return fname, bytebuf.getvalue()

    writer = get_parallel_writer(path)(path,
                                       overwrite=overwrite,
                                       credentials=credentials)
    images.foreach(lambda x: writer.write(tobuffer(x)))
Beispiel #2
0
def totif(images, path, prefix="image", overwrite=False, credentials=None):
    """
    Write out TIF files for 2d image data.

    See also
    --------
    thunder.data.images.totif
    """
    dims = images.dims
    if not len(dims) in [2, 3]:
        raise ValueError("Only 2D or 3D images can be exported to tif, "
                         "images are %d-dimensional." % len(dims))

    from scipy.misc import imsave
    from io import BytesIO
    from thunder.writers import get_parallel_writer

    def tobuffer(kv):
        key, img = kv
        fname = prefix+"-"+"%05d.tif" % int(key)
        bytebuf = BytesIO()
        imsave(bytebuf, img, format='TIFF')
        return fname, bytebuf.getvalue()

    writer = get_parallel_writer(path)(path, overwrite=overwrite, credentials=credentials)
    images.foreach(lambda x: writer.write(tobuffer(x)))
Beispiel #3
0
def tobinary(images, path, prefix="image", overwrite=False, credentials=None):
    """
    Write out images as binary files.

    See also
    --------
    thunder.data.images.tobinary
    """
    from thunder.writers import get_parallel_writer

    def tobuffer(kv):
        key, img = kv
        fname = prefix + "-" + "%05d.bin" % int(key)
        return fname, img.copy()

    writer = get_parallel_writer(path)(path, overwrite=overwrite, credentials=credentials)
    images.foreach(lambda x: writer.write(tobuffer(x)))
    config(path, list(images.dims), images.dtype, overwrite=overwrite)
Beispiel #4
0
def tobinary(images, path, prefix="image", overwrite=False, credentials=None):
    """
    Write out images as binary files.

    See also
    --------
    thunder.data.images.tobinary
    """
    from thunder.writers import get_parallel_writer

    def tobuffer(kv):
        key, img = kv
        fname = prefix + "-" + "%05d.bin" % int(key)
        return fname, img.copy()

    writer = get_parallel_writer(path)(path,
                                       overwrite=overwrite,
                                       credentials=credentials)
    images.foreach(lambda x: writer.write(tobuffer(x)))
    config(path, list(images.dims), images.dtype, overwrite=overwrite)
Beispiel #5
0
def tobinary(series, path, prefix='series', overwrite=False, credentials=None):
    """
    Writes out data to binary format.

    Parameters
    ----------
    series : Series
        The data to write

    path : string path or URI to directory to be created
        Output files will be written underneath path.
        Directory will be created as a result of this call.

    prefix : str, optional, default = 'series'
        String prefix for files.

    overwrite : bool
        If true, path and all its contents will be deleted and
        recreated as partof this call.
    """
    from six import BytesIO
    from thunder.utils import check_path
    from thunder.writers import get_parallel_writer

    if not overwrite:
        check_path(path, credentials=credentials)
        overwrite = True

    def tobuffer(kv):
        firstkey = None
        buf = BytesIO()
        for k, v in kv:
            if firstkey is None:
                firstkey = k
            buf.write(v.tostring())
        val = buf.getvalue()
        buf.close()
        if firstkey is None:
            return iter([])
        else:
            label = prefix + '-' + getlabel(firstkey) + ".bin"
            return iter([(label, val)])

    writer = get_parallel_writer(path)(path,
                                       overwrite=overwrite,
                                       credentials=credentials)

    if series.mode == 'spark':
        binary = series.values.tordd().sortByKey().mapPartitions(tobuffer)
        binary.foreach(writer.write)

    else:
        basedims = [series.shape[d] for d in series.baseaxes]

        def split(k):
            ind = unravel_index(k, basedims)
            return ind, series.values[ind]

        buf = tobuffer([split(i) for i in range(prod(basedims))])
        [writer.write(b) for b in buf]

    shape = series.shape
    dtype = series.dtype

    write_config(path,
                 shape=shape,
                 dtype=dtype,
                 overwrite=overwrite,
                 credentials=credentials)
Beispiel #6
0
def tobinary(series, path, prefix='series', overwrite=False, credentials=None):
    """
    Writes out data to binary format.

    Parameters
    ----------
    series : Series
        The data to write

    path : string path or URI to directory to be created
        Output files will be written underneath path.
        Directory will be created as a result of this call.

    prefix : str, optional, default = 'series'
        String prefix for files.

    overwrite : bool
        If true, path and all its contents will be deleted and
        recreated as partof this call.
    """
    from six import BytesIO
    from thunder.utils import check_path
    from thunder.writers import get_parallel_writer

    if not overwrite:
        check_path(path, credentials=credentials)
        overwrite = True

    def tobuffer(kv):
        firstkey = None
        buf = BytesIO()
        for k, v in kv:
            if firstkey is None:
                firstkey = k
            buf.write(v.tostring())
        val = buf.getvalue()
        buf.close()
        if firstkey is None:
            return iter([])
        else:
            label = prefix + '-' + getlabel(firstkey) + ".bin"
            return iter([(label, val)])

    writer = get_parallel_writer(path)(path, overwrite=overwrite, credentials=credentials)

    if series.mode == 'spark':
        binary = series.values.tordd().sortByKey().mapPartitions(tobuffer)
        binary.foreach(writer.write)

    else:
        basedims = [series.shape[d] for d in series.baseaxes]

        def split(k):
            ind = unravel_index(k, basedims)
            return ind, series.values[ind]

        buf = tobuffer([split(i) for i in range(prod(basedims))])
        [writer.write(b) for b in buf]

    shape = series.shape
    dtype = series.dtype

    write_config(path, shape=shape, dtype=dtype, overwrite=overwrite, credentials=credentials)