Python from_delayedの例、dask.array.from_delayed Pythonの例

コード例 #1

0

ファイルを表示

ファイル: xrimage.py プロジェクト: pytroll/trollimage

    def stretch_linear(self, cutoffs=(0.005, 0.005)):
        """Stretch linearly the contrast of the current image.

        Use *cutoffs* for left and right trimming.
        """
        logger.debug("Perform a linear contrast stretch.")

        logger.debug("Calculate the histogram quantiles: ")
        logger.debug("Left and right quantiles: " +
                     str(cutoffs[0]) + " " + str(cutoffs[1]))

        cutoff_type = np.float64
        # numpy percentile (which quantile calls) returns 64-bit floats
        # unless the value is a higher order float
        if np.issubdtype(self.data.dtype, np.floating) and \
                np.dtype(self.data.dtype).itemsize > 8:
            cutoff_type = self.data.dtype
        left, right = dask.delayed(self._compute_quantile, nout=2)(self.data.data, self.data.dims, cutoffs)
        left_data = da.from_delayed(left,
                                    shape=(self.data.sizes['bands'],),
                                    dtype=cutoff_type)
        left = xr.DataArray(left_data, dims=('bands',),
                            coords={'bands': self.data['bands']})
        right_data = da.from_delayed(right,
                                     shape=(self.data.sizes['bands'],),
                                     dtype=cutoff_type)
        right = xr.DataArray(right_data, dims=('bands',),
                             coords={'bands': self.data['bands']})

        self.crude_stretch(left, right)

コード例 #2

0

ファイルを表示

def change_dtype(data: list, output_dtype: str, offset: np.array) -> list:
    """
    Lazy histogram-preserving datatype adjustment of a collection of array-likes.
    Signed datatypes (int8, int16) are converted to their unsigned counterparts (uint8, uint16) by upcasting to signed type with
    higher precision, shifting all values by a constant, then downcasting to the final unsigned datatype. The resulting arrays
    have a global minimum of 0, with the original min-max distance.
    """

    @dask.delayed
    def adjuster(arr, upcast, offset, dtype):
        assert arr.ndim == offset.ndim
        return (arr.astype(upcast) - offset).astype(dtype)

    if output_dtype == "same":
        return data
    elif output_dtype == "uint8":
        assert (data[0].dtype == "int8") or (data[0].dtype == ">i1")
        upcast = "int16"
    elif output_dtype == "uint16":
        assert (data[0].dtype == "int16") or (data[0].dtype == ">i2")
        upcast = "int32"

    return [
        da.from_delayed(
            adjuster(
                d, upcast=upcast, offset=offset.reshape(-1, 1, 1), dtype=output_dtype
            ),
            dtype=output_dtype,
            shape=d.shape,
        )
        for d in data
    ]

コード例 #3

0

ファイルを表示

ファイル: text.py プロジェクト: sanyaade-machine-learning/dask-ml

    def transform(self, X):
        """Transform a sequence of documents to a document-term matrix.

        Transformation is done in parallel, and correctly handles dask
        collections.

        Parameters
        ----------
        X : dask.Bag of raw text documents, length = n_samples
            Samples. Each sample must be a text document (either bytes or
            unicode strings, file name or file object depending on the
            constructor argument) which will be tokenized and hashed.

        Returns
        -------
        X : dask.array.Array, shape = (n_samples, self.n_features)
            Document-term matrix. Each block of the array is a scipy sparse
            matrix.

        Notes
        -----
        The returned dask Array is composed scipy sparse matricies. If you need
        to compute on the result immediately, you may need to convert the individual
        blocks to ndarrays or pydata/sparse matricies.

        >>> import sparse
        >>> X.map_blocks(sparse.COO.from_scipy_sparse)  # doctest: +SKIP

        See the :doc:`examples/text-vectorization` for more.
        """
        transformer = super(HashingVectorizer, self).transform

        msg = "'X' should be a 1-dimensional array with length 'num_samples'."

        if not dask.is_dask_collection(X):
            return transformer(X)

        if isinstance(X, db.Bag):
            bag2 = X.map_partitions(transformer)
            objs = bag2.to_delayed()
            arrs = [
                da.from_delayed(obj, (np.nan, self.n_features), self.dtype)
                for obj in objs
            ]
            result = da.concatenate(arrs, axis=0)
        elif isinstance(X, dd.Series):
            result = X.map_partitions(transformer)
        elif isinstance(X, da.Array):
            # dask.Array
            chunks = ((np.nan,) * X.numblocks[0], (self.n_features,))
            if X.ndim == 1:
                result = X.map_blocks(
                    transformer, dtype="f8", chunks=chunks, new_axis=1
                )
            else:
                raise ValueError(msg)
        else:
            raise ValueError(msg)

        return result

コード例 #4

0

ファイルを表示

ファイル: tec_solver.py プロジェクト: Joshuaalbert/RadioAstronomyThings

def test_clock_tec_solve_dask():
    np.random.seed(1234)
    import pylab as plt
    times = np.arange(2)
    freqs = np.linspace(110e6, 170e6, 1000)

    cs = np.array([1, 1])
    tec = np.array([0.1, 0.2])
    delay = np.ones(len(times)) * 2e-9  # 10ns
    phase = np.multiply.outer(np.ones(
        len(freqs)), cs) + 8.44797256e-7 * TECU * np.multiply.outer(
            1. / freqs, tec) + 2. * np.pi * np.multiply.outer(freqs, delay)
    phase += 15 * np.pi / 180. * np.random.normal(
        size=[len(freqs), len(times)])
    #plt.imshow(phase,origin='lower',extent=(times[0],times[-1],freqs[0],freqs[-1]),aspect='auto')
    #plt.colorbar()
    #plt.xlabel('times (s)')
    #plt.ylabel('freqs (Hz)')
    #plt.show()
    m, cov = least_squares_solve(phase, freqs, times, 15, Ct_ratio=0.01)
    m_exact = np.array([delay, tec, cs]).T
    import dask.array as da
    solsMH = [
        da.from_delayed(clock_tec_solve_dask(phase[:, i], freqs, m[i, :],
                                             cov[i, :, :], 15, 0.01),
                        shape=(3, ),
                        dtype=np.double) for i in range(len(times))
    ]

    sol_stacked = da.stack(solsMH, axis=0)
    sol = sol_stacked.compute()
    print(sol)

コード例 #5

0

ファイルを表示

def arrays_from_delayed(args, shapes=None, dtypes=None):
    """

    Parameters
    ----------
    args: a collection of dask.delayed objects representing lazy-loaded arrays.

    shapes: a collection of tuples specifying the shape of each array in args, or None. if None, the first array will be loaded
        using local computation, and the shape of that arrays will be used for all subsequent arrays.

    dtypes: a collection of strings specifying the datatype of each array in args, or None. If None, the first array will be loaded
        using local computation and the dtype of that array will be used for all subsequent arrays.

    Returns a list of dask arrays.
    -------

    """

    if shapes is None or dtypes is None:
        sample = args[0].compute(scheduler="threads")
        if shapes is None:
            shapes = (sample.shape, ) * len(args)
        if dtypes is None:
            dtypes = (sample.dtype, ) * len(args)

    assert len(shapes) == len(args) and len(dtypes) == len(args)

    arrays = [
        da.from_delayed(args[ind], shape=shapes[ind], dtype=dtypes[ind])
        for ind in range(len(args))
    ]
    return arrays

コード例 #6

0

ファイルを表示

ファイル: test_atop.py プロジェクト: mrocklin/dask

def test_bag_array_conversion():
    import dask.bag as db
    b = db.range(10, npartitions=1)
    x, = b.map_partitions(np.asarray).to_delayed()
    x, = [da.from_delayed(a, shape=(10,), dtype=int) for a in [x]]
    z = da.concatenate([x])
    assert_eq(z, np.arange(10), check_graph=False)

コード例 #7

0

ファイルを表示

ファイル: xrimage.py プロジェクト: pytroll/trollimage

 def _band_hist(band_data):
     cdf = da.arange(0., 1., 1. / nwidth, chunks=nwidth)
     if approximate:
         # need a 1D array
         flat_data = band_data.ravel()
         # replace with nanpercentile in the future, if available
         # dask < 0.17 returns all NaNs for this
         bins = da.percentile(flat_data[da.notnull(flat_data)],
                              cdf * 100.)
     else:
         bins = dask.delayed(np.nanpercentile)(band_data, cdf * 100.)
         bins = da.from_delayed(bins, shape=(nwidth,), dtype=cdf.dtype)
     res = dask.delayed(np.interp)(band_data, bins, cdf)
     res = da.from_delayed(res, shape=band_data.shape,
                           dtype=band_data.dtype)
     return res

コード例 #8

0

ファイルを表示

ファイル: lazy.py プロジェクト: mwalls/hyperspy

 def _map_all(self, function, inplace=True, **kwargs):
     calc_result = dd(function)(self.data, **kwargs)
     if inplace:
         self.data = da.from_delayed(calc_result, shape=self.data.shape,
                                     dtype=self.data.dtype)
         return None
     return self._deepcopy_with_new_data(calc_result)

コード例 #9

0

ファイルを表示

ファイル: image.py プロジェクト: woozey/hyperspy

def file_reader(filename, **kwds):
    """Read data from any format supported by PIL.

    Parameters
    ----------
    filename: str

    """
    dc = _read_data(filename)
    lazy = kwds.pop('lazy', False)
    if lazy:
        # load the image fully to check the dtype and shape, should be cheap.
        # Then store this info for later re-loading when required
        from dask.array import from_delayed
        from dask import delayed
        val = delayed(_read_data, pure=True)(filename)
        dc = from_delayed(val, shape=dc.shape, dtype=dc.dtype)
    return [{'data': dc,
             'metadata':
             {
                 'General': {'original_filename': os.path.split(filename)[1]},
                 "Signal": {'signal_type': "",
                            'record_by': 'image', },
             }
             }]

コード例 #10

0

ファイルを表示

def _get_measurement(datasources,
                     geobox,
                     resampling,
                     no_data,
                     dtype,
                     fuse_func=None):
    """ Gets the measurement array of a band of data
    """

    # pylint: disable=broad-except, protected-access

    def copyto_fuser(dest, src):
        """
        :type dest: numpy.ndarray
        :type src: numpy.ndarray
        """
        where_nodata = (
            dest == no_data) if not numpy.isnan(no_data) else numpy.isnan(dest)
        numpy.copyto(dest, src, where=where_nodata)
        return dest

    fuse_func = fuse_func or copyto_fuser
    destination = _make_destination(geobox.shape, no_data, dtype)

    for source in datasources:
        buffer = delayed(_read_file)(source,
                                     geobox,
                                     band=source.get_bandnumber(),
                                     no_data=no_data,
                                     resampling=resampling)
        destination = delayed(fuse_func)(destination, buffer)

    return da.from_delayed(destination, geobox.shape, dtype)

コード例 #11

0

ファイルを表示

ファイル: safe_msi.py プロジェクト: davidh-ssec/satpy

    def get_dataset(self, key, info):
        """Load a dataset."""
        if self._channel != key.name:
            return

        logger.debug('Reading %s.', key.name)
        # FIXME: get this from MTD_MSIL1C.xml
        quantification_value = 10000.
        jp2 = glymur.Jp2k(self.filename)
        bitdepth = 0
        for seg in jp2.codestream.segment:
            try:
                bitdepth = max(bitdepth, seg.bitdepth[0])
            except AttributeError:
                pass

        jp2.dtype = (np.uint8 if bitdepth <= 8 else np.uint16)

        # Initialize the jp2 reader / doesn't work in a multi-threaded context.
        # jp2[0, 0]
        # data = da.from_array(jp2, chunks=CHUNK_SIZE) / quantification_value * 100

        data = da.from_delayed(delayed(jp2.read)(), jp2.shape, jp2.dtype)
        data = data.rechunk(CHUNK_SIZE) / quantification_value * 100

        proj = DataArray(data, dims=['y', 'x'])
        proj.attrs = info.copy()
        proj.attrs['units'] = '%'
        proj.attrs['platform_name'] = self.platform_name
        return proj

コード例 #12

0

ファイルを表示

def get_da_images(files, which="data", shape=ZTF_IMAGE_SHAPE, dtype="float32"):
    """ Get a dask.array stacked for each of the ziff image you want. 
    = Works only with single ziff = 
    """
    lazy_array = [dask.delayed(get_ziff_single_image)(f_, which=which) for f_ in files]
    lazy_arrays = [da.from_delayed(x_, shape=shape, dtype=dtype) for x_ in lazy_array]
    return da.stack(lazy_arrays)

コード例 #13

0

ファイルを表示

ファイル: data.py プロジェクト: wangjianze/dask-ml

    def fit(self, X, y=None):
        q_min, q_max = self.quantile_range
        if not 0 <= q_min <= q_max <= 100:
            raise ValueError("Invalid quantile range: %s" % str(self.quantile_range))

        if isinstance(X, dd.DataFrame):
            n_columns = len(X.columns)
            partition_lengths = X.map_partitions(len).compute()
            dtype = np.find_common_type(X.dtypes, [])
            blocks = X.to_delayed()
            X = da.vstack(
                [
                    da.from_delayed(
                        block.values, shape=(length, n_columns), dtype=dtype
                    )
                    for block, length in zip(blocks, partition_lengths)
                ]
            )

        quantiles = [da.percentile(col, [q_min, 50.0, q_max]) for col in X.T]
        quantiles = da.vstack(quantiles).compute()
        self.center_ = quantiles[:, 1]
        self.scale_ = quantiles[:, 2] - quantiles[:, 0]
        self.scale_ = skdata._handle_zeros_in_scale(self.scale_, copy=False)
        return self

コード例 #14

0

ファイルを表示

def get_data_lazy(image: ImageWrapper, c_index: int = 0) -> da.Array:
    """Get n-dimensional dask array, with delayed reading from OMERO image."""
    size_z = image.getSizeZ()
    size_t = image.getSizeT()
    size_x = image.getSizeX()
    size_y = image.getSizeY()
    pixels = image.getPrimaryPixels()

    @delayed
    @timer
    def get_plane(plane_name):
        z, c, t = [int(n) for n in plane_name.split(",")]
        p = pixels.getPlane(z, c, t)
        return p

    dtype = PIXEL_TYPES.get(pixels.getPixelsType().value, None)

    plane_names = [
        f"{z},{c_index},{t}" for t in range(size_t) for z in range(size_z)
    ]
    lazy_arrays = [get_plane(pn) for pn in plane_names]
    dask_arrays = [
        da.from_delayed(delayed_reader, shape=(size_y, size_x), dtype=dtype)
        for delayed_reader in lazy_arrays
    ]
    # Stack into one large dask.array
    if size_z == 1 or size_t == 1:
        return da.stack(dask_arrays, axis=0)

    z_stacks = []
    for t in range(size_t):
        z_stacks.append(
            da.stack(dask_arrays[t * size_z:(t + 1) * size_z], axis=0))
    stack = da.stack(z_stacks, axis=0)
    return stack

コード例 #15

0

ファイルを表示

ファイル: _split.py プロジェクト: AnnaPalarkina171/HateDetection

    def _split(self, test_start, test_stop, n_samples, chunks, seeds):
        train_objs = []
        test_objs = []
        train_sizes = []
        test_sizes = []

        offset = 0
        for chunk, seed in zip(chunks, seeds):
            start, stop = offset, offset + chunk

            test_id_start = max(test_start, start)
            test_id_stop = min(test_stop, stop)

            if test_id_start < test_id_stop:
                test_objs.append(
                    dask.delayed(_generate_offset_idx)(chunk, test_id_start,
                                                       test_id_stop, offset,
                                                       seed))
                test_sizes.append(test_id_stop - test_id_start)

            train_id_stop = min(test_id_start, stop)
            if train_id_stop > start:
                train_objs.append(
                    dask.delayed(_generate_offset_idx)(chunk, start,
                                                       train_id_stop, offset,
                                                       seed))
                train_sizes.append(train_id_stop - start)

            train_id_start = max(test_id_stop, start)
            if train_id_start < stop:
                train_objs.append(
                    dask.delayed(_generate_offset_idx)(chunk, train_id_start,
                                                       stop, offset, seed))
                train_sizes.append(stop - train_id_start)
            offset = stop

        train_idx = da.concatenate([
            da.from_delayed(obj, (train_size, ), np.dtype("int"))
            for obj, train_size in zip(train_objs, train_sizes)
        ])

        test_idx = da.concatenate([
            da.from_delayed(obj, (test_size, ), np.dtype("int"))
            for obj, test_size in zip(test_objs, test_sizes)
        ])

        return train_idx, test_idx

コード例 #16

0

ファイルを表示

ファイル: resample.py プロジェクト: sehnem/satpy

    def compute(self,
                data,
                cache_id=None,
                fill_value=0,
                weight_count=10000,
                weight_min=0.01,
                weight_distance_max=1.0,
                weight_delta_max=1.0,
                weight_sum_min=-1.0,
                maximum_weight_mode=False,
                grid_coverage=0,
                **kwargs):
        """Resample the data according to the precomputed X/Y coordinates."""
        rows = self.cache["rows"]
        cols = self.cache["cols"]

        # if the data is scan based then check its metadata or the passed
        # kwargs otherwise assume the entire input swath is one large
        # "scanline"
        rows_per_scan = kwargs.get(
            'rows_per_scan', data.attrs.get("rows_per_scan", data.shape[0]))

        if data.ndim == 3 and 'bands' in data.dims:
            data_in = tuple(
                data.sel(bands=band).data for band in data['bands'])
        elif data.ndim == 2:
            data_in = data.data
        else:
            raise ValueError("Unsupported data shape for EWA resampling.")

        res = dask.delayed(self._call_fornav)(
            cols,
            rows,
            self.target_geo_def,
            data_in,
            grid_coverage=grid_coverage,
            rows_per_scan=rows_per_scan,
            weight_count=weight_count,
            weight_min=weight_min,
            weight_distance_max=weight_distance_max,
            weight_delta_max=weight_delta_max,
            weight_sum_min=weight_sum_min,
            maximum_weight_mode=maximum_weight_mode)
        if isinstance(data_in, tuple):
            new_shape = (len(data_in), ) + self.target_geo_def.shape
        else:
            new_shape = self.target_geo_def.shape
        data_arr = da.from_delayed(res, new_shape, data.dtype)
        # from delayed creates one large chunk, break it up a bit if we can
        data_arr = data_arr.rechunk([CHUNK_SIZE] * data_arr.ndim)
        if data.ndim == 3 and data.dims[0] == 'bands':
            dims = ('bands', 'y', 'x')
        elif data.ndim == 2:
            dims = ('y', 'x')
        else:
            dims = data.dims

        res = xr.DataArray(data_arr, dims=dims, attrs=data.attrs.copy())
        return update_resampled_coords(data, res, self.target_geo_def)

コード例 #17

0

ファイルを表示

ファイル: hand_demo-2D.py プロジェクト: tlambert03/image-demos

def dask_from_mov(path):
    vid = imageio.get_reader(path, 'ffmpeg')
    shape = vid.get_meta_data()['size'][::-1] + (3, )
    lazy_imread = delayed(vid.get_data)
    return da.stack([
        da.from_delayed(lazy_imread(i), shape=shape, dtype=np.uint8)
        for i in range(vid.count_frames())
    ])

コード例 #18

0

ファイルを表示

def make_da(delayed_list, length):
    sample = delayed_list[0].compute()
    arrays = [
        da.from_delayed(item, dtype=sample.dtype, shape=sample.shape)
        for item in delayed_list
    ]
    result = da.concatenate(arrays, axis=0)[:length]
    return result

コード例 #19

0

ファイルを表示

ファイル: zarr_tiff_multiprocessing.py プロジェクト: irahorecka/LLS_Pipeline

def get_lazy_arrays(glob_filenames, imread_sample):
    lazy_arrays = [dask.delayed(imread)(fn) for fn in glob_filenames]
    lazy_arrays = [
        da.from_delayed(x,
                        shape=imread_sample.shape,
                        dtype=imread_sample.dtype) for x in lazy_arrays
    ]
    return lazy_arrays

コード例 #20

0

ファイルを表示

 def _band_hist(band_data):
     cdf = da.arange(0., 1., 1. / nwidth, chunks=nwidth)
     if approximate:
         # need a 1D array
         flat_data = band_data.ravel()
         # replace with nanpercentile in the future, if available
         # dask < 0.17 returns all NaNs for this
         bins = da.percentile(flat_data[da.notnull(flat_data)],
                              cdf * 100.)
     else:
         bins = dask.delayed(np.nanpercentile)(band_data, cdf * 100.)
         bins = da.from_delayed(bins, shape=(nwidth, ), dtype=cdf.dtype)
     res = dask.delayed(np.interp)(band_data, bins, cdf)
     res = da.from_delayed(res,
                           shape=band_data.shape,
                           dtype=band_data.dtype)
     return res

コード例 #21

0

ファイルを表示

def as_known(X, lengths):
    blocks = X.to_delayed().flatten()
    P = X.shape[1]
    arrays = [
        da.from_delayed(x, dtype=X.dtype, shape=(length, P))
        for x, length in zip(blocks, lengths)
    ]
    return da.concatenate(arrays, axis=0)

コード例 #22

0

ファイルを表示

ファイル: test_atop.py プロジェクト: vertexclique/dask

def test_bag_array_conversion():
    import dask.bag as db

    b = db.range(10, npartitions=1)
    x, = b.map_partitions(np.asarray).to_delayed()
    x, = [da.from_delayed(a, shape=(10, ), dtype=int) for a in [x]]
    z = da.concatenate([x])
    assert_eq(z, np.arange(10), check_graph=False)

コード例 #23

0

ファイルを表示

ファイル: iofits.py プロジェクト: xwcl/xpipeline

def hdulists_keyword_to_dask_array(all_hduls, keyword, ext=0, dtype=float):
    arr = da.stack([
        da.from_delayed(_kw_to_0d_seq(hdul, ext, keyword),
                        shape=(),
                        dtype=dtype) for hdul in all_hduls
    ])
    log.info(f"Header keyword {keyword} extracted to new {arr.shape} sequence")
    return arr

コード例 #24

0

ファイルを表示

ファイル: lazy.py プロジェクト: chrinide/hyperspy

 def _map_all(self, function, inplace=True, **kwargs):
     calc_result = dd(function)(self.data, **kwargs)
     if inplace:
         self.data = da.from_delayed(calc_result,
                                     shape=self.data.shape,
                                     dtype=self.data.dtype)
         return None
     return self._deepcopy_with_new_data(calc_result)

コード例 #25

0

ファイルを表示

ファイル: datapoint.py プロジェクト: rjpolackwich/pyveda

 def image(self):
     """ Returns a delayed dask call for fetching the image for a data point """
     token = gbdx.gbdx_connection.access_token
     load = load_image(self.links["image"]["href"],
                       token,
                       self.imshape,
                       dtype=self.dtype)
     return da.from_delayed(load, shape=self.imshape, dtype=self.dtype)

コード例 #26

0

ファイルを表示

    def func(band_data, kernel=kernel, mode=mode, index=None):
        del index

        delay = dask.delayed(_three_d_effect_delayed)(band_data, kernel, mode)
        new_data = da.from_delayed(delay,
                                   shape=band_data.shape,
                                   dtype=band_data.dtype)
        return new_data

コード例 #27

0

ファイルを表示

def test_from_delayed_meta():
    def f():
        return sparse.COO.from_numpy(np.eye(3))

    d = dask.delayed(f)()
    x = da.from_delayed(d, shape=(3, 3), meta=sparse.COO.from_numpy(np.eye(1)))
    assert isinstance(x._meta, sparse.COO)
    assert_eq(x, x)

コード例 #28

0

ファイルを表示

ファイル: _standard_grid.py プロジェクト: wxiongccnu1990/cngi_prototype

def _graph_standard_degrid(vis_dataset, grid, briggs_factors, cgk_1D, grid_parms):
   import dask
   import dask.array as da
   import xarray as xr
   import time
   import itertools
   
   # Getting data for gridding
   chan_chunk_size = vis_dataset[grid_parms["imaging_weight_name"]].chunks[2][0]

   freq_chan = da.from_array(vis_dataset.coords['chan'].values, chunks=(chan_chunk_size))

   n_chunks_in_each_dim = vis_dataset[grid_parms["imaging_weight_name"]].data.numblocks
   chunk_indx = []

   iter_chunks_indx = itertools.product(np.arange(n_chunks_in_each_dim[0]), np.arange(n_chunks_in_each_dim[1]),
                                        np.arange(n_chunks_in_each_dim[2]), np.arange(n_chunks_in_each_dim[3]))

   #n_delayed = np.prod(n_chunks_in_each_dim)
   chunk_sizes = vis_dataset[grid_parms["imaging_weight_name"]].chunks

   n_chan_chunks_img = n_chunks_in_each_dim[2]
   list_of_degrids = []
   list_of_sum_weights = []
   
   list_of_degrids = ndim_list(n_chunks_in_each_dim)
   
   
   # Build graph
   for c_time, c_baseline, c_chan, c_pol in iter_chunks_indx:
       if grid_parms['chan_mode'] == 'cube':
            a_c_chan = c_chan
       else:
            a_c_chan = 0
       
       if grid_parms['do_imaging_weight']:
           sub_degrid = dask.delayed(_standard_imaging_weight_degrid_numpy_wrap)(
                grid.partitions[0,0,a_c_chan,c_pol],
                vis_dataset[grid_parms["uvw_name"]].data.partitions[c_time, c_baseline, 0],
                vis_dataset[grid_parms["imaging_weight_name"]].data.partitions[c_time, c_baseline, c_chan, c_pol],
                briggs_factors.partitions[:,a_c_chan,c_pol],
                freq_chan.partitions[c_chan],
                dask.delayed(grid_parms))
                
           single_chunk_size = (chunk_sizes[0][c_time], chunk_sizes[1][c_baseline],chunk_sizes[2][c_chan], chunk_sizes[3][c_pol])
           list_of_degrids[c_time][c_baseline][c_chan][c_pol] = da.from_delayed(sub_degrid, single_chunk_size,dtype=np.double)
       else:
           print('Degridding of visibilities and psf still needs to be implemented')
           
           #sub_grid_and_sum_weights = dask.delayed(_standard_grid_numpy_wrap)(
           #vis_dataset[vis_dataset[grid_parms["data"]].data.partitions[c_time, c_baseline, c_chan, c_pol],
           #vis_dataset[grid_parms["uvw"]].data.partitions[c_time, c_baseline, 0],
           #vis_dataset[grid_parms["imaging_weight"]].data.partitions[c_time, c_baseline, c_chan, c_pol],
           #freq_chan.partitions[c_chan],
           #dask.delayed(cgk_1D), dask.delayed(grid_parms))
       
   degrid = da.block(list_of_degrids)
   return degrid

コード例 #29

0

ファイルを表示

def compute_gradient_dask(rays,
                          g,
                          dobs,
                          i0,
                          K_ne,
                          m_tci,
                          m_prior,
                          CdCt,
                          sigma_m,
                          Nkernel,
                          size_cell,
                          cov_obj=None):
    L_m = Nkernel * size_cell
    #     #i not eq i0 mask
    #     mask = np.ones(rays.shape[0],dtype=np.bool)
    #     mask[i0] = False
    #     rays = rays[mask,:,:,:,:]
    #     g = g[mask,:,:]
    #     dobs = dobs[mask,:,:]
    #     CdCt = CdCt[mask,:,:]
    #residuals
    #g.shape, dobs.shape [Na,Nt,Nd]
    dd = g - dobs
    #weighted residuals
    #Cd.shape [Na,Nt,Nd] i.e. diagonal
    #CdCt^-1 = 1./CdCt
    dd /= (CdCt + 1e-15)
    #get ray info
    Na, Nt, Nd, _, Ns = rays.shape
    #     if Na < Nd:
    #         #parallelize over antennas
    #         gradient = da.sum(da.stack([da.from_delayed(delayed(do_gradient)(rays[i,:,:,:,:], dd[i,:,:], K_ne, m_tci,
    #                                          sigma_m, Nkernel, size_cell),(m_tci.nx,m_tci.ny,m_tci.nz),dtype=np.double) for i in range(Na)],axis=-1),axis=-1)
    #     else:
    #         #parallelize over directions
    #         gradient = da.sum(da.stack([da.from_delayed(delayed(do_gradient)(rays[:,:,d,:,:], dd[:,:,d], K_ne, m_tci,
    #                                           sigma_m, Nkernel, size_cell),(m_tci.nx,m_tci.ny,m_tci.nz),dtype=np.double) for d in range(Nd)],axis=-1),axis=-1)
    #parallelize over directions
    ne_tci = m_tci.copy()
    np.exp(ne_tci.M, out=ne_tci.M)
    ne_tci.M *= K_ne / TECU
    gradient = da.sum(da.stack([
        da.from_delayed(delayed(do_gradient)(
            rays[:, :, d, :, :], dd[:, :, d], ne_tci, sigma_m, Nkernel,
            size_cell, i0), (m_tci.nx, m_tci.ny, m_tci.nz),
                        dtype=np.double) for d in range(Nd)
    ],
                               axis=-1),
                      axis=-1)
    gradient = gradient.compute(get=get)
    gradient -= gradient[i0, ...]
    if cov_obj is not None:
        dm = m_tci.M - m_prior
        gradient + cov_obj.contract(dm)
    #gradient += m_tci.M
    #gradient -= m_prior

    return gradient

コード例 #30

0

ファイルを表示

ファイル: core.py プロジェクト: wenfanwu/aronnax

def interpret_raw_file_delayed(name, nx, ny, layers, dx, dy):
    """
    Use Dask.delayed to lazily load a single output file. While this can be
    used as is, it is intended to be an internal function called by `open_mfdataset`.
    """

    d = dsa.from_delayed(delayed(interpret_raw_file)(name, nx, ny, layers),
                            (layers, ny+dy, nx+dx), float)
    return d

コード例 #31

0

ファイルを表示

ファイル: toy_example.py プロジェクト: chengwill97/dask-applications

def array_images():
    custom_imread = dask.delayed(skimage.io.imread, pure=True)
    images = [
        custom_imread(
            '/Users/nivethamahalakshmibalasamy/Documents/ECI-PolarScience/dask_stuff/grayscale-xy-%d.png'
            % i) for i in range(1376, 1396)
    ]
    #print images
    image_array = [
        da.from_delayed(i, sample.shape, sample.dtype) for i in images
    ]
    sizes = [j.shape for j in image_array]
    #print sizes
    stack = da.stack(image_array, axis=0)
    print stack
    #print stack[0]
    # Combining chunks - A chunk consists of 5 images
    stack = stack.rechunk((5, 2000, 2000))
    print "After rechunking: "
    temp = stack
    #temp.visualize()
    print "Before distributing to workers:"
    print stack.mean().compute()
    print stack[1, :].compute()
    print stack[19, :].mean().compute()
    stack.visualize()

    # Distribute array components over workers and centralized scheduler
    cluster = LocalCluster()
    client = Client(cluster)
    print client

    # Load the entire distributed array on the cluster (4 workers, 4 cores)
    stack = client.persist(stack)
    #print stack.shape
    #print "After distributing to workers: "
    print stack.mean().compute()

    # map the otsu thresholding function
    #print stack[0]
    stack = da.map_blocks(otsu_thresholding,
                          stack,
                          chunks=(5, 2000, 2000),
                          dtype=sample.dtype)
    stack = da.map_blocks(blob_detection,
                          stack,
                          chunks=(5, 2000, 2000),
                          dtype=sample.dtype)
    stack = client.persist(stack)
    #th = client.persist(th)
    #thresholded.visualize()
    #th = client.persist(thresholded)
    #print thresholded.mean().compute()
    #print thresholded
    #print stack.shape
    print stack.mean().compute()
    stack.visualize()

コード例 #32

0

ファイルを表示

def read_prob_map(h5_path, array_info):
    shape, dtype = array_info

    data = delayed(read_h5)(h5_path)
    data = da.from_delayed(data,
                           shape=shape,
                           dtype=dtype,
                           name=os.path.basename(h5_path))
    return data

コード例 #33

0

ファイルを表示

    def _preprocess(self, collection, chunks=64, size=None):

        h, w = size
        images = [self.read_image(file, (h, w)) for file in collection]
        images = [da.from_delayed(image, shape=(h, w), dtype=numpy.uint8) for image in images]
        images = da.stack(images, axis=0)
        images = images.rechunk(chunks=(chunks, h, w))

        return images

コード例 #34

0

ファイルを表示

def read_tiff(tiff_path, array_info):
    shape, dtype = array_info

    data = delayed(imageio.volread)(tiff_path)
    data = da.from_delayed(data,
                           shape=shape,
                           dtype=dtype,
                           name=os.path.basename(tiff_path))
    return data

コード例 #35

0

ファイルを表示

def scatter_array(arr, dask_client):
    """Scatter a large numpy array into workers
    Return the equivalent dask array
    """
    future_arr = dask_client.scatter(arr)
    return da.from_delayed(future_arr,
                           shape=arr.shape,
                           dtype=arr.dtype,
                           meta=np.zeros_like(arr, shape=()))

コード例 #36

0

ファイルを表示

ファイル: __init__.py プロジェクト: adybbroe/satpy

    def func(band_data, luts=luts, index=-1):
        # NaN/null values will become 0
        lut = luts[:, index] if len(luts.shape) == 2 else luts
        band_data = band_data.clip(0, lut.size - 1).astype(np.uint8)

        new_delay = dask.delayed(_lookup_delayed)(lut, band_data)
        new_data = da.from_delayed(new_delay, shape=band_data.shape,
                                   dtype=luts.dtype)
        return new_data

コード例 #37

0

ファイルを表示

ファイル: digital_micrograph.py プロジェクト: woozey/hyperspy

def file_reader(filename, record_by=None, order=None, lazy=False,
                optimize=True):
    """Reads a DM3 file and loads the data into the appropriate class.
    data_id can be specified to load a given image within a DM3 file that
    contains more than one dataset.

    Parameters
    ----------
    record_by: Str
        One of: SI, Signal2D
    order : Str
        One of 'C' or 'F'
    lazy : bool, default False
        Load the signal lazily.
    %s
    """

    with open(filename, "rb") as f:
        dm = DigitalMicrographReader(f)
        dm.parse_file()
        images = [ImageObject(imdict, f, order=order, record_by=record_by)
                  for imdict in dm.get_image_dictionaries()]
        imd = []
        del dm.tags_dict['ImageList']
        dm.tags_dict['ImageList'] = {}

        for image in images:
            dm.tags_dict['ImageList'][
                'TagGroup0'] = image.imdict.as_dictionary()
            axes = image.get_axes_dict()
            mp = image.get_metadata()
            mp['General']['original_filename'] = os.path.split(filename)[1]
            post_process = []
            if image.to_spectrum is True:
                post_process.append(lambda s: s.to_signal1D(optimize=optimize))
            post_process.append(lambda s: s.squeeze())
            if lazy:
                image.filename = filename
                from dask.array import from_delayed
                import dask.delayed as dd
                val = dd(image.get_data, pure=True)()
                data = from_delayed(val, shape=image.shape,
                                    dtype=image.dtype)
            else:
                data = image.get_data()
            imd.append(
                {'data': data,
                 'axes': axes,
                 'metadata': mp,
                 'original_metadata': dm.tags_dict,
                 'post_process': post_process,
                 'mapping': image.get_mapping(),
                 })

    return imd
    file_reader.__doc__ %= (OPTIMIZE_ARG.replace('False', 'True'))

コード例 #38

0

ファイルを表示

ファイル: __init__.py プロジェクト: davidh-ssec/satpy

    def func(band_data, luts=luts):
        # NaN/null values will become 0
        band_data = band_data.clip(0, luts.size - 1).astype(np.uint8)

        def _delayed(luts, band_data):
            # can't use luts.__getitem__ for some reason
            return luts[band_data]
        new_delay = dask.delayed(_delayed)(luts, band_data)
        new_data = da.from_delayed(new_delay, shape=band_data.shape,
                                   dtype=luts.dtype)
        return new_data

コード例 #39

0

ファイルを表示

ファイル: __init__.py プロジェクト: davidh-ssec/satpy

    def func(band_data, kernel=kernel, mode=mode):
        def _delayed(band_data, kernel, mode):
            band_data = band_data.reshape(band_data.shape[1:])
            new_data = convolve2d(band_data, kernel, mode=mode)
            return new_data.reshape((1, band_data.shape[0],
                                     band_data.shape[1]))

        delay = dask.delayed(_delayed)(band_data, kernel, mode)
        new_data = da.from_delayed(delay, shape=band_data.shape,
                                   dtype=band_data.dtype)
        return new_data

コード例 #40

0

ファイルを表示

ファイル: resample.py プロジェクト: davidh-ssec/satpy

    def compute(self, data, cache_id=None, fill_value=0, weight_count=10000,
                weight_min=0.01, weight_distance_max=1.0,
                weight_delta_max=1.0, weight_sum_min=-1.0,
                maximum_weight_mode=False, grid_coverage=0, **kwargs):
        """Resample the data according to the precomputed X/Y coordinates.

        :param grid_coverage: minimum ratio of number of output grid pixels
                              covered with swath pixels

        """
        rows = self.cache["rows"]
        cols = self.cache["cols"]

        # if the data is scan based then check its metadata or the passed
        # kwargs otherwise assume the entire input swath is one large
        # "scanline"
        rows_per_scan = kwargs.get('rows_per_scan',
                                   data.attrs.get("rows_per_scan",
                                                  data.shape[0]))

        if data.ndim == 3 and 'bands' in data.dims:
            data_in = tuple(data.sel(bands=band).data
                            for band in data['bands'])
        elif data.ndim == 2:
            data_in = data.data
        else:
            raise ValueError("Unsupported data shape for EWA resampling.")

        res = dask.delayed(self._call_fornav)(
            cols, rows, self.target_geo_def, data_in,
            grid_coverage=grid_coverage,
            rows_per_scan=rows_per_scan, weight_count=weight_count,
            weight_min=weight_min, weight_distance_max=weight_distance_max,
            weight_delta_max=weight_delta_max, weight_sum_min=weight_sum_min,
            maximum_weight_mode=maximum_weight_mode)
        if isinstance(data_in, tuple):
            new_shape = (len(data_in),) + self.target_geo_def.shape
        else:
            new_shape = self.target_geo_def.shape
        data_arr = da.from_delayed(res, new_shape, data.dtype)
        # from delayed creates one large chunk, break it up a bit if we can
        data_arr = data_arr.rechunk([CHUNK_SIZE] * data_arr.ndim)
        if data.ndim == 3 and data.dims[0] == 'bands':
            dims = ('bands', 'y', 'x')
        elif data.ndim == 2:
            dims = ('y', 'x')
        else:
            dims = data.dims

        return xr.DataArray(data_arr, dims=dims,
                            attrs=data.attrs.copy())

コード例 #41

0

ファイルを表示

ファイル: lazy.py プロジェクト: mwalls/hyperspy

    def _map_iterate(self,
                     function,
                     iterating_kwargs=(),
                     show_progressbar=None,
                     parallel=None,
                     ragged=None,
                     inplace=True,
                     **kwargs):
        if ragged not in (True, False):
            raise ValueError('"ragged" kwarg has to be bool for lazy signals')
        _logger.debug("Entering '_map_iterate'")

        size = max(1, self.axes_manager.navigation_size)
        from hyperspy.misc.utils import (create_map_objects,
                                         map_result_construction)
        func, iterators = create_map_objects(function, size, iterating_kwargs,
                                             **kwargs)
        iterators = (self._iterate_signal(), ) + iterators
        res_shape = self.axes_manager._navigation_shape_in_array
        # no navigation
        if not len(res_shape) and ragged:
            res_shape = (1,)

        all_delayed = [dd(func)(data) for data in zip(*iterators)]

        if ragged:
            sig_shape = ()
            sig_dtype = np.dtype('O')
        else:
            one_compute = all_delayed[0].compute()
            sig_shape = one_compute.shape
            sig_dtype = one_compute.dtype
        pixels = [
            da.from_delayed(
                res, shape=sig_shape, dtype=sig_dtype) for res in all_delayed
        ]

        for step in reversed(res_shape):
            _len = len(pixels)
            starts = range(0, _len, step)
            ends = range(step, _len + step, step)
            pixels = [
                da.stack(
                    pixels[s:e], axis=0) for s, e in zip(starts, ends)
            ]
        result = pixels[0]
        res = map_result_construction(
            self, inplace, result, ragged, sig_shape, lazy=True)
        return res

コード例 #42

0

ファイルを表示

ファイル: _lazy_data.py プロジェクト: QuLogic/iris

def as_lazy_data(data, chunks=None, asarray=False):
    """
    Convert the input array `data` to a dask array.

    Args:

    * data:
        An array. This will be converted to a dask array.

    Kwargs:

    * chunks:
        Describes how the created dask array should be split up. Defaults to a
        value first defined in biggus (being `8 * 1024 * 1024 * 2`).
        For more information see
        http://dask.pydata.org/en/latest/array-creation.html#chunks.

    * asarray:
        If True, then chunks will be converted to instances of `ndarray`.
        Set to False (default) to pass passed chunks through unchanged.

    Returns:
        The input array converted to a dask array.

    """
    if chunks is None:
        # Default to the shape of the wrapped array-like,
        # but reduce it if larger than a default maximum size.
        chunks = _limited_shape(data.shape)

    if not is_lazy_data(data):
        if data.shape == ():
            # Workaround for https://github.com/dask/dask/issues/2823. Make
            # sure scalar dask arrays return numpy objects.
            dtype = data.dtype
            data = _getall_delayed(data)
            data = da.from_delayed(data, (), dtype)
        else:
            data = da.from_array(data, chunks=chunks, asarray=asarray)
    return data

コード例 #43

0

ファイルを表示

ファイル: tec_solver.py プロジェクト: Joshuaalbert/RadioAstronomyThings

def test_clock_tec_solve_dask():
    np.random.seed(1234)
    import pylab as plt
    times = np.arange(2)
    freqs = np.linspace(110e6,170e6,1000)
    
    cs = np.array([1,1])
    tec = np.array([0.1,0.2])
    delay = np.ones(len(times)) * 2e-9# 10ns
    phase = np.multiply.outer(np.ones(len(freqs)),cs) + 8.44797256e-7*TECU*np.multiply.outer(1./freqs,tec) + 2.*np.pi*np.multiply.outer(freqs,delay)
    phase += 15*np.pi/180.*np.random.normal(size=[len(freqs),len(times)])
    #plt.imshow(phase,origin='lower',extent=(times[0],times[-1],freqs[0],freqs[-1]),aspect='auto')
    #plt.colorbar()
    #plt.xlabel('times (s)')
    #plt.ylabel('freqs (Hz)')
    #plt.show()
    m,cov = least_squares_solve(phase, freqs, times,15,Ct_ratio=0.01)
    m_exact = np.array([delay,tec,cs]).T
    import dask.array as da
    solsMH = [da.from_delayed(clock_tec_solve_dask(phase[:,i],freqs,m[i,:], cov[i,:,:],15,0.01),shape=(3,),dtype=np.double) for i in range(len(times))]
    
    sol_stacked = da.stack(solsMH, axis = 0)
    sol = sol_stacked.compute()
    print(sol)

コード例 #44

0

ファイルを表示

ファイル: xrft.py プロジェクト: rabernat/xrft

 def dask_win_func(n):
     return dsar.from_delayed(
         delayed(numpy_win_func, pure=True)(n),
         (n,), float)

コード例 #45

0

ファイルを表示

ファイル: fei.py プロジェクト: mwalls/hyperspy

def ser_reader(filename, objects=None, *args, **kwds):
    """Reads the information from the file and returns it in the HyperSpy
    required format.

    """
    header, data = load_ser_file(filename)
    record_by = guess_record_by(header['DataTypeID'])
    ndim = int(header['NumberDimensions'])
    date, time = None, None
    if objects is not None:
        objects_dict = convert_xml_to_dict(objects[0])
        date, time = _get_date_time(objects_dict.ObjectInfo.AcquireDate)
    if "PositionY" in data.dtype.names and len(data['PositionY']) > 1 and \
            (data['PositionY'][0] == data['PositionY'][1]):
        # The spatial dimensions are stored in F order i.e. X, Y, ...
        order = "F"
    else:
        # The spatial dimensions are stored in C order i.e. ..., Y, X
        order = "C"
    if ndim == 0 and header["ValidNumberElements"] != 0:
        # The calibration of the axes are not stored in the header.
        # We try to guess from the position coordinates.
        array_shape, axes = get_axes_from_position(header=header,
                                                   data=data)
    else:
        axes = []
        array_shape = [None, ] * int(ndim)
        spatial_axes = ["x", "y"][:ndim]
        for i in range(ndim):
            idim = 1 + i if order == "C" else ndim - i
            if (record_by == "spectrum" or
                    header['Dim-%i_DimensionSize' % (i + 1)][0] != 1):
                units = (header['Dim-%i_Units' % (idim)][0].decode('utf-8')
                         if header['Dim-%i_UnitsLength' % (idim)] > 0
                         else t.Undefined)
                if units == "meters":
                    name = (spatial_axes.pop() if order == "F"
                            else spatial_axes.pop(-1))
                else:
                    name = t.Undefined
                axes.append({
                    'offset': header['Dim-%i_CalibrationOffset' % idim][0],
                    'scale': header['Dim-%i_CalibrationDelta' % idim][0],
                    'units': units,
                    'size': header['Dim-%i_DimensionSize' % idim][0],
                    'name': name,
                })
                array_shape[i] = \
                    header['Dim-%i_DimensionSize' % idim][0]
    # Spectral dimension
    if record_by == "spectrum":
        axes.append({
            'offset': data['CalibrationOffset'][0],
            'scale': data['CalibrationDelta'][0],
            'size': data['ArrayLength'][0],
            'index_in_array': header['NumberDimensions'][0]
        })

        # FEI seems to use the international system of units (SI) for the
        # energy scale (eV).
        axes[-1]['units'] = 'eV'
        axes[-1]['name'] = 'Energy'

        array_shape.append(data['ArrayLength'][0])

    elif record_by == 'image':
        if objects is not None:
            units = _guess_units_from_mode(objects_dict, header)
        else:
            units = "meters"
        # Y axis
        axes.append({
            'name': 'y',
            'offset': data['CalibrationOffsetY'][0] -
            data['CalibrationElementY'][0] * data['CalibrationDeltaY'][0],
            'scale': data['CalibrationDeltaY'][0],
            'units': units,
            'size': data['ArraySizeY'][0],
        })
        array_shape.append(data['ArraySizeY'][0])

        # X axis
        axes.append({
            'name': 'x',
            'offset': data['CalibrationOffsetX'][0] -
            data['CalibrationElementX'][0] * data['CalibrationDeltaX'][0],
            'scale': data['CalibrationDeltaX'][0],
            'size': data['ArraySizeX'][0],
            'units': units,
        })
        array_shape.append(data['ArraySizeX'][0])
    # FEI seems to use the international system of units (SI) for the
    # spatial scale. However, we prefer to work in nm
    for axis in axes:
        if axis['units'] == 'meters':
            axis['units'] = 'nm'
            axis['scale'] *= 10 ** 9
        elif axis['units'] == '1/meters':
            axis['units'] = '1/nm'
            axis['scale'] /= 10 ** 9
    # Remove Nones from array_shape caused by squeezing size 1 dimensions
    array_shape = [dim for dim in array_shape if dim is not None]
    lazy = kwds.pop('lazy', False)
    if lazy:
        from dask import delayed
        from dask.array import from_delayed
        val = delayed(load_only_data, pure=True)(filename, array_shape,
                                                 record_by, len(axes))
        dc = from_delayed(val, shape=array_shape,
                          dtype=data['Array'].dtype)
    else:
        dc = load_only_data(filename, array_shape, record_by, len(axes),
                            data=data)

    if ordict:
        original_metadata = OrderedDict()
    else:
        original_metadata = {}
    header_parameters = sarray2dict(header)
    sarray2dict(data, header_parameters)
    # We remove the Array key to save memory avoiding duplication
    del header_parameters['Array']
    original_metadata['ser_header_parameters'] = header_parameters
    metadata = {'General': {
        'original_filename': os.path.split(filename)[1],
    },
        "Signal": {
        'signal_type': "",
        'record_by': record_by,
    },
    }
    if date is not None and time is not None:
        metadata['General']['date'] = date
        metadata['General']['time'] = time
    dictionary = {
        'data': dc,
        'metadata': metadata,
        'axes': axes,
        'original_metadata': original_metadata,
        'mapping': mapping}
    return dictionary

コード例 #46

0

ファイルを表示

ファイル: tiff.py プロジェクト: mwalls/hyperspy

def file_reader(filename, record_by='image', force_read_resolution=False,
                **kwds):
    """
    Read data from tif files using Christoph Gohlke's tifffile library.
    The units and the scale of images saved with ImageJ or Digital
    Micrograph is read. There is limited support for reading the scale of
    files created with Zeiss and FEI SEMs.

    Parameters
    ----------
    filename: str
    record_by: {'image'}
        Has no effect because this format only supports recording by
        image.
    force_read_resolution: Bool
        Default: False.
        Force reading the x_resolution, y_resolution and the resolution_unit
        of the tiff tags.
        See http://www.awaresystems.be/imaging/tiff/tifftags/resolutionunit.html
    **kwds, optional
    """

    _logger.debug('************* Loading *************')
    # For testing the use of local and skimage tifffile library
    import_local_tifffile = False
    if 'import_local_tifffile' in kwds.keys():
        import_local_tifffile = kwds.pop('import_local_tifffile')

    imsave, TiffFile = _import_tifffile_library(import_local_tifffile)
    lazy = kwds.pop('lazy', False)
    memmap = kwds.pop('memmap', False)
    with TiffFile(filename, **kwds) as tiff:

        # change in the Tifffiles API
        if hasattr(tiff.series[0], 'axes'):
            # in newer version the axes is an attribute
            axes = tiff.series[0].axes
        else:
            # old version
            axes = tiff.series[0]['axes']
        is_rgb = tiff.is_rgb
        _logger.debug("Is RGB: %s" % is_rgb)
        series = tiff.series[0]
        if hasattr(series, 'shape'):
            shape = series.shape
            dtype = series.dtype
        else:
            shape = series['shape']
            dtype = series['dtype']
        if is_rgb:
            axes = axes[:-1]
            names = ['R', 'G', 'B', 'A']
            lastshape = shape[-1]
            dtype = np.dtype({'names': names[:lastshape],
                              'formats': [dtype] * lastshape})
            shape = shape[:-1]
        op = {}
        for key, tag in tiff[0].tags.items():
            op[key] = tag.value
        names = [axes_label_codes[axis] for axis in axes]

        _logger.debug('Tiff tags list: %s' % op.keys())
        _logger.debug("Photometric: %s" % op['photometric'])
        _logger.debug('is_imagej: {}'.format(tiff[0].is_imagej))


        # workaround for 'palette' photometric, keep only 'X' and 'Y' axes
        sl = None
        if op['photometric'] == 3:
            sl = [0] * len(shape)
            names = []
            for i, axis in enumerate(axes):
                if axis == 'X' or axis == 'Y':
                    sl[i] = slice(None)
                    names.append(axes_label_codes[axis])
                else:
                    axes.replace(axis, '')
            shape = tuple(_sh for _s, _sh in zip(sl, shape)
                          if isinstance(_s, slice))
        _logger.debug("names: {0}".format(names))

        scales = [1.0] * len(names)
        offsets = [0.0] * len(names)
        units = [t.Undefined] * len(names)
        try:
            scales_d, units_d, offsets_d, intensity_axis = \
                _parse_scale_unit(tiff, op, shape,
                                  force_read_resolution)
            for i, name in enumerate(names):
                if name == 'height':
                    scales[i], units[i] = scales_d['x'], units_d['x']
                    offsets[i] = offsets_d['x']
                elif name == 'width':
                    scales[i], units[i] = scales_d['y'], units_d['y']
                    offsets[i] = offsets_d['y']
                elif name in ['depth', 'image series', 'time']:
                    scales[i], units[i] = scales_d['z'], units_d['z']
                    offsets[i] = offsets_d['z']
        except:
            _logger.info("Scale and units could not be imported")

        axes = [{'size': size,
                 'name': str(name),
                 'scale': scale,
                 'offset': offset,
                 'units': unit,
                 }
                for size, name, scale, offset, unit in zip(shape, names,
                                                           scales, offsets,
                                                           units)]

        md = {'General': {'original_filename': os.path.split(filename)[1]},
              'Signal': {'signal_type': "",
                         'record_by': "image",
                         },
              }

        if 'units' in intensity_axis.keys():
            md['Signal']['quantity'] = intensity_axis['units']
        if 'scale' in intensity_axis.keys() and 'offset' in intensity_axis.keys():
            dic = {'gain_factor': intensity_axis['scale'],
                   'gain_offset': intensity_axis['offset']}
            md['Signal']['Noise_properties'] = {'Variance_linear_model': dic}

    data_args = TiffFile, filename, is_rgb, sl
    if lazy:
        from dask import delayed
        from dask.array import from_delayed
        memmap = True
        val = delayed(_load_data, pure=True)(*data_args, memmap=memmap, **kwds)
        dc = from_delayed(val, dtype=dtype, shape=shape)
        # TODO: maybe just pass the memmap from tiffile?
    else:
        dc = _load_data(*data_args, memmap=memmap, **kwds)

    return [{'data': dc,
             'original_metadata': op,
             'axes': axes,
             'metadata': md,
             }]

コード例 #47

0

ファイルを表示

ファイル: semper_unf.py プロジェクト: mwalls/hyperspy

    def load_from_unf(cls, filename, lazy=False):
        """Load a `.unf`-file into a :class:`~.SemperFormat` object.

        Parameters
        ----------
        filename : string
            The name of the unf-file from which to load the data. Standard
            format is '\*.unf'.

        Returns
        -------
        semper : :class:`~.SemperFormat` (N=1)
            SEMPER file format object containing the loaded information.

        """
        metadata = OrderedDict()
        with open(filename, 'rb') as f:
            # Read header:
            rec_length = np.fromfile(
                f,
                dtype='<i4',
                count=1)[0]  # length of header
            header = np.fromfile(
                f,
                dtype=cls.HEADER_DTYPES[
                    :rec_length //
                    2],
                count=1)
            metadata.update(sarray2dict(header))
            assert np.frombuffer(f.read(4), dtype=np.int32)[0] == rec_length, \
                'Error while reading the header (length is not correct)!'
            data_format = cls.IFORM_DICT[metadata['IFORM']]
            iversn, remain = divmod(metadata['IFLAG'], 10000)
            ilabel, ntitle = divmod(remain, 1000)
            metadata.update(
                {'IVERSN': iversn, 'ILABEL': ilabel, 'NTITLE': ntitle})
            # Read title:
            title = ''
            if ntitle > 0:
                assert np.fromfile(
                    f,
                    dtype='<i4',
                    count=1)[0] == ntitle  # length of title
                title = b''.join(np.fromfile(f, dtype='c', count=ntitle))
                title = title.decode()
                metadata['TITLE'] = title
                assert np.fromfile(f, dtype='<i4', count=1)[0] == ntitle
            if ilabel:
                try:
                    metadata.update(cls._read_label(f))
                except Exception as e:
                    warning = ('Could not read label, trying to proceed '
                               'without it!')
                    warning += ' (Error message: {})'.format(str(e))
                    warnings.warn(warning)
            # Read picture data:
            pos = f.tell()
            shape = metadata['NLAY'], metadata[ 'NROW'], metadata['NCOL']
            if lazy:
                from dask.array import from_delayed
                from dask import delayed
                task = delayed(_read_data)(f, filename, pos, data_format,
                                           shape)
                data = from_delayed(task, shape=shape, dtype=data_format)
            else:
                data = _read_data(f, filename, pos, data_format, shape)
        offsets = (metadata.get('X0V0', 0.),
                   metadata.get('Y0V2', 0.),
                   metadata.get('Z0V4', 0.))
        scales = (metadata.get('DXV1', 1.),
                  metadata.get('DYV3', 1.),
                  metadata.get('DZV5', 1.))
        units = (metadata.get('XUNIT', Undefined),
                 metadata.get('YUNIT', Undefined),
                 metadata.get('ZUNIT', Undefined))
        return cls(data, title, offsets, scales, units, metadata)