Ejemplo n.º 1
0
    def stretch_linear(self, cutoffs=(0.005, 0.005)):
        """Stretch linearly the contrast of the current image.

        Use *cutoffs* for left and right trimming.
        """
        logger.debug("Perform a linear contrast stretch.")

        logger.debug("Calculate the histogram quantiles: ")
        logger.debug("Left and right quantiles: " +
                     str(cutoffs[0]) + " " + str(cutoffs[1]))

        cutoff_type = np.float64
        # numpy percentile (which quantile calls) returns 64-bit floats
        # unless the value is a higher order float
        if np.issubdtype(self.data.dtype, np.floating) and \
                np.dtype(self.data.dtype).itemsize > 8:
            cutoff_type = self.data.dtype
        left, right = dask.delayed(self._compute_quantile, nout=2)(self.data.data, self.data.dims, cutoffs)
        left_data = da.from_delayed(left,
                                    shape=(self.data.sizes['bands'],),
                                    dtype=cutoff_type)
        left = xr.DataArray(left_data, dims=('bands',),
                            coords={'bands': self.data['bands']})
        right_data = da.from_delayed(right,
                                     shape=(self.data.sizes['bands'],),
                                     dtype=cutoff_type)
        right = xr.DataArray(right_data, dims=('bands',),
                             coords={'bands': self.data['bands']})

        self.crude_stretch(left, right)
Ejemplo n.º 2
0
def change_dtype(data: list, output_dtype: str, offset: np.array) -> list:
    """
    Lazy histogram-preserving datatype adjustment of a collection of array-likes.
    Signed datatypes (int8, int16) are converted to their unsigned counterparts (uint8, uint16) by upcasting to signed type with
    higher precision, shifting all values by a constant, then downcasting to the final unsigned datatype. The resulting arrays
    have a global minimum of 0, with the original min-max distance.
    """

    @dask.delayed
    def adjuster(arr, upcast, offset, dtype):
        assert arr.ndim == offset.ndim
        return (arr.astype(upcast) - offset).astype(dtype)

    if output_dtype == "same":
        return data
    elif output_dtype == "uint8":
        assert (data[0].dtype == "int8") or (data[0].dtype == ">i1")
        upcast = "int16"
    elif output_dtype == "uint16":
        assert (data[0].dtype == "int16") or (data[0].dtype == ">i2")
        upcast = "int32"

    return [
        da.from_delayed(
            adjuster(
                d, upcast=upcast, offset=offset.reshape(-1, 1, 1), dtype=output_dtype
            ),
            dtype=output_dtype,
            shape=d.shape,
        )
        for d in data
    ]
Ejemplo n.º 3
0
    def transform(self, X):
        """Transform a sequence of documents to a document-term matrix.

        Transformation is done in parallel, and correctly handles dask
        collections.

        Parameters
        ----------
        X : dask.Bag of raw text documents, length = n_samples
            Samples. Each sample must be a text document (either bytes or
            unicode strings, file name or file object depending on the
            constructor argument) which will be tokenized and hashed.

        Returns
        -------
        X : dask.array.Array, shape = (n_samples, self.n_features)
            Document-term matrix. Each block of the array is a scipy sparse
            matrix.

        Notes
        -----
        The returned dask Array is composed scipy sparse matricies. If you need
        to compute on the result immediately, you may need to convert the individual
        blocks to ndarrays or pydata/sparse matricies.

        >>> import sparse
        >>> X.map_blocks(sparse.COO.from_scipy_sparse)  # doctest: +SKIP

        See the :doc:`examples/text-vectorization` for more.
        """
        transformer = super(HashingVectorizer, self).transform

        msg = "'X' should be a 1-dimensional array with length 'num_samples'."

        if not dask.is_dask_collection(X):
            return transformer(X)

        if isinstance(X, db.Bag):
            bag2 = X.map_partitions(transformer)
            objs = bag2.to_delayed()
            arrs = [
                da.from_delayed(obj, (np.nan, self.n_features), self.dtype)
                for obj in objs
            ]
            result = da.concatenate(arrs, axis=0)
        elif isinstance(X, dd.Series):
            result = X.map_partitions(transformer)
        elif isinstance(X, da.Array):
            # dask.Array
            chunks = ((np.nan,) * X.numblocks[0], (self.n_features,))
            if X.ndim == 1:
                result = X.map_blocks(
                    transformer, dtype="f8", chunks=chunks, new_axis=1
                )
            else:
                raise ValueError(msg)
        else:
            raise ValueError(msg)

        return result
def test_clock_tec_solve_dask():
    np.random.seed(1234)
    import pylab as plt
    times = np.arange(2)
    freqs = np.linspace(110e6, 170e6, 1000)

    cs = np.array([1, 1])
    tec = np.array([0.1, 0.2])
    delay = np.ones(len(times)) * 2e-9  # 10ns
    phase = np.multiply.outer(np.ones(
        len(freqs)), cs) + 8.44797256e-7 * TECU * np.multiply.outer(
            1. / freqs, tec) + 2. * np.pi * np.multiply.outer(freqs, delay)
    phase += 15 * np.pi / 180. * np.random.normal(
        size=[len(freqs), len(times)])
    #plt.imshow(phase,origin='lower',extent=(times[0],times[-1],freqs[0],freqs[-1]),aspect='auto')
    #plt.colorbar()
    #plt.xlabel('times (s)')
    #plt.ylabel('freqs (Hz)')
    #plt.show()
    m, cov = least_squares_solve(phase, freqs, times, 15, Ct_ratio=0.01)
    m_exact = np.array([delay, tec, cs]).T
    import dask.array as da
    solsMH = [
        da.from_delayed(clock_tec_solve_dask(phase[:, i], freqs, m[i, :],
                                             cov[i, :, :], 15, 0.01),
                        shape=(3, ),
                        dtype=np.double) for i in range(len(times))
    ]

    sol_stacked = da.stack(solsMH, axis=0)
    sol = sol_stacked.compute()
    print(sol)
Ejemplo n.º 5
0
def arrays_from_delayed(args, shapes=None, dtypes=None):
    """

    Parameters
    ----------
    args: a collection of dask.delayed objects representing lazy-loaded arrays.

    shapes: a collection of tuples specifying the shape of each array in args, or None. if None, the first array will be loaded
        using local computation, and the shape of that arrays will be used for all subsequent arrays.

    dtypes: a collection of strings specifying the datatype of each array in args, or None. If None, the first array will be loaded
        using local computation and the dtype of that array will be used for all subsequent arrays.

    Returns a list of dask arrays.
    -------

    """

    if shapes is None or dtypes is None:
        sample = args[0].compute(scheduler="threads")
        if shapes is None:
            shapes = (sample.shape, ) * len(args)
        if dtypes is None:
            dtypes = (sample.dtype, ) * len(args)

    assert len(shapes) == len(args) and len(dtypes) == len(args)

    arrays = [
        da.from_delayed(args[ind], shape=shapes[ind], dtype=dtypes[ind])
        for ind in range(len(args))
    ]
    return arrays
Ejemplo n.º 6
0
def test_bag_array_conversion():
    import dask.bag as db
    b = db.range(10, npartitions=1)
    x, = b.map_partitions(np.asarray).to_delayed()
    x, = [da.from_delayed(a, shape=(10,), dtype=int) for a in [x]]
    z = da.concatenate([x])
    assert_eq(z, np.arange(10), check_graph=False)
Ejemplo n.º 7
0
 def _band_hist(band_data):
     cdf = da.arange(0., 1., 1. / nwidth, chunks=nwidth)
     if approximate:
         # need a 1D array
         flat_data = band_data.ravel()
         # replace with nanpercentile in the future, if available
         # dask < 0.17 returns all NaNs for this
         bins = da.percentile(flat_data[da.notnull(flat_data)],
                              cdf * 100.)
     else:
         bins = dask.delayed(np.nanpercentile)(band_data, cdf * 100.)
         bins = da.from_delayed(bins, shape=(nwidth,), dtype=cdf.dtype)
     res = dask.delayed(np.interp)(band_data, bins, cdf)
     res = da.from_delayed(res, shape=band_data.shape,
                           dtype=band_data.dtype)
     return res
Ejemplo n.º 8
0
 def _map_all(self, function, inplace=True, **kwargs):
     calc_result = dd(function)(self.data, **kwargs)
     if inplace:
         self.data = da.from_delayed(calc_result, shape=self.data.shape,
                                     dtype=self.data.dtype)
         return None
     return self._deepcopy_with_new_data(calc_result)
Ejemplo n.º 9
0
def file_reader(filename, **kwds):
    """Read data from any format supported by PIL.

    Parameters
    ----------
    filename: str

    """
    dc = _read_data(filename)
    lazy = kwds.pop('lazy', False)
    if lazy:
        # load the image fully to check the dtype and shape, should be cheap.
        # Then store this info for later re-loading when required
        from dask.array import from_delayed
        from dask import delayed
        val = delayed(_read_data, pure=True)(filename)
        dc = from_delayed(val, shape=dc.shape, dtype=dc.dtype)
    return [{'data': dc,
             'metadata':
             {
                 'General': {'original_filename': os.path.split(filename)[1]},
                 "Signal": {'signal_type': "",
                            'record_by': 'image', },
             }
             }]
Ejemplo n.º 10
0
def _get_measurement(datasources,
                     geobox,
                     resampling,
                     no_data,
                     dtype,
                     fuse_func=None):
    """ Gets the measurement array of a band of data
    """

    # pylint: disable=broad-except, protected-access

    def copyto_fuser(dest, src):
        """
        :type dest: numpy.ndarray
        :type src: numpy.ndarray
        """
        where_nodata = (
            dest == no_data) if not numpy.isnan(no_data) else numpy.isnan(dest)
        numpy.copyto(dest, src, where=where_nodata)
        return dest

    fuse_func = fuse_func or copyto_fuser
    destination = _make_destination(geobox.shape, no_data, dtype)

    for source in datasources:
        buffer = delayed(_read_file)(source,
                                     geobox,
                                     band=source.get_bandnumber(),
                                     no_data=no_data,
                                     resampling=resampling)
        destination = delayed(fuse_func)(destination, buffer)

    return da.from_delayed(destination, geobox.shape, dtype)
Ejemplo n.º 11
0
    def get_dataset(self, key, info):
        """Load a dataset."""
        if self._channel != key.name:
            return

        logger.debug('Reading %s.', key.name)
        # FIXME: get this from MTD_MSIL1C.xml
        quantification_value = 10000.
        jp2 = glymur.Jp2k(self.filename)
        bitdepth = 0
        for seg in jp2.codestream.segment:
            try:
                bitdepth = max(bitdepth, seg.bitdepth[0])
            except AttributeError:
                pass

        jp2.dtype = (np.uint8 if bitdepth <= 8 else np.uint16)

        # Initialize the jp2 reader / doesn't work in a multi-threaded context.
        # jp2[0, 0]
        # data = da.from_array(jp2, chunks=CHUNK_SIZE) / quantification_value * 100

        data = da.from_delayed(delayed(jp2.read)(), jp2.shape, jp2.dtype)
        data = data.rechunk(CHUNK_SIZE) / quantification_value * 100

        proj = DataArray(data, dims=['y', 'x'])
        proj.attrs = info.copy()
        proj.attrs['units'] = '%'
        proj.attrs['platform_name'] = self.platform_name
        return proj
Ejemplo n.º 12
0
def get_da_images(files, which="data", shape=ZTF_IMAGE_SHAPE, dtype="float32"):
    """ Get a dask.array stacked for each of the ziff image you want. 
    = Works only with single ziff = 
    """
    lazy_array = [dask.delayed(get_ziff_single_image)(f_, which=which) for f_ in files]
    lazy_arrays = [da.from_delayed(x_, shape=shape, dtype=dtype) for x_ in lazy_array]
    return da.stack(lazy_arrays)
Ejemplo n.º 13
0
    def fit(self, X, y=None):
        q_min, q_max = self.quantile_range
        if not 0 <= q_min <= q_max <= 100:
            raise ValueError("Invalid quantile range: %s" % str(self.quantile_range))

        if isinstance(X, dd.DataFrame):
            n_columns = len(X.columns)
            partition_lengths = X.map_partitions(len).compute()
            dtype = np.find_common_type(X.dtypes, [])
            blocks = X.to_delayed()
            X = da.vstack(
                [
                    da.from_delayed(
                        block.values, shape=(length, n_columns), dtype=dtype
                    )
                    for block, length in zip(blocks, partition_lengths)
                ]
            )

        quantiles = [da.percentile(col, [q_min, 50.0, q_max]) for col in X.T]
        quantiles = da.vstack(quantiles).compute()
        self.center_ = quantiles[:, 1]
        self.scale_ = quantiles[:, 2] - quantiles[:, 0]
        self.scale_ = skdata._handle_zeros_in_scale(self.scale_, copy=False)
        return self
Ejemplo n.º 14
0
def get_data_lazy(image: ImageWrapper, c_index: int = 0) -> da.Array:
    """Get n-dimensional dask array, with delayed reading from OMERO image."""
    size_z = image.getSizeZ()
    size_t = image.getSizeT()
    size_x = image.getSizeX()
    size_y = image.getSizeY()
    pixels = image.getPrimaryPixels()

    @delayed
    @timer
    def get_plane(plane_name):
        z, c, t = [int(n) for n in plane_name.split(",")]
        p = pixels.getPlane(z, c, t)
        return p

    dtype = PIXEL_TYPES.get(pixels.getPixelsType().value, None)

    plane_names = [
        f"{z},{c_index},{t}" for t in range(size_t) for z in range(size_z)
    ]
    lazy_arrays = [get_plane(pn) for pn in plane_names]
    dask_arrays = [
        da.from_delayed(delayed_reader, shape=(size_y, size_x), dtype=dtype)
        for delayed_reader in lazy_arrays
    ]
    # Stack into one large dask.array
    if size_z == 1 or size_t == 1:
        return da.stack(dask_arrays, axis=0)

    z_stacks = []
    for t in range(size_t):
        z_stacks.append(
            da.stack(dask_arrays[t * size_z:(t + 1) * size_z], axis=0))
    stack = da.stack(z_stacks, axis=0)
    return stack
Ejemplo n.º 15
0
    def _split(self, test_start, test_stop, n_samples, chunks, seeds):
        train_objs = []
        test_objs = []
        train_sizes = []
        test_sizes = []

        offset = 0
        for chunk, seed in zip(chunks, seeds):
            start, stop = offset, offset + chunk

            test_id_start = max(test_start, start)
            test_id_stop = min(test_stop, stop)

            if test_id_start < test_id_stop:
                test_objs.append(
                    dask.delayed(_generate_offset_idx)(chunk, test_id_start,
                                                       test_id_stop, offset,
                                                       seed))
                test_sizes.append(test_id_stop - test_id_start)

            train_id_stop = min(test_id_start, stop)
            if train_id_stop > start:
                train_objs.append(
                    dask.delayed(_generate_offset_idx)(chunk, start,
                                                       train_id_stop, offset,
                                                       seed))
                train_sizes.append(train_id_stop - start)

            train_id_start = max(test_id_stop, start)
            if train_id_start < stop:
                train_objs.append(
                    dask.delayed(_generate_offset_idx)(chunk, train_id_start,
                                                       stop, offset, seed))
                train_sizes.append(stop - train_id_start)
            offset = stop

        train_idx = da.concatenate([
            da.from_delayed(obj, (train_size, ), np.dtype("int"))
            for obj, train_size in zip(train_objs, train_sizes)
        ])

        test_idx = da.concatenate([
            da.from_delayed(obj, (test_size, ), np.dtype("int"))
            for obj, test_size in zip(test_objs, test_sizes)
        ])

        return train_idx, test_idx
Ejemplo n.º 16
0
    def compute(self,
                data,
                cache_id=None,
                fill_value=0,
                weight_count=10000,
                weight_min=0.01,
                weight_distance_max=1.0,
                weight_delta_max=1.0,
                weight_sum_min=-1.0,
                maximum_weight_mode=False,
                grid_coverage=0,
                **kwargs):
        """Resample the data according to the precomputed X/Y coordinates."""
        rows = self.cache["rows"]
        cols = self.cache["cols"]

        # if the data is scan based then check its metadata or the passed
        # kwargs otherwise assume the entire input swath is one large
        # "scanline"
        rows_per_scan = kwargs.get(
            'rows_per_scan', data.attrs.get("rows_per_scan", data.shape[0]))

        if data.ndim == 3 and 'bands' in data.dims:
            data_in = tuple(
                data.sel(bands=band).data for band in data['bands'])
        elif data.ndim == 2:
            data_in = data.data
        else:
            raise ValueError("Unsupported data shape for EWA resampling.")

        res = dask.delayed(self._call_fornav)(
            cols,
            rows,
            self.target_geo_def,
            data_in,
            grid_coverage=grid_coverage,
            rows_per_scan=rows_per_scan,
            weight_count=weight_count,
            weight_min=weight_min,
            weight_distance_max=weight_distance_max,
            weight_delta_max=weight_delta_max,
            weight_sum_min=weight_sum_min,
            maximum_weight_mode=maximum_weight_mode)
        if isinstance(data_in, tuple):
            new_shape = (len(data_in), ) + self.target_geo_def.shape
        else:
            new_shape = self.target_geo_def.shape
        data_arr = da.from_delayed(res, new_shape, data.dtype)
        # from delayed creates one large chunk, break it up a bit if we can
        data_arr = data_arr.rechunk([CHUNK_SIZE] * data_arr.ndim)
        if data.ndim == 3 and data.dims[0] == 'bands':
            dims = ('bands', 'y', 'x')
        elif data.ndim == 2:
            dims = ('y', 'x')
        else:
            dims = data.dims

        res = xr.DataArray(data_arr, dims=dims, attrs=data.attrs.copy())
        return update_resampled_coords(data, res, self.target_geo_def)
Ejemplo n.º 17
0
def dask_from_mov(path):
    vid = imageio.get_reader(path, 'ffmpeg')
    shape = vid.get_meta_data()['size'][::-1] + (3, )
    lazy_imread = delayed(vid.get_data)
    return da.stack([
        da.from_delayed(lazy_imread(i), shape=shape, dtype=np.uint8)
        for i in range(vid.count_frames())
    ])
Ejemplo n.º 18
0
def make_da(delayed_list, length):
    sample = delayed_list[0].compute()
    arrays = [
        da.from_delayed(item, dtype=sample.dtype, shape=sample.shape)
        for item in delayed_list
    ]
    result = da.concatenate(arrays, axis=0)[:length]
    return result
def get_lazy_arrays(glob_filenames, imread_sample):
    lazy_arrays = [dask.delayed(imread)(fn) for fn in glob_filenames]
    lazy_arrays = [
        da.from_delayed(x,
                        shape=imread_sample.shape,
                        dtype=imread_sample.dtype) for x in lazy_arrays
    ]
    return lazy_arrays
Ejemplo n.º 20
0
 def _band_hist(band_data):
     cdf = da.arange(0., 1., 1. / nwidth, chunks=nwidth)
     if approximate:
         # need a 1D array
         flat_data = band_data.ravel()
         # replace with nanpercentile in the future, if available
         # dask < 0.17 returns all NaNs for this
         bins = da.percentile(flat_data[da.notnull(flat_data)],
                              cdf * 100.)
     else:
         bins = dask.delayed(np.nanpercentile)(band_data, cdf * 100.)
         bins = da.from_delayed(bins, shape=(nwidth, ), dtype=cdf.dtype)
     res = dask.delayed(np.interp)(band_data, bins, cdf)
     res = da.from_delayed(res,
                           shape=band_data.shape,
                           dtype=band_data.dtype)
     return res
Ejemplo n.º 21
0
def as_known(X, lengths):
    blocks = X.to_delayed().flatten()
    P = X.shape[1]
    arrays = [
        da.from_delayed(x, dtype=X.dtype, shape=(length, P))
        for x, length in zip(blocks, lengths)
    ]
    return da.concatenate(arrays, axis=0)
Ejemplo n.º 22
0
def test_bag_array_conversion():
    import dask.bag as db

    b = db.range(10, npartitions=1)
    x, = b.map_partitions(np.asarray).to_delayed()
    x, = [da.from_delayed(a, shape=(10, ), dtype=int) for a in [x]]
    z = da.concatenate([x])
    assert_eq(z, np.arange(10), check_graph=False)
Ejemplo n.º 23
0
def hdulists_keyword_to_dask_array(all_hduls, keyword, ext=0, dtype=float):
    arr = da.stack([
        da.from_delayed(_kw_to_0d_seq(hdul, ext, keyword),
                        shape=(),
                        dtype=dtype) for hdul in all_hduls
    ])
    log.info(f"Header keyword {keyword} extracted to new {arr.shape} sequence")
    return arr
Ejemplo n.º 24
0
 def _map_all(self, function, inplace=True, **kwargs):
     calc_result = dd(function)(self.data, **kwargs)
     if inplace:
         self.data = da.from_delayed(calc_result,
                                     shape=self.data.shape,
                                     dtype=self.data.dtype)
         return None
     return self._deepcopy_with_new_data(calc_result)
Ejemplo n.º 25
0
 def image(self):
     """ Returns a delayed dask call for fetching the image for a data point """
     token = gbdx.gbdx_connection.access_token
     load = load_image(self.links["image"]["href"],
                       token,
                       self.imshape,
                       dtype=self.dtype)
     return da.from_delayed(load, shape=self.imshape, dtype=self.dtype)
Ejemplo n.º 26
0
    def func(band_data, kernel=kernel, mode=mode, index=None):
        del index

        delay = dask.delayed(_three_d_effect_delayed)(band_data, kernel, mode)
        new_data = da.from_delayed(delay,
                                   shape=band_data.shape,
                                   dtype=band_data.dtype)
        return new_data
Ejemplo n.º 27
0
def test_from_delayed_meta():
    def f():
        return sparse.COO.from_numpy(np.eye(3))

    d = dask.delayed(f)()
    x = da.from_delayed(d, shape=(3, 3), meta=sparse.COO.from_numpy(np.eye(1)))
    assert isinstance(x._meta, sparse.COO)
    assert_eq(x, x)
Ejemplo n.º 28
0
def _graph_standard_degrid(vis_dataset, grid, briggs_factors, cgk_1D, grid_parms):
   import dask
   import dask.array as da
   import xarray as xr
   import time
   import itertools
   
   # Getting data for gridding
   chan_chunk_size = vis_dataset[grid_parms["imaging_weight_name"]].chunks[2][0]

   freq_chan = da.from_array(vis_dataset.coords['chan'].values, chunks=(chan_chunk_size))

   n_chunks_in_each_dim = vis_dataset[grid_parms["imaging_weight_name"]].data.numblocks
   chunk_indx = []

   iter_chunks_indx = itertools.product(np.arange(n_chunks_in_each_dim[0]), np.arange(n_chunks_in_each_dim[1]),
                                        np.arange(n_chunks_in_each_dim[2]), np.arange(n_chunks_in_each_dim[3]))

   #n_delayed = np.prod(n_chunks_in_each_dim)
   chunk_sizes = vis_dataset[grid_parms["imaging_weight_name"]].chunks

   n_chan_chunks_img = n_chunks_in_each_dim[2]
   list_of_degrids = []
   list_of_sum_weights = []
   
   list_of_degrids = ndim_list(n_chunks_in_each_dim)
   
   
   # Build graph
   for c_time, c_baseline, c_chan, c_pol in iter_chunks_indx:
       if grid_parms['chan_mode'] == 'cube':
            a_c_chan = c_chan
       else:
            a_c_chan = 0
       
       if grid_parms['do_imaging_weight']:
           sub_degrid = dask.delayed(_standard_imaging_weight_degrid_numpy_wrap)(
                grid.partitions[0,0,a_c_chan,c_pol],
                vis_dataset[grid_parms["uvw_name"]].data.partitions[c_time, c_baseline, 0],
                vis_dataset[grid_parms["imaging_weight_name"]].data.partitions[c_time, c_baseline, c_chan, c_pol],
                briggs_factors.partitions[:,a_c_chan,c_pol],
                freq_chan.partitions[c_chan],
                dask.delayed(grid_parms))
                
           single_chunk_size = (chunk_sizes[0][c_time], chunk_sizes[1][c_baseline],chunk_sizes[2][c_chan], chunk_sizes[3][c_pol])
           list_of_degrids[c_time][c_baseline][c_chan][c_pol] = da.from_delayed(sub_degrid, single_chunk_size,dtype=np.double)
       else:
           print('Degridding of visibilities and psf still needs to be implemented')
           
           #sub_grid_and_sum_weights = dask.delayed(_standard_grid_numpy_wrap)(
           #vis_dataset[vis_dataset[grid_parms["data"]].data.partitions[c_time, c_baseline, c_chan, c_pol],
           #vis_dataset[grid_parms["uvw"]].data.partitions[c_time, c_baseline, 0],
           #vis_dataset[grid_parms["imaging_weight"]].data.partitions[c_time, c_baseline, c_chan, c_pol],
           #freq_chan.partitions[c_chan],
           #dask.delayed(cgk_1D), dask.delayed(grid_parms))
       
   degrid = da.block(list_of_degrids)
   return degrid
Ejemplo n.º 29
0
def compute_gradient_dask(rays,
                          g,
                          dobs,
                          i0,
                          K_ne,
                          m_tci,
                          m_prior,
                          CdCt,
                          sigma_m,
                          Nkernel,
                          size_cell,
                          cov_obj=None):
    L_m = Nkernel * size_cell
    #     #i not eq i0 mask
    #     mask = np.ones(rays.shape[0],dtype=np.bool)
    #     mask[i0] = False
    #     rays = rays[mask,:,:,:,:]
    #     g = g[mask,:,:]
    #     dobs = dobs[mask,:,:]
    #     CdCt = CdCt[mask,:,:]
    #residuals
    #g.shape, dobs.shape [Na,Nt,Nd]
    dd = g - dobs
    #weighted residuals
    #Cd.shape [Na,Nt,Nd] i.e. diagonal
    #CdCt^-1 = 1./CdCt
    dd /= (CdCt + 1e-15)
    #get ray info
    Na, Nt, Nd, _, Ns = rays.shape
    #     if Na < Nd:
    #         #parallelize over antennas
    #         gradient = da.sum(da.stack([da.from_delayed(delayed(do_gradient)(rays[i,:,:,:,:], dd[i,:,:], K_ne, m_tci,
    #                                          sigma_m, Nkernel, size_cell),(m_tci.nx,m_tci.ny,m_tci.nz),dtype=np.double) for i in range(Na)],axis=-1),axis=-1)
    #     else:
    #         #parallelize over directions
    #         gradient = da.sum(da.stack([da.from_delayed(delayed(do_gradient)(rays[:,:,d,:,:], dd[:,:,d], K_ne, m_tci,
    #                                           sigma_m, Nkernel, size_cell),(m_tci.nx,m_tci.ny,m_tci.nz),dtype=np.double) for d in range(Nd)],axis=-1),axis=-1)
    #parallelize over directions
    ne_tci = m_tci.copy()
    np.exp(ne_tci.M, out=ne_tci.M)
    ne_tci.M *= K_ne / TECU
    gradient = da.sum(da.stack([
        da.from_delayed(delayed(do_gradient)(
            rays[:, :, d, :, :], dd[:, :, d], ne_tci, sigma_m, Nkernel,
            size_cell, i0), (m_tci.nx, m_tci.ny, m_tci.nz),
                        dtype=np.double) for d in range(Nd)
    ],
                               axis=-1),
                      axis=-1)
    gradient = gradient.compute(get=get)
    gradient -= gradient[i0, ...]
    if cov_obj is not None:
        dm = m_tci.M - m_prior
        gradient + cov_obj.contract(dm)
    #gradient += m_tci.M
    #gradient -= m_prior

    return gradient
Ejemplo n.º 30
0
def interpret_raw_file_delayed(name, nx, ny, layers, dx, dy):
    """
    Use Dask.delayed to lazily load a single output file. While this can be
    used as is, it is intended to be an internal function called by `open_mfdataset`.
    """

    d = dsa.from_delayed(delayed(interpret_raw_file)(name, nx, ny, layers),
                            (layers, ny+dy, nx+dx), float)
    return d
Ejemplo n.º 31
0
def array_images():
    custom_imread = dask.delayed(skimage.io.imread, pure=True)
    images = [
        custom_imread(
            '/Users/nivethamahalakshmibalasamy/Documents/ECI-PolarScience/dask_stuff/grayscale-xy-%d.png'
            % i) for i in range(1376, 1396)
    ]
    #print images
    image_array = [
        da.from_delayed(i, sample.shape, sample.dtype) for i in images
    ]
    sizes = [j.shape for j in image_array]
    #print sizes
    stack = da.stack(image_array, axis=0)
    print stack
    #print stack[0]
    # Combining chunks - A chunk consists of 5 images
    stack = stack.rechunk((5, 2000, 2000))
    print "After rechunking: "
    temp = stack
    #temp.visualize()
    print "Before distributing to workers:"
    print stack.mean().compute()
    print stack[1, :].compute()
    print stack[19, :].mean().compute()
    stack.visualize()

    # Distribute array components over workers and centralized scheduler
    cluster = LocalCluster()
    client = Client(cluster)
    print client

    # Load the entire distributed array on the cluster (4 workers, 4 cores)
    stack = client.persist(stack)
    #print stack.shape
    #print "After distributing to workers: "
    print stack.mean().compute()

    # map the otsu thresholding function
    #print stack[0]
    stack = da.map_blocks(otsu_thresholding,
                          stack,
                          chunks=(5, 2000, 2000),
                          dtype=sample.dtype)
    stack = da.map_blocks(blob_detection,
                          stack,
                          chunks=(5, 2000, 2000),
                          dtype=sample.dtype)
    stack = client.persist(stack)
    #th = client.persist(th)
    #thresholded.visualize()
    #th = client.persist(thresholded)
    #print thresholded.mean().compute()
    #print thresholded
    #print stack.shape
    print stack.mean().compute()
    stack.visualize()
Ejemplo n.º 32
0
def read_prob_map(h5_path, array_info):
    shape, dtype = array_info

    data = delayed(read_h5)(h5_path)
    data = da.from_delayed(data,
                           shape=shape,
                           dtype=dtype,
                           name=os.path.basename(h5_path))
    return data
Ejemplo n.º 33
0
    def _preprocess(self, collection, chunks=64, size=None):

        h, w = size
        images = [self.read_image(file, (h, w)) for file in collection]
        images = [da.from_delayed(image, shape=(h, w), dtype=numpy.uint8) for image in images]
        images = da.stack(images, axis=0)
        images = images.rechunk(chunks=(chunks, h, w))

        return images
Ejemplo n.º 34
0
def read_tiff(tiff_path, array_info):
    shape, dtype = array_info

    data = delayed(imageio.volread)(tiff_path)
    data = da.from_delayed(data,
                           shape=shape,
                           dtype=dtype,
                           name=os.path.basename(tiff_path))
    return data
Ejemplo n.º 35
0
def scatter_array(arr, dask_client):
    """Scatter a large numpy array into workers
    Return the equivalent dask array
    """
    future_arr = dask_client.scatter(arr)
    return da.from_delayed(future_arr,
                           shape=arr.shape,
                           dtype=arr.dtype,
                           meta=np.zeros_like(arr, shape=()))
Ejemplo n.º 36
0
    def func(band_data, luts=luts, index=-1):
        # NaN/null values will become 0
        lut = luts[:, index] if len(luts.shape) == 2 else luts
        band_data = band_data.clip(0, lut.size - 1).astype(np.uint8)

        new_delay = dask.delayed(_lookup_delayed)(lut, band_data)
        new_data = da.from_delayed(new_delay, shape=band_data.shape,
                                   dtype=luts.dtype)
        return new_data
Ejemplo n.º 37
0
def file_reader(filename, record_by=None, order=None, lazy=False,
                optimize=True):
    """Reads a DM3 file and loads the data into the appropriate class.
    data_id can be specified to load a given image within a DM3 file that
    contains more than one dataset.

    Parameters
    ----------
    record_by: Str
        One of: SI, Signal2D
    order : Str
        One of 'C' or 'F'
    lazy : bool, default False
        Load the signal lazily.
    %s
    """

    with open(filename, "rb") as f:
        dm = DigitalMicrographReader(f)
        dm.parse_file()
        images = [ImageObject(imdict, f, order=order, record_by=record_by)
                  for imdict in dm.get_image_dictionaries()]
        imd = []
        del dm.tags_dict['ImageList']
        dm.tags_dict['ImageList'] = {}

        for image in images:
            dm.tags_dict['ImageList'][
                'TagGroup0'] = image.imdict.as_dictionary()
            axes = image.get_axes_dict()
            mp = image.get_metadata()
            mp['General']['original_filename'] = os.path.split(filename)[1]
            post_process = []
            if image.to_spectrum is True:
                post_process.append(lambda s: s.to_signal1D(optimize=optimize))
            post_process.append(lambda s: s.squeeze())
            if lazy:
                image.filename = filename
                from dask.array import from_delayed
                import dask.delayed as dd
                val = dd(image.get_data, pure=True)()
                data = from_delayed(val, shape=image.shape,
                                    dtype=image.dtype)
            else:
                data = image.get_data()
            imd.append(
                {'data': data,
                 'axes': axes,
                 'metadata': mp,
                 'original_metadata': dm.tags_dict,
                 'post_process': post_process,
                 'mapping': image.get_mapping(),
                 })

    return imd
    file_reader.__doc__ %= (OPTIMIZE_ARG.replace('False', 'True'))
Ejemplo n.º 38
0
    def func(band_data, luts=luts):
        # NaN/null values will become 0
        band_data = band_data.clip(0, luts.size - 1).astype(np.uint8)

        def _delayed(luts, band_data):
            # can't use luts.__getitem__ for some reason
            return luts[band_data]
        new_delay = dask.delayed(_delayed)(luts, band_data)
        new_data = da.from_delayed(new_delay, shape=band_data.shape,
                                   dtype=luts.dtype)
        return new_data
Ejemplo n.º 39
0
    def func(band_data, kernel=kernel, mode=mode):
        def _delayed(band_data, kernel, mode):
            band_data = band_data.reshape(band_data.shape[1:])
            new_data = convolve2d(band_data, kernel, mode=mode)
            return new_data.reshape((1, band_data.shape[0],
                                     band_data.shape[1]))

        delay = dask.delayed(_delayed)(band_data, kernel, mode)
        new_data = da.from_delayed(delay, shape=band_data.shape,
                                   dtype=band_data.dtype)
        return new_data
Ejemplo n.º 40
0
    def compute(self, data, cache_id=None, fill_value=0, weight_count=10000,
                weight_min=0.01, weight_distance_max=1.0,
                weight_delta_max=1.0, weight_sum_min=-1.0,
                maximum_weight_mode=False, grid_coverage=0, **kwargs):
        """Resample the data according to the precomputed X/Y coordinates.

        :param grid_coverage: minimum ratio of number of output grid pixels
                              covered with swath pixels

        """
        rows = self.cache["rows"]
        cols = self.cache["cols"]

        # if the data is scan based then check its metadata or the passed
        # kwargs otherwise assume the entire input swath is one large
        # "scanline"
        rows_per_scan = kwargs.get('rows_per_scan',
                                   data.attrs.get("rows_per_scan",
                                                  data.shape[0]))

        if data.ndim == 3 and 'bands' in data.dims:
            data_in = tuple(data.sel(bands=band).data
                            for band in data['bands'])
        elif data.ndim == 2:
            data_in = data.data
        else:
            raise ValueError("Unsupported data shape for EWA resampling.")

        res = dask.delayed(self._call_fornav)(
            cols, rows, self.target_geo_def, data_in,
            grid_coverage=grid_coverage,
            rows_per_scan=rows_per_scan, weight_count=weight_count,
            weight_min=weight_min, weight_distance_max=weight_distance_max,
            weight_delta_max=weight_delta_max, weight_sum_min=weight_sum_min,
            maximum_weight_mode=maximum_weight_mode)
        if isinstance(data_in, tuple):
            new_shape = (len(data_in),) + self.target_geo_def.shape
        else:
            new_shape = self.target_geo_def.shape
        data_arr = da.from_delayed(res, new_shape, data.dtype)
        # from delayed creates one large chunk, break it up a bit if we can
        data_arr = data_arr.rechunk([CHUNK_SIZE] * data_arr.ndim)
        if data.ndim == 3 and data.dims[0] == 'bands':
            dims = ('bands', 'y', 'x')
        elif data.ndim == 2:
            dims = ('y', 'x')
        else:
            dims = data.dims

        return xr.DataArray(data_arr, dims=dims,
                            attrs=data.attrs.copy())
Ejemplo n.º 41
0
    def _map_iterate(self,
                     function,
                     iterating_kwargs=(),
                     show_progressbar=None,
                     parallel=None,
                     ragged=None,
                     inplace=True,
                     **kwargs):
        if ragged not in (True, False):
            raise ValueError('"ragged" kwarg has to be bool for lazy signals')
        _logger.debug("Entering '_map_iterate'")

        size = max(1, self.axes_manager.navigation_size)
        from hyperspy.misc.utils import (create_map_objects,
                                         map_result_construction)
        func, iterators = create_map_objects(function, size, iterating_kwargs,
                                             **kwargs)
        iterators = (self._iterate_signal(), ) + iterators
        res_shape = self.axes_manager._navigation_shape_in_array
        # no navigation
        if not len(res_shape) and ragged:
            res_shape = (1,)

        all_delayed = [dd(func)(data) for data in zip(*iterators)]

        if ragged:
            sig_shape = ()
            sig_dtype = np.dtype('O')
        else:
            one_compute = all_delayed[0].compute()
            sig_shape = one_compute.shape
            sig_dtype = one_compute.dtype
        pixels = [
            da.from_delayed(
                res, shape=sig_shape, dtype=sig_dtype) for res in all_delayed
        ]

        for step in reversed(res_shape):
            _len = len(pixels)
            starts = range(0, _len, step)
            ends = range(step, _len + step, step)
            pixels = [
                da.stack(
                    pixels[s:e], axis=0) for s, e in zip(starts, ends)
            ]
        result = pixels[0]
        res = map_result_construction(
            self, inplace, result, ragged, sig_shape, lazy=True)
        return res
Ejemplo n.º 42
0
def as_lazy_data(data, chunks=None, asarray=False):
    """
    Convert the input array `data` to a dask array.

    Args:

    * data:
        An array. This will be converted to a dask array.

    Kwargs:

    * chunks:
        Describes how the created dask array should be split up. Defaults to a
        value first defined in biggus (being `8 * 1024 * 1024 * 2`).
        For more information see
        http://dask.pydata.org/en/latest/array-creation.html#chunks.

    * asarray:
        If True, then chunks will be converted to instances of `ndarray`.
        Set to False (default) to pass passed chunks through unchanged.

    Returns:
        The input array converted to a dask array.

    """
    if chunks is None:
        # Default to the shape of the wrapped array-like,
        # but reduce it if larger than a default maximum size.
        chunks = _limited_shape(data.shape)

    if not is_lazy_data(data):
        if data.shape == ():
            # Workaround for https://github.com/dask/dask/issues/2823. Make
            # sure scalar dask arrays return numpy objects.
            dtype = data.dtype
            data = _getall_delayed(data)
            data = da.from_delayed(data, (), dtype)
        else:
            data = da.from_array(data, chunks=chunks, asarray=asarray)
    return data
Ejemplo n.º 43
0
def test_clock_tec_solve_dask():
    np.random.seed(1234)
    import pylab as plt
    times = np.arange(2)
    freqs = np.linspace(110e6,170e6,1000)
    
    cs = np.array([1,1])
    tec = np.array([0.1,0.2])
    delay = np.ones(len(times)) * 2e-9# 10ns
    phase = np.multiply.outer(np.ones(len(freqs)),cs) + 8.44797256e-7*TECU*np.multiply.outer(1./freqs,tec) + 2.*np.pi*np.multiply.outer(freqs,delay)
    phase += 15*np.pi/180.*np.random.normal(size=[len(freqs),len(times)])
    #plt.imshow(phase,origin='lower',extent=(times[0],times[-1],freqs[0],freqs[-1]),aspect='auto')
    #plt.colorbar()
    #plt.xlabel('times (s)')
    #plt.ylabel('freqs (Hz)')
    #plt.show()
    m,cov = least_squares_solve(phase, freqs, times,15,Ct_ratio=0.01)
    m_exact = np.array([delay,tec,cs]).T
    import dask.array as da
    solsMH = [da.from_delayed(clock_tec_solve_dask(phase[:,i],freqs,m[i,:], cov[i,:,:],15,0.01),shape=(3,),dtype=np.double) for i in range(len(times))]
    
    sol_stacked = da.stack(solsMH, axis = 0)
    sol = sol_stacked.compute()
    print(sol)
Ejemplo n.º 44
0
 def dask_win_func(n):
     return dsar.from_delayed(
         delayed(numpy_win_func, pure=True)(n),
         (n,), float)
Ejemplo n.º 45
0
def ser_reader(filename, objects=None, *args, **kwds):
    """Reads the information from the file and returns it in the HyperSpy
    required format.

    """
    header, data = load_ser_file(filename)
    record_by = guess_record_by(header['DataTypeID'])
    ndim = int(header['NumberDimensions'])
    date, time = None, None
    if objects is not None:
        objects_dict = convert_xml_to_dict(objects[0])
        date, time = _get_date_time(objects_dict.ObjectInfo.AcquireDate)
    if "PositionY" in data.dtype.names and len(data['PositionY']) > 1 and \
            (data['PositionY'][0] == data['PositionY'][1]):
        # The spatial dimensions are stored in F order i.e. X, Y, ...
        order = "F"
    else:
        # The spatial dimensions are stored in C order i.e. ..., Y, X
        order = "C"
    if ndim == 0 and header["ValidNumberElements"] != 0:
        # The calibration of the axes are not stored in the header.
        # We try to guess from the position coordinates.
        array_shape, axes = get_axes_from_position(header=header,
                                                   data=data)
    else:
        axes = []
        array_shape = [None, ] * int(ndim)
        spatial_axes = ["x", "y"][:ndim]
        for i in range(ndim):
            idim = 1 + i if order == "C" else ndim - i
            if (record_by == "spectrum" or
                    header['Dim-%i_DimensionSize' % (i + 1)][0] != 1):
                units = (header['Dim-%i_Units' % (idim)][0].decode('utf-8')
                         if header['Dim-%i_UnitsLength' % (idim)] > 0
                         else t.Undefined)
                if units == "meters":
                    name = (spatial_axes.pop() if order == "F"
                            else spatial_axes.pop(-1))
                else:
                    name = t.Undefined
                axes.append({
                    'offset': header['Dim-%i_CalibrationOffset' % idim][0],
                    'scale': header['Dim-%i_CalibrationDelta' % idim][0],
                    'units': units,
                    'size': header['Dim-%i_DimensionSize' % idim][0],
                    'name': name,
                })
                array_shape[i] = \
                    header['Dim-%i_DimensionSize' % idim][0]
    # Spectral dimension
    if record_by == "spectrum":
        axes.append({
            'offset': data['CalibrationOffset'][0],
            'scale': data['CalibrationDelta'][0],
            'size': data['ArrayLength'][0],
            'index_in_array': header['NumberDimensions'][0]
        })

        # FEI seems to use the international system of units (SI) for the
        # energy scale (eV).
        axes[-1]['units'] = 'eV'
        axes[-1]['name'] = 'Energy'

        array_shape.append(data['ArrayLength'][0])

    elif record_by == 'image':
        if objects is not None:
            units = _guess_units_from_mode(objects_dict, header)
        else:
            units = "meters"
        # Y axis
        axes.append({
            'name': 'y',
            'offset': data['CalibrationOffsetY'][0] -
            data['CalibrationElementY'][0] * data['CalibrationDeltaY'][0],
            'scale': data['CalibrationDeltaY'][0],
            'units': units,
            'size': data['ArraySizeY'][0],
        })
        array_shape.append(data['ArraySizeY'][0])

        # X axis
        axes.append({
            'name': 'x',
            'offset': data['CalibrationOffsetX'][0] -
            data['CalibrationElementX'][0] * data['CalibrationDeltaX'][0],
            'scale': data['CalibrationDeltaX'][0],
            'size': data['ArraySizeX'][0],
            'units': units,
        })
        array_shape.append(data['ArraySizeX'][0])
    # FEI seems to use the international system of units (SI) for the
    # spatial scale. However, we prefer to work in nm
    for axis in axes:
        if axis['units'] == 'meters':
            axis['units'] = 'nm'
            axis['scale'] *= 10 ** 9
        elif axis['units'] == '1/meters':
            axis['units'] = '1/nm'
            axis['scale'] /= 10 ** 9
    # Remove Nones from array_shape caused by squeezing size 1 dimensions
    array_shape = [dim for dim in array_shape if dim is not None]
    lazy = kwds.pop('lazy', False)
    if lazy:
        from dask import delayed
        from dask.array import from_delayed
        val = delayed(load_only_data, pure=True)(filename, array_shape,
                                                 record_by, len(axes))
        dc = from_delayed(val, shape=array_shape,
                          dtype=data['Array'].dtype)
    else:
        dc = load_only_data(filename, array_shape, record_by, len(axes),
                            data=data)

    if ordict:
        original_metadata = OrderedDict()
    else:
        original_metadata = {}
    header_parameters = sarray2dict(header)
    sarray2dict(data, header_parameters)
    # We remove the Array key to save memory avoiding duplication
    del header_parameters['Array']
    original_metadata['ser_header_parameters'] = header_parameters
    metadata = {'General': {
        'original_filename': os.path.split(filename)[1],
    },
        "Signal": {
        'signal_type': "",
        'record_by': record_by,
    },
    }
    if date is not None and time is not None:
        metadata['General']['date'] = date
        metadata['General']['time'] = time
    dictionary = {
        'data': dc,
        'metadata': metadata,
        'axes': axes,
        'original_metadata': original_metadata,
        'mapping': mapping}
    return dictionary
Ejemplo n.º 46
0
def file_reader(filename, record_by='image', force_read_resolution=False,
                **kwds):
    """
    Read data from tif files using Christoph Gohlke's tifffile library.
    The units and the scale of images saved with ImageJ or Digital
    Micrograph is read. There is limited support for reading the scale of
    files created with Zeiss and FEI SEMs.

    Parameters
    ----------
    filename: str
    record_by: {'image'}
        Has no effect because this format only supports recording by
        image.
    force_read_resolution: Bool
        Default: False.
        Force reading the x_resolution, y_resolution and the resolution_unit
        of the tiff tags.
        See http://www.awaresystems.be/imaging/tiff/tifftags/resolutionunit.html
    **kwds, optional
    """

    _logger.debug('************* Loading *************')
    # For testing the use of local and skimage tifffile library
    import_local_tifffile = False
    if 'import_local_tifffile' in kwds.keys():
        import_local_tifffile = kwds.pop('import_local_tifffile')

    imsave, TiffFile = _import_tifffile_library(import_local_tifffile)
    lazy = kwds.pop('lazy', False)
    memmap = kwds.pop('memmap', False)
    with TiffFile(filename, **kwds) as tiff:

        # change in the Tifffiles API
        if hasattr(tiff.series[0], 'axes'):
            # in newer version the axes is an attribute
            axes = tiff.series[0].axes
        else:
            # old version
            axes = tiff.series[0]['axes']
        is_rgb = tiff.is_rgb
        _logger.debug("Is RGB: %s" % is_rgb)
        series = tiff.series[0]
        if hasattr(series, 'shape'):
            shape = series.shape
            dtype = series.dtype
        else:
            shape = series['shape']
            dtype = series['dtype']
        if is_rgb:
            axes = axes[:-1]
            names = ['R', 'G', 'B', 'A']
            lastshape = shape[-1]
            dtype = np.dtype({'names': names[:lastshape],
                              'formats': [dtype] * lastshape})
            shape = shape[:-1]
        op = {}
        for key, tag in tiff[0].tags.items():
            op[key] = tag.value
        names = [axes_label_codes[axis] for axis in axes]

        _logger.debug('Tiff tags list: %s' % op.keys())
        _logger.debug("Photometric: %s" % op['photometric'])
        _logger.debug('is_imagej: {}'.format(tiff[0].is_imagej))


        # workaround for 'palette' photometric, keep only 'X' and 'Y' axes
        sl = None
        if op['photometric'] == 3:
            sl = [0] * len(shape)
            names = []
            for i, axis in enumerate(axes):
                if axis == 'X' or axis == 'Y':
                    sl[i] = slice(None)
                    names.append(axes_label_codes[axis])
                else:
                    axes.replace(axis, '')
            shape = tuple(_sh for _s, _sh in zip(sl, shape)
                          if isinstance(_s, slice))
        _logger.debug("names: {0}".format(names))

        scales = [1.0] * len(names)
        offsets = [0.0] * len(names)
        units = [t.Undefined] * len(names)
        try:
            scales_d, units_d, offsets_d, intensity_axis = \
                _parse_scale_unit(tiff, op, shape,
                                  force_read_resolution)
            for i, name in enumerate(names):
                if name == 'height':
                    scales[i], units[i] = scales_d['x'], units_d['x']
                    offsets[i] = offsets_d['x']
                elif name == 'width':
                    scales[i], units[i] = scales_d['y'], units_d['y']
                    offsets[i] = offsets_d['y']
                elif name in ['depth', 'image series', 'time']:
                    scales[i], units[i] = scales_d['z'], units_d['z']
                    offsets[i] = offsets_d['z']
        except:
            _logger.info("Scale and units could not be imported")

        axes = [{'size': size,
                 'name': str(name),
                 'scale': scale,
                 'offset': offset,
                 'units': unit,
                 }
                for size, name, scale, offset, unit in zip(shape, names,
                                                           scales, offsets,
                                                           units)]

        md = {'General': {'original_filename': os.path.split(filename)[1]},
              'Signal': {'signal_type': "",
                         'record_by': "image",
                         },
              }

        if 'units' in intensity_axis.keys():
            md['Signal']['quantity'] = intensity_axis['units']
        if 'scale' in intensity_axis.keys() and 'offset' in intensity_axis.keys():
            dic = {'gain_factor': intensity_axis['scale'],
                   'gain_offset': intensity_axis['offset']}
            md['Signal']['Noise_properties'] = {'Variance_linear_model': dic}

    data_args = TiffFile, filename, is_rgb, sl
    if lazy:
        from dask import delayed
        from dask.array import from_delayed
        memmap = True
        val = delayed(_load_data, pure=True)(*data_args, memmap=memmap, **kwds)
        dc = from_delayed(val, dtype=dtype, shape=shape)
        # TODO: maybe just pass the memmap from tiffile?
    else:
        dc = _load_data(*data_args, memmap=memmap, **kwds)

    return [{'data': dc,
             'original_metadata': op,
             'axes': axes,
             'metadata': md,
             }]
Ejemplo n.º 47
0
    def load_from_unf(cls, filename, lazy=False):
        """Load a `.unf`-file into a :class:`~.SemperFormat` object.

        Parameters
        ----------
        filename : string
            The name of the unf-file from which to load the data. Standard
            format is '\*.unf'.

        Returns
        -------
        semper : :class:`~.SemperFormat` (N=1)
            SEMPER file format object containing the loaded information.

        """
        metadata = OrderedDict()
        with open(filename, 'rb') as f:
            # Read header:
            rec_length = np.fromfile(
                f,
                dtype='<i4',
                count=1)[0]  # length of header
            header = np.fromfile(
                f,
                dtype=cls.HEADER_DTYPES[
                    :rec_length //
                    2],
                count=1)
            metadata.update(sarray2dict(header))
            assert np.frombuffer(f.read(4), dtype=np.int32)[0] == rec_length, \
                'Error while reading the header (length is not correct)!'
            data_format = cls.IFORM_DICT[metadata['IFORM']]
            iversn, remain = divmod(metadata['IFLAG'], 10000)
            ilabel, ntitle = divmod(remain, 1000)
            metadata.update(
                {'IVERSN': iversn, 'ILABEL': ilabel, 'NTITLE': ntitle})
            # Read title:
            title = ''
            if ntitle > 0:
                assert np.fromfile(
                    f,
                    dtype='<i4',
                    count=1)[0] == ntitle  # length of title
                title = b''.join(np.fromfile(f, dtype='c', count=ntitle))
                title = title.decode()
                metadata['TITLE'] = title
                assert np.fromfile(f, dtype='<i4', count=1)[0] == ntitle
            if ilabel:
                try:
                    metadata.update(cls._read_label(f))
                except Exception as e:
                    warning = ('Could not read label, trying to proceed '
                               'without it!')
                    warning += ' (Error message: {})'.format(str(e))
                    warnings.warn(warning)
            # Read picture data:
            pos = f.tell()
            shape = metadata['NLAY'], metadata[ 'NROW'], metadata['NCOL']
            if lazy:
                from dask.array import from_delayed
                from dask import delayed
                task = delayed(_read_data)(f, filename, pos, data_format,
                                           shape)
                data = from_delayed(task, shape=shape, dtype=data_format)
            else:
                data = _read_data(f, filename, pos, data_format, shape)
        offsets = (metadata.get('X0V0', 0.),
                   metadata.get('Y0V2', 0.),
                   metadata.get('Z0V4', 0.))
        scales = (metadata.get('DXV1', 1.),
                  metadata.get('DYV3', 1.),
                  metadata.get('DZV5', 1.))
        units = (metadata.get('XUNIT', Undefined),
                 metadata.get('YUNIT', Undefined),
                 metadata.get('ZUNIT', Undefined))
        return cls(data, title, offsets, scales, units, metadata)