Esempio n. 1
0
def show_images():

    x = plt.figure(1)
    plt.clf()
    plt.imshow(sky, vmin=da.min(sky), vmax=da.max(sky))
    plt.title('sky')
    plt.show(block=False)

    y = plt.figure(2)
    plt.clf()
    plt.imshow(psf, vmin=da.min(psf), vmax=da.max(psf))
    plt.title('psf')
    plt.show(block=False)

    z = plt.figure(3)
    plt.clf()
    plt.imshow(dirty, vmin=da.min(dirty), vmax=da.max(dirty))
    plt.title('dirty')
    plt.show(block=False)
    while (plt.fignum_exists(1) and plt.fignum_exists(2)
           and plt.fignum_exists(3)):
        try:
            plt.pause(10000000)
            plt.close("all")
        except:
            break
Esempio n. 2
0
def max_and_argmax(data):
    """Returns max and argmax along last two axes.

    Last two axes should correspond to the x and y dimensions.

    Parameters
    ----------
    data : dask array
        data with at least 3 dimensions

    Returns
    -------
    weights : dask array
        max of `data` along the last two axes
    argmax : dask array
        argmax of `data` along the last two axes
    """
    # Slap out a dimension to nicely apply argmax and max
    flatData = data.reshape(data.shape[:-2] + (-1, ))
    argmax = da.argmax(flatData, axis=-1)
    # We can forego calculating both max and argmax as soon as
    # we have da.take_along_axis() https://github.com/dask/dask/issues/3663
    # Would a map_blocks of np.take_along_axis() work and be faster?
    weights = da.max(flatData, axis=-1)
    return weights, argmax
Esempio n. 3
0
        def load_data(statistic, axis):
            import dask.array as da
            import numpy as np
            from glue.utils import view_shape
            x = da.from_zarr('/mnt/cephfs/zarr_data_full')
            f = 1500
            scale = 2

            lh = []
            for k in range(scale):
                lc = []
                for i in range(scale):
                    lr = []
                    for j in range(scale):
                        lr.append(x[f % 3500])
                        f = f + 1
                    lc.append(da.concatenate(lr))
                lh.append(da.concatenate(lc, 1))
            z = da.concatenate(lh, 2)

            if statistic == 'minimum':
                return da.min(z, axis).compute()
            elif statistic == 'maximum':
                return da.max(z, axis).compute()
            elif statistic == 'mean' or statistic == 'median':
                return da.mean(z, axis).compute()
            elif statistic == 'percentile':
                return percentile / 100
            elif statistic == 'sum':
                return da.sum(z.axis).compute()
            return 0
Esempio n. 4
0
def searchdask(a, v, how=None, atol=None):
    n_a = a.shape[0]
    searchfunc, args = presearch(a, v)

    if how == 'nearest':
        l_index = da.maximum(searchfunc(*args, side='right') - 1, 0)
        r_index = da.minimum(searchfunc(*args), n_a - 1)
        cond = 2 * v < (select(a, r_index) + select(a, l_index))
        indexer = da.maximum(da.where(cond, l_index, r_index), 0)
    elif how == 'bfill':
        indexer = searchfunc(*args)
    elif how == 'ffill':
        indexer = searchfunc(*args, side='right') - 1
        indexer = da.where(indexer == -1, n_a, indexer)
    elif how is None:
        l_index = searchfunc(*args)
        r_index = searchfunc(*args, side='right')
        indexer = da.where(l_index == r_index, n_a, l_index)
    else:
        return NotImplementedError

    if atol is not None:
        a2 = da.concatenate([a, [atol + da.max(v) + 1]])
        indexer = da.where(
            da.absolute(select(a2, indexer) - v) > atol, n_a, indexer)
    return indexer
    def extract(self):

        df_path = pd.read_csv('path_to_file.csv', sep=';')

        df_path = df_path.rename(columns={'Unnamed: 0': 'id'})
        df_path = df_path.set_index('id')

        print(df_path)

        ds_batch = xr.open_mfdataset(df_path['path'],
                                     parallel=True)  #loading ncdf files

        print(ds_batch)

        print("--- Total size (GB):")
        print(ds_batch.nbytes * (2**-30))  # get size of the dataset in GB

        #getting average albedos over whole time period (used for maps and scatter plots)
        darr = ds_batch['QFLAG']  #getting data for specific band
        print(darr)

        #res = darr.mean(['lon','lat'])
        #res = da.count_nonzero( da.bitwise_and(darr//2**5, 1), ['lon','lat'])
        #res = (darr==32).sum(['lon','lat'])
        #res = xr.ufunc.bitwise_and(darr, 0b100000).sum(['lon','lat'])
        func = lambda x: np.bitwise_and(np.right_shift(x, 5), np.uint64(1))
        func = lambda x: np.bitwise_and(x, np.uint64(1))
        res = xr.apply_ufunc(func,
                             darr,
                             input_core_dims=[['lon', 'lat']],
                             dask='parallelized',
                             vectorize=True)
        #res = itwise_and(np.right_shift(darr, 5), 1).sum(['lon','lat])
        #res = (darr==32).max(['lon','lat'])
        print(np.array(res))

        sys.exit()

        da_count = ((da >> 5) & 1)  #calculate mean over time
        #da_mean_lowres = da_mean.sel(lat=slice(70, 30)).sel(lon=slice(-25, 70)) # this can be used to zoom in over Europe
        da_mean_lowres = da_mean.isel(lat=slice(None, None, 10)).isel(
            lon=slice(None, None, 10))  #downsampling for faster plotting

        #getting average, min and max albedos for each time step (used to plot timeline)
        da_timeline_mean = da.mean(['lon', 'lat'])
        da_timeline_max = da.max(['lon', 'lat'])
        da_timeline_min = da.min(['lon', 'lat'])

        #closing arrays to free memory
        DS.close()
        da.close()
        da_mean.close()

        return da_mean_lowres, da_timeline_mean, da_timeline_max, da_timeline_min

        da_mean_lowres.close()
        da_timeline_mean.close()
        da_timeline_max.close()
        da_timeline_min.close()
    def _subdivide(self, hdf5obj, imagepathin, imagepathout=None):
        # Use whatever chunk size that imaris has used
        # Not sure this is perfect - sometimes there are some redundant
        # slices to pad out the chunk
        chunkshape = hdf5obj[imagepathin].chunks
        imshape = hdf5obj[imagepathin].shape
        aa = (tuple([imshape[0]]), self.chunkstuff(imshape[1], chunkshape[1]),
              self.chunkstuff(imshape[2], chunkshape[2]))
        dtp = hdf5obj[imagepathin].dtype
        #print("Image shape",  imshape)
        subsamp = self._subdiv
        # imaris appears to do z,y,x - only subsample x and y...
        daskimg = da.from_array(hdf5obj[imagepathin], chunks=aa)
        #blurred = daskimg.map_overlap(mysmoother2, depth=(0, 6, 6), boundary='reflect', dtype = dtp)
        blurred = daskimg.map_overlap(self.mysmoother,
                                      depth=(0, 6, 6),
                                      boundary='reflect',
                                      dtype=dtp)
        #d2 = (np.ceil(np.array(chunkshape)/2.0)).astype(int)
        dz = tuple(np.ceil(np.array(aa[0]) / float(subsamp[0])).astype(int))
        dy = tuple(np.ceil(np.array(aa[1]) / float(subsamp[1])).astype(int))
        dx = tuple(np.ceil(np.array(aa[2]) / float(subsamp[2])).astype(int))
        downsamp = blurred.map_blocks(self.myresize,
                                      dtype=dtp,
                                      chunks=(dz, dy, dx))
        # histograms
        mx = da.max(downsamp)
        mn = da.max(downsamp)
        mx = mx.compute()
        mn = mn.compute()
        h, bins = da.histogram(downsamp, bins=256, range=(mx, mx))
        self.to_hdf5(hdf5obj, imagepathout, downsamp)
        # need to fix this - will break on windows
        grouppath = posixpath.dirname(imagepathout)

        def mkAttr(XX):
            return np.frombuffer(str(XX).encode(), dtype='|S1')

        hdf5obj[grouppath].attrs['ImageSizeX'] = mkAttr(downsamp.shape[2])
        hdf5obj[grouppath].attrs['ImageSizeY'] = mkAttr(downsamp.shape[1])
        hdf5obj[grouppath].attrs['ImageSizeZ'] = mkAttr(downsamp.shape[0])
        hdf5obj[grouppath].attrs['HistogramMin'] = mkAttr(mn)
        hdf5obj[grouppath].attrs['HistogramMax'] = mkAttr(mx)
        self.to_hdf5(hdf5obj, posixpath.join(grouppath, 'Histogram'), h)
Esempio n. 7
0
def analyze(t, c, z):
    plane = data[t, c, z, :, :]
    smoothed_image = dask_image.ndfilters.gaussian_filter(plane, sigma=[1, 1])
    threshold_value = 0.75 * da.max(smoothed_image).compute()
    threshold_image = smoothed_image > threshold_value
    label_image, num_labels = dask_image.ndmeasure.label(threshold_image)
    name = 't:%s, c: %s, z:%s' % (t, c, z)
    print("Plane coordinates: %s" % name)
    ref = 't_%s_c_%s_z_%s' % (t, c, z)
    return label_image, ref
Esempio n. 8
0
def plot_subfigure(X, Y, subplot, transform):
    if transform == "pca":
        X = PCA(n_components=2).fit_transform(X)
    elif transform == "cca":
        X = CCA(n_components=2).fit(X, Y).transform(X)
    else:
        raise ValueError

    min_x = da.min(X[:, 0])
    max_x = da.max(X[:, 0])

    min_y = da.min(X[:, 1])
    max_y = da.max(X[:, 1])

    classif = OneVsRestClassifier(LogisticRegression())
    classif.fit(X, Y)
    y_pred = classif.predict(X)

    print('{} + OneVsRestClassifier + LogisticRegression accuracy_score {}'.
          format(transform, accuracy_score(Y, y_pred)))

    plt.subplot(1, 2, subplot)
    plt.scatter(X[:, 0], X[:, 1], s=15, c='gray', edgecolors=(0, 0, 0))

    for i in da.unique(Y.argmax(axis=1)):
        class_ = da.where(Y[:, i])
        plt.scatter(X[class_, 0],
                    X[class_, 1],
                    s=25,
                    linewidths=2,
                    label='Class {}'.format(str(i)))

    for i in range(len(classif.estimators_)):
        plot_hyperplane(classif.estimators_[i], min_x, max_x, 'k--',
                        'Boundary\nfor class {}'.format(str(i)))

    plt.xticks(())
    plt.yticks(())

    plt.xlim(min_x - .1 * max_x, max_x + .1 * max_x)
    plt.ylim(min_y - .1 * max_y, max_y + .1 * max_y)
Esempio n. 9
0
def add_data(workspace: String, dataset: String):
    import dask.array as da
    from survos2.improc.utils import optimal_chunksize
    ws = get(workspace)
    with dataset_from_uri(dataset, mode='r') as data:
        chunk_size = optimal_chunksize(data, Config['computing.chunk_size'])
        data = da.from_array(data, chunks=chunk_size)
        data -= da.min(data)
        data /= da.max(data)
        ds = ws.add_data(data)
    logger.info(type(ds))
    return ds
Esempio n. 10
0
def max_and_argmax(data):
    """Return the dask max and argmax of data along the last two axes,
    which corresponds to the x and y dimensions
    (uncomputed)
    """
    # Slap out a dimension to nicely apply argmax and max
    flatData = data.reshape(data.shape[:-2] + (-1, ))
    argmax = da.argmax(flatData, axis=-1)
    # We can forego calculating both max and argmax as soon as
    # we have da.take_along_axis() https://github.com/dask/dask/issues/3663
    # Would a map_blocks of np.take_along_axis() work and be faster?
    weights = da.max(flatData, axis=-1)
    return weights, argmax
Esempio n. 11
0
def show_images():

    plt.figure(1)
    plt.clf()
    plt.imshow(quad, vmin=da.min(quad), vmax=da.max(quad))
    plt.title('quad')
    plt.show(block=False)
    while (plt.fignum_exists(1)):
        try:
            plt.pause(100000)
            plt.close("all")
        except:
            break
Esempio n. 12
0
def show_results():

    x = plt.figure(1)
    plt.clf()
    plt.imshow(hub, vmin=da.min(hub), vmax=da.max(hub))
    plt.title('huber')
    plt.show(block=False)

    while (plt.fignum_exists(1)):
        try:
            plt.pause(10000000)
            plt.close("all")
        except:
            break
Esempio n. 13
0
def show_images():

    for i in range(len(dirty)):
        plt.figure(i+1)
        plt.clf()
        plt.imshow(quad[i], vmin = da.min(quad[i]), vmax = da.max(quad[i]))
        plt.title('quad' + str(i))
    plt.show(block=False)
    while(plt.fignum_exists(1)):
        try:
            plt.pause(100000)
            plt.close("all")
        except:
            break
Esempio n. 14
0
def maxproj2tiff(
    in_filepath: str,
    out_filepath: str,
    channel_names: typing.Any = None,
    flip: bool = False,
    overwrite: bool = False,
):
    """
    Maximum projection over channels of HDF5 and save to disk as TIFF.

    Args:
        in_filepath, out_filepath: str
            Paths of input HDF5 and output TIFF files.
        channel_names: list(str), str
            Names of the HDF5 datasets to use. If string, treated as path
            to a text file where each line is the name of a channel.
        overwrite: bool [optional]
            Overwrite the output file if already exists, default False.
    """
    # parse channel names
    if isinstance(channel_names, str):
        with open(channel_names, "r") as f:
            channel_names = [line.strip() for line in f]

    # load data
    f = h5py.File(in_filepath, "r")

    # allowing same API for images not need maximum projection
    # but still need to be saved as TIFF
    if len(channel_names) > 1:
        arr = f[channel_names[0]]
    else:
        arr_list = [da.from_array(f[key]) for key in channel_names]
        arr = da.max(da.stack(arr_list, axis=-1), axis=-1)

    # in case flipping is needed
    if flip:
        try:
            dtype = np.iinfo(arr.dtype)
        except ValueError:
            dtype = np.finfo(arr.dtype)
        arr = dtype.max - arr

    # save to disk as TIFF
    tifffile.imsave(out_filepath, arr)
Esempio n. 15
0
def test_reductions():
    x = np.arange(5).astype('f4')
    a = da.from_array(x, blockshape=(2, ))

    assert eq(da.all(a), np.all(x))
    assert eq(da.any(a), np.any(x))
    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.max(a), np.max(x))
    assert eq(da.mean(a), np.mean(x))
    assert eq(da.min(a), np.min(x))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.nanmax(a), np.nanmax(x))
    assert eq(da.nanmin(a), np.nanmin(x))
    assert eq(da.nansum(a), np.nansum(x))
    assert eq(da.nanvar(a), np.nanvar(x))
    assert eq(da.nanstd(a), np.nanstd(x))
Esempio n. 16
0
    def fix_data(self, cube):
        """Fix data.

        Unit is %, values are <= 1.

        Parameters
        ----------
        cube: iris.cube.Cube
            Cube to fix

        Returns
        -------
        iris.cube.Cube
            Fixed cube. It can be a difference instance.
        """
        if cube.units == "%" and da.max(cube.core_data()).compute() <= 1.:
            cube.data = cube.core_data() * 100.
        return cube
Esempio n. 17
0
def test_reductions():
    x = np.arange(5).astype('f4')
    a = da.from_array(x, chunks=(2,))

    assert eq(da.all(a), np.all(x))
    assert eq(da.any(a), np.any(x))
    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.max(a), np.max(x))
    assert eq(da.mean(a), np.mean(x))
    assert eq(da.min(a), np.min(x))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.nanmax(a), np.nanmax(x))
    assert eq(da.nanmin(a), np.nanmin(x))
    assert eq(da.nansum(a), np.nansum(x))
    assert eq(da.nanvar(a), np.nanvar(x))
    assert eq(da.nanstd(a), np.nanstd(x))
Esempio n. 18
0
def add_data(workspace: String, data_fname: String):
    import dask.array as da

    from survos2.improc.utils import optimal_chunksize

    ws = get(workspace)
    logger.info(f"Adding data to workspace {ws}")

    with dataset_from_uri(data_fname, mode="r") as data:

        chunk_size = optimal_chunksize(data, Config["computing.chunk_size"])
        logger.debug(
            f'Calculating optimal chunk size using chunk_size {Config["computing.chunk_size"]}: {chunk_size}'
        )

        data = da.from_array(data, chunks=chunk_size)
        data -= da.min(data)
        data /= da.max(data)
        ds = ws.add_data(data)
        # ds.set_attr("chunk_size", chunk_size)
    return ds
Esempio n. 19
0
 def statistics(self, data, pca_stats=None):
     # set headers
     if pca_stats:  # for pca
         if pca_stats["eigenvals"] is not None:
             self.stats_header.setText("Eigenvalue: {} ({}%)".format(
                 round(pca_stats["eigenvals"][self.pc_id - 1], 2),
                 round(pca_stats["eigenvals_%"][self.pc_id - 1], 2)))
             self.stats_header.setToolTip(
                 "It shows how are the dispersion of the data with respect to its component"
             )
         else:
             self.stats_header.setText("Eigenvalue: --")
             self.stats_header.setToolTip(
                 "Is only available when the components are computed with the plugin"
             )
     else:  # for aoi
         self.stats_header.setText("Pixels in AOI: {}".format(
             round(data.size if data.size > 1 else 0, 2)))
         self.stats_header.setToolTip("")
     # restore or compute the statistics
     if self.QCBox_StatsLayer.currentText(
     ) == self.pc_name and self.stats_pc is not None:
         min, max, std, p25, p50, p75 = self.stats_pc
     else:
         da_data = da.from_array(data, chunks=(8000000, ))
         min = da.min(da_data).compute()
         max = da.max(da_data).compute()
         std = da.std(da_data).compute()
         p25 = da.percentile(da_data, 25).compute()[0]
         p50 = da.percentile(da_data, 50).compute()[0]
         p75 = da.percentile(da_data, 75).compute()[0]
         if self.QCBox_StatsLayer.currentText() == self.pc_name:
             self.stats_pc = (min, max, std, p25, p50, p75)
     # set in dialog
     self.stats_min.setText(str(round(min, 2)))
     self.stats_max.setText(str(round(max, 2)))
     self.stats_std.setText(str(round(std, 2)))
     self.stats_p25.setText(str(round(p25, 2)))
     self.stats_p50.setText(str(round(p50, 2)))
     self.stats_p75.setText(str(round(p75, 2)))
Esempio n. 20
0
def test_workspace():
    ws = Workspace(".")
    workspace_fpath = "./newws1"
    ws = ws.create(workspace_fpath)
    data_fname = "./tmp/testvol_4x4x4b.h5"

    with dataset_from_uri(data_fname, mode="r") as data:
        chunk_size = optimal_chunksize(data, Config["computing.chunk_size"])
        data = da.from_array(data, chunks=chunk_size)
        data -= da.min(data)
        data /= da.max(data)
        ds = ws.add_data(data)
        # ds.set_attr("chunk_size", chunk_size)

    ws.add_dataset("testds", "float32")
    assert ws.exists(workspace_fpath)
    assert ws.has_data()
    assert ws.available_datasets() == ['testds']
    ws.add_session('newsesh')
    assert ws.has_session('newsesh')

    ws.delete()
Esempio n. 21
0
def cluster_centroids(data, clusters, k=None):
    """Return centroids of clusters & clusters in data.

    data is an array of observations with shape (A, B, ...).

    clusters is an array of integers of shape (A,) giving the index
    (from 0 to k-1) of the cluster to which each observation belongs.
    The clusters must all be non-empty.

    k is the number of clusters. If omitted, it is deduced from the
    values in the clusters array.

    The result is an array of shape (k, B, ...) containing the
    centroid of each cluster.

    >>> data = np.array([[12, 10, 87],
    ...                  [ 2, 12, 33],
    ...                  [68, 31, 32],
    ...                  [88, 13, 66],
    ...                  [79, 40, 89],
    ...                  [ 1, 77, 12]])
    >>> cluster_centroids(data, np.array([1, 1, 2, 2, 0, 1]))
    array([[ 79.,  40.,  89.],
           [  5.,  33.,  44.],
           [ 78.,  22.,  49.]])

    """
    if k is None:
        k = (da.max(clusters)).compute() + 1

    result = []

    result = [
        da.mean(data[clusters.compute() == i], axis=0) for i in xrange(k)
    ]

    return da.reshape(da.concatenate(result, axis=0),
                      shape=(k, ) + data.shape[1:])
Esempio n. 22
0
def density_flux(population, total_population, carrying_capacity, distance,
                 csx, csy, **kwargs):
    """
    'density-based dispersion'

    Dispersal is calculated using the following sequence of methods:

    Portions of populations at each element (node, or grid cell) in the study area array (raster) are moved to
    surrounding elements (a neighbourhood) within a radius that is defined by the input distance (:math:`d`), as
    presented in the conceptual figure below.

        .. image:: images/density_flux_neighbourhood.png
            :align: center

    .. attention:: No dispersal will occur if the provided distance is less than the distance between elements (grid cells) in the model domain, as none will be included in the neighbourhood

    The mean density (:math:`\\rho`) of all elements in the neighbourhood is calculated as:

    .. math::
       \\rho=\\frac{\\sum_{i=1}^{n} \\frac{pop_T(i)}{k_T(i)}}{n}

    where,

    :math:`pop_T` is the total population (of the entire species) at each element (:math:`i`); and\n
    :math:`k_T` is the total carrying capacity for the species

    The density gradient at each element (:math:`\\Delta`) with respect to the mean is calculated as:

    .. math::
        \\Delta(i)=\\frac{pop_T(i)}{k_T(i)}-\\rho

    If the centroid element is above the mean :math:`[\\Delta(i_0) > 0]`, it is able to release a portion of its
    population to elements in the neighbourhood. The eligible population to be received by surrounding elements is equal
    to the sum of populations at elements with negative density gradients, the :math:`candidates`:

    .. math::
        candidates=\\sum_{i=1}^{n} \\Delta(i)[\\Delta(i) < 0]k_T(i)

    The minimum of either the population above the mean at the centroid element - :math:`source=\\Delta(i_0)*k_T(i_0)`,
    or the :math:`candidates` are used to determine the total population that is dispersed from the centroid element to
    the other elements in the neighbourhood:

    .. math::
        dispersal=min\{source, candidates\}

    The population at the centroid element becomes:

    .. math::
        pop_a(i_0)=pop_a(i_0)-\\frac{pop_a(i_0)}{pop_T(i_0)}dispersal

    where,

    :math:`pop_a` is the age (stage) group population, which is a sub-population of the total.

    The populations of the candidate elements in the neighbourhood become (a net gain due to negative gradients):

    .. math::
        pop_a(i)=pop_a(i)-\\frac{\\Delta(i)[\\Delta(i) < 0]k_T(i)}{candidates}dispersal\\frac{pop_a(i)}{pop_T(i)}

    :param da.Array population: Sub-population to redistribute (subset of the ``total_population``)
    :param da.Array total_population: Total population
    :param da.Array carrying_capacity: Total Carrying Capacity (k)
    :param float distance: Maximum dispersal distance
    :param float csx: Cell size of the domain in the x-direction
    :param float csy: Cell size of the domain in the y-direction

    .. Attention:: Ensure the cell sizes are in the same units as the specified direction

    :Keyword Arguments:
        **mask** (*array*) --
            A weighting mask that scales dispersal based on the normalized mask value (default: None)
    :return: Redistributed population
    """
    if any([
            not isinstance(a, da.Array)
            for a in [population, total_population, carrying_capacity]
    ]):
        raise DispersalError('Inputs must be a dask arrays')

    if distance == 0:
        # Don't do anything
        return population

    chunks = tuple(c[0] if c else 0 for c in population.chunks)[:2]

    mask = kwargs.get('mask', None)
    if mask is None:
        mask = da.ones(shape=population.shape, dtype='float32', chunks=chunks)

    # Normalize the mask
    mask_min = da.min(mask)
    _range = da.max(mask) - mask_min
    mask = da.where(_range > 0, (mask - mask_min) / _range, 1.)

    # Calculate the kernel indices and shape
    kernel = calculate_kernel(distance, csx, csy)
    if kernel is None:
        # Not enough distance to cover a grid cell
        return population
    kernel, m, n = kernel
    m = int(m)
    n = int(n)

    a = da.pad(da.dstack(
        [population, total_population, carrying_capacity, mask]),
               ((m, m), (n, n), (0, 0)),
               'constant',
               constant_values=0)
    _m = -m
    if m == 0:
        _m = None
    _n = -n
    if n == 0:
        _n = None
    output = delayed(density_flux_task)(a, kernel, m, n)[m:_m, n:_n, 0]
    output = da.from_delayed(output, population.shape, np.float32)

    return output.rechunk(chunks)
Esempio n. 23
0
def new_grid_mapping_from_coords(
    x_coords: xr.DataArray,
    y_coords: xr.DataArray,
    crs: Union[str, pyproj.crs.CRS],
    *,
    tile_size: Union[int, Tuple[int, int]] = None,
    tolerance: float = DEFAULT_TOLERANCE,
) -> GridMapping:
    crs = _normalize_crs(crs)
    assert_instance(x_coords, xr.DataArray, name='x_coords')
    assert_instance(y_coords, xr.DataArray, name='y_coords')
    assert_true(x_coords.ndim in (1, 2),
                'x_coords and y_coords must be either 1D or 2D arrays')
    assert_instance(tolerance, float, name='tolerance')
    assert_true(tolerance > 0.0, 'tolerance must be greater zero')

    if x_coords.name and y_coords.name:
        xy_var_names = str(x_coords.name), str(y_coords.name)
    else:
        xy_var_names = _default_xy_var_names(crs)

    tile_size = _normalize_int_pair(tile_size, default=None)
    is_lon_360 = None  # None means "not yet known"
    if crs.is_geographic:
        is_lon_360 = bool(np.any(x_coords > 180))

    x_res = 0
    y_res = 0

    if x_coords.ndim == 1:
        # We have 1D x,y coordinates
        cls = Coords1DGridMapping

        assert_true(x_coords.size >= 2 and y_coords.size >= 2,
                    'sizes of x_coords and y_coords 1D arrays must be >= 2')

        size = x_coords.size, y_coords.size

        x_dim, y_dim = x_coords.dims[0], y_coords.dims[0]

        x_diff = _abs_no_zero(x_coords.diff(dim=x_dim).values)
        y_diff = _abs_no_zero(y_coords.diff(dim=y_dim).values)

        if not is_lon_360 and crs.is_geographic:
            is_anti_meridian_crossed = np.any(np.nanmax(x_diff) > 180)
            if is_anti_meridian_crossed:
                x_coords = to_lon_360(x_coords)
                x_diff = _abs_no_zero(x_coords.diff(dim=x_dim))
                is_lon_360 = True

        x_res, y_res = x_diff[0], y_diff[0]
        x_diff_equal = np.allclose(x_diff, x_res, atol=tolerance)
        y_diff_equal = np.allclose(y_diff, y_res, atol=tolerance)
        is_regular = x_diff_equal and y_diff_equal
        if is_regular:
            x_res = round_to_fraction(x_res, 5, 0.25)
            y_res = round_to_fraction(y_res, 5, 0.25)
        else:
            x_res = round_to_fraction(float(np.nanmedian(x_diff)), 2, 0.5)
            y_res = round_to_fraction(float(np.nanmedian(y_diff)), 2, 0.5)

        if tile_size is None \
                and x_coords.chunks is not None \
                and y_coords.chunks is not None:
            tile_size = (max(0,
                             *x_coords.chunks[0]), max(0, *y_coords.chunks[0]))

        # Guess j axis direction
        is_j_axis_up = bool(y_coords[0] < y_coords[-1])

    else:
        # We have 2D x,y coordinates
        cls = Coords2DGridMapping

        assert_true(
            x_coords.shape == y_coords.shape, 'shapes of x_coords and y_coords'
            ' 2D arrays must be equal')
        assert_true(
            x_coords.dims == y_coords.dims,
            'dimensions of x_coords and y_coords'
            ' 2D arrays must be equal')

        y_dim, x_dim = x_coords.dims

        height, width = x_coords.shape
        size = width, height

        x = da.asarray(x_coords)
        y = da.asarray(y_coords)

        x_x_diff = _abs_no_nan(da.diff(x, axis=1))
        x_y_diff = _abs_no_nan(da.diff(x, axis=0))
        y_x_diff = _abs_no_nan(da.diff(y, axis=1))
        y_y_diff = _abs_no_nan(da.diff(y, axis=0))

        if not is_lon_360 and crs.is_geographic:
            is_anti_meridian_crossed = da.any(da.max(x_x_diff) > 180) \
                                       or da.any(da.max(x_y_diff) > 180)
            if is_anti_meridian_crossed:
                x_coords = to_lon_360(x_coords)
                x = da.asarray(x_coords)
                x_x_diff = _abs_no_nan(da.diff(x, axis=1))
                x_y_diff = _abs_no_nan(da.diff(x, axis=0))
                is_lon_360 = True

        is_regular = False

        if da.all(x_y_diff == 0) and da.all(y_x_diff == 0):
            x_res = x_x_diff[0, 0]
            y_res = y_y_diff[0, 0]
            is_regular = \
                da.allclose(x_x_diff[0, :], x_res, atol=tolerance) \
                and da.allclose(x_x_diff[-1, :], x_res, atol=tolerance) \
                and da.allclose(y_y_diff[:, 0], y_res, atol=tolerance) \
                and da.allclose(y_y_diff[:, -1], y_res, atol=tolerance)

        if not is_regular:
            # Let diff arrays have same shape as original by
            # doubling last rows and columns.
            x_x_diff_c = da.concatenate([x_x_diff, x_x_diff[:, -1:]], axis=1)
            y_x_diff_c = da.concatenate([y_x_diff, y_x_diff[:, -1:]], axis=1)
            x_y_diff_c = da.concatenate([x_y_diff, x_y_diff[-1:, :]], axis=0)
            y_y_diff_c = da.concatenate([y_y_diff, y_y_diff[-1:, :]], axis=0)
            # Find resolution via area
            x_abs_diff = da.sqrt(da.square(x_x_diff_c) + da.square(x_y_diff_c))
            y_abs_diff = da.sqrt(da.square(y_x_diff_c) + da.square(y_y_diff_c))
            if crs.is_geographic:
                # Convert degrees into meters
                x_abs_diff_r = da.radians(x_abs_diff)
                y_abs_diff_r = da.radians(y_abs_diff)
                x_abs_diff = _ER * da.cos(x_abs_diff_r) * y_abs_diff_r
                y_abs_diff = _ER * y_abs_diff_r
            xy_areas = (x_abs_diff * y_abs_diff).flatten()
            xy_areas = da.where(xy_areas > 0, xy_areas, np.nan)
            # Get indices of min and max area
            xy_area_index_min = da.nanargmin(xy_areas)
            xy_area_index_max = da.nanargmax(xy_areas)
            # Convert area to edge length
            xy_res_min = math.sqrt(xy_areas[xy_area_index_min])
            xy_res_max = math.sqrt(xy_areas[xy_area_index_max])
            # Empirically weight min more than max
            xy_res = 0.7 * xy_res_min + 0.3 * xy_res_max
            if crs.is_geographic:
                # Convert meters back into degrees
                # print(f'xy_res in meters: {xy_res}')
                xy_res = math.degrees(xy_res / _ER)
                # print(f'xy_res in degrees: {xy_res}')
            # Because this is an estimation, we can round to a nice number
            xy_res = round_to_fraction(xy_res, digits=1, resolution=0.5)
            x_res, y_res = float(xy_res), float(xy_res)

        if tile_size is None and x_coords.chunks is not None:
            j_chunks, i_chunks = x_coords.chunks
            tile_size = max(0, *i_chunks), max(0, *j_chunks)

        if tile_size is not None:
            tile_width, tile_height = tile_size
            x_coords = x_coords.chunk((tile_height, tile_width))
            y_coords = y_coords.chunk((tile_height, tile_width))

        # Guess j axis direction
        is_j_axis_up = np.all(y_coords[0, :] < y_coords[-1, :]) or None

    assert_true(x_res > 0 and y_res > 0,
                'internal error: x_res and y_res could not be determined',
                exception_type=RuntimeError)

    x_res, y_res = _to_int_or_float(x_res), _to_int_or_float(y_res)
    x_res_05, y_res_05 = x_res / 2, y_res / 2
    x_min = _to_int_or_float(x_coords.min() - x_res_05)
    y_min = _to_int_or_float(y_coords.min() - y_res_05)
    x_max = _to_int_or_float(x_coords.max() + x_res_05)
    y_max = _to_int_or_float(y_coords.max() + y_res_05)

    return cls(x_coords=x_coords,
               y_coords=y_coords,
               crs=crs,
               size=size,
               tile_size=tile_size,
               xy_bbox=(x_min, y_min, x_max, y_max),
               xy_res=(x_res, y_res),
               xy_var_names=xy_var_names,
               xy_dim_names=(str(x_dim), str(y_dim)),
               is_regular=is_regular,
               is_lon_360=is_lon_360,
               is_j_axis_up=is_j_axis_up)
Esempio n. 24
0
def plot_dataset(X,
                 y,
                 images=None,
                 labels=None,
                 gray=False,
                 save=None,
                 y_original=None):
    print('data size {}'.format(X.shape))
    uni_y = len(da.unique(y).compute())

    x_min, x_max = da.min(X, 0), da.max(X, 0)
    X = (X - x_min) / (x_max - x_min)
    #if save is not None:
    #plt.figure(figsize=(27,18), dpi=600)
    #else:
    fig = plt.figure(figsize=(27, 18), dpi=100)
    ax = plt.subplot(111)

    for i in tqdm(range(X.shape[0])):
        plt.text(X[i, 0],
                 X[i, 1],
                 str(y[i]),
                 color=plt.cm.Set1(y[i] / uni_y),
                 fontdict={
                     'weight': 'bold',
                     'size': 9
                 })

    if images is not None:
        if hasattr(offsetbox, 'AnnotationBbox'):
            # only print thumbnails with matplotlib > 1.0
            shown_images = da.array([[1., 1.]])  # just something big
            for i in range(X.shape[0]):
                dist = da.sum((X[i] - shown_images)**2, 1)
                if da.min(dist) < 4e-3:
                    # don't show points that are too close
                    continue

                if labels is not None:
                    if y_original is not None:
                        plt.text(X[i, 0] - 0.01,
                                 X[i, 1] - 0.033,
                                 labels[y_original[i]],
                                 fontdict={
                                     'weight': 'bold',
                                     'size': 15
                                 })
                    else:
                        plt.text(X[i, 0] - 0.01,
                                 X[i, 1] - 0.033,
                                 labels[y[i]],
                                 fontdict={
                                     'weight': 'bold',
                                     'size': 15
                                 })

                shown_images = da.r_[shown_images, [X[i]]]
                if gray:
                    image_ = offsetbox.OffsetImage(
                        da.expand_dims(util.invert(images[i]), axis=0))
                else:
                    image_ = offsetbox.OffsetImage(images[i],
                                                   cmap=plt.cm.gray_r)

                imagebox = offsetbox.AnnotationBbox(image_, X[i])

                ax.add_artist(imagebox)

    plt.xticks([]), plt.yticks([])

    for item in [fig, ax]:
        item.patch.set_visible(False)

    ax.axis('off')

    if save is not None:
        print('Saving Image {} ...'.format(save))
        plt.title('epoch ' + save.split('.')[0].split()[-1],
                  fontdict={'fontsize': 20},
                  loc='left')
        plt.savefig(save)
        plt.close()
    else:
        plt.show()
    del X, y, fig, ax

    gc.collect()
Esempio n. 25
0
def two_point_stats(arr1,
                    arr2,
                    mask=None,
                    periodic_boundary=True,
                    cutoff=None):
    """Calculate the 2-points stats for two arrays

    Args:
      arr1: array used to calculate cross-correlations (n_samples,n_x,n_y)
      arr2: array used to calculate cross-correlations (n_samples,n_x,n_y)
      mask: array specifying confidence in the measurement at a pixel
        (n_samples,n_x,n_y).  In range [0,1].
      periodic_boundary: whether to assume a periodic boundary (default is true)
      cutoff: the subarray of the 2 point stats to keep

    Returns:
      the snipped 2-points stats

    >>> two_point_stats(
    ...     da.from_array(np.arange(10).reshape(2, 5), chunks=(2, 5)),
    ...     da.from_array(np.arange(10).reshape(2, 5), chunks=(2, 5)),
    ... ).shape
    (2, 5)

    Test masking

    >>> array = da.array([[[1, 0 ,0], [0, 1, 1], [1, 1, 0]]])
    >>> mask = da.array([[[1, 1, 1], [1, 1, 1], [1, 0, 0]]])
    >>> norm_mask = da.array([[[2, 4, 3], [4, 7, 4], [3, 4, 2]]])
    >>> expected = da.array([[[1, 0, 1], [1, 4, 1], [1, 0, 1]]]) / norm_mask
    >>> assert np.allclose(
    ...     two_point_stats(array, array, mask=mask, periodic_boundary=False),
    ...     expected
    ... )

    The mask must be in the range 0 to 1.

    >>> array = da.array([[[1, 0], [0, 1]]])
    >>> mask =  da.array([[[2, 0], [0, 1]]])
    >>> two_point_stats(array, array, mask)
    Traceback (most recent call last):
    ...
    RuntimeError: Mask must be in range [0,1]
    """

    cutoff_ = int((np.min(arr1.shape[1:]) - 1) / 2)
    if cutoff is None:
        cutoff = cutoff_
    cutoff = min(cutoff, cutoff_)

    nonperiodic_padder = sequence(
        dapad(
            pad_width=[(0, 0)] + [(cutoff, cutoff)] * (arr1.ndim - 1),
            mode="constant",
            constant_values=0,
        ),
        lambda x: da.rechunk(x, (x.chunks[0], ) + x.shape[1:]),
    )

    padder = identity if periodic_boundary else nonperiodic_padder

    if mask is not None:
        if da.max(mask).compute() > 1.0 or da.min(mask).compute() < 0.0:
            raise RuntimeError("Mask must be in range [0,1]")

        mask_array = lambda arr: arr * mask

        normalize = lambda x: x / auto_correlation(padder(mask))
    else:
        mask_array = identity

        if periodic_boundary:
            # The periodic normalization could always be the
            # auto_correlation of the mask. But for the sake of
            # efficiency, we specify the periodic normalization in the
            # case there is no mask.
            normalize = lambda x: x / arr1[0].size
        else:
            normalize = lambda x: x / auto_correlation(
                padder(np.ones_like(arr1)))

    return sequence(
        map_(mask_array),
        map_(padder),
        list,
        star(cross_correlation),
        normalize,
        center_slice(cutoff=cutoff),
    )([arr1, arr2])
Esempio n. 26
0
def two_point_stats(arr1,
                    arr2,
                    periodic_boundary=True,
                    cutoff=None,
                    mask=None):
    r"""Calculate the 2-points stats for two arrays

    The discretized two point statistics are given by

    .. math::

       f[r \; \vert \; l, l'] = \frac{1}{S} \sum_s m[s, l] m[s + r, l']

    where :math:`f[r \; \vert \; l, l']` is the conditional
    probability of finding the local states :math:`l` and :math:`l` at
    a distance and orientation away from each other defined by the
    vector :math:`r`. `See this paper for more details on the
    notation. <https://doi.org/10.1007/s40192-017-0089-0>`_

    The array ``arr1[i]`` (state :math:`l`) is correlated with
    ``arr2[i]`` (state :math:`l'`) for each sample ``i``. Both arrays
    must have the same number of samples and nominal states (integer
    value) or continuous variables.

    To calculate multiple different correlations for each sample, see
    :func:`~pymks.correlations_multiple`.

    To use ``two_point_stats`` as part of a Scikit-learn pipeline, see
    :class:`~pymks.TwoPointCorrelation`.

    Args:
      arr1: array used to calculate cross-correlations, shape
        ``(n_samples,n_x,n_y)``
      arr2: array used to calculate cross-correlations, shape
        ``(n_samples,n_x,n_y)``
      periodic_boundary: whether to assume a periodic boundary
        (default is ``True``)
      cutoff: the subarray of the 2 point stats to keep
      mask: array specifying confidence in the measurement at a pixel,
        shape ``(n_samples,n_x,n_y)``. In range [0,1].

    Returns:
      the snipped 2-points stats

    If both arrays are Dask arrays then a Dask array is returned.

    >>> out = two_point_stats(
    ...     da.from_array(np.arange(10).reshape(2, 5), chunks=(2, 5)),
    ...     da.from_array(np.arange(10).reshape(2, 5), chunks=(2, 5)),
    ... )
    >>> out.chunks
    ((2,), (5,))
    >>> out.shape
    (2, 5)

    If either of the arrays are Numpy then a Numpy array is returned.

    >>> two_point_stats(
    ...     np.arange(10).reshape(2, 5),
    ...     np.arange(10).reshape(2, 5),
    ... )
    array([[ 3.,  4.,  6.,  4.,  3.],
           [48., 49., 51., 49., 48.]])

    Test masking

    >>> array = da.array([[[1, 0 ,0], [0, 1, 1], [1, 1, 0]]])
    >>> mask = da.array([[[1, 1, 1], [1, 1, 1], [1, 0, 0]]])
    >>> norm_mask = da.array([[[2, 4, 3], [4, 7, 4], [3, 4, 2]]])
    >>> expected = da.array([[[1, 0, 1], [1, 4, 1], [1, 0, 1]]]) / norm_mask
    >>> assert np.allclose(
    ...     two_point_stats(array, array, mask=mask, periodic_boundary=False)[:, 1:-1, 1:-1],
    ...     expected
    ... )

    The mask must be in the range 0 to 1.

    >>> array = da.array([[[1, 0], [0, 1]]])
    >>> mask =  da.array([[[2, 0], [0, 1]]])
    >>> two_point_stats(array, array, mask=mask)
    Traceback (most recent call last):
    ...
    RuntimeError: Mask must be in range [0,1]

    """  # noqa: #501

    n_is_even = 1 - np.array(arr1.shape[1:]) % 2
    padding = np.array(arr1.shape[1:]) // 2

    nonperiodic_padder = sequence(
        dapad(
            pad_width=[(0, 0)] + list(zip(padding, padding + n_is_even)),
            mode="constant",
            constant_values=0,
        ),
        lambda x: da.rechunk(x, (x.chunks[0], ) + x.shape[1:]),
    )

    padder = identity if periodic_boundary else nonperiodic_padder

    if mask is not None:
        if da.max(mask).compute() > 1.0 or da.min(mask).compute() < 0.0:
            raise RuntimeError("Mask must be in range [0,1]")

        mask_array = lambda arr: arr * mask

        normalize = lambda x: x / auto_correlation(padder(mask))
    else:
        mask_array = identity

        if periodic_boundary:
            # The periodic normalization could always be the
            # auto_correlation of the mask. But for the sake of
            # efficiency, we specify the periodic normalization in the
            # case there is no mask.
            normalize = sequence(
                lambda x: x / arr1[0].size,
                dapad(
                    pad_width=[(0, 0)] + list(zip(0 * n_is_even, n_is_even)),
                    mode="wrap",
                ),
                lambda x: da.rechunk(x, (x.chunks[0], ) + x.shape[1:]),
            )
        else:
            normalize = lambda x: x / auto_correlation(
                padder(np.ones_like(arr1)))

    return sequence(
        map_(mask_array),
        map_(padder),
        list,
        star(cross_correlation),
        normalize,
        center_slice(cutoff=cutoff),
    )([arr1, arr2])
Esempio n. 27
0
def predict_xr(
    model,
    input_xr,
    chunk_size=None,
    persist=True,
    proba=False,
    clean=False,
    return_input=False,
):
    """
    Using dask-ml ParallelPostfit(), runs  the parallel
    predict and predict_proba methods of sklearn
    estimators. Useful for running predictions
    on a larger-than-RAM datasets.

    Last modified: September 2020

    Parameters
    ----------
    model : scikit-learn model or compatible object
        Must have a .predict() method that takes numpy arrays.
    input_xr : xarray.DataArray or xarray.Dataset.
        Must have dimensions 'x' and 'y'
    chunk_size : int
        The dask chunk size to use on the flattened array. If this
        is left as None, then the chunks size is inferred from the
        .chunks() method on the `input_xr`
    persist : bool
        If True, and proba=True, then 'input_xr' data will be
        loaded into distributed memory. This will ensure data
        is not loaded twice for the prediction of probabilities,
        but this will only work if the data is not larger than RAM.
    proba : bool
        If True, predict probabilities. This only applies if the
        model has a .predict_proba() method
    clean : bool
        If True, remove Infs and NaNs from input and output arrays
    return_input : bool
        If True, then the data variables in the 'input_xr' dataset will
        be appended to the output xarray dataset.

    Returns
    ----------
    output_xr : xarray.Dataset
        An xarray.Dataset containing the prediction output from model
        with input_xr as input, if proba=True then dataset will also contain
        the prediciton probabilities. Has the same spatiotemporal structure
        as input_xr.

    """
    if chunk_size is None:
        chunk_size = int(input_xr.chunks["x"][0]) * int(
            input_xr.chunks["y"][0])

    # convert model to dask predict
    model = ParallelPostFit(model)

    # with joblib.parallel_backend("dask"):
    x, y, crs = input_xr.x, input_xr.y, input_xr.geobox.crs

    input_data = []

    for var_name in input_xr.data_vars:
        input_data.append(input_xr[var_name])

    input_data_flattened = []
    # TODO: transfer to dask dataframe
    for arr in input_data:
        data = arr.data.flatten().rechunk(chunk_size)
        input_data_flattened.append(data)

    # reshape for prediction
    input_data_flattened = da.array(input_data_flattened).transpose()

    if clean:
        input_data_flattened = da.where(da.isfinite(input_data_flattened),
                                        input_data_flattened, 0)

    if proba and persist:
        # persisting data so we don't require loading all the data twice
        input_data_flattened = input_data_flattened.persist()

    # apply the classification
    print("   predicting...")
    out_class = model.predict(input_data_flattened)

    # Mask out NaN or Inf values in results
    if clean:
        out_class = da.where(da.isfinite(out_class), out_class, 0)

    # Reshape when writing out
    out_class = out_class.reshape(len(y), len(x))

    # stack back into xarray
    output_xr = xr.DataArray(out_class,
                             coords={
                                 "x": x,
                                 "y": y
                             },
                             dims=["y", "x"])

    output_xr = output_xr.to_dataset(name="Predictions")

    if proba:
        print("   probabilities...")
        out_proba = model.predict_proba(input_data_flattened)

        # convert to %
        out_proba = da.max(out_proba, axis=1) * 100.0

        if clean:
            out_proba = da.where(da.isfinite(out_proba), out_proba, 0)

        out_proba = out_proba.reshape(len(y), len(x))

        out_proba = xr.DataArray(out_proba,
                                 coords={
                                     "x": x,
                                     "y": y
                                 },
                                 dims=["y", "x"])
        output_xr["Probabilities"] = out_proba

    if return_input:
        print("   input features...")
        # unflatten the input_data_flattened array and append
        # to the output_xr containin the predictions
        arr = input_xr.to_array()
        stacked = arr.stack(z=["y", "x"])
        # handle multivariable output
        output_px_shape = ()
        if len(input_data_flattened.shape[1:]):
            output_px_shape = input_data_flattened.shape[1:]

        output_features = input_data_flattened.reshape(
            (len(stacked.z), *output_px_shape))

        # set the stacked coordinate to match the input
        output_features = xr.DataArray(
            output_features,
            coords={
                "z": stacked["z"]
            },
            dims=[
                "z",
                *[
                    "output_dim_" + str(idx)
                    for idx in range(len(output_px_shape))
                ],
            ],
        ).unstack()

        # convert to dataset and rename arrays
        output_features = output_features.to_dataset(dim="output_dim_0")
        data_vars = list(input_xr.data_vars)
        output_features = output_features.rename(
            {i: j
             for i, j in zip(output_features.data_vars, data_vars)}  # noqa pylint: disable=unnecessary-comprehension
        )

        # merge with predictions
        output_xr = xr.merge([output_xr, output_features], compat="override")

    return assign_crs(output_xr, str(crs))
Esempio n. 28
0
    xds_from_ms(
        args.ms,
        # We only need the antenna and uvw columns
        columns=("UVW", "ANTENNA1", "ANTENNA2"),
        group_cols=[],
        index_cols=[],
        chunks={"row": 1e6}))

# Should only have one dataset
assert len(xds) == 1
# The unique baseline for one scan is same for every scan in the Measurement Set
ds = xds[0]

# Calculate Maximum baseline
uvw = ds.UVW.data
bl_max_dist = da.sqrt(da.max(da.sum(uvw**2, axis=1)))

# bl_max_dist = da.stack(ds.UVW.data, my_ds.UVW.data for my_ds in xds, axis=1)

# Need ant1 and ant2 to be int32 for the compound int64 below
# to work
assert ds.ANTENNA1.dtype == ds.ANTENNA2.dtype == np.int32

bl = da.stack([ds.ANTENNA1.data, ds.ANTENNA2.data], axis=1)
# convert array to dtype int64 from int32
bl = bl.rechunk(-1, 2).view(np.int64)
# get the unique values
ubl = da.unique(bl)
# dask compute, convert back to int32 and reshape
ubl = da.compute(ubl)[0].view(np.int32).reshape(-1, 2)
Esempio n. 29
0
# Should only be one correlation
assert psf.shape[2] == 1, psf.shape

# FFT the PSF
psf_fft = da.fft.fftshift(da.fft.ifft2(da.fft.ifftshift(psf[:, :, 0])))

# Dirty image composed of the diagonal correlations
if ncorr == 1:
    dirty = dirty_fft[0].real
else:
    dirty = (dirty_fft[0].real + dirty_fft[ncorr - 1].real) * 0.5

# Normalised Amplitude
psf = da.absolute(psf_fft.real)
psf = (psf / da.max(psf))

# Scale the dirty image by the psf
# x4 because the N**2 FFT normalization factor
# on a square image double the size
dirty = dirty / (da.max(psf) * 4.)

# Visualise profiling if we have bokeh
try:
    import bokeh  # noqa
except ImportError:
    from dask.diagnostics import ProgressBar

    with ProgressBar():
        dirty = dirty.compute()
else: