Esempio n. 1
0
def multidim_lazy_stack(stack):
    """
    Recursively build a multidimensional stacked dask array.

    This is needed because dask.array.stack only accepts a 1-dimensional list.

    Args:

    * stack:
        An ndarray of dask arrays.

    Returns:
        The input array converted to a lazy dask array.

    """
    if stack.ndim == 0:
        # A 0-d array cannot be stacked.
        result = stack.item()
    elif stack.ndim == 1:
        # Another base case : simple 1-d goes direct in dask.
        result = da.stack(list(stack))
    else:
        # Recurse because dask.stack does not do multi-dimensional.
        result = da.stack([multidim_lazy_stack(subarray)
                           for subarray in stack])
    return result
Esempio n. 2
0
def haversines(x1, x2, y1, y2, z1=None, z2=None):

    x1, x2 = da.deg2rad(x1), da.deg2rad(x2)
    y1, y2 = da.deg2rad(y1), da.deg2rad(y2)

    x = (x2 - x1) * da.cos((y1 + y2) * 0.5) * cst.r_earth
    y = (y2 - y1) * cst.r_earth * da.ones_like(x1) * da.ones_like(x2)

    if z1 is None or z2 is None:
        return da.stack((x, y), axis=-1)
    else:
        z1 = da.where(da.isnan(z1), 0, z1)
        z2 = da.where(da.isnan(z2), 0, z2)
        z = (z2 - z1) * da.ones_like(x)
        return da.stack((x, y, z), axis=-1)
Esempio n. 3
0
    def __call__(self, times=None):
        """Run the filtering process on this experiment."""

        # run over the full range of valid time indices unless specified otherwise
        tgrid = self.fieldset.gridset.grids[0].time
        if times is None:
            times = tgrid.copy()

            if self.uneven_window:
                raise NotImplementedError("uneven windows aren't supported")

        # restrict to period covered by window
        times = np.array(times)
        window_left = times - tgrid[0] >= self.window_size
        window_right = times <= tgrid[-1] - self.window_size
        times = times[window_left & window_right]

        da_out = {v: [] for v in self.sample_variables}

        # do the filtering at each timestep
        for idx, time in enumerate(times):
            # returns a dictionary of sample_variable -> dask array
            filtered = self.filter_step(idx, time)
            for v, a in filtered.items():
                da_out[v].append(a)

        # dump all to disk
        da.to_hdf5(self.name + ".h5",
                   {v: da.stack(a)
                    for v, a in da_out.items()})
Esempio n. 4
0
 def cutOffAtDEM(self, assign=True):
     topo = self.gridObj.topo + self.gridObj.mdata["datum"]
     #topo = da.flipud(topo)
     removalIndex = []
     #verify that there is presently a DEM
     if (not (self.gridObj.gridTopography)):
         print("no dem present in grid file, please assign one to continue")
         return
     #do it one complete layer at a time looping downward
     for i in range(self.gridObj.z.shape[2] - 1, -1, -1):
         #the greater than is because the indexing of the rFile is flipped from whay you would expect
         #the transposition is to deal with the fact that the 2d mesh grid is flipped reletive to the 3d one
         removalIndex.append(self.gridObj.z[:, :, i] >= (topo))
     #join into a single dask array
     removalIndex = da.stack(removalIndex, axis=-1)
     removalIndex = removalIndex.flatten()
     #drop values
     self.gridObj.vp[removalIndex] = -999
     self.gridObj.vs[removalIndex] = -999
     self.gridObj.p[removalIndex] = -999
     self.gridObj.qp[removalIndex] = -999
     self.gridObj.qs[removalIndex] = -999
     if (assign):
         print("assiging dem cut off to gridfile")
         self.gridObj.assignNewGridProperties(self.gridObj.vp,
                                              self.gridObj.vs,
                                              self.gridObj.p,
                                              self.gridObj.qp,
                                              self.gridObj.qs)
Esempio n. 5
0
def hdulists_to_dask_cube(all_hduls, plane_shape, ext=0, dtype=float):
    cube = da.stack([
        da.from_delayed(hdul[ext].data, shape=plane_shape, dtype=dtype)
        for hdul in all_hduls
    ])
    log.info(f"Dask Array of shape {cube.shape} created from HDULists")
    return cube
Esempio n. 6
0
	def cat_arrays(self):
		no_rain= self.dask_array(self.no_rain_path)
		normal= self.dask_array(self.normal_path)
		night= self.dask_array(self.night_path)
		heavy= self.dask_array(self.heavy_path)
		datasets= da.stack([no_rain, normal, heavy, night], axis=0)
		return datasets
def stats_for_tiff_images(filenames, use_test_data=False):
    def read_image(filename, band, use_test_data):
        filename = filename.format(band)
        if use_test_data:
            num = int(band)
            band_shape = (120, 120)
            return np.full(band_shape, num)
        band_ds = rasterio.open(filename)
        return np.array(band_ds.read(1))

    def images_for_band(band):
        delayed_read = dask.delayed(read_image, pure=True)
        lazy_images = [
            da.from_delayed(delayed_read(filename, band, use_test_data),
                            dtype=np.uint16,
                            shape=(120, 120)) for filename in filenames
        ]

        stack = da.stack(lazy_images, axis=0).rechunk('auto')
        return stack.flatten()

    all_bands = da.stack(
        [images_for_band("02"),
         images_for_band("03"),
         images_for_band("04")],
        axis=-1)

    stats = defaultdict(dict)
    for stat_name, stat_func in [('mean', da.mean), ('std', da.std),
                                 ('min', da.min), ('max', da.max)]:
        stats[stat_name] = stat_func(all_bands, axis=0).compute()

    return pd.DataFrame(stats, index=['red', 'blue', 'green'])
Esempio n. 8
0
 def as_stitched_array(self, channel_index=0, channel_name=None, t_index=0, verbose=True):
     if channel_name is not None:
         channel_index = self._channel_name_to_index(channel_name)
     z_list = []
     for z in self.z_indices:
         # this doesn't work with explore acquisitions and would need to be updated
         rows, cols = self.get_num_rows_and_cols()
         empty_tile = np.zeros((self.image_height, self.image_width), self.pixel_type)
         row_list = []
         for row in range(rows):
             if verbose:
                 print('stitching row {} of {}'.format(row + 1, rows))
             col_list = []
             for col in range(cols):
                 pos_index_array = np.nonzero(np.logical_and(self.row_col_array[:, 0] == row,
                                                             self.row_col_array[:, 1] == col))[0]
                 pos_index = None if pos_index_array.size == 0 else pos_index_array[0]
                 if pos_index is not None and self.has_image(
                         channel_index=channel_index, z_index=z, t_index=t_index, pos_index=pos_index):
                     img = self.read_image(channel_index=channel_index, z_index=z, t_index=t_index, pos_index=pos_index,
                                           memmapped=True)
                 else:
                     img = empty_tile
                 # crop to center of tile
                 col_list.append(img[self.overlap[0] // 2: -self.overlap[0] // 2,
                                 self.overlap[1] // 2: -self.overlap[1] // 2])
             stitched_col = da.concatenate(col_list, axis=1)
             row_list.append(stitched_col)
         stitched = da.concatenate(row_list, axis=0)
         z_list.append(stitched)
     return da.stack(z_list)
Esempio n. 9
0
def get_pet(from_time, to_time, da_pet_mask, zarr_path):
    import dask.array as da
    from_time = str2datetime(from_time)
    to_time = str2datetime(to_time)
    ds_pet = xr.open_zarr(gcsfs.GCSMap('pangeo-data/cgiar_pet'))
    da_pet = ds_pet['PET']
    pet = get_ws_p(1 / 120, da_pet_mask, da_pet, tolerance=0.000001).chunk({
        'label':
        1
    }).compute()
    date_range = pd.date_range(start=from_time + timedelta(minutes=15),
                               end=to_time,
                               freq='30min')
    arrays = [
        da.from_delayed(dask.delayed(get_pet_for_label)(date_range, label,
                                                        pet),
                        dtype='float32',
                        shape=(len(date_range), ))
        for label in pet.label.values
    ]
    stack = da.stack(arrays, axis=0)
    pet_over_time = xr.DataArray(stack,
                                 coords=[pet.label.values, date_range],
                                 dims=['label', 'time'])
    return pet_over_time
Esempio n. 10
0
def get_layer_list(channels, nd2_func, path, frame_shape, frame_dtype,
                   n_timepoints):
    channel_dict = dict(zip(channels, [[] for _ in range(len(channels))]))
    for i, channel in enumerate(channels):
        arr = da.stack([
            da.from_delayed(delayed(nd2_func(path, i))(j),
                            shape=frame_shape,
                            dtype=frame_dtype) for j in range(n_timepoints)
        ])
        channel_dict[channel] = dask.optimize(arr)[0]

    layer_list = []
    for channel_name, channel in channel_dict.items():
        visible = channel_name in VISIBLE
        blending = 'additive' if visible else 'translucent'
        channel_color = list(CHANNEL_COLORS[channel_name])
        color = Colormap([[0, 0, 0], channel_color])
        meta = get_metadata(path)
        add_kwargs = {
            "name": channel_name,
            "visible": visible,
            "colormap": color,
            "blending": blending,
            **meta
        }
        layer_type = "image"
        layer_list.append((channel, add_kwargs, layer_type))
    return layer_list
Esempio n. 11
0
def position_grid(shape, blocksize):
    """
    """

    coords = da.meshgrid(*[range(x) for x in shape], indexing='ij')
    coords = da.stack(coords, axis=-1).astype(np.int16)
    return da.rechunk(coords, chunks=tuple(blocksize) + (3, ))
Esempio n. 12
0
    def stretch_logarithmic(self, factor=100.):
        """Move data into range [1:factor] through normalized logarithm."""
        logger.debug("Perform a logarithmic contrast stretch.")
        crange = (0., 1.0)

        b__ = float(crange[1] - crange[0]) / np.log(factor)
        c__ = float(crange[0])

        def _band_log(arr):
            slope = (factor - 1.) / float(arr.max() - arr.min())
            arr = 1. + (arr - arr.min()) * slope
            arr = c__ + b__ * da.log(arr)
            return arr

        band_results = []
        for band in self.data['bands'].values:
            if band == 'A':
                continue
            band_data = self.data.sel(bands=band)
            res = _band_log(band_data.data)
            band_results.append(res)

        if 'A' in self.data.coords['bands'].values:
            band_results.append(self.data.sel(bands='A'))
        self.data.data = da.stack(band_results,
                                  axis=self.data.dims.index('bands'))
Esempio n. 13
0
def classify(texts):
    batch_x_text = [clearstring(t) for t in texts]
    batch_x = str_idx(batch_x_text, dict_sentiment['dictionary'], 100)
    output_sentiment = sess_sentiment.run(logits_sentiment,
                                          feed_dict={x_sentiment: batch_x})
    labels = [sentiment_label[l] for l in np.argmax(output_sentiment, 1)]
    return da.stack(labels, axis=0)
Esempio n. 14
0
    def stretch_logarithmic(self, factor=100.):
        """Move data into range [1:factor] through normalized logarithm."""
        logger.debug("Perform a logarithmic contrast stretch.")
        crange = (0., 1.0)

        b__ = float(crange[1] - crange[0]) / np.log(factor)
        c__ = float(crange[0])

        def _band_log(arr):
            slope = (factor - 1.) / float(arr.max() - arr.min())
            arr = 1. + (arr - arr.min()) * slope
            arr = c__ + b__ * da.log(arr)
            return arr

        band_results = []
        for band in self.data['bands'].values:
            if band == 'A':
                continue
            band_data = self.data.sel(bands=band)
            res = _band_log(band_data.data)
            band_results.append(res)

        if 'A' in self.data.coords['bands'].values:
            band_results.append(self.data.sel(bands='A'))
        self.data.data = da.stack(band_results,
                                  axis=self.data.dims.index('bands'))
Esempio n. 15
0
def da_linregress(x, y):
    """
    Refactor of the scipy linregress with numba, less checks for speed sake and
    done with dask arrays

    :param x: array for independent
    :param y:
    :return:
    """
    TINY = 1.0e-20
    # x = np.asarray(x)
    # y = np.asarray(y)
    arr = da.stack([x, y], axis=1)
    n = len(x)
    # average sum of squares:
    ssxm, ssxym, ssyxm, ssym = (da.dot(arr.T, arr) / n).ravel()
    r_num = ssxym
    r_den = np.sqrt(ssxm * ssym)
    if r_den == 0.0:
        r = 0.0
    else:
        r = r_num / r_den
        # test for numerical error propagation
        if r > 1.0:
            r = 1.0
        elif r < -1.0:
            r = -1.0
    df = n - 2
    slope = r_num / ssxm
    r_t = r + TINY
    t = r * da.sqrt(df / ((1.0 - r_t) * (1.0 + r_t)))
    prob = 2 * stats.distributions.t.sf(np.abs(t), df)
    return slope, r**2, prob
Esempio n. 16
0
    def transform_lonlats(self, lons, lats):
        R = 6370997.0
        x_coords = R * da.cos(da.deg2rad(lats)) * da.cos(da.deg2rad(lons))
        y_coords = R * da.cos(da.deg2rad(lats)) * da.sin(da.deg2rad(lons))
        z_coords = R * da.sin(da.deg2rad(lats))

        return da.stack((x_coords, y_coords, z_coords), axis=-1)
Esempio n. 17
0
def load_data(path, chunks):

    raw_bag = read_text(path) \
                .str.strip() \
                .map(row_to_numpy)

    return da.stack(raw_bag, axis=0)
Esempio n. 18
0
def calculatePk(number):
    print("working on {0}".format(number))
    sys.stdout.flush()
    cat = CSVCatalog(path=inpath.format(number),
                     names=["x", "y", "z", "vx", "vy", "vz", "M"])
    cat['position'] = da.stack([cat['x'], cat['y'], cat['z']]).T
    mesh = cat.to_mesh(window='cic',
                       Nmesh=1024,
                       compensated=True,
                       BoxSize=1500,
                       position='position')
    print("start making power")
    sys.stdout.flush()
    # compute the power, specifying desired linear k-binning
    r = FFTPower(mesh, mode='1d', dk=0.05, kmin=0.01)
    Pk = r.power
    k = Pk['k']
    power = Pk['power'].real - Pk.attrs['shotnoise']

    print("start writing power")
    sys.stdout.flush()
    f = open(outpath + "Pk_{0}.pk".format(number), "w")
    f.write("k(h/Mpc) Pk(h/Mpc)^3\n")
    for i in range(len(Pk['k'])):
        f.write("{0} {1}\n".format(k[i], power[i]))
    f.close()
Esempio n. 19
0
    def set_norm_factors(self, data_group, fold_group, overwrite=False):

        # Get Zarr arrays
        train_indexes = fold_group['train'][:]
        X = da.from_zarr(data_group['X'])
        norm_shape = X.shape[1:]

        # Create normalization data Zarr arrays
        norm_group = fold_group.require_group('norm_data')
        norm_group.require_dataset('s1', shape=norm_shape, dtype='float32', chunks=None)
        norm_group.require_dataset('s2', shape=norm_shape, dtype='float32', chunks=None)
        norm_group.require_dataset('mean', shape=norm_shape, dtype='float32', chunks=None)
        norm_group.require_dataset('std', shape=norm_shape, dtype='float32', chunks=None)

        # Stop processing if already done AND we don't want to overwrite the dataset
        if (norm_group['s1'].nchunks == norm_group['s1'].nchunks_initialized) and not overwrite:
            return

        # Compute normalization factors
        fold_num = pathlib.PurePath(fold_group.name).name[-1]
        print(f'Computing the normalization factors for the cross-validation fold #{fold_num}.\nThis may take some time...')

        # Compute sum and squared sum
        s1 = X[train_indexes,].sum(axis=0)
        s2 = (X[train_indexes,] ** 2).sum(axis=0)
        S = da.stack([s1, s2], axis=0).compute()
        s1 = S[0,]
        s2 = S[1,]
        n = train_indexes.size

        # Fill Zarr arrays with the normalization factors
        norm_group['s1'][:] = s1
        norm_group['s2'][:] = s2
        norm_group['mean'][:] = s1 / n
        norm_group['std'][:] = np.sqrt((n * s2 - (s1 * s1)) / (n * (n - 1)))
Esempio n. 20
0
    def _get_schema(self):
        from dask.bytes import open_files
        import dask.array as da
        if self._arr is None:
            path = self._get_cache(self.path)[0]

            files = open_files(path, 'rb', compression=None,
                               **self.storage)
            if self.shape is None:
                arr = NumpyAccess(files[0])
                self.shape = arr.shape
                self.dtype = arr.dtype
                arrs = [arr] + [NumpyAccess(f, self.shape, self.dtype)
                                for f in files[1:]]
            else:
                arrs = [NumpyAccess(f, self.shape, self.dtype)
                        for f in files]
            self.chunks = (self._chunks, ) + (-1, ) * (len(self.shape) - 1)
            self._arrs = [da.from_array(arr, self.chunks) for arr in arrs]

            if len(self._arrs) > 1:
                self._arr = da.stack(self._arrs)
            else:
                self._arr = self._arrs[0]
            self.chunks = self._arr.chunks
        return Schema(dtype=str(self.dtype), shape=self.shape,
                      extra_metadata=self.metadata,
                      npartitions=self._arr.npartitions,
                      chunks=self.chunks)
Esempio n. 21
0
def test_clock_tec_solve_dask():
    np.random.seed(1234)
    import pylab as plt
    times = np.arange(2)
    freqs = np.linspace(110e6, 170e6, 1000)

    cs = np.array([1, 1])
    tec = np.array([0.1, 0.2])
    delay = np.ones(len(times)) * 2e-9  # 10ns
    phase = np.multiply.outer(np.ones(
        len(freqs)), cs) + 8.44797256e-7 * TECU * np.multiply.outer(
            1. / freqs, tec) + 2. * np.pi * np.multiply.outer(freqs, delay)
    phase += 15 * np.pi / 180. * np.random.normal(
        size=[len(freqs), len(times)])
    #plt.imshow(phase,origin='lower',extent=(times[0],times[-1],freqs[0],freqs[-1]),aspect='auto')
    #plt.colorbar()
    #plt.xlabel('times (s)')
    #plt.ylabel('freqs (Hz)')
    #plt.show()
    m, cov = least_squares_solve(phase, freqs, times, 15, Ct_ratio=0.01)
    m_exact = np.array([delay, tec, cs]).T
    import dask.array as da
    solsMH = [
        da.from_delayed(clock_tec_solve_dask(phase[:, i], freqs, m[i, :],
                                             cov[i, :, :], 15, 0.01),
                        shape=(3, ),
                        dtype=np.double) for i in range(len(times))
    ]

    sol_stacked = da.stack(solsMH, axis=0)
    sol = sol_stacked.compute()
    print(sol)
Esempio n. 22
0
def get_layer_list(channels, nd2_func, path, frame_shape, frame_dtype,
                   n_timepoints):
    # channel_dict = dict(zip(channels, [[] for _ in range(len(channels))]))
    channel_dict = {}
    for i, channel in enumerate(channels):
        arr = da.stack([
            da.from_delayed(delayed(nd2_func(path, i))(j),
                            shape=frame_shape,
                            dtype=frame_dtype) for j in range(n_timepoints)
        ])
        channel_dict[color_maps[i % len(color_maps)]] = dask.optimize(arr)[0]

    layer_list = []
    print("channel_dict", channel_dict)
    for channel_name, channel in channel_dict.items():
        blending = 'additive'
        meta = get_metadata(path)
        add_kwargs = {
            "name": channel_name,
            "colormap": channel_name,
            "blending": blending,
            "rendering": "mip",
            **meta
        }
        layer_type = "image"
        layer_list.append((channel, add_kwargs, layer_type))
    return layer_list
Esempio n. 23
0
def get_layer_list(channels, nd2_func, path, frame_shape, frame_dtype,
                   n_timepoints):
    channel_dict = dict(zip(channels, [[] for _ in range(len(channels))]))
    for i, channel in enumerate(channels):
        arr = da.stack([
            da.from_delayed(delayed(nd2_func(path, i))(j),
                            shape=frame_shape,
                            dtype=frame_dtype) for j in range(n_timepoints)
        ])
        channel_dict[channel] = dask.optimize(arr)[0]

    layer_list = []
    for channel_name, channel in channel_dict.items():
        visible = True
        blending = 'additive' if visible else 'translucent'
        meta = get_metadata(path)
        channel_color = meta['channels'][channel_name]
        color = Colormap([[0, 0, 0], channel_color[:-1]])  # ignore alpha
        add_kwargs = {
            "name": channel_name,
            "visible": visible,
            "colormap": color,
            "blending": blending,
            "scale": meta['scale'],
            "translate": meta['translate'],
        }
        layer_type = "image"
        layer_list.append((channel, add_kwargs, layer_type))
    return layer_list
Esempio n. 24
0
 def _generate_volume(self, image_list, scale=1):
     """
     """
     image_info = self._volume_info['coordinates_list']
     # get the shape of each plane in the x y axis
     shape = self._shape
     shape = np.round(np.array(shape) * scale).astype(int)
     shape = (shape[1], shape[2])
     # find out if the image is RGB shaped
     if isinstance(image_list[0], Delayed):
         image = image_list[0].compute()
     else:
         image = image_list[0]
     # get the data type
     dtype = image.dtype
     # add the RGB dim if necessary
     if image.shape[-1] == 3:
         shape = (shape[0], shape[1], 3)
     del image
     # get a list of delayed arrays representing padded images
     arrays = [
         da.from_delayed(self._padded_image(z, image_list, image_info,
                                            shape, scale),
                         shape,
                         dtype=dtype) for z in range(self._n)
     ]
     # get dask array representing image volume
     volume = da.stack(arrays, axis=0)
     return volume
Esempio n. 25
0
def read_raster(path, block_size=1):
    """Read all bands from raster"""
    bands = range(1, get_band_count(path) + 1)
    return da.stack([
        read_raster_band(path, band=band, block_size=block_size)
        for band in bands
    ])
Esempio n. 26
0
def black_scholes(nopt, price, strike, t, rate, vol, schd=None):
    mr = -rate
    sig_sig_two = vol * vol * 2

    P = price
    S = strike
    T = t

    a = log(P / S)
    b = T * mr

    z = T * sig_sig_two
    c = 0.25 * z
    y = da.map_blocks(invsqrt, z)

    w1 = (a - b + c) * y
    w2 = (a - b - c) * y

    d1 = 0.5 + 0.5 * da.map_blocks(erf, w1)
    d2 = 0.5 + 0.5 * da.map_blocks(erf, w2)

    Se = exp(b) * S

    call = P * d1 - Se * d2
    put = call - P + Se

    return da.compute(da.stack((put, call)), get=schd)
Esempio n. 27
0
def get_da_background(files, shape=ZTF_IMAGE_SHAPE, dtype="float32"):
    """ Get a dask.array stacked for each of the ziff image you want. 
    = Works only with single ziff = 
    """
    lazy_array = [dask.delayed(get_ziff_single_background)(f_)   for f_ in files]
    lazy_arrays = [da.from_delayed(x_, shape=shape, dtype=dtype) for x_ in lazy_array]
    return da.stack(lazy_arrays)
Esempio n. 28
0
def read_raster(path, band=None, block_size=1):
    """Read all or some bands from raster

    Arguments:
        path {string} -- path to raster file

    Keyword Arguments:
        band {int, iterable(int)} -- band number or iterable of bands.
            When passing None, it reads all bands (default: {None})
        block_size {int} -- block size multiplier (default: {1})

    Returns:
        dask.array.Array -- a Dask array
    """

    if isinstance(band, int):
        return read_raster_band(path, band=band, block_size=block_size)
    else:
        if band is None:
            bands = range(1, get_band_count(path) + 1)
        else:
            bands = list(band)
        return da.stack([
            read_raster_band(path, band=band, block_size=block_size)
            for band in bands
        ])
Esempio n. 29
0
def correlations_multiple(data, correlations, periodic_boundary=True, cutoff=None):
    """Calculate 2-point stats for a multiple auto/cross correlation

    Args:
      data: the discretized data (n_samples,n_x,n_y,n_correlation)
      correlation_pair: the correlation pairs
      periodic_boundary: whether to assume a periodic boudnary (default is true)
      cutoff: the subarray of the 2 point stats to keep

    Returns:
      the 2-points stats array

    >>> data = np.arange(18).reshape(1, 3, 3, 2)
    >>> out = correlations_multiple(data, [[0, 1], [1, 1]])
    >>> out
    dask.array<stack, shape=(1, 3, 3, 2), dtype=float64, chunksize=(1, 3, 3, 1)>
    >>> answer = np.array([[[58, 62, 58], [94, 98, 94], [58, 62, 58]]]) + 1. / 3.
    >>> assert(out.compute()[...,0], answer)
    """

    return pipe(
        range(data.shape[-1]),
        map_(lambda x: (0, x)),
        lambda x: correlations if correlations else x,
        map_(
            lambda x: two_point_stats(
                data[..., x[0]],
                data[..., x[1]],
                periodic_boundary=periodic_boundary,
                cutoff=cutoff,
            )
        ),
        list,
        lambda x: da.stack(x, axis=-1),
    )
Esempio n. 30
0
def compute_adjoint_dask(rays, g, dobs, i0, K_ne, m_tci, m_prior, CdCt,
                         sigma_m, Nkernel, size_cell):
    L_m = Nkernel * size_cell
    #     #i not eq i0 mask
    #     mask = np.ones(rays.shape[0],dtype=np.bool)
    #     mask[i0] = False
    #     rays = rays[mask,:,:,:,:]
    #     g = g[mask,:,:]
    #     dobs = dobs[mask,:,:]
    #     CdCt = CdCt[mask,:,:]
    #residuals
    #g.shape, dobs.shape [Na,Nt,Nd]
    dd = g - dobs
    #weighted residuals
    #Cd.shape [Na,Nt,Nd] i.e. diagonal
    #CdCt^-1 = 1./CdCt
    dd /= (CdCt + 1e-15)
    #get ray info
    Na, Nt, Nd, _, Ns = rays.shape
    #parallelize over directions
    gradient = da.sum(da.stack([
        da.from_delayed(delayed(do_adjoint)(
            rays[:, :, d, :, :], dd[:, :, d], K_ne, m_tci, sigma_m, Nkernel,
            size_cell, i0), (m_tci.nx, m_tci.ny, m_tci.nz),
                        dtype=np.double) for d in range(Nd)
    ],
                               axis=-1),
                      axis=-1)
    gradient = gradient.compute(get=get)
    gradient += m_tci.M
    gradient -= m_prior

    return gradient
Esempio n. 31
0
def est_sh_part(varr, max_sh, npart, local):
    if varr.shape[0] <= 1:
        return varr.squeeze(), np.array([[0, 0]])
    idx_spt = np.array_split(np.arange(varr.shape[0]), npart)
    fm_ls, sh_ls = [], []
    for idx in idx_spt:
        if len(idx) > 0:
            fm, sh = est_sh_part(varr[idx, :, :], max_sh, npart, local)
            fm_ls.append(fm)
            sh_ls.append(sh)
    mid = int(len(sh_ls) / 2)
    sh_add_ls = [np.array([0, 0])] * len(sh_ls)
    for i, fm in enumerate(fm_ls):
        if i < mid:
            temp = fm_ls[i + 1]
            sh_idx = np.arange(i + 1)
        elif i > mid:
            temp = fm_ls[i - 1]
            sh_idx = np.arange(i, len(sh_ls))
        else:
            continue
        sh_add = darr.from_delayed(
            delayed(match_temp)(fm, temp, max_sh, local), (2,), float
        )
        for j in sh_idx:
            sh_ls[j] = sh_ls[j] + sh_add.reshape((1, -1))
            sh_add_ls[j] = sh_add_ls[j] + sh_add
    for i, (fm, sh) in enumerate(zip(fm_ls, sh_add_ls)):
        fm_ls[i] = darr.nan_to_num(
            darr.from_delayed(delayed(shift_perframe)(fm, sh), fm.shape, fm.dtype)
        )
    sh_ret = darr.concatenate(sh_ls)
    fm_ret = darr.stack(fm_ls)
    return fm_ret.max(axis=0), sh_ret
Esempio n. 32
0
def wavg_full_t(data, flags, weights, solint, times=None, threshold=0.8):
    """Perform weighted average of data, flags and weights, over axis 0.

    This applies flags and uses specified solution interval increments.

    Parameters
    ----------
    data       : array of complex
    flags      : array of boolean
    weights    : array of floats
    solint     : index interval over which to average, integer
    times      : optional array of times to average, array of floats
    threshold  : optional float

    Returns
    -------
    av_data    : weighted average of data
    av_flags   : weighted average of flags
    av_weights : weighted average of weights
    av_times   : optional average of times
    """
    # ensure solint is an intager
    solint = np.int(solint)
    inc_array = range(0, data.shape[0], solint)

    av_data = []
    av_flags = []
    av_weights = []
    # TODO: might be more efficient to use reduceat?
    for ti in inc_array:
        w_out = wavg_full(data[ti:ti + solint],
                          flags[ti:ti + solint],
                          weights[ti:ti + solint],
                          threshold=threshold)
        av_data.append(w_out[0])
        av_flags.append(w_out[1])
        av_weights.append(w_out[2])
    av_data = da.stack(av_data)
    av_flags = da.stack(av_flags)
    av_weights = da.stack(av_weights)

    if times is not None:
        av_times = np.array(
            [np.average(times[ti:ti + solint], axis=0) for ti in inc_array])
        return av_data, av_flags, av_weights, av_times
    else:
        return av_data, av_flags, av_weights
Esempio n. 33
0
def lonlat2xyz(lons, lats):

    R = 6370997.0
    x_coords = R * da.cos(da.deg2rad(lats)) * da.cos(da.deg2rad(lons))
    y_coords = R * da.cos(da.deg2rad(lats)) * da.sin(da.deg2rad(lons))
    z_coords = R * da.sin(da.deg2rad(lats))

    return da.stack(
        (x_coords.ravel(), y_coords.ravel(), z_coords.ravel()), axis=-1)
Esempio n. 34
0
    def _map_iterate(self,
                     function,
                     iterating_kwargs=(),
                     show_progressbar=None,
                     parallel=None,
                     ragged=None,
                     inplace=True,
                     **kwargs):
        if ragged not in (True, False):
            raise ValueError('"ragged" kwarg has to be bool for lazy signals')
        _logger.debug("Entering '_map_iterate'")

        size = max(1, self.axes_manager.navigation_size)
        from hyperspy.misc.utils import (create_map_objects,
                                         map_result_construction)
        func, iterators = create_map_objects(function, size, iterating_kwargs,
                                             **kwargs)
        iterators = (self._iterate_signal(), ) + iterators
        res_shape = self.axes_manager._navigation_shape_in_array
        # no navigation
        if not len(res_shape) and ragged:
            res_shape = (1,)

        all_delayed = [dd(func)(data) for data in zip(*iterators)]

        if ragged:
            sig_shape = ()
            sig_dtype = np.dtype('O')
        else:
            one_compute = all_delayed[0].compute()
            sig_shape = one_compute.shape
            sig_dtype = one_compute.dtype
        pixels = [
            da.from_delayed(
                res, shape=sig_shape, dtype=sig_dtype) for res in all_delayed
        ]

        for step in reversed(res_shape):
            _len = len(pixels)
            starts = range(0, _len, step)
            ends = range(step, _len + step, step)
            pixels = [
                da.stack(
                    pixels[s:e], axis=0) for s, e in zip(starts, ends)
            ]
        result = pixels[0]
        res = map_result_construction(
            self, inplace, result, ragged, sig_shape, lazy=True)
        return res
Esempio n. 35
0
    def stretch_hist_equalize(self, approximate=False):
        """Stretch the current image's colors through histogram equalization.

        Args:
            approximate (bool): Use a faster less-accurate percentile
                                calculation. At the time of writing the dask
                                version of `percentile` is not as accurate as
                                the numpy version. This will likely change in
                                the future. Current dask version 0.17.

        """
        logger.info("Perform a histogram equalized contrast stretch.")

        nwidth = 2048.
        logger.debug("Make histogram bins having equal amount of data, " +
                     "using numpy percentile function:")

        def _band_hist(band_data):
            cdf = da.arange(0., 1., 1. / nwidth, chunks=nwidth)
            if approximate:
                # need a 1D array
                flat_data = band_data.ravel()
                # replace with nanpercentile in the future, if available
                # dask < 0.17 returns all NaNs for this
                bins = da.percentile(flat_data[da.notnull(flat_data)],
                                     cdf * 100.)
            else:
                bins = dask.delayed(np.nanpercentile)(band_data, cdf * 100.)
                bins = da.from_delayed(bins, shape=(nwidth,), dtype=cdf.dtype)
            res = dask.delayed(np.interp)(band_data, bins, cdf)
            res = da.from_delayed(res, shape=band_data.shape,
                                  dtype=band_data.dtype)
            return res

        band_results = []
        for band in self.data['bands'].values:
            if band == 'A':
                continue
            band_data = self.data.sel(bands=band)
            res = _band_hist(band_data.data)
            band_results.append(res)

        if 'A' in self.data.coords['bands'].values:
            band_results.append(self.data.sel(bands='A'))
        self.data.data = da.stack(band_results,
                                  axis=self.data.dims.index('bands'))
Esempio n. 36
0
    def load(s, measure, dset_name, transpose_lst, df_attr='demog_df'):
        ''' given measure, h5 dataset name, transpose list: load data '''

        df = getattr(s, df_attr)

        if measure in dir(s):
            print(measure, 'already loaded')
            if df.shape[0] != getattr(s, measure).shape[0]:
                print('shape of loaded data does not match demogs, reloading')
            else:
                return np.array([])

        dsets = [h5py.File(fn, 'r')[dset_name] for fn in df['path'].values]
        arrays = [da.from_array(dset, chunks=dset.shape) for dset in dsets]
        stack = da.stack(arrays, axis=-1)  # concatenate along last axis
        stack = stack.transpose(transpose_lst)  # do transposition

        data = np.empty(stack.shape)
        da.store(stack, data)
        print(data.shape)
        return data
Esempio n. 37
0
def dec10216(inbuf):
    """Decode 10 bits data into 16 bits words.

    ::

        /*
         * pack 4 10-bit words in 5 bytes into 4 16-bit words
         *
         * 0       1       2       3       4       5
         * 01234567890123456789012345678901234567890
         * 0         1         2         3         4
         */
        ip = &in_buffer[i];
        op = &out_buffer[j];
        op[0] = ip[0]*4 + ip[1]/64;
        op[1] = (ip[1] & 0x3F)*16 + ip[2]/16;
        op[2] = (ip[2] & 0x0F)*64 + ip[3]/4;
        op[3] = (ip[3] & 0x03)*256 +ip[4];

    """
    arr10 = inbuf.astype(np.uint16)
    arr16_len = int(len(arr10) * 4 / 5)
    arr10_len = int((arr16_len * 5) / 4)
    arr10 = arr10[:arr10_len]  # adjust size

    # dask is slow with indexing
    arr10_0 = arr10[::5]
    arr10_1 = arr10[1::5]
    arr10_2 = arr10[2::5]
    arr10_3 = arr10[3::5]
    arr10_4 = arr10[4::5]

    arr16_0 = (arr10_0 << 2) + (arr10_1 >> 6)
    arr16_1 = ((arr10_1 & 63) << 4) + (arr10_2 >> 4)
    arr16_2 = ((arr10_2 & 15) << 6) + (arr10_3 >> 2)
    arr16_3 = ((arr10_3 & 3) << 8) + arr10_4
    arr16 = da.stack([arr16_0, arr16_1, arr16_2, arr16_3], axis=-1).ravel()
    arr16 = da.rechunk(arr16, arr16.shape[0])

    return arr16
def test_clock_tec_solve_dask():
    np.random.seed(1234)
    import pylab as plt
    times = np.arange(2)
    freqs = np.linspace(110e6,170e6,1000)
    
    cs = np.array([1,1])
    tec = np.array([0.1,0.2])
    delay = np.ones(len(times)) * 2e-9# 10ns
    phase = np.multiply.outer(np.ones(len(freqs)),cs) + 8.44797256e-7*TECU*np.multiply.outer(1./freqs,tec) + 2.*np.pi*np.multiply.outer(freqs,delay)
    phase += 15*np.pi/180.*np.random.normal(size=[len(freqs),len(times)])
    #plt.imshow(phase,origin='lower',extent=(times[0],times[-1],freqs[0],freqs[-1]),aspect='auto')
    #plt.colorbar()
    #plt.xlabel('times (s)')
    #plt.ylabel('freqs (Hz)')
    #plt.show()
    m,cov = least_squares_solve(phase, freqs, times,15,Ct_ratio=0.01)
    m_exact = np.array([delay,tec,cs]).T
    import dask.array as da
    solsMH = [da.from_delayed(clock_tec_solve_dask(phase[:,i],freqs,m[i,:], cov[i,:,:],15,0.01),shape=(3,),dtype=np.double) for i in range(len(times))]
    
    sol_stacked = da.stack(solsMH, axis = 0)
    sol = sol_stacked.compute()
    print(sol)
Esempio n. 39
0
def test_stack_scalars():
    d = da.arange(4, chunks=2)

    s = da.stack([d.mean(), d.sum()])

    assert s.compute().tolist() == [np.arange(4).mean(), np.arange(4).sum()]
Esempio n. 40
0
def test_short_stack():
    x = np.array([1])
    d = da.from_array(x, chunks=(1,))
    s = da.stack([d])
    assert s.shape == (1, 1)
    assert Array._get(s.dask, s._keys())[0][0].shape == (1, 1)
Esempio n. 41
0
def test_gh4043(lock, asarray, fancy):
    a1 = da.from_array(np.zeros(3,), chunks=1, asarray=asarray, lock=lock, fancy=fancy)
    a2 = da.from_array(np.ones(3,), chunks=1, asarray=asarray, lock=lock, fancy=fancy)
    al = da.stack([a1, a2])
    assert_eq(al, al)
Esempio n. 42
0
    def __init__(
        self,
        dirname,
        iters=None,
        deltaT=1,
        prefix=None,
        ref_date=None,
        calendar=None,
        ignore_pickup=True,
        geometry="Cartesian",
        skip_vars=[],
    ):
        """iters: list of iteration numbers
        deltaT: timestep
        prefix: list of file prefixes (if None use all)
        """
        assert geometry in _valid_geometry
        self.geometry = geometry

        # the directory where the files live
        self.dirname = dirname

        # storage dicts for variables and attributes
        self._variables = xray.core.pycompat.OrderedDict()
        self._attributes = xray.core.pycompat.OrderedDict()
        self._dimensions = []

        ### figure out the mapping between diagnostics names and variable properties

        ### read grid files
        for k in _grid_variables:
            dims, desc, units = _grid_variables[k]
            data = _read_and_shape_grid_data(k, dirname)
            if data is not None:
                self._variables[k] = Variable(dims, MemmapArrayWrapper(data), {"description": desc, "units": units})
                self._dimensions.append(k)

        ## check for layers
        Nlayers = None
        for varname, dims, desc, units, data in _get_layers_grid_variables(dirname):
            self._variables[varname] = Variable(dims, MemmapArrayWrapper(data), {"description": desc, "units": units})
            self._dimensions.append(varname)
            # if there are multiple layers coordinates, they all have the same
            # size, so this works (although it is sloppy)
            if varname[-7:] == "_bounds":
                Nlayers = len(data)

        ## load metadata for all possible diagnostics
        diag_meta = _parse_available_diagnostics(os.path.join(dirname, "available_diagnostics.log"), Nlayers=Nlayers)

        # now get variables from our iters
        if iters is not None:

            # create iteration array
            iterdata = np.asarray(iters)
            self._variables["iter"] = Variable(("time",), iterdata, {"description": "model timestep number"})

            # create time array
            timedata = np.asarray(iters) * deltaT
            time_attrs = {"description": "model time"}
            if ref_date is not None:
                time_attrs["units"] = "seconds since %s" % ref_date
            else:
                time_attrs["units"] = "seconds"
            if calendar is not None:
                time_attrs["calendar"] = calendar
            self._variables["time"] = Variable(("time",), timedata, time_attrs)
            self._dimensions.append("time")

            varnames = []
            fnames = []
            _data_vars = xray.core.pycompat.OrderedDict()
            # look at first iter to get variable metadata
            for f in glob(os.path.join(dirname, "*.%010d.meta" % iters[0])):
                if ignore_pickup and re.search("pickup", f):
                    pass
                else:
                    go = True
                    if prefix is not None:
                        bname = os.path.basename(f[:-16])
                        matches = [bname == p for p in prefix]
                        if not any(matches):
                            go = False
                    if go:
                        meta = _parse_meta(f)
                        if "fldList" in meta:
                            # we have multiple variables per file
                            flds = meta["fldList"]
                            [varnames.append(fl) for fl in flds]
                        else:
                            # just use the filename as the variable name
                            varnames.append(meta["basename"])
                        fnames.append(os.path.join(dirname, meta["basename"]))

            # strip unwanted variables
            for v in skip_vars:
                try:
                    varnames.remove(v)
                except ValueError:
                    pass

            # read data as dask arrays (should be an option)
            vardata = {}
            for k in varnames:
                vardata[k] = []
            for i in iters:
                for f in fnames:
                    try:
                        data = _read_mds(f, i, force_dict=True)
                        # this can screw up if the same variable appears in
                        # multiple diagnostic files
                        for k in data:
                            if k in varnames:
                                mwrap = MemmapArrayWrapper(data[k])
                                # for some reason, da.from_array does not
                                # necessarily give a unique name
                                # need to specify array name
                                myda = da.from_array(mwrap, mwrap.shape, name="%s_%010d" % (k, i))
                                vardata[k].append(myda)
                    except IOError:
                        # couldn't find the variable, remove it from the list
                        # print 'Removing %s from list (iter %g)' % (k, i)
                        varnames.remove(k)

            # final loop to create Variable objects
            for k in varnames:
                try:
                    dims, desc, units = _state_variables[k]
                except KeyError:
                    try:
                        dims, desc, units = _ptracers[k]
                    except KeyError:
                        dims, desc, units = diag_meta[k]

                # check for shape compatability
                varshape = vardata[k][0].shape
                varndims = len(varshape)
                # maybe promote 2d data to 3d
                if (len(dims) == 3) and (varndims == 2):
                    if len(self._variables[dims[0]]) == 1:
                        vardata[k] = [v.reshape((1,) + varshape) for v in vardata[k]]
                        warnings.warn("Promiting 2D data to 3D data " "for variable %s" % k)
                        varndims += 1
                if len(dims) != varndims:
                    warnings.warn(
                        "Shape of variable data is not compatible "
                        "with expected number of dimensions. This "
                        "can arise if the 'levels' option is used "
                        "in data.diagnostics. Right now we have no "
                        "way to infer the level, so the variable is "
                        "skipped: " + k
                    )
                else:
                    # add time to dimension
                    dims_time = ("time",) + dims
                    # wrap variable in dask array
                    # -- why? it's already a dask array
                    # vardask = da.stack([da.from_array(d, varshape) for d in vardata[k]])
                    vardask = da.stack(vardata[k])
                    # for nkdsk in range(len(vardata[k])):
                    #    print 'Key %s, vardata[%g] sum %g, name %s' % (k, nkdsk,
                    #        vardata[k][nkdsk].sum(), vardata[k][nkdsk].name)
                    #    print 'Key %s, vardask[%g] sum %g' % (k, nkdsk,
                    #        vardask[nkdsk].sum())
                    newvar = Variable(dims_time, vardask, {"description": desc, "units": units})
                    self._variables[k] = newvar

        self._attributes = {"history": "Some made up attribute"}
Esempio n. 43
0
    def __init__(self, dirname, iters=None, deltaT=1,
                 prefix=None, ref_date=None, calendar=None,
                 ignore_pickup=True, geometry='Cartesian'):
        """iters: list of iteration numbers
        deltaT: timestep
        prefix: list of file prefixes (if None use all)
        """
        assert geometry in _valid_geometry
        self.geometry = geometry
        
        # the directory where the files live
        self.dirname = dirname
        
        # storage dicts for variables and attributes
        self._variables = OrderedDict()
        self._attributes = OrderedDict()
        self._dimensions = []
 
        ### figure out the mapping between diagnostics names and variable properties
        # all possible diagnostics
        diag_meta = _parse_available_diagnostics(
                os.path.join(dirname, 'available_diagnostics.log'))

        ### read grid files
        for k in _grid_variables:
            if _grid_special_mapping.has_key(k):
                fname = _grid_special_mapping[k][0]
                sl = _grid_special_mapping[k][1]
            else:
                fname = k
                sl = None
            data = None
            try:
                data = _read_mds(os.path.join(dirname, fname), force_dict=False)
            except IOError:
                try:
                    data = _read_mds(os.path.join(dirname, fname.upper()),
                                     force_dict=False)
                except IOError:
                    warnings.warn("Couldn't load grid variable " + k)
            if data is not None:
                data = data[sl] if sl is not None else data.squeeze()
                dims, desc, units = _grid_variables[k]
                self._variables[k] = Variable(
                    dims, MemmapArrayWrapper(data), {'description': desc, 'units': units})
                self._dimensions.append(k)
                
        # now get variables from our iters
        if iters is not None:
            
            # create iteration array
            iterdata = np.asarray(iters)
            self._variables['iter'] = Variable(('time',), iterdata,
                                                {'description': 'model timestep number'})
            
            # create time array
            timedata = np.asarray(iters)*deltaT
            time_attrs = {'description': 'model time'}
            if ref_date is not None:
                time_attrs['units'] = 'seconds since %s' % ref_date
            else:
                time_attrs['units'] = 'seconds'
            if calendar is not None:
                time_attrs['calendar'] = calendar
            self._variables['time'] = Variable(
                                        ('time',), timedata, time_attrs)
            self._dimensions.append('time')
            
            varnames = []
            fnames = []
            _data_vars = OrderedDict()
            # look at first iter to get variable metadata
            for f in glob(os.path.join(dirname, '*.%010d.meta' % iters[0])):
                if ignore_pickup and re.search('pickup', f):
                    pass
                else:
                    go = True
                    if prefix is not None:
                        bname = os.path.basename(f[:-16])
                        matches = [bname==p for p in prefix]
                        if not any(matches):
                            go = False
                    if go:
                        meta = _parse_meta(f)
                        if meta.has_key('fldList'):
                            flds = meta['fldList']
                            [varnames.append(fl) for fl in flds]
                        else:
                            varnames.append(meta['basename'])
                        fnames.append(os.path.join(dirname,meta['basename']))
            
            # read data as dask arrays (should be an option)
            vardata = {}
            for k in varnames:
                vardata[k] = []
            for i in iters:
                for f in fnames:
                    try:
                        data = _read_mds(f, i, force_dict=True)
                        for k in data.keys():
                            mwrap = MemmapArrayWrapper(data[k])
                            vardata[k].append(
                                da.from_array(mwrap, mwrap.shape))
                    except IOError:
                        # couldn't find the variable, remove it from the list
                        #print 'Removing %s from list (iter %g)' % (k, i)
                        varnames.remove(k)

            # final loop to create Variable objects
            for k in varnames:
                try:
                    dims, desc, units = _state_variables[k]
                except KeyError:
                    dims, desc, units = diag_meta[k]
                # check for shape compatability
                varshape = vardata[k][0].shape
                varndims = len(varshape)
                if len(dims) != varndims:
                    warnings.warn("Shape of variable data is not compatible "
                                  "with expected number of dimensions. This "
                                  "can arise if the 'levels' option is used "
                                  "in data.diagnostics. Right now we have no "
                                  "way to infer the level, so the variable is "
                                  "skipped: " + k)
                else:
                    # add time to dimension
                    dims_time = ('time',) + dims
                    # wrap variable in dask array
                    vardask = da.stack([da.from_array(d, varshape) for d in vardata[k]])
                    self._variables[k] = Variable( dims_time, vardask,
                                                   {'description': desc, 'units': units})
                                        
        self._attributes = {'history': 'Some made up attribute'}
Esempio n. 44
0
def CartesianToEquatorial(pos, observer=[0,0,0], frame='icrs'):
    """
    Convert Cartesian position coordinates to equatorial right ascension
    and declination, using the specified observer location.

    .. note::
        RA and DEC will be returned in degrees, with RA in the range [0,360]
        and DEC in the range [-90, 90].

    Parameters
    ----------
    pos : array_like
        a N x 3 array holding the Cartesian position coordinates
    observer : array_like
        a length 3 array holding the observer location
    frame : string
        A string, 'icrs' or 'galactic'. The frame of the input position.
        Use 'icrs' if the cartesian position is already in Equatorial.

    Returns
    -------
    ra, dec : array_like
        the right ascension and declination coordinates, in degrees. RA
        will be in the range [0,360] and DEC in the range [-90, 90]
    """

    # split x, y, z to signify that we do not need to have pos
    # as a full chunk in the last dimension.
    # this is useful when we use apply_gufunc.

    x, y, z = [pos[..., i] - observer[i] for i in range(3)]

    if frame == 'icrs':
        # FIXME: Convert these to a gufunc that uses astropy?
        # might be a step backward.

        # from equatorial to equatorial
        s = da.hypot(x, y)
        lon = da.arctan2(y, x)
        lat = da.arctan2(z, s)

        # convert to degrees
        lon = da.rad2deg(lon)
        lat = da.rad2deg(lat)
        # wrap lon to [0,360]
        lon = da.mod(lon-360., 360.)
        ra, dec = lon, lat
    else:
        from astropy.coordinates import SkyCoord

        def cart_to_eq(x, y, z):
            try:
                sc = SkyCoord(x, y, z, representation_type='cartesian', frame=frame)
                scg = sc.transform_to(frame='icrs')
                scg.representation_type = 'unitspherical'
            except:
                sc = SkyCoord(x, y, z, representation='cartesian', frame=frame)
                scg = sc.transform_to(frame='icrs')
                scg.representation = 'unitspherical'

            ra, dec = scg.ra.value, scg.dec.value

            return ra, dec

        dtype = pos.dtype
        ra, dec = da.apply_gufunc(cart_to_eq, '(),(),()->(),()', x, y, z, output_dtypes=[dtype, dtype])

    return da.stack((ra, dec), axis=0)
Esempio n. 45
0
def stack(signal_list, axis=None, new_axis_name='stack_element',
          lazy=None, **kwargs):
    """Concatenate the signals in the list over a given axis or a new axis.

    The title is set to that of the first signal in the list.

    Parameters
    ----------
    signal_list : list of BaseSignal instances
    axis : {None, int, str}
        If None, the signals are stacked over a new axis. The data must
        have the same dimensions. Otherwise the
        signals are stacked over the axis given by its integer index or
        its name. The data must have the same shape, except in the dimension
        corresponding to `axis`.
    new_axis_name : string
        The name of the new axis when `axis` is None.
        If an axis with this name already
        exists it automatically append '-i', where `i` are integers,
        until it finds a name that is not yet in use.
    lazy: {bool, None}
        Returns a LazySignal if True. If None, only returns lazy rezult if at
        least one is lazy.

    Returns
    -------
    signal : BaseSignal instance (or subclass, determined by the objects in
        signal list)

    Examples
    --------
    >>> data = np.arange(20)
    >>> s = hs.stack([hs.signals.Signal1D(data[:10]),
    ...               hs.signals.Signal1D(data[10:])])
    >>> s
    <Signal1D, title: Stack of , dimensions: (2, 10)>
    >>> s.data
    array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
           [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])

    """
    from itertools import zip_longest
    from hyperspy.signals import BaseSignal
    import dask.array as da
    from numbers import Number
    # TODO: remove next time
    deprecated = ['mmap', 'mmap_dir']
    warn_str = "'{}' argument is deprecated, please use 'lazy' instead"
    for k in deprecated:
        if k in kwargs:
            lazy=True
            warnings.warn(warn_str.format(k), VisibleDeprecationWarning)

    axis_input = copy.deepcopy(axis)
    signal_list = list(signal_list)
    # Get the real signal with the most axes to get metadata/class/etc
    # first = sorted(filter(lambda _s: isinstance(_s, BaseSignal), signal_list),
    #                key=lambda _s: _s.data.ndim)[-1]
    first = next(filter(lambda _s: isinstance(_s, BaseSignal), signal_list))

    # Cast numbers as signals. Will broadcast later

    for i, _s in enumerate(signal_list):
        if isinstance(_s, BaseSignal):
            pass
        elif isinstance(_s, Number):
            sig = BaseSignal(_s)
            signal_list[i] = sig
        else:
            raise ValueError("{} type cannot be stacked.".format(type(_s)))

    if lazy is None:
        lazy = any(_s._lazy for _s in signal_list)
    if not isinstance(lazy, bool):
        raise ValueError("'lazy' argument has to be None, True or False")

    # Cast all as lazy if required
    for i, _s in enumerate(signal_list):
        if not _s._lazy:
            signal_list[i] = _s.as_lazy()
    if len(signal_list) > 1:
        newlist = broadcast_signals(*signal_list, ignore_axis=axis_input)
        if axis is not None:
            step_sizes = [s.axes_manager[axis].size for s in newlist]
            axis = newlist[0].axes_manager[axis]
        datalist = [s.data for s in newlist]
        newdata = da.stack(datalist, axis=0) if axis is None else \
            da.concatenate(datalist, axis=axis.index_in_array)
        if axis_input is None:
            signal = first.__class__(newdata)
            signal._lazy = True
            signal._assign_subclass()
            signal.axes_manager._axes[1:] = copy.deepcopy(newlist[0].axes_manager._axes)
            axis_name = new_axis_name
            axis_names = [axis_.name for axis_ in signal.axes_manager._axes[1:]]
            j = 1
            while axis_name in axis_names:
                axis_name = new_axis_name + "_%i" % j
                j += 1
            eaxis = signal.axes_manager._axes[0]
            eaxis.name = axis_name
            eaxis.navigate = True  # This triggers _update_parameters
            signal.metadata = copy.deepcopy(first.metadata)
            # Get the title from 1st object
            signal.metadata.General.title = (
                "Stack of " + first.metadata.General.title)
            signal.original_metadata = DictionaryTreeBrowser({})
        else:
            signal = newlist[0]._deepcopy_with_new_data(newdata)
            signal._lazy = True
            signal._assign_subclass()
        signal.get_dimensions_from_data()
        signal.original_metadata.add_node('stack_elements')

        for i, obj in enumerate(signal_list):
            signal.original_metadata.stack_elements.add_node('element%i' % i)
            node = signal.original_metadata.stack_elements['element%i' % i]
            node.original_metadata = \
                obj.original_metadata.as_dictionary()
            node.metadata = \
                obj.metadata.as_dictionary()

        if axis_input is None:
            axis_input = signal.axes_manager[-1 + 1j].index_in_axes_manager
            step_sizes = 1

        signal.metadata._HyperSpy.set_item('Stacking_history.axis', axis_input)
        signal.metadata._HyperSpy.set_item('Stacking_history.step_sizes',
                                           step_sizes)
        if np.all([
                s.metadata.has_item('Signal.Noise_properties.variance')
                for s in signal_list
        ]):
            variance = stack([
                s.metadata.Signal.Noise_properties.variance for s in signal_list
            ], axis)
            signal.metadata.set_item('Signal.Noise_properties.variance', variance)
    else:
        signal = signal_list[0]

    # Leave as lazy or compute
    if lazy:
        signal = signal.as_lazy()
    else:
        signal.compute(False)

    return signal
Esempio n. 46
0
def CartesianToSky(pos, cosmo, velocity=None, observer=[0,0,0], zmax=100., frame='icrs'):
    r"""
    Convert Cartesian position coordinates to RA/Dec and redshift,
    using the specified cosmology to convert radial distances from
    the origin into redshift.

    If velocity is supplied, the returned redshift accounts for the
    additional peculiar velocity shift.

    Users should ensure that ``zmax`` is larger than the largest possible
    redshift being considered to avoid an interpolation exception.

    .. note::
        Cartesian coordinates should be in units of Mpc/h and velocity
        should be in units of km/s.

    Parameters
    ----------
    pos : dask array
        a N x 3 array holding the Cartesian position coordinates in Mpc/h
    cosmo : :class:`~nbodykit.cosmology.cosmology.Cosmology`
        the cosmology used to meausre the comoving distance from ``redshift``
    velocity : array_like
        a N x 3 array holding velocity in km/s
    observer : array_like, optional
        a length 3 array holding the observer location
    zmax : float, optional
        the maximum possible redshift, should be set to a reasonably large
        value to avoid interpolation failure going from comoving distance
        to redshift
    frame : string ('icrs' or 'galactic')
        speciefies which frame the Cartesian coordinates is. Useful if you know
        the simulation (usually cartesian) is in galactic units but you want
        to convert to the icrs (ra, dec) usually used in surveys.

    Returns
    -------
    ra, dec, z : dask array
        the right ascension (in degrees), declination (in degrees), and
        redshift coordinates. RA will be in the range [0,360] and DEC in the
        range [-90, 90]

    Notes
    -----
    If velocity is provided, redshift-space distortions are added to the
    real-space redshift :math:`z_\mathrm{real}`, via:

    .. math::

            z_\mathrm{redshift} = ( v_\mathrm{pec} / c ) (1 + z_\mathrm{reals})

    Raises
    ------
    TypeError
        If the input columns are not dask arrays
    """
    from astropy.constants import c
    from scipy.interpolate import interp1d

    if not isinstance(pos, da.Array):
        pos = da.from_array(pos, chunks=100000)

    pos = pos - observer
    # RA,dec coordinates (in degrees)
    ra, dec = CartesianToEquatorial(pos, frame=frame)

    # the distance from the origin
    r = da.linalg.norm(pos, axis=-1)

    def z_from_comoving_distance(x):
        zgrid = numpy.logspace(-8, numpy.log10(zmax), 1024)
        zgrid = numpy.concatenate([[0.], zgrid])
        rgrid = cosmo.comoving_distance(zgrid)
        return interp1d(rgrid, zgrid)(x)

    # invert distance - redshift relation
    z = r.map_blocks(z_from_comoving_distance)

    # add in velocity offsets?
    if velocity is not None:

        vpec = (pos * velocity).sum(axis=-1) / r

        z += vpec / c.to('km/s').value * (1 + z)

    return da.stack((ra, dec, z), axis=0)
Esempio n. 47
0
import h5py
from glob import glob
import os

filenames = sorted(glob(os.path.join('data', 'weather-big', '*.hdf5')))
dsets = [h5py.File(filename, mode='r')['/t2m'] for filename in filenames]

import dask.array as da
arrays = [da.from_array(dset, chunks=(500, 500)) for dset in dsets]

x = da.stack(arrays, axis=0)

result = x[:, ::2, ::2]

da.to_hdf5(os.path.join('data', 'myfile.hdf5'), '/output', result)