Ejemplo n.º 1
0
def test_diff_append(n):
    x = cupy.arange(5) + 1
    a = da.from_array(x, chunks=2)
    assert_eq(da.diff(a, n, append=0), cupy.diff(x, n, append=0))
    assert_eq(da.diff(a, n, append=[0]), cupy.diff(x, n, append=[0]))
    assert_eq(da.diff(a, n, append=[-1, 0]), cupy.diff(x, n, append=[-1, 0]))

    x = cupy.arange(16).reshape(4, 4)
    a = da.from_array(x, chunks=2)
    assert_eq(da.diff(a, n, axis=1, append=0), cupy.diff(x,
                                                         n,
                                                         axis=1,
                                                         append=0))
    assert_eq(
        da.diff(a, n, axis=1, append=[[0], [0], [0], [0]]),
        cupy.diff(x, n, axis=1, append=[[0], [0], [0], [0]]),
    )
    assert_eq(da.diff(a, n, axis=0, append=0), cupy.diff(x,
                                                         n,
                                                         axis=0,
                                                         append=0))
    assert_eq(
        da.diff(a, n, axis=0, append=[[0, 0, 0, 0]]),
        cupy.diff(x, n, axis=0, append=[[0, 0, 0, 0]]),
    )

    if n > 0:
        with pytest.raises(ValueError):
            # When order is 0 the result is the icupyut array, it doesn't raise
            # an error
            da.diff(a, n, append=cupy.zeros((3, 3)))
def test_diff(shape, n, axis):
    x = np.random.randint(0, 10, shape)
    a = da.from_array(x, chunks=(len(shape) * (5, )))

    assert_eq(da.diff(a, n, axis), np.diff(x, n, axis))
Ejemplo n.º 3
0
def test_diff(shape, n, axis):
    x = np.random.randint(0, 10, shape)
    a = da.from_array(x, chunks=(len(shape) * (5,)))

    assert_eq(da.diff(a, n, axis), np.diff(x, n, axis))
Ejemplo n.º 4
0
def new_grid_mapping_from_coords(
    x_coords: xr.DataArray,
    y_coords: xr.DataArray,
    crs: Union[str, pyproj.crs.CRS],
    *,
    tile_size: Union[int, Tuple[int, int]] = None,
    tolerance: float = DEFAULT_TOLERANCE,
) -> GridMapping:
    crs = _normalize_crs(crs)
    assert_instance(x_coords, xr.DataArray, name='x_coords')
    assert_instance(y_coords, xr.DataArray, name='y_coords')
    assert_true(x_coords.ndim in (1, 2),
                'x_coords and y_coords must be either 1D or 2D arrays')
    assert_instance(tolerance, float, name='tolerance')
    assert_true(tolerance > 0.0, 'tolerance must be greater zero')

    if x_coords.name and y_coords.name:
        xy_var_names = str(x_coords.name), str(y_coords.name)
    else:
        xy_var_names = _default_xy_var_names(crs)

    tile_size = _normalize_int_pair(tile_size, default=None)
    is_lon_360 = None  # None means "not yet known"
    if crs.is_geographic:
        is_lon_360 = bool(np.any(x_coords > 180))

    x_res = 0
    y_res = 0

    if x_coords.ndim == 1:
        # We have 1D x,y coordinates
        cls = Coords1DGridMapping

        assert_true(x_coords.size >= 2 and y_coords.size >= 2,
                    'sizes of x_coords and y_coords 1D arrays must be >= 2')

        size = x_coords.size, y_coords.size

        x_dim, y_dim = x_coords.dims[0], y_coords.dims[0]

        x_diff = _abs_no_zero(x_coords.diff(dim=x_dim).values)
        y_diff = _abs_no_zero(y_coords.diff(dim=y_dim).values)

        if not is_lon_360 and crs.is_geographic:
            is_anti_meridian_crossed = np.any(np.nanmax(x_diff) > 180)
            if is_anti_meridian_crossed:
                x_coords = to_lon_360(x_coords)
                x_diff = _abs_no_zero(x_coords.diff(dim=x_dim))
                is_lon_360 = True

        x_res, y_res = x_diff[0], y_diff[0]
        x_diff_equal = np.allclose(x_diff, x_res, atol=tolerance)
        y_diff_equal = np.allclose(y_diff, y_res, atol=tolerance)
        is_regular = x_diff_equal and y_diff_equal
        if is_regular:
            x_res = round_to_fraction(x_res, 5, 0.25)
            y_res = round_to_fraction(y_res, 5, 0.25)
        else:
            x_res = round_to_fraction(float(np.nanmedian(x_diff)), 2, 0.5)
            y_res = round_to_fraction(float(np.nanmedian(y_diff)), 2, 0.5)

        if tile_size is None \
                and x_coords.chunks is not None \
                and y_coords.chunks is not None:
            tile_size = (max(0,
                             *x_coords.chunks[0]), max(0, *y_coords.chunks[0]))

        # Guess j axis direction
        is_j_axis_up = bool(y_coords[0] < y_coords[-1])

    else:
        # We have 2D x,y coordinates
        cls = Coords2DGridMapping

        assert_true(
            x_coords.shape == y_coords.shape, 'shapes of x_coords and y_coords'
            ' 2D arrays must be equal')
        assert_true(
            x_coords.dims == y_coords.dims,
            'dimensions of x_coords and y_coords'
            ' 2D arrays must be equal')

        y_dim, x_dim = x_coords.dims

        height, width = x_coords.shape
        size = width, height

        x = da.asarray(x_coords)
        y = da.asarray(y_coords)

        x_x_diff = _abs_no_nan(da.diff(x, axis=1))
        x_y_diff = _abs_no_nan(da.diff(x, axis=0))
        y_x_diff = _abs_no_nan(da.diff(y, axis=1))
        y_y_diff = _abs_no_nan(da.diff(y, axis=0))

        if not is_lon_360 and crs.is_geographic:
            is_anti_meridian_crossed = da.any(da.max(x_x_diff) > 180) \
                                       or da.any(da.max(x_y_diff) > 180)
            if is_anti_meridian_crossed:
                x_coords = to_lon_360(x_coords)
                x = da.asarray(x_coords)
                x_x_diff = _abs_no_nan(da.diff(x, axis=1))
                x_y_diff = _abs_no_nan(da.diff(x, axis=0))
                is_lon_360 = True

        is_regular = False

        if da.all(x_y_diff == 0) and da.all(y_x_diff == 0):
            x_res = x_x_diff[0, 0]
            y_res = y_y_diff[0, 0]
            is_regular = \
                da.allclose(x_x_diff[0, :], x_res, atol=tolerance) \
                and da.allclose(x_x_diff[-1, :], x_res, atol=tolerance) \
                and da.allclose(y_y_diff[:, 0], y_res, atol=tolerance) \
                and da.allclose(y_y_diff[:, -1], y_res, atol=tolerance)

        if not is_regular:
            # Let diff arrays have same shape as original by
            # doubling last rows and columns.
            x_x_diff_c = da.concatenate([x_x_diff, x_x_diff[:, -1:]], axis=1)
            y_x_diff_c = da.concatenate([y_x_diff, y_x_diff[:, -1:]], axis=1)
            x_y_diff_c = da.concatenate([x_y_diff, x_y_diff[-1:, :]], axis=0)
            y_y_diff_c = da.concatenate([y_y_diff, y_y_diff[-1:, :]], axis=0)
            # Find resolution via area
            x_abs_diff = da.sqrt(da.square(x_x_diff_c) + da.square(x_y_diff_c))
            y_abs_diff = da.sqrt(da.square(y_x_diff_c) + da.square(y_y_diff_c))
            if crs.is_geographic:
                # Convert degrees into meters
                x_abs_diff_r = da.radians(x_abs_diff)
                y_abs_diff_r = da.radians(y_abs_diff)
                x_abs_diff = _ER * da.cos(x_abs_diff_r) * y_abs_diff_r
                y_abs_diff = _ER * y_abs_diff_r
            xy_areas = (x_abs_diff * y_abs_diff).flatten()
            xy_areas = da.where(xy_areas > 0, xy_areas, np.nan)
            # Get indices of min and max area
            xy_area_index_min = da.nanargmin(xy_areas)
            xy_area_index_max = da.nanargmax(xy_areas)
            # Convert area to edge length
            xy_res_min = math.sqrt(xy_areas[xy_area_index_min])
            xy_res_max = math.sqrt(xy_areas[xy_area_index_max])
            # Empirically weight min more than max
            xy_res = 0.7 * xy_res_min + 0.3 * xy_res_max
            if crs.is_geographic:
                # Convert meters back into degrees
                # print(f'xy_res in meters: {xy_res}')
                xy_res = math.degrees(xy_res / _ER)
                # print(f'xy_res in degrees: {xy_res}')
            # Because this is an estimation, we can round to a nice number
            xy_res = round_to_fraction(xy_res, digits=1, resolution=0.5)
            x_res, y_res = float(xy_res), float(xy_res)

        if tile_size is None and x_coords.chunks is not None:
            j_chunks, i_chunks = x_coords.chunks
            tile_size = max(0, *i_chunks), max(0, *j_chunks)

        if tile_size is not None:
            tile_width, tile_height = tile_size
            x_coords = x_coords.chunk((tile_height, tile_width))
            y_coords = y_coords.chunk((tile_height, tile_width))

        # Guess j axis direction
        is_j_axis_up = np.all(y_coords[0, :] < y_coords[-1, :]) or None

    assert_true(x_res > 0 and y_res > 0,
                'internal error: x_res and y_res could not be determined',
                exception_type=RuntimeError)

    x_res, y_res = _to_int_or_float(x_res), _to_int_or_float(y_res)
    x_res_05, y_res_05 = x_res / 2, y_res / 2
    x_min = _to_int_or_float(x_coords.min() - x_res_05)
    y_min = _to_int_or_float(y_coords.min() - y_res_05)
    x_max = _to_int_or_float(x_coords.max() + x_res_05)
    y_max = _to_int_or_float(y_coords.max() + y_res_05)

    return cls(x_coords=x_coords,
               y_coords=y_coords,
               crs=crs,
               size=size,
               tile_size=tile_size,
               xy_bbox=(x_min, y_min, x_max, y_max),
               xy_res=(x_res, y_res),
               xy_var_names=xy_var_names,
               xy_dim_names=(str(x_dim), str(y_dim)),
               is_regular=is_regular,
               is_lon_360=is_lon_360,
               is_j_axis_up=is_j_axis_up)
Ejemplo n.º 5
0
# not convertable to dask easily:
fVabs_old = lambda Gxyz, kVabs: np.polyval(kVabs.flat, np.sqrt(np.tan(fInclination(Gxyz))))
rep2mean = lambda x, bOk: np.interp(np.arange(len(x)), np.flatnonzero(bOk), x[bOk], np.NaN, np.NaN)
fForce2Vabs_fitted = lambda x: da.where(x > 2, 2, da.where(x < 1, 0.25 * x, 0.25 * x + 0.3 * (x - 1) ** 4))
fIncl2Force = lambda incl: da.sqrt(da.tan(incl))
fVabs = lambda Gxyz, kVabs: fForce2Vabs_fitted(fIncl2Force(fInclination(Gxyz)))
f = lambda fun, *args: fun(*args)
positiveInd = lambda i, L: np.int32(da.where(i < 0, L - i, i))
minInterval = lambda iLims1, iLims2, L: f(
    lambda iL1, iL2: da.transpose([max(iL1[:, 0], iL2[:, 0]), min(iL1[:, -1], iL2[:, -1])]), positiveInd(iLims1, L),
    positiveInd(iLims2, L))
fStEn2bool = lambda iStEn, length: da.hstack(
    [(da.ones(iEn2iSt, dtype=np.bool8) if b else da.zeros(iEn2iSt, dtype=np.bool8)) for iEn2iSt, b in da.vstack((
        da.diff(
            da.hstack(
                (
                    0,
                    iStEn.flat,
                    length))),
        da.hstack(
            (
                da.repeat(
                    [
                        (
                            False,
                            True)],
                    np.size(
                        iStEn,
                        0),
                    0).flat,
                False)))).T])
TimeShift_Log_sec = 60
Ejemplo n.º 6
0
def dataset_chunks(datasets, time_bin_secs, max_row_chunks):
    """
    Given ``max_row_chunks`` determine a chunking strategy
    for each dataset that prevents binning unique times in
    separate chunks.
    """
    # Calculate (utime, idx, counts) tuple for each dataset
    # then tranpose to get lists for each tuple entry
    if len(datasets) == 0:
        return (), ()

    utimes = []
    interval_avg = []
    counts = []
    monotonicity_checks = []

    for ds in datasets:
        # Compute unique times, their counts and interval sum
        # for each row chunk
        block_values = da.blockwise(_time_interval_sum,
                                    "r",
                                    ds.TIME.data,
                                    "r",
                                    ds.INTERVAL.data,
                                    "r",
                                    meta=np.empty((0, ), dtype=np.object),
                                    dtype=np.object)

        # Reduce each row chunk's values
        reduction = da.reduction(block_values,
                                 chunk=_chunk,
                                 combine=_time_int_combine,
                                 aggregate=_time_int_agg,
                                 concatenate=False,
                                 split_every=16,
                                 meta=np.empty((0, ), dtype=np.object),
                                 dtype=np.object)

        # Pull out the final unique times, counts and interval average
        utime = reduction.map_blocks(getitem, 0, dtype=ds.TIME.dtype)
        count = reduction.map_blocks(getitem, 1, dtype=np.int32)
        int_avg = reduction.map_blocks(getitem, 2, dtype=ds.INTERVAL.dtype)

        # Check monotonicity of TIME while we're at it
        is_monotonic = da.all(da.diff(ds.TIME.data) >= 0.0)

        utimes.append(utime)
        counts.append(count)
        interval_avg.append(int_avg)
        monotonicity_checks.append(is_monotonic)

    # Work out the unique times, average intervals for those times
    # and the frequency of those times
    (ds_utime, ds_avg_intervals, ds_counts,
     ds_monotonicity_checks) = dask.compute(utimes, interval_avg, counts,
                                            monotonicity_checks)

    if not all(ds_monotonicity_checks):
        raise ValueError("TIME is not monotonically increasing. "
                         "This is required.")

    # Produce row and time chunking strategies for each dataset
    ds_row_chunks = []
    ds_time_chunks = []
    ds_interval_secs = []

    it = zip(ds_utime, ds_avg_intervals, ds_counts)
    for di, (utime, avg_interval, counts) in enumerate(it):
        # Maintain row and time chunks for this dataset
        row_chunks = []
        time_chunks = []
        interval_secs = []

        # Start out with first entries
        bin_rows = counts[0]
        bin_times = 1
        bin_secs = avg_interval[0]

        dsit = enumerate(zip(utime[1:], avg_interval[1:], counts[1:]))
        for ti, (ut, avg_int, count) in dsit:
            if count > max_row_chunks:
                logger.warning(
                    "Unique time {:3f} occurred {:d} times "
                    "in dataset {:d} but this exceeds the "
                    "requested row chunks {:d}. "
                    "Consider increasing --row-chunks", ut, count, di,
                    max_row_chunks)

            if avg_int > time_bin_secs:
                logger.warning(
                    "The average INTERVAL associated with "
                    "unique time {:3f} in dataset {:d} "
                    "is {:3f} but this exceeds the requested "
                    "number of seconds in a time bin {:3f}s. "
                    "Consider increasing --time-bin-secs", ut, di, avg_int,
                    time_bin_secs)

            next_rows = bin_rows + count

            # If we're still within the number of rows for this bin
            # keep going
            if next_rows < max_row_chunks:
                bin_rows = next_rows
                bin_times += 1
                bin_secs += avg_int
            # Otherwise finalize this bin and
            # start a new one with the counts
            # we were trying to add
            else:
                row_chunks.append(bin_rows)
                time_chunks.append(bin_times)
                interval_secs.append(bin_secs)
                bin_rows = count
                bin_times = 1
                bin_secs = avg_int

        # Finish any remaining bins
        if bin_rows > 0:
            assert bin_times > 0
            row_chunks.append(bin_rows)
            time_chunks.append(bin_times)
            interval_secs.append(bin_secs)

        row_chunks = tuple(row_chunks)
        time_chunks = tuple(time_chunks)
        interval_secs = tuple(interval_secs)
        ds_row_chunks.append(row_chunks)
        ds_time_chunks.append(time_chunks)
        ds_interval_secs.append(interval_secs)

    logger.info("Dataset Chunking: (r)ow - (t)imes - (s)econds")

    it = zip(datasets, ds_row_chunks, ds_time_chunks, ds_interval_secs)
    for di, (ds, ds_rcs, ds_tcs, ds_int_secs) in enumerate(it):
        ds_rows = ds.dims['row']
        ds_crows = sum(ds_rcs)

        if not ds_rows == ds_crows:
            raise ValueError("Number of dataset rows %d "
                             "does not match the sum %d "
                             "of the row chunks %s" %
                             (ds_rows, ds_crows, ds_rcs))

        log_str = ", ".join("(%dr,%dt,%.1fs)" % (rc, tc, its)
                            for rc, tc, its in zip(*(ds_rcs, ds_tcs,
                                                     ds_int_secs)))

        logger.info("Dataset {d}: {s}", d=di, s=log_str)

    return ds_row_chunks, ds_time_chunks
Ejemplo n.º 7
0
def dataset_chunks(datasets, time_bin_secs, max_row_chunks):
    """
    Given ``max_row_chunks`` determine a chunking strategy
    for each dataset that prevents binning unique times in
    separate chunks.
    """
    # Calculate (utime, idx, counts) tuple for each dataset
    # then tranpose to get lists for each tuple entry
    if len(datasets) == 0:
        return (), ()

    utimes = []
    interval_avg = []
    counts = []
    monotonicity_checks = []

    for ds in datasets:
        # Compute unique times, their counts and interval sum
        # for each row chunk
        block_values = da.blockwise(_time_interval_sum,
                                    "r",
                                    ds.TIME.data,
                                    "r",
                                    ds.INTERVAL.data,
                                    "r",
                                    meta=np.empty((0, ), dtype=np.object),
                                    dtype=np.object)

        # Reduce each row chunk's values
        reduction = da.reduction(block_values,
                                 chunk=_chunk,
                                 combine=_time_int_combine,
                                 aggregate=_time_int_agg,
                                 concatenate=False,
                                 split_every=16,
                                 meta=np.empty((0, ), dtype=np.object),
                                 dtype=np.object)

        # Pull out the final unique times, counts and interval average
        utime = reduction.map_blocks(getitem, 0, dtype=ds.TIME.dtype)
        count = reduction.map_blocks(getitem, 1, dtype=np.int32)
        int_avg = reduction.map_blocks(getitem, 2, dtype=ds.INTERVAL.dtype)

        # Check monotonicity of TIME while we're at it
        is_monotonic = da.all(da.diff(ds.TIME.data) >= 0.0)

        utimes.append(utime)
        counts.append(count)
        interval_avg.append(int_avg)
        monotonicity_checks.append(is_monotonic)

    # Work out the unique times, average intervals for those times
    # and the frequency of those times
    (ds_utime, ds_avg_intervals, ds_counts,
     ds_monotonicity_checks) = dask.compute(utimes, interval_avg, counts,
                                            monotonicity_checks)

    if not all(ds_monotonicity_checks):
        raise ValueError("TIME is not monotonically increasing. "
                         "This is required.")

    grouper = DatasetGrouper(time_bin_secs, max_row_chunks)
    res = grouper.group(ds_utime, ds_avg_intervals, ds_counts)
    ds_row_chunks, ds_time_chunks, ds_interval_secs = res

    logger.info("Dataset Chunking: (r)ow - (t)imes - (s)econds")

    it = zip(datasets, ds_row_chunks, ds_time_chunks, ds_interval_secs)
    for di, (ds, ds_rcs, ds_tcs, ds_int_secs) in enumerate(it):
        ds_rows = ds.dims['row']
        ds_crows = sum(ds_rcs)

        if not ds_rows == ds_crows:
            raise ValueError("Number of dataset rows %d "
                             "does not match the sum %d "
                             "of the row chunks %s" %
                             (ds_rows, ds_crows, ds_rcs))

        log_str = ", ".join("(%dr,%dt,%.1fs)" % (rc, tc, its)
                            for rc, tc, its in zip(*(ds_rcs, ds_tcs,
                                                     ds_int_secs)))

        logger.info("Dataset {d}: {s}", d=di, s=log_str)

    return ds_row_chunks, ds_time_chunks