Esempio n. 1
0
def test_reductions():
    x = np.arange(5).astype('f4')
    a = da.from_array(x, chunks=(2,))

    assert eq(da.all(a), np.all(x))
    assert eq(da.any(a), np.any(x))
    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.max(a), np.max(x))
    assert eq(da.mean(a), np.mean(x))
    assert eq(da.min(a), np.min(x))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.nanmax(a), np.nanmax(x))
    assert eq(da.nanmin(a), np.nanmin(x))
    assert eq(da.nansum(a), np.nansum(x))
    assert eq(da.nanvar(a), np.nanvar(x))
    assert eq(da.nanstd(a), np.nanstd(x))
Esempio n. 2
0
def test_make_regression(n_samples, n_features, n_informative,
                         n_targets, bias, effective_rank,
                         tail_strength, noise, shuffle,
                         coef, n_parts, order,
                         use_full_low_rank, client):

    c = client
    from cuml.dask.datasets import make_regression

    result = make_regression(n_samples=n_samples, n_features=n_features,
                             n_informative=n_informative,
                             n_targets=n_targets, bias=bias,
                             effective_rank=effective_rank, noise=noise,
                             shuffle=shuffle, coef=coef,
                             n_parts=n_parts,
                             use_full_low_rank=use_full_low_rank,
                             order=order)

    if coef:
        out, values, coefs = result
    else:
        out, values = result

    assert out.shape == (n_samples, n_features), "out shape mismatch"

    if n_targets > 1:
        assert values.shape == (n_samples, n_targets), \
               "values shape mismatch"
    else:
        assert values.shape == (n_samples,), "values shape mismatch"

    assert len(out.chunks[0]) == n_parts
    assert len(out.chunks[1]) == 1

    if coef:
        if n_targets > 1:
            assert coefs.shape == (n_features, n_targets), \
                   "coefs shape mismatch"
            assert len(coefs.chunks[1]) == 1
        else:
            assert coefs.shape == (n_features,), "coefs shape mismatch"
            assert len(coefs.chunks[0]) == 1

        test1 = da.all(da.sum(coefs != 0.0, axis=0) == n_informative)

        std_test2 = da.std(values - (da.dot(out, coefs) + bias), axis=0)

        test1, std_test2 = da.compute(test1, std_test2)

        diff = cp.abs(1.0 - std_test2)
        test2 = cp.all(diff < 1.5 * 10**(-1.))

        assert test1, \
            "Unexpected number of informative features"

        assert test2, "Unexpectedly incongruent outputs"

    data_ddh = DistributedDataHandler.create(data=(out, values),
                                             client=c)
    out_part, value_part = data_ddh.gpu_futures[0][1].result()

    if coef:
        coefs_ddh = DistributedDataHandler.create(data=coefs,
                                                  client=c)
        coefs_part = coefs_ddh.gpu_futures[0][1].result()
    if order == 'F':
        assert out_part.flags['F_CONTIGUOUS']
        if n_targets > 1:
            assert value_part.flags['F_CONTIGUOUS']
            if coef:
                assert coefs_part.flags['F_CONTIGUOUS']
    elif order == 'C':
        assert out_part.flags['C_CONTIGUOUS']
        if n_targets > 1:
            assert value_part.flags['C_CONTIGUOUS']
            if coef:
                assert coefs_part.flags['C_CONTIGUOUS']
Esempio n. 3
0
# https://software.intel.com/en-us/blogs/2016/04/04/unleash-parallel-performance-of-python-programs
import dask, time
import dask.array as da
x = da.random.random((100000, 2000), chunks=(10000, 2000))
t0 = time.time()
q, r = da.linalg.qr(x)
test = da.all(da.isclose(x, q.dot(r)))
assert(test.compute()) # compute(get=dask.threaded.get) by default
print(time.time() - t0)
# python -m TBB intelCompilerTest.py
Esempio n. 4
0
def dataset_chunks(datasets, time_bin_secs, max_row_chunks):
    """
    Given ``max_row_chunks`` determine a chunking strategy
    for each dataset that prevents binning unique times in
    separate chunks.
    """
    # Calculate (utime, idx, counts) tuple for each dataset
    # then tranpose to get lists for each tuple entry
    if len(datasets) == 0:
        return (), ()

    utimes = []
    interval_avg = []
    counts = []
    monotonicity_checks = []

    for ds in datasets:
        # Compute unique times, their counts and interval sum
        # for each row chunk
        block_values = da.blockwise(_time_interval_sum,
                                    "r",
                                    ds.TIME.data,
                                    "r",
                                    ds.INTERVAL.data,
                                    "r",
                                    meta=np.empty((0, ), dtype=np.object),
                                    dtype=np.object)

        # Reduce each row chunk's values
        reduction = da.reduction(block_values,
                                 chunk=_chunk,
                                 combine=_time_int_combine,
                                 aggregate=_time_int_agg,
                                 concatenate=False,
                                 split_every=16,
                                 meta=np.empty((0, ), dtype=np.object),
                                 dtype=np.object)

        # Pull out the final unique times, counts and interval average
        utime = reduction.map_blocks(getitem, 0, dtype=ds.TIME.dtype)
        count = reduction.map_blocks(getitem, 1, dtype=np.int32)
        int_avg = reduction.map_blocks(getitem, 2, dtype=ds.INTERVAL.dtype)

        # Check monotonicity of TIME while we're at it
        is_monotonic = da.all(da.diff(ds.TIME.data) >= 0.0)

        utimes.append(utime)
        counts.append(count)
        interval_avg.append(int_avg)
        monotonicity_checks.append(is_monotonic)

    # Work out the unique times, average intervals for those times
    # and the frequency of those times
    (ds_utime, ds_avg_intervals, ds_counts,
     ds_monotonicity_checks) = dask.compute(utimes, interval_avg, counts,
                                            monotonicity_checks)

    if not all(ds_monotonicity_checks):
        raise ValueError("TIME is not monotonically increasing. "
                         "This is required.")

    # Produce row and time chunking strategies for each dataset
    ds_row_chunks = []
    ds_time_chunks = []
    ds_interval_secs = []

    it = zip(ds_utime, ds_avg_intervals, ds_counts)
    for di, (utime, avg_interval, counts) in enumerate(it):
        # Maintain row and time chunks for this dataset
        row_chunks = []
        time_chunks = []
        interval_secs = []

        # Start out with first entries
        bin_rows = counts[0]
        bin_times = 1
        bin_secs = avg_interval[0]

        dsit = enumerate(zip(utime[1:], avg_interval[1:], counts[1:]))
        for ti, (ut, avg_int, count) in dsit:
            if count > max_row_chunks:
                logger.warning(
                    "Unique time {:3f} occurred {:d} times "
                    "in dataset {:d} but this exceeds the "
                    "requested row chunks {:d}. "
                    "Consider increasing --row-chunks", ut, count, di,
                    max_row_chunks)

            if avg_int > time_bin_secs:
                logger.warning(
                    "The average INTERVAL associated with "
                    "unique time {:3f} in dataset {:d} "
                    "is {:3f} but this exceeds the requested "
                    "number of seconds in a time bin {:3f}s. "
                    "Consider increasing --time-bin-secs", ut, di, avg_int,
                    time_bin_secs)

            next_rows = bin_rows + count

            # If we're still within the number of rows for this bin
            # keep going
            if next_rows < max_row_chunks:
                bin_rows = next_rows
                bin_times += 1
                bin_secs += avg_int
            # Otherwise finalize this bin and
            # start a new one with the counts
            # we were trying to add
            else:
                row_chunks.append(bin_rows)
                time_chunks.append(bin_times)
                interval_secs.append(bin_secs)
                bin_rows = count
                bin_times = 1
                bin_secs = avg_int

        # Finish any remaining bins
        if bin_rows > 0:
            assert bin_times > 0
            row_chunks.append(bin_rows)
            time_chunks.append(bin_times)
            interval_secs.append(bin_secs)

        row_chunks = tuple(row_chunks)
        time_chunks = tuple(time_chunks)
        interval_secs = tuple(interval_secs)
        ds_row_chunks.append(row_chunks)
        ds_time_chunks.append(time_chunks)
        ds_interval_secs.append(interval_secs)

    logger.info("Dataset Chunking: (r)ow - (t)imes - (s)econds")

    it = zip(datasets, ds_row_chunks, ds_time_chunks, ds_interval_secs)
    for di, (ds, ds_rcs, ds_tcs, ds_int_secs) in enumerate(it):
        ds_rows = ds.dims['row']
        ds_crows = sum(ds_rcs)

        if not ds_rows == ds_crows:
            raise ValueError("Number of dataset rows %d "
                             "does not match the sum %d "
                             "of the row chunks %s" %
                             (ds_rows, ds_crows, ds_rcs))

        log_str = ", ".join("(%dr,%dt,%.1fs)" % (rc, tc, its)
                            for rc, tc, its in zip(*(ds_rcs, ds_tcs,
                                                     ds_int_secs)))

        logger.info("Dataset {d}: {s}", d=di, s=log_str)

    return ds_row_chunks, ds_time_chunks
Esempio n. 5
0
def new_grid_mapping_from_coords(
    x_coords: xr.DataArray,
    y_coords: xr.DataArray,
    crs: Union[str, pyproj.crs.CRS],
    *,
    tile_size: Union[int, Tuple[int, int]] = None,
    tolerance: float = DEFAULT_TOLERANCE,
) -> GridMapping:
    crs = _normalize_crs(crs)
    assert_instance(x_coords, xr.DataArray, name='x_coords')
    assert_instance(y_coords, xr.DataArray, name='y_coords')
    assert_true(x_coords.ndim in (1, 2),
                'x_coords and y_coords must be either 1D or 2D arrays')
    assert_instance(tolerance, float, name='tolerance')
    assert_true(tolerance > 0.0, 'tolerance must be greater zero')

    if x_coords.name and y_coords.name:
        xy_var_names = str(x_coords.name), str(y_coords.name)
    else:
        xy_var_names = _default_xy_var_names(crs)

    tile_size = _normalize_int_pair(tile_size, default=None)
    is_lon_360 = None  # None means "not yet known"
    if crs.is_geographic:
        is_lon_360 = bool(np.any(x_coords > 180))

    x_res = 0
    y_res = 0

    if x_coords.ndim == 1:
        # We have 1D x,y coordinates
        cls = Coords1DGridMapping

        assert_true(x_coords.size >= 2 and y_coords.size >= 2,
                    'sizes of x_coords and y_coords 1D arrays must be >= 2')

        size = x_coords.size, y_coords.size

        x_dim, y_dim = x_coords.dims[0], y_coords.dims[0]

        x_diff = _abs_no_zero(x_coords.diff(dim=x_dim).values)
        y_diff = _abs_no_zero(y_coords.diff(dim=y_dim).values)

        if not is_lon_360 and crs.is_geographic:
            is_anti_meridian_crossed = np.any(np.nanmax(x_diff) > 180)
            if is_anti_meridian_crossed:
                x_coords = to_lon_360(x_coords)
                x_diff = _abs_no_zero(x_coords.diff(dim=x_dim))
                is_lon_360 = True

        x_res, y_res = x_diff[0], y_diff[0]
        x_diff_equal = np.allclose(x_diff, x_res, atol=tolerance)
        y_diff_equal = np.allclose(y_diff, y_res, atol=tolerance)
        is_regular = x_diff_equal and y_diff_equal
        if is_regular:
            x_res = round_to_fraction(x_res, 5, 0.25)
            y_res = round_to_fraction(y_res, 5, 0.25)
        else:
            x_res = round_to_fraction(float(np.nanmedian(x_diff)), 2, 0.5)
            y_res = round_to_fraction(float(np.nanmedian(y_diff)), 2, 0.5)

        if tile_size is None \
                and x_coords.chunks is not None \
                and y_coords.chunks is not None:
            tile_size = (max(0,
                             *x_coords.chunks[0]), max(0, *y_coords.chunks[0]))

        # Guess j axis direction
        is_j_axis_up = bool(y_coords[0] < y_coords[-1])

    else:
        # We have 2D x,y coordinates
        cls = Coords2DGridMapping

        assert_true(
            x_coords.shape == y_coords.shape, 'shapes of x_coords and y_coords'
            ' 2D arrays must be equal')
        assert_true(
            x_coords.dims == y_coords.dims,
            'dimensions of x_coords and y_coords'
            ' 2D arrays must be equal')

        y_dim, x_dim = x_coords.dims

        height, width = x_coords.shape
        size = width, height

        x = da.asarray(x_coords)
        y = da.asarray(y_coords)

        x_x_diff = _abs_no_nan(da.diff(x, axis=1))
        x_y_diff = _abs_no_nan(da.diff(x, axis=0))
        y_x_diff = _abs_no_nan(da.diff(y, axis=1))
        y_y_diff = _abs_no_nan(da.diff(y, axis=0))

        if not is_lon_360 and crs.is_geographic:
            is_anti_meridian_crossed = da.any(da.max(x_x_diff) > 180) \
                                       or da.any(da.max(x_y_diff) > 180)
            if is_anti_meridian_crossed:
                x_coords = to_lon_360(x_coords)
                x = da.asarray(x_coords)
                x_x_diff = _abs_no_nan(da.diff(x, axis=1))
                x_y_diff = _abs_no_nan(da.diff(x, axis=0))
                is_lon_360 = True

        is_regular = False

        if da.all(x_y_diff == 0) and da.all(y_x_diff == 0):
            x_res = x_x_diff[0, 0]
            y_res = y_y_diff[0, 0]
            is_regular = \
                da.allclose(x_x_diff[0, :], x_res, atol=tolerance) \
                and da.allclose(x_x_diff[-1, :], x_res, atol=tolerance) \
                and da.allclose(y_y_diff[:, 0], y_res, atol=tolerance) \
                and da.allclose(y_y_diff[:, -1], y_res, atol=tolerance)

        if not is_regular:
            # Let diff arrays have same shape as original by
            # doubling last rows and columns.
            x_x_diff_c = da.concatenate([x_x_diff, x_x_diff[:, -1:]], axis=1)
            y_x_diff_c = da.concatenate([y_x_diff, y_x_diff[:, -1:]], axis=1)
            x_y_diff_c = da.concatenate([x_y_diff, x_y_diff[-1:, :]], axis=0)
            y_y_diff_c = da.concatenate([y_y_diff, y_y_diff[-1:, :]], axis=0)
            # Find resolution via area
            x_abs_diff = da.sqrt(da.square(x_x_diff_c) + da.square(x_y_diff_c))
            y_abs_diff = da.sqrt(da.square(y_x_diff_c) + da.square(y_y_diff_c))
            if crs.is_geographic:
                # Convert degrees into meters
                x_abs_diff_r = da.radians(x_abs_diff)
                y_abs_diff_r = da.radians(y_abs_diff)
                x_abs_diff = _ER * da.cos(x_abs_diff_r) * y_abs_diff_r
                y_abs_diff = _ER * y_abs_diff_r
            xy_areas = (x_abs_diff * y_abs_diff).flatten()
            xy_areas = da.where(xy_areas > 0, xy_areas, np.nan)
            # Get indices of min and max area
            xy_area_index_min = da.nanargmin(xy_areas)
            xy_area_index_max = da.nanargmax(xy_areas)
            # Convert area to edge length
            xy_res_min = math.sqrt(xy_areas[xy_area_index_min])
            xy_res_max = math.sqrt(xy_areas[xy_area_index_max])
            # Empirically weight min more than max
            xy_res = 0.7 * xy_res_min + 0.3 * xy_res_max
            if crs.is_geographic:
                # Convert meters back into degrees
                # print(f'xy_res in meters: {xy_res}')
                xy_res = math.degrees(xy_res / _ER)
                # print(f'xy_res in degrees: {xy_res}')
            # Because this is an estimation, we can round to a nice number
            xy_res = round_to_fraction(xy_res, digits=1, resolution=0.5)
            x_res, y_res = float(xy_res), float(xy_res)

        if tile_size is None and x_coords.chunks is not None:
            j_chunks, i_chunks = x_coords.chunks
            tile_size = max(0, *i_chunks), max(0, *j_chunks)

        if tile_size is not None:
            tile_width, tile_height = tile_size
            x_coords = x_coords.chunk((tile_height, tile_width))
            y_coords = y_coords.chunk((tile_height, tile_width))

        # Guess j axis direction
        is_j_axis_up = np.all(y_coords[0, :] < y_coords[-1, :]) or None

    assert_true(x_res > 0 and y_res > 0,
                'internal error: x_res and y_res could not be determined',
                exception_type=RuntimeError)

    x_res, y_res = _to_int_or_float(x_res), _to_int_or_float(y_res)
    x_res_05, y_res_05 = x_res / 2, y_res / 2
    x_min = _to_int_or_float(x_coords.min() - x_res_05)
    y_min = _to_int_or_float(y_coords.min() - y_res_05)
    x_max = _to_int_or_float(x_coords.max() + x_res_05)
    y_max = _to_int_or_float(y_coords.max() + y_res_05)

    return cls(x_coords=x_coords,
               y_coords=y_coords,
               crs=crs,
               size=size,
               tile_size=tile_size,
               xy_bbox=(x_min, y_min, x_max, y_max),
               xy_res=(x_res, y_res),
               xy_var_names=xy_var_names,
               xy_dim_names=(str(x_dim), str(y_dim)),
               is_regular=is_regular,
               is_lon_360=is_lon_360,
               is_j_axis_up=is_j_axis_up)
Esempio n. 6
0
def lengths_and_angles_to_box_vectors(a_length, b_length, c_length, alpha, beta, gamma):
    """Convert from the lengths/angles of the unit cell to the box
    vectors (Bravais vectors). The angles should be in degrees.

    Mimics mdtraj.core.unitcell.lengths_and_angles_to_box_vectors()

    Parameters
    ----------
    a_length : scalar or ndarray
        length of Bravais unit vector **a**
    b_length : scalar or ndarray
        length of Bravais unit vector **b**
    c_length : scalar or ndarray
        length of Bravais unit vector **c**
    alpha : scalar or ndarray
        angle between vectors **b** and **c**, in degrees.
    beta : scalar or ndarray
        angle between vectors **c** and **a**, in degrees.
    gamma : scalar or ndarray
        angle between vectors **a** and **b**, in degrees.

    Returns
    -------
    a : dask.array
        If the inputs are scalar, the vectors will one dimensional (length 3).
        If the inputs are one dimension, shape=(n_frames, ), then the output
        will be (n_frames, 3)
    b : dask.array
        If the inputs are scalar, the vectors will one dimensional (length 3).
        If the inputs are one dimension, shape=(n_frames, ), then the output
        will be (n_frames, 3)
    c : dask.array
        If the inputs are scalar, the vectors will one dimensional (length 3).
        If the inputs are one dimension, shape=(n_frames, ), then the output
        will be (n_frames, 3)

    This code is adapted from gyroid, which is licensed under the BSD
    http://pythonhosted.org/gyroid/_modules/gyroid/unitcell.html
    """
    # Fix for da that requires angles and lengths to be arrays
    lengths = [a_length, b_length, c_length]
    for i, e in enumerate(lengths):
        # Use python logic shortcutting to not compute dask Arrays
        if not isinstance(e, da.core.Array) and np.isscalar(e):
            lengths[i] = np.array([e])
    a_length, b_length, c_length = tuple(lengths)

    angles = [alpha, beta, gamma]
    for i, e in enumerate(angles):
        if not isinstance(e, da.core.Array) and np.isscalar(e):
            angles[i] = np.array([e])
    alpha, beta, gamma = tuple(angles)

    if da.all(alpha < 2 * np.pi) and (
        da.all(beta < 2 * np.pi) and da.all(gamma < 2 * np.pi)
    ):
        warnings.warn(
            "All your angles were less than 2*pi."
            " Did you accidentally give me radians?"
        )

    alpha = alpha * np.pi / 180
    beta = beta * np.pi / 180
    gamma = gamma * np.pi / 180

    a = da.stack([a_length, da.zeros_like(a_length), da.zeros_like(a_length)])
    b = da.stack(
        [b_length * da.cos(gamma), b_length * da.sin(gamma), da.zeros_like(b_length)]
    )
    cx = c_length * da.cos(beta)
    cy = c_length * (da.cos(alpha) - da.cos(beta) * da.cos(gamma)) / da.sin(gamma)
    cz = da.sqrt(c_length * c_length - cx * cx - cy * cy)
    c = da.stack([cx, cy, cz])
    if not a.shape == b.shape == c.shape:
        raise TypeError("Shape is messed up.")

    # Make sure that all vector components that are _almost_ 0 are set exactly
    # to 0
    tol = 1e-6
    a[da.logical_and(a > -tol, a < tol)] = 0.0
    b[da.logical_and(b > -tol, b < tol)] = 0.0
    c[da.logical_and(c > -tol, c < tol)] = 0.0

    return a.T, b.T, c.T
    #jetM = jetM_[runMask][:nJets]
    #print " >> %s: %s"%('jetM', jetM.shape)
    #jetPt = jetPt_[runMask][:nJets]
    #print " >> %s: %s"%('jetPt', jetPt.shape)
    X_jets0 = X_jets0_[runMask][:nJets]
    print " >> %s: %s" % ('X_jets', X_jets0.shape)
    X_jets1 = X_jets1_[runMask][:nJets]
    print " >> %s: %s" % ('X_jets', X_jets1.shape)
    X_FC = X_FC_[runMask][:nJets]
    print " >> %s: %s" % ('X_FC', X_FC.shape)
    #X_ECAL_stacked = X_ECAL_stacked_[runMask][:nJets]
    #print " >> %s: %s"%('X_ECAL_stacked', X_ECAL_stacked.shape)
    y_jets = y_jets_[runMask][:nJets]
    print " >> %s: %s" % ('y_jets', y_jets.shape)

    assert da.all(jetEventId == jetEventId1)

    #file_out_str = "test_jets.hdf5"
    file_out_str = "%s/%s/%s_n%d_label%d_jetcombo_run%d.hdf5" % (
        eosDir, decay, decay, nJets, label, i)
    print " >> Writing to:", file_out_str
    da.to_hdf5(
        file_out_str,
        {
            #'runId': runId,
            #'lumiId': lumiId,
            #'eventId': eventId,
            #'X_ECAL_stacked': X_ECAL_stacked,
            #'y': y,
            'jetRunId': jetRunId,
            'jetEventId': jetEventId,
Esempio n. 8
0
def dataset_chunks(datasets, time_bin_secs, max_row_chunks):
    """
    Given ``max_row_chunks`` determine a chunking strategy
    for each dataset that prevents binning unique times in
    separate chunks.
    """
    # Calculate (utime, idx, counts) tuple for each dataset
    # then tranpose to get lists for each tuple entry
    if len(datasets) == 0:
        return (), ()

    utimes = []
    interval_avg = []
    counts = []
    monotonicity_checks = []

    for ds in datasets:
        # Compute unique times, their counts and interval sum
        # for each row chunk
        block_values = da.blockwise(_time_interval_sum,
                                    "r",
                                    ds.TIME.data,
                                    "r",
                                    ds.INTERVAL.data,
                                    "r",
                                    meta=np.empty((0, ), dtype=np.object),
                                    dtype=np.object)

        # Reduce each row chunk's values
        reduction = da.reduction(block_values,
                                 chunk=_chunk,
                                 combine=_time_int_combine,
                                 aggregate=_time_int_agg,
                                 concatenate=False,
                                 split_every=16,
                                 meta=np.empty((0, ), dtype=np.object),
                                 dtype=np.object)

        # Pull out the final unique times, counts and interval average
        utime = reduction.map_blocks(getitem, 0, dtype=ds.TIME.dtype)
        count = reduction.map_blocks(getitem, 1, dtype=np.int32)
        int_avg = reduction.map_blocks(getitem, 2, dtype=ds.INTERVAL.dtype)

        # Check monotonicity of TIME while we're at it
        is_monotonic = da.all(da.diff(ds.TIME.data) >= 0.0)

        utimes.append(utime)
        counts.append(count)
        interval_avg.append(int_avg)
        monotonicity_checks.append(is_monotonic)

    # Work out the unique times, average intervals for those times
    # and the frequency of those times
    (ds_utime, ds_avg_intervals, ds_counts,
     ds_monotonicity_checks) = dask.compute(utimes, interval_avg, counts,
                                            monotonicity_checks)

    if not all(ds_monotonicity_checks):
        raise ValueError("TIME is not monotonically increasing. "
                         "This is required.")

    grouper = DatasetGrouper(time_bin_secs, max_row_chunks)
    res = grouper.group(ds_utime, ds_avg_intervals, ds_counts)
    ds_row_chunks, ds_time_chunks, ds_interval_secs = res

    logger.info("Dataset Chunking: (r)ow - (t)imes - (s)econds")

    it = zip(datasets, ds_row_chunks, ds_time_chunks, ds_interval_secs)
    for di, (ds, ds_rcs, ds_tcs, ds_int_secs) in enumerate(it):
        ds_rows = ds.dims['row']
        ds_crows = sum(ds_rcs)

        if not ds_rows == ds_crows:
            raise ValueError("Number of dataset rows %d "
                             "does not match the sum %d "
                             "of the row chunks %s" %
                             (ds_rows, ds_crows, ds_rcs))

        log_str = ", ".join("(%dr,%dt,%.1fs)" % (rc, tc, its)
                            for rc, tc, its in zip(*(ds_rcs, ds_tcs,
                                                     ds_int_secs)))

        logger.info("Dataset {d}: {s}", d=di, s=log_str)

    return ds_row_chunks, ds_time_chunks
 def _bench(self, get):
     q, r = da.linalg.qr(self.x)
     test = da.all(da.isclose(self.x, q.dot(r)))
     test.compute(get=get)
Esempio n. 10
0
def qr(x):
    t0 = time.time()
    q, r = da.linalg.qr(x)
    test = da.all(da.isclose(x, q.dot(r)))
    test.compute()
    print(time.time() - t0)