Beispiel #1
0
def get_observer_look(sat_lon, sat_lat, sat_alt, utc_time, lon, lat, alt):
    """Calculate observers look angle to a satellite.
    http://celestrak.com/columns/v02n02/

    utc_time: Observation time (datetime object)
    lon: Longitude of observer position on ground in degrees east
    lat: Latitude of observer position on ground in degrees north
    alt: Altitude above sea-level (geoid) of observer position on ground in km

    Return: (Azimuth, Elevation)
    """
    (pos_x, pos_y, pos_z), (vel_x, vel_y, vel_z) = astronomy.observer_position(
        utc_time, sat_lon, sat_lat, sat_alt)

    (opos_x, opos_y, opos_z), (ovel_x, ovel_y, ovel_z) = \
        astronomy.observer_position(utc_time, lon, lat, alt)

    lon = np.deg2rad(lon)
    lat = np.deg2rad(lat)

    theta = (astronomy.gmst(utc_time) + lon) % (2 * np.pi)

    rx = pos_x - opos_x
    ry = pos_y - opos_y
    rz = pos_z - opos_z

    sin_lat = np.sin(lat)
    cos_lat = np.cos(lat)
    sin_theta = np.sin(theta)
    cos_theta = np.cos(theta)

    top_s = sin_lat * cos_theta * rx + \
        sin_lat * sin_theta * ry - cos_lat * rz
    top_e = -sin_theta * rx + cos_theta * ry
    top_z = cos_lat * cos_theta * rx + \
        cos_lat * sin_theta * ry + sin_lat * rz

    az_ = np.arctan(-top_e / top_s)

    if has_xarray and isinstance(az_, xr.DataArray):
        az_data = az_.data
    else:
        az_data = az_

    if has_dask and isinstance(az_data, da.Array):
        az_data = da.where(top_s > 0, az_data + np.pi, az_data)
        az_data = da.where(az_data < 0, az_data + 2 * np.pi, az_data)
    else:
        az_data[np.where(top_s > 0)] += np.pi
        az_data[np.where(az_data < 0)] += 2 * np.pi

    if has_xarray and isinstance(az_, xr.DataArray):
        az_.data = az_data
    else:
        az_ = az_data

    rg_ = np.sqrt(rx * rx + ry * ry + rz * rz)
    el_ = np.arcsin(top_z / rg_)

    return np.rad2deg(az_), np.rad2deg(el_)
Beispiel #2
0
def test_where_incorrect_args():
    a = da.ones(5, chunks=3)

    for kwd in ["x", "y"]:
        kwargs = {kwd: a}
        try:
            da.where(a > 0, **kwargs)
        except ValueError as e:
            assert 'either both or neither of x and y should be given' in str(e)
Beispiel #3
0
def _mask_coordinates_dask(lons, lats):
    """Mask invalid coordinate values"""
    # lons = da.ravel(lons)
    # lats = da.ravel(lats)
    idxs = ((lons < -180.) | (lons > 180.) |
            (lats < -90.) | (lats > 90.))
    lons = da.where(idxs, np.nan, lons)
    lats = da.where(idxs, np.nan, lats)

    return lons, lats
Beispiel #4
0
def test_where_scalar_dtype():
    x = np.int32(3)
    y1 = np.array([4, 5, 6], dtype=np.int16)
    c1 = np.array([1, 0, 1])
    y2 = da.from_array(y1, chunks=2)
    c2 = da.from_array(c1, chunks=2)
    w1 = np.where(c1, x, y1)
    w2 = da.where(c2, x, y2)
    assert_eq(w1, w2)
    # Test again for the bool optimization
    w3 = np.where(True, x, y1)
    w4 = da.where(True, x, y1)
    assert_eq(w3, w4)
Beispiel #5
0
def kurtosis(a, axis=0, fisher=True, bias=True, nan_policy='propagate'):
    if nan_policy != 'propagate':
        raise NotImplementedError("`nan_policy` other than 'propagate' "
                                  "have not been implemented.")
    n = a.shape[axis]  # noqa; for bias
    m2 = moment(a, 2, axis)
    m4 = moment(a, 4, axis)
    zero = (m2 == 0)
    olderr = np.seterr(all='ignore')
    try:
        vals = da.where(zero, 0, m4 / m2**2.0)
    finally:
        np.seterr(**olderr)

    if not bias:
        # need a version of np.place
        raise NotImplementedError("bias=False is not implemented.")

    if vals.ndim == 0:
        return vals  # TODO: scalar
        # vals = vals.item()  # array scalar

    if fisher:
        return vals - 3
    else:
        return vals
Beispiel #6
0
def periodic_distance(a, b, periodic):
    '''Periodic distance between two arrays. Periodic is a 3
    dimensional array containing the 3 box sizes.

    '''
    delta = abs(a - b)
    delta = da.where(delta > 0.5 * periodic, periodic - delta, delta)
    return da.sqrt((delta ** 2).sum(axis=-1))
Beispiel #7
0
def _unequal_var_ttest_denom(v1, n1, v2, n2):
    vn1 = v1 / n1
    vn2 = v2 / n2
    with np.errstate(divide='ignore', invalid='ignore'):
        df = (vn1 + vn2)**2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1))

    # If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0).
    # Hence it doesn't matter what df is as long as it's not NaN.
    df = da.where(da.isnan(df), 1, df)  # XXX: np -> da
    denom = da.sqrt(vn1 + vn2)
    return df, denom
Beispiel #8
0
def _solve_quadratic_dask(a__, b__, c__, min_val=0.0, max_val=1.0):
    """Solve quadratic equation and return the valid roots from interval
    [*min_val*, *max_val*]

    """

    discriminant = b__ * b__ - 4 * a__ * c__

    # Solve the quadratic polynomial
    x_1 = (-b__ + da.sqrt(discriminant)) / (2 * a__)
    x_2 = (-b__ - da.sqrt(discriminant)) / (2 * a__)

    # Find valid solutions, ie. 0 <= t <= 1
    idxs = (x_1 < min_val) | (x_1 > max_val)
    x__ = da.where(idxs, x_2, x_1)

    idxs = (x__ < min_val) | (x__ > max_val)
    x__ = da.where(idxs, np.nan, x__)

    return x__
Beispiel #9
0
def _get_ts_parallellogram_dask(pt_1, pt_2, pt_3, out_y, out_x):
    """Get parameters for the case where uprights are parallel"""

    # Pairwise longitudal separations between reference points
    x_21 = pt_2[:, 0] - pt_1[:, 0]
    x_31 = pt_3[:, 0] - pt_1[:, 0]

    # Pairwise latitudal separations between reference points
    y_21 = pt_2[:, 1] - pt_1[:, 1]
    y_31 = pt_3[:, 1] - pt_1[:, 1]

    t__ = (x_21 * (out_y - pt_1[:, 1]) - y_21 * (out_x - pt_1[:, 0])) / \
          (x_21 * y_31 - y_21 * x_31)
    idxs = (t__ < 0.) | (t__ > 1.)
    t__ = da.where(idxs, np.nan, t__)

    s__ = (out_x - pt_1[:, 0] + x_31 * t__) / x_21
    idxs = (s__ < 0.) | (s__ > 1.)
    s__ = da.where(idxs, np.nan, s__)

    return t__, s__
Beispiel #10
0
def _get_ts_dask(pt_1, pt_2, pt_3, pt_4, out_x, out_y):
    """Calculate vertical and horizontal fractional distances t and s"""

    # General case, ie. where the the corners form an irregular rectangle
    t__, s__ = _get_ts_irregular_dask(pt_1, pt_2, pt_3, pt_4, out_y, out_x)

    # Cases where verticals are parallel
    idxs = da.isnan(t__) | da.isnan(s__)
    # Remove extra dimensions
    idxs = da.ravel(idxs)

    if da.any(idxs):
        t_new, s_new = _get_ts_uprights_parallel_dask(pt_1, pt_2,
                                                      pt_3, pt_4,
                                                      out_y, out_x)

        t__ = da.where(idxs, t_new, t__)
        s__ = da.where(idxs, s_new, s__)

    # Cases where both verticals and horizontals are parallel
    idxs = da.isnan(t__) | da.isnan(s__)
    # Remove extra dimensions
    idxs = da.ravel(idxs)
    if da.any(idxs):
        t_new, s_new = _get_ts_parallellogram_dask(pt_1, pt_2, pt_3,
                                                   out_y, out_x)
        t__ = da.where(idxs, t_new, t__)
        s__ = da.where(idxs, s_new, s__)

    idxs = (t__ < 0) | (t__ > 1) | (s__ < 0) | (s__ > 1)
    t__ = da.where(idxs, np.nan, t__)
    s__ = da.where(idxs, np.nan, s__)

    return t__, s__
Beispiel #11
0
def test_where_nonzero():
    for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]:
        x = np.random.randint(10, size=shape)
        d = da.from_array(x, chunks=chunks)

        x_w = np.where(x)
        d_w = da.where(d)

        assert isinstance(d_w, type(x_w))
        assert len(d_w) == len(x_w)

        for i in range(len(x_w)):
            assert_eq(d_w[i], x_w[i])
Beispiel #12
0
def _solve_another_fractional_distance_dask(f__, y_1, y_2, y_3, y_4, out_y):
    """Solve parameter t__ from s__, or vice versa.  For solving s__,
    switch order of y_2 and y_3."""
    y_21 = y_2 - y_1
    y_43 = y_4 - y_3

    g__ = ((out_y - y_1 - y_21 * f__) /
           (y_3 + y_43 * f__ - y_1 - y_21 * f__))

    # Limit values to interval [0, 1]
    idxs = (g__ < 0) | (g__ > 1)
    g__ = da.where(idxs, np.nan, g__)

    return g__
Beispiel #13
0
def test_where_bool_optimization():
    x = np.random.randint(10, size=(15, 16))
    d = da.from_array(x, chunks=(4, 5))
    y = np.random.randint(10, size=(15, 16))
    e = da.from_array(y, chunks=(4, 5))

    for c in [True, False, np.True_, np.False_, 1, 0]:
        w1 = da.where(c, d, e)
        w2 = np.where(c, x, y)

        assert_eq(w1, w2)

        ex_w1 = d if c else e

        assert w1 is ex_w1
Beispiel #14
0
def test_where():
    x = np.random.randint(10, size=(15, 14))
    x[5, 5] = x[4, 4] = 0 # Ensure some false elements
    d = da.from_array(x, chunks=(4, 5))
    y = np.random.randint(10, size=15).astype(np.uint8)
    e = da.from_array(y, chunks=(4,))

    for c1, c2 in [(d > 5, x > 5),
                   (d, x),
                   (1, 1),
                   (0, 0),
                   (5, 5),
                   (True, True),
                   (np.True_, np.True_),
                   (False, False),
                   (np.False_, np.False_)]:
        for b1, b2 in [(0, 0), (-e[:, None], -y[:, None]), (e[:14], y[:14])]:
            w1 = da.where(c1, d, b1)
            w2 = np.where(c2, x, b2)
            assert_eq(w1, w2)
Beispiel #15
0
def skew(a, axis=0, bias=True, nan_policy='propagate'):
    if nan_policy != 'propagate':
        raise NotImplementedError("`nan_policy` other than 'propagate' "
                                  "have not been implemented.")

    n = a.shape[axis]  # noqa; for bias
    m2 = moment(a, 2, axis)
    m3 = moment(a, 3, axis)
    zero = (m2 == 0)
    vals = da.where(~zero, m3 / m2**1.5, 0.)
    # vals = da.where(~zero, (m2, m3),
    #                 lambda m2, m3: m3 / m2**1.5,
    #                 0.)
    if not bias:
        # Need a version of np.place
        raise NotImplementedError("bias=False is not implemented.")

    if vals.ndim == 0:
        return vals
        # TODO: scalar
        # return vals.item()

    return vals
Beispiel #16
0
def _residual(ms, stack, **kw):
    args = OmegaConf.create(kw)
    OmegaConf.set_struct(args, True)
    pyscilog.log_to_file(args.output_filename + '.log')
    pyscilog.enable_memory_logging(level=3)

    # number of threads per worker
    if args.nthreads is None:
        if args.host_address is not None:
            raise ValueError(
                "You have to specify nthreads when using a distributed scheduler"
            )
        import multiprocessing
        nthreads = multiprocessing.cpu_count()
        args.nthreads = nthreads
    else:
        nthreads = args.nthreads

    # configure memory limit
    if args.mem_limit is None:
        if args.host_address is not None:
            raise ValueError(
                "You have to specify mem-limit when using a distributed scheduler"
            )
        import psutil
        mem_limit = int(psutil.virtual_memory()[0] /
                        1e9)  # 100% of memory by default
        args.mem_limit = mem_limit
    else:
        mem_limit = args.mem_limit

    nband = args.nband
    if args.nworkers is None:
        nworkers = nband
        args.nworkers = nworkers
    else:
        nworkers = args.nworkers

    if args.nthreads_per_worker is None:
        nthreads_per_worker = 1
        args.nthreads_per_worker = nthreads_per_worker
    else:
        nthreads_per_worker = args.nthreads_per_worker

    # the number of chunks being read in simultaneously is equal to
    # the number of dask threads
    nthreads_dask = nworkers * nthreads_per_worker

    if args.ngridder_threads is None:
        if args.host_address is not None:
            ngridder_threads = nthreads // nthreads_per_worker
        else:
            ngridder_threads = nthreads // nthreads_dask
        args.ngridder_threads = ngridder_threads
    else:
        ngridder_threads = args.ngridder_threads

    ms = list(ms)
    print('Input Options:', file=log)
    for key in kw.keys():
        print('     %25s = %s' % (key, args[key]), file=log)

    # numpy imports have to happen after this step
    from pfb import set_client
    set_client(nthreads, mem_limit, nworkers, nthreads_per_worker,
               args.host_address, stack, log)

    import numpy as np
    from pfb.utils.misc import chan_to_band_mapping
    import dask
    from dask.graph_manipulation import clone
    from dask.distributed import performance_report
    from daskms import xds_from_storage_ms as xds_from_ms
    from daskms import xds_from_storage_table as xds_from_table
    import dask.array as da
    from africanus.constants import c as lightspeed
    from africanus.gridding.wgridder.dask import residual as im2residim
    from ducc0.fft import good_size
    from pfb.utils.misc import stitch_images, plan_row_chunk
    from pfb.utils.fits import set_wcs, save_fits

    # chan <-> band mapping
    freqs, freq_bin_idx, freq_bin_counts, freq_out, band_mapping, chan_chunks = chan_to_band_mapping(
        ms, nband=nband)

    # gridder memory budget
    max_chan_chunk = 0
    max_freq = 0
    for ims in ms:
        for spw in freqs[ims]:
            counts = freq_bin_counts[ims][spw].compute()
            freq = freqs[ims][spw].compute()
            max_chan_chunk = np.maximum(max_chan_chunk, counts.max())
            max_freq = np.maximum(max_freq, freq.max())

    # assumes measurement sets have the same columns,
    # number of correlations etc.
    xds = xds_from_ms(ms[0])
    ncorr = xds[0].dims['corr']
    nrow = xds[0].dims['row']
    data_bytes = getattr(xds[0], args.data_column).data.itemsize
    bytes_per_row = max_chan_chunk * ncorr * data_bytes
    memory_per_row = bytes_per_row

    # real valued weights
    wdims = getattr(xds[0], args.weight_column).data.ndim
    if wdims == 2:  # WEIGHT
        memory_per_row += ncorr * data_bytes / 2
    else:  # WEIGHT_SPECTRUM
        memory_per_row += bytes_per_row / 2

    # flags (uint8 or bool)
    memory_per_row += np.dtype(np.uint8).itemsize * max_chan_chunk * ncorr

    # UVW
    memory_per_row += xds[0].UVW.data.itemsize * 3

    # ANTENNA1/2
    memory_per_row += xds[0].ANTENNA1.data.itemsize * 2

    columns = (args.data_column, args.weight_column, args.flag_column, 'UVW',
               'ANTENNA1', 'ANTENNA2')

    # flag row
    if 'FLAG_ROW' in xds[0]:
        columns += ('FLAG_ROW', )
        memory_per_row += xds[0].FLAG_ROW.data.itemsize

    # imaging weights
    if args.imaging_weight_column is not None:
        columns += (args.imaging_weight_column, )
        memory_per_row += bytes_per_row / 2

    # Mueller term (complex valued)
    if args.mueller_column is not None:
        columns += (args.mueller_column, )
        memory_per_row += bytes_per_row

    # get max uv coords over all fields
    uvw = []
    u_max = 0.0
    v_max = 0.0
    for ims in ms:
        xds = xds_from_ms(ims, columns=('UVW'), chunks={'row': -1})

        for ds in xds:
            uvw = ds.UVW.data
            u_max = da.maximum(u_max, abs(uvw[:, 0]).max())
            v_max = da.maximum(v_max, abs(uvw[:, 1]).max())
            uv_max = da.maximum(u_max, v_max)

    uv_max = uv_max.compute()
    del uvw

    # image size
    cell_N = 1.0 / (2 * uv_max * max_freq / lightspeed)

    if args.cell_size is not None:
        cell_size = args.cell_size
        cell_rad = cell_size * np.pi / 60 / 60 / 180
        if cell_N / cell_rad < 1:
            raise ValueError(
                "Requested cell size too small. "
                "Super resolution factor = ", cell_N / cell_rad)
        print("Super resolution factor = %f" % (cell_N / cell_rad), file=log)
    else:
        cell_rad = cell_N / args.super_resolution_factor
        cell_size = cell_rad * 60 * 60 * 180 / np.pi
        print("Cell size set to %5.5e arcseconds" % cell_size, file=log)

    if args.nx is None:
        fov = args.field_of_view * 3600
        npix = int(fov / cell_size)
        if npix % 2:
            npix += 1
        nx = good_size(npix)
        ny = good_size(npix)
    else:
        nx = args.nx
        ny = args.ny if args.ny is not None else nx

    print("Image size set to (%i, %i, %i)" % (nband, nx, ny), file=log)

    # get approx image size
    # this is not a conservative estimate when multiple SPW's map to a single
    # imaging band
    pixel_bytes = np.dtype(args.output_type).itemsize
    band_size = nx * ny * pixel_bytes

    if args.host_address is None:
        # full image on single node
        row_chunk = plan_row_chunk(mem_limit / nworkers, band_size, nrow,
                                   memory_per_row, nthreads_per_worker)

    else:
        # single band per node
        row_chunk = plan_row_chunk(mem_limit, band_size, nrow, memory_per_row,
                                   nthreads_per_worker)

    if args.row_chunks is not None:
        row_chunk = int(args.row_chunks)
        if row_chunk == -1:
            row_chunk = nrow

    print(
        "nrows = %i, row chunks set to %i for a total of %i chunks per node" %
        (nrow, row_chunk, int(np.ceil(nrow / row_chunk))),
        file=log)

    chunks = {}
    for ims in ms:
        chunks[ims] = []  # xds_from_ms expects a list per ds
        for spw in freqs[ims]:
            chunks[ims].append({
                'row': row_chunk,
                'chan': chan_chunks[ims][spw]['chan']
            })

    dirties = []
    radec = None  # assumes we are only imaging field 0 of first MS
    for ims in ms:
        xds = xds_from_ms(ims, chunks=chunks[ims], columns=columns)

        # subtables
        ddids = xds_from_table(ims + "::DATA_DESCRIPTION")
        fields = xds_from_table(ims + "::FIELD")
        spws = xds_from_table(ims + "::SPECTRAL_WINDOW")
        pols = xds_from_table(ims + "::POLARIZATION")

        # subtable data
        ddids = dask.compute(ddids)[0]
        fields = dask.compute(fields)[0]
        spws = dask.compute(spws)[0]
        pols = dask.compute(pols)[0]

        for ds in xds:
            field = fields[ds.FIELD_ID]

            # check fields match
            if radec is None:
                radec = field.PHASE_DIR.data.squeeze()

            if not np.array_equal(radec, field.PHASE_DIR.data.squeeze()):
                continue

            # this is not correct, need to use spw
            spw = ds.DATA_DESC_ID

            uvw = clone(ds.UVW.data)

            data = getattr(ds, args.data_column).data
            dataxx = data[:, :, 0]
            datayy = data[:, :, -1]

            weights = getattr(ds, args.weight_column).data
            if len(weights.shape) < 3:
                weights = da.broadcast_to(weights[:, None, :],
                                          data.shape,
                                          chunks=data.chunks)

            if args.imaging_weight_column is not None:
                imaging_weights = getattr(ds, args.imaging_weight_column).data
                if len(imaging_weights.shape) < 3:
                    imaging_weights = da.broadcast_to(imaging_weights[:,
                                                                      None, :],
                                                      data.shape,
                                                      chunks=data.chunks)

                weightsxx = imaging_weights[:, :, 0] * weights[:, :, 0]
                weightsyy = imaging_weights[:, :, -1] * weights[:, :, -1]
            else:
                weightsxx = weights[:, :, 0]
                weightsyy = weights[:, :, -1]

            # apply adjoint of mueller term.
            # Phases modify data amplitudes modify weights.
            if args.mueller_column is not None:
                mueller = getattr(ds, args.mueller_column).data
                dataxx *= da.exp(-1j * da.angle(mueller[:, :, 0]))
                datayy *= da.exp(-1j * da.angle(mueller[:, :, -1]))
                weightsxx *= da.absolute(mueller[:, :, 0])
                weightsyy *= da.absolute(mueller[:, :, -1])

            # weighted sum corr to Stokes I
            weights = weightsxx + weightsyy
            data = (weightsxx * dataxx + weightsyy * datayy)
            # TODO - turn off this stupid warning
            data = da.where(weights, data / weights, 0.0j)

            # MS may contain auto-correlations
            if 'FLAG_ROW' in xds[0]:
                frow = ds.FLAG_ROW.data | (ds.ANTENNA1.data
                                           == ds.ANTENNA2.data)
            else:
                frow = (ds.ANTENNA1.data == ds.ANTENNA2.data)

            # only keep data where both corrs are unflagged
            flag = getattr(ds, args.flag_column).data
            flagxx = flag[:, :, 0]
            flagyy = flag[:, :, -1]
            # ducc0 uses uint8 mask not flag
            mask = ~da.logical_or((flagxx | flagyy), frow[:, None])

            dirty = vis2im(uvw,
                           freqs[ims][spw],
                           data,
                           freq_bin_idx[ims][spw],
                           freq_bin_counts[ims][spw],
                           nx,
                           ny,
                           cell_rad,
                           weights=weights,
                           flag=mask.astype(np.uint8),
                           nthreads=ngridder_threads,
                           epsilon=args.epsilon,
                           do_wstacking=args.wstack,
                           double_accum=args.double_accum)

            dirties.append(dirty)

    # dask.visualize(dirties, filename=args.output_filename + '_graph.pdf', optimize_graph=False)

    if not args.mock:
        # result = dask.compute(dirties, wsum, optimize_graph=False)
        with performance_report(filename=args.output_filename + '_per.html'):
            result = dask.compute(dirties, optimize_graph=False)

        dirties = result[0]

        dirty = stitch_images(dirties, nband, band_mapping)

        hdr = set_wcs(cell_size / 3600, cell_size / 3600, nx, ny, radec,
                      freq_out)
        save_fits(args.output_filename + '_dirty.fits',
                  dirty,
                  hdr,
                  dtype=args.output_type)

    print("All done here.", file=log)
Beispiel #17
0
def from_lon_360(lon_var: Union[np.ndarray, da.Array, xr.DataArray]):
    if isinstance(lon_var, xr.DataArray):
        return lon_var.where(lon_var <= 180.0, lon_var - 360.0)
    else:
        lon_var = da.asarray(lon_var)
        return da.where(lon_var <= 180.0, lon_var, lon_var - 360.0)
Beispiel #18
0
def execute_node_nullif_scalar_series(op, value, series, **kwargs):
    # TODO - not preserving the index
    return dd.from_array(da.where(series.eq(value).values, np.nan, value))
Beispiel #19
0
    def __call__(self, signal, out=None, axes=None):
        """Slice the signal according to the ROI, and return it.

        Arguments
        ---------
        signal : Signal
            The signal to slice with the ROI.
        out : Signal, default = None
            If the 'out' argument is supplied, the sliced output will be put
            into this instead of returning a Signal. See Signal.__getitem__()
            for more details on 'out'.
        axes : specification of axes to use, default = None
            The axes argument specifies which axes the ROI will be applied on.
            The items in the collection can be either of the following:
                * a tuple of:
                    - DataAxis. These will not be checked with
                      signal.axes_manager.
                    - anything that will index signal.axes_manager
                * For any other value, it will check whether the navigation
                  space can fit the right number of axis, and use that if it
                  fits. If not, it will try the signal space.
        """

        if axes is None and signal in self.signal_map:
            axes = self.signal_map[signal][1]
        else:
            axes = self._parse_axes(axes, signal.axes_manager)

        natax = signal.axes_manager._get_axes_in_natural_order()
        # Slice original data with a circumscribed rectangle
        cx = self.cx + 0.5001 * axes[0].scale
        cy = self.cy + 0.5001 * axes[1].scale
        ranges = [[cx - self.r, cx + self.r],
                  [cy - self.r, cy + self.r]]
        slices = self._make_slices(natax, axes, ranges)
        ir = [slices[natax.index(axes[0])],
              slices[natax.index(axes[1])]]
        vx = axes[0].axis[ir[0]] - cx
        vy = axes[1].axis[ir[1]] - cy
        gx, gy = np.meshgrid(vx, vy)
        gr = gx**2 + gy**2
        mask = gr > self.r**2
        if self.r_inner != t.Undefined:
            mask |= gr < self.r_inner**2
        tiles = []
        shape = []
        chunks = []
        for i in range(len(slices)):
            if signal._lazy:
                chunks.append(signal.data.chunks[i][0])
            if i == natax.index(axes[0]):
                thisshape = mask.shape[0]
                tiles.append(thisshape)
                shape.append(thisshape)
            elif i == natax.index(axes[1]):
                thisshape = mask.shape[1]
                tiles.append(thisshape)
                shape.append(thisshape)
            else:
                tiles.append(signal.axes_manager._axes[i].size)
                shape.append(1)
        mask = mask.reshape(shape)

        nav_axes = [ax.navigate for ax in axes]
        nav_dim = signal.axes_manager.navigation_dimension
        if True in nav_axes:
            if False in nav_axes:

                slicer = signal.inav[slices[:nav_dim]].isig.__getitem__
                slices = slices[nav_dim:]
            else:
                slicer = signal.inav.__getitem__
                slices = slices[0:nav_dim]
        else:
            slicer = signal.isig.__getitem__
            slices = slices[nav_dim:]

        roi = slicer(slices, out=out)
        roi = out or roi
        if roi._lazy:
            import dask.array as da
            mask = da.from_array(mask, chunks=chunks)
            mask = da.broadcast_to(mask, tiles)
            # By default promotes dtype to float if required
            roi.data = da.where(mask, np.nan, roi.data)
        else:
            mask = np.broadcast_to(mask, tiles)
            roi.data = np.ma.masked_array(roi.data, mask, hard_mask=True)
        if out is None:
            return roi
        else:
            out.events.data_changed.trigger(out)
Beispiel #20
0
    def decomposition(self,
                      normalize_poissonian_noise=False,
                      algorithm='svd',
                      output_dimension=None,
                      signal_mask=None,
                      navigation_mask=None,
                      get=threaded.get,
                      num_chunks=None,
                      reproject=True,
                      bounds=False,
                      **kwargs):
        """Perform Incremental (Batch) decomposition on the data, keeping n
        significant components.

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : str
            One of ('svd', 'PCA', 'ORPCA', 'ONMF'). By default 'svd',
            lazy SVD decomposition from dask.
        output_dimension : int
            the number of significant components to keep. If None, keep all
            (only valid for SVD)
        get : dask scheduler
            the dask scheduler to use for computations;
            default `dask.threaded.get`
        num_chunks : int
            the number of dask chunks to pass to the decomposition model.
            More chunks require more memory, but should run faster. Will be
            increased to contain atleast output_dimension signals.
        navigation_mask : {BaseSignal, numpy array, dask array}
            The navigation locations marked as True are not used in the
            decompostion.
        signal_mask : {BaseSignal, numpy array, dask array}
            The signal locations marked as True are not used in the
            decomposition.
        reproject : bool
            Reproject data on the learnt components (factors) after learning.
        **kwargs
            passed to the partial_fit/fit functions.

        Notes
        -----
        Various algorithm parameters and their default values:
            ONMF:
                lambda1=1,
                kappa=1,
                robust=False,
                store_r=False
                batch_size=None
            ORPCA:
                fast=True,
                lambda1=None,
                lambda2=None,
                method=None,
                learning_rate=None,
                init=None,
                training_samples=None,
                momentum=None
            PCA:
                batch_size=None,
                copy=True,
                white=False


        """
        if bounds:
            msg = ("The `bounds` keyword is deprecated and will be removed "
                   "in v2.0. Since version > 1.3 this has no effect.")
            warnings.warn(msg, VisibleDeprecationWarning)
        explained_variance = None
        explained_variance_ratio = None
        _al_data = self._data_aligned_with_axes
        nav_chunks = _al_data.chunks[:self.axes_manager.navigation_dimension]
        sig_chunks = _al_data.chunks[self.axes_manager.navigation_dimension:]

        num_chunks = 1 if num_chunks is None else num_chunks
        blocksize = np.min([multiply(ar) for ar in product(*nav_chunks)])
        nblocks = multiply([len(c) for c in nav_chunks])
        if algorithm != "svd" and output_dimension is None:
            raise ValueError("With the %s the output_dimension "
                             "must be specified" % algorithm)
        if output_dimension and blocksize / output_dimension < num_chunks:
            num_chunks = np.ceil(blocksize / output_dimension)
        blocksize *= num_chunks
        # LEARN
        if algorithm == 'PCA':
            from sklearn.decomposition import IncrementalPCA
            obj = IncrementalPCA(n_components=output_dimension)
            method = partial(obj.partial_fit, **kwargs)
            reproject = True

        elif algorithm == 'ORPCA':
            from hyperspy.learn.rpca import ORPCA
            kwg = {'fast': True}
            kwg.update(kwargs)
            obj = ORPCA(output_dimension, **kwg)
            method = partial(obj.fit, iterating=True)

        elif algorithm == 'ONMF':
            from hyperspy.learn.onmf import ONMF
            batch_size = kwargs.pop('batch_size', None)
            obj = ONMF(output_dimension, **kwargs)
            method = partial(obj.fit, batch_size=batch_size)
        elif algorithm != "svd":
            raise ValueError('algorithm not known')

        original_data = self.data
        try:
            if normalize_poissonian_noise:
                data = self._data_aligned_with_axes
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                nm = da.logical_not(
                    da.zeros(self.axes_manager.navigation_shape[::-1],
                             chunks=nav_chunks) if navigation_mask is None else
                    to_array(navigation_mask, chunks=nav_chunks))
                sm = da.logical_not(
                    da.zeros(self.axes_manager.signal_shape[::-1],
                             chunks=sig_chunks) if signal_mask is None else
                    to_array(signal_mask, chunks=sig_chunks))
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                bH, aG = da.compute(
                    data.sum(axis=tuple(range(ndim))),
                    data.sum(axis=tuple(range(ndim, ndim + sdim))))
                bH = da.where(sm, bH, 1)
                aG = da.where(nm, aG, 1)

                raG = da.sqrt(aG)
                rbH = da.sqrt(bH)

                coeff = raG[(..., ) + (None, ) * rbH.ndim] *\
                    rbH[(None, ) * raG.ndim + (...,)]
                coeff.map_blocks(np.nan_to_num)
                coeff = da.where(coeff == 0, 1, coeff)
                data = data / coeff
                self.data = data

            # LEARN
            if algorithm == "svd":
                reproject = False
                from dask.array.linalg import svd
                try:
                    self._unfolded4decomposition = self.unfold()
                    # TODO: implement masking
                    if navigation_mask or signal_mask:
                        raise NotImplemented(
                            "Masking is not yet implemented for lazy SVD.")
                    U, S, V = svd(self.data)
                    factors = V.T
                    explained_variance = S**2 / self.data.shape[0]
                    loadings = U * S
                finally:
                    if self._unfolded4decomposition is True:
                        self.fold()
                        self._unfolded4decomposition is False
            else:
                this_data = []
                try:
                    for chunk in progressbar(self._block_iterator(
                            flat_signal=True,
                            get=get,
                            signal_mask=signal_mask,
                            navigation_mask=navigation_mask),
                                             total=nblocks,
                                             leave=True,
                                             desc='Learn'):
                        this_data.append(chunk)
                        if len(this_data) == num_chunks:
                            thedata = np.concatenate(this_data, axis=0)
                            method(thedata)
                            this_data = []
                    if len(this_data):
                        thedata = np.concatenate(this_data, axis=0)
                        method(thedata)
                except KeyboardInterrupt:
                    pass

            # GET ALREADY CALCULATED RESULTS
            if algorithm == 'PCA':
                explained_variance = obj.explained_variance_
                explained_variance_ratio = obj.explained_variance_ratio_
                factors = obj.components_.T

            elif algorithm == 'ORPCA':
                _, _, U, S, V = obj.finish()
                factors = U * S
                loadings = V
                explained_variance = S**2 / len(factors)

            elif algorithm == 'ONMF':
                factors, loadings = obj.finish()
                loadings = loadings.T

            # REPROJECT
            if reproject:
                if algorithm == 'PCA':
                    method = obj.transform

                    def post(a):
                        return np.concatenate(a, axis=0)
                elif algorithm == 'ORPCA':
                    method = obj.project
                    obj.R = []

                    def post(a):
                        return obj.finish()[4]
                elif algorithm == 'ONMF':
                    method = obj.project

                    def post(a):
                        return np.concatenate(a, axis=1).T

                _map = map(
                    lambda thing: method(thing),
                    self._block_iterator(flat_signal=True,
                                         get=get,
                                         signal_mask=signal_mask,
                                         navigation_mask=navigation_mask))
                H = []
                try:
                    for thing in progressbar(_map,
                                             total=nblocks,
                                             desc='Project'):
                        H.append(thing)
                except KeyboardInterrupt:
                    pass
                loadings = post(H)

            if explained_variance is not None and \
                    explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()

            # RESHUFFLE "blocked" LOADINGS
            ndim = self.axes_manager.navigation_dimension
            if algorithm != "svd":  # Only needed for online algorithms
                try:
                    loadings = _reshuffle_mixed_blocks(loadings, ndim,
                                                       (output_dimension, ),
                                                       nav_chunks).reshape(
                                                           (-1,
                                                            output_dimension))
                except ValueError:
                    # In case the projection step was not finished, it's left
                    # as scrambled
                    pass
        finally:
            self.data = original_data

        target = self.learning_results
        target.decomposition_algorithm = algorithm
        target.output_dimension = output_dimension
        if algorithm != "svd":
            target._object = obj
        target.factors = factors
        target.loadings = loadings
        target.explained_variance = explained_variance
        target.explained_variance_ratio = explained_variance_ratio

        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.factors = target.factors * rbH.ravel()[:, np.newaxis]
            target.loadings = target.loadings * raG.ravel()[:, np.newaxis]
Beispiel #21
0
    def norm_topo(self,
                  data,
                  elev,
                  solar_za,
                  solar_az,
                  slope=None,
                  aspect=None,
                  method='empirical-rotation',
                  slope_thresh=2,
                  nodata=0,
                  elev_nodata=-32768,
                  scale_factor=1,
                  angle_scale=0.01,
                  n_jobs=1,
                  robust=False,
                  min_samples=100,
                  slope_kwargs=None,
                  aspect_kwargs=None,
                  band_coeffs=None):
        """
        Applies topographic normalization

        Args:
            data (2d or 3d DataArray): The data to normalize, in the range 0-1.
            elev (2d DataArray): The elevation data.
            solar_za (2d DataArray): The solar zenith angles (degrees).
            solar_az (2d DataArray): The solar azimuth angles (degrees).
            slope (2d DataArray): The slope data. If not given, slope is calculated from ``elev``.
            aspect (2d DataArray): The aspect data. If not given, aspect is calculated from ``elev``.
            method (Optional[str]): The method to apply. Choices are ['c', 'empirical-rotation'].
            slope_thresh (Optional[float or int]): The slope threshold. Any samples with
                values < ``slope_thresh`` are not adjusted.
            nodata (Optional[int or float]): The 'no data' value for ``data``.
            elev_nodata (Optional[float or int]): The 'no data' value for ``elev``.
            scale_factor (Optional[float]): A scale factor to apply to the input data.
            angle_scale (Optional[float]): The angle scale factor.
            n_jobs (Optional[int]): The number of parallel workers for ``LinearRegression.fit``.
            robust (Optional[bool]): Whether to fit a robust regression.
            min_samples (Optional[int]): The minimum number of samples required to fit a regression.
            slope_kwargs (Optional[dict]): Keyword arguments passed to ``gdal.DEMProcessingOptions``
                to calculate the slope.
            aspect_kwargs (Optional[dict]): Keyword arguments passed to ``gdal.DEMProcessingOptions``
                to calculate the aspect.
            band_coeffs (Optional[dict]): Slope and intercept coefficients for each band.

        References:

            See :cite:`teillet_etal_1982` for the C-correction method.
            See :cite:`tan_etal_2010` for the Empirical Rotation method.

        Returns:
            ``xarray.DataArray``

        Examples:
            >>> import geowombat as gw
            >>> from geowombat.radiometry import Topo
            >>>
            >>> topo = Topo()
            >>>
            >>> # Example where pixel angles are stored in separate GeoTiff files
            >>> with gw.config.update(sensor='l7', scale_factor=0.0001, nodata=0):
            >>>
            >>>     with gw.open('landsat.tif') as src,
            >>>         gw.open('srtm') as elev,
            >>>             gw.open('solarz.tif') as solarz,
            >>>                 gw.open('solara.tif') as solara:
            >>>
            >>>         src_norm = topo.norm_topo(src, elev, solarz, solara, n_jobs=-1)
        """

        method = method.strip().lower()

        if method not in ['c', 'empirical-rotation']:

            logger.exception(
                "  Currently, the only supported methods are 'c' and 'empirical-rotation'."
            )
            raise NameError

        attrs = data.attrs.copy()

        if not nodata:
            nodata = data.gw.nodata

        if scale_factor == 1.0:
            scale_factor = data.gw.scale_factor

        # Scale the reflectance data
        if scale_factor != 1:
            data = data * scale_factor

        if not slope_kwargs:

            slope_kwargs = dict(format='MEM',
                                computeEdges=True,
                                alg='ZevenbergenThorne',
                                slopeFormat='degree')

        if not aspect_kwargs:

            aspect_kwargs = dict(format='MEM',
                                 computeEdges=True,
                                 alg='ZevenbergenThorne',
                                 trigonometric=False,
                                 zeroForFlat=True)

        slope_kwargs['format'] = 'MEM'
        slope_kwargs['slopeFormat'] = 'degree'
        aspect_kwargs['format'] = 'MEM'

        # Force to SRTM resolution
        proc_dims = (int((data.gw.ncols * data.gw.cellx) / 30.0),
                     int((data.gw.nrows * data.gw.celly) / 30.0))

        w = int((5 * 30.0) / data.gw.celly)

        if w % 2 == 0:
            w += 1

        if isinstance(slope, xr.DataArray):
            slope_deg_fd = slope.squeeze().data
        else:

            slope_deg = calc_slope_delayed(elev.squeeze().data,
                                           proc_dims=proc_dims,
                                           w=w,
                                           **slope_kwargs)
            slope_deg_fd = da.from_delayed(slope_deg,
                                           (data.gw.nrows, data.gw.ncols),
                                           dtype='float64')

        if isinstance(aspect, xr.DataArray):
            aspect_deg_fd = aspect.squeeze().data
        else:

            aspect_deg = calc_aspect_delayed(elev.squeeze().data,
                                             proc_dims=proc_dims,
                                             w=w,
                                             **aspect_kwargs)
            aspect_deg_fd = da.from_delayed(aspect_deg,
                                            (data.gw.nrows, data.gw.ncols),
                                            dtype='float64')

        nodata_samps = da.where(
            (elev.data == elev_nodata) | (data.max(dim='band').data == nodata)
            | (slope_deg_fd < slope_thresh), 1, 0)

        slope_rad = da.deg2rad(slope_deg_fd)
        aspect_rad = da.deg2rad(aspect_deg_fd)

        # Convert degrees to radians
        solar_za = da.deg2rad(solar_za.squeeze().data * angle_scale)
        solar_az = da.deg2rad(solar_az.squeeze().data * angle_scale)

        cos_z = da.cos(solar_za)

        # Calculate the illumination angle
        il = da.cos(slope_rad) * cos_z + da.sin(slope_rad) * da.sin(
            solar_za) * da.cos(solar_az - aspect_rad)

        sr_adj = list()
        for band in data.band.values.tolist():

            if method == 'c':

                sr_adj.append(
                    self._method_c(
                        data.sel(band=band).data, il, cos_z, nodata_samps,
                        min_samples, n_jobs, robust, band_coeffs, band))

            else:

                sr_adj.append(
                    self._method_empirical_rotation(
                        data.sel(band=band).data, il, cos_z, nodata_samps,
                        min_samples, n_jobs, robust, band_coeffs, band))

        adj_data = xr.DataArray(data=da.concatenate(sr_adj).reshape(
            (data.gw.nbands, data.gw.nrows, data.gw.ncols)),
                                coords={
                                    'band': data.band.values.tolist(),
                                    'y': data.y.values,
                                    'x': data.x.values
                                },
                                dims=('band', 'y', 'x'),
                                attrs=data.attrs)

        attrs['calibration'] = 'Topographic-adjusted'
        attrs['nodata'] = nodata
        attrs['drange'] = (0, 1)

        adj_data.attrs = attrs

        return adj_data
Beispiel #22
0
    def decomposition(self,
                      normalize_poissonian_noise=False,
                      algorithm='svd',
                      output_dimension=None,
                      signal_mask=None,
                      navigation_mask=None,
                      get=threaded.get,
                      num_chunks=None,
                      reproject=True,
                      bounds=False,
                      **kwargs):
        """Perform Incremental (Batch) decomposition on the data, keeping n
        significant components.

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : str
            One of ('svd', 'PCA', 'ORPCA', 'ONMF'). By default 'svd',
            lazy SVD decomposition from dask.
        output_dimension : int
            the number of significant components to keep. If None, keep all
            (only valid for SVD)
        get : dask scheduler
            the dask scheduler to use for computations;
            default `dask.threaded.get`
        num_chunks : int
            the number of dask chunks to pass to the decomposition model.
            More chunks require more memory, but should run faster. Will be
            increased to contain atleast output_dimension signals.
        navigation_mask : {BaseSignal, numpy array, dask array}
            The navigation locations marked as True are not used in the
            decompostion.
        signal_mask : {BaseSignal, numpy array, dask array}
            The signal locations marked as True are not used in the
            decomposition.
        reproject : bool
            Reproject data on the learnt components (factors) after learning.
        **kwargs
            passed to the partial_fit/fit functions.

        Notes
        -----
        Various algorithm parameters and their default values:
            ONMF:
                lambda1=1,
                kappa=1,
                robust=False,
                store_r=False
                batch_size=None
            ORPCA:
                fast=True,
                lambda1=None,
                lambda2=None,
                method=None,
                learning_rate=None,
                init=None,
                training_samples=None,
                momentum=None
            PCA:
                batch_size=None,
                copy=True,
                white=False


        """
        if bounds:
            msg = (
                "The `bounds` keyword is deprecated and will be removed "
                "in v2.0. Since version > 1.3 this has no effect.")
            warnings.warn(msg, VisibleDeprecationWarning)
        explained_variance = None
        explained_variance_ratio = None
        _al_data = self._data_aligned_with_axes
        nav_chunks = _al_data.chunks[:self.axes_manager.navigation_dimension]
        sig_chunks = _al_data.chunks[self.axes_manager.navigation_dimension:]

        num_chunks = 1 if num_chunks is None else num_chunks
        blocksize = np.min([multiply(ar) for ar in product(*nav_chunks)])
        nblocks = multiply([len(c) for c in nav_chunks])
        if algorithm != "svd" and output_dimension is None:
            raise ValueError("With the %s the output_dimension "
                             "must be specified" % algorithm)
        if output_dimension and blocksize / output_dimension < num_chunks:
            num_chunks = np.ceil(blocksize / output_dimension)
        blocksize *= num_chunks
        # LEARN
        if algorithm == 'PCA':
            from sklearn.decomposition import IncrementalPCA
            obj = IncrementalPCA(n_components=output_dimension)
            method = partial(obj.partial_fit, **kwargs)
            reproject = True

        elif algorithm == 'ORPCA':
            from hyperspy.learn.rpca import ORPCA
            kwg = {'fast': True}
            kwg.update(kwargs)
            obj = ORPCA(output_dimension, **kwg)
            method = partial(obj.fit, iterating=True)

        elif algorithm == 'ONMF':
            from hyperspy.learn.onmf import ONMF
            batch_size = kwargs.pop('batch_size', None)
            obj = ONMF(output_dimension, **kwargs)
            method = partial(obj.fit, batch_size=batch_size)
        elif algorithm != "svd":
            raise ValueError('algorithm not known')

        original_data = self.data
        try:
            if normalize_poissonian_noise:
                data = self._data_aligned_with_axes
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                nm = da.logical_not(
                    da.zeros(
                        self.axes_manager.navigation_shape[::-1],
                        chunks=nav_chunks)
                    if navigation_mask is None else to_array(
                        navigation_mask, chunks=nav_chunks))
                sm = da.logical_not(
                    da.zeros(
                        self.axes_manager.signal_shape[::-1],
                        chunks=sig_chunks)
                    if signal_mask is None else to_array(
                        signal_mask, chunks=sig_chunks))
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                bH, aG = da.compute(
                    data.sum(axis=tuple(range(ndim))),
                    data.sum(axis=tuple(range(ndim, ndim + sdim))))
                bH = da.where(sm, bH, 1)
                aG = da.where(nm, aG, 1)

                raG = da.sqrt(aG)
                rbH = da.sqrt(bH)

                coeff = raG[(..., ) + (None, ) * rbH.ndim] *\
                    rbH[(None, ) * raG.ndim + (...,)]
                coeff.map_blocks(np.nan_to_num)
                coeff = da.where(coeff == 0, 1, coeff)
                data = data / coeff
                self.data = data

            # LEARN
            if algorithm == "svd":
                reproject = False
                from dask.array.linalg import svd
                try:
                    self._unfolded4decomposition = self.unfold()
                    # TODO: implement masking
                    if navigation_mask or signal_mask:
                        raise NotImplemented(
                            "Masking is not yet implemented for lazy SVD."
                        )
                    U, S, V = svd(self.data)
                    factors = V.T
                    explained_variance = S ** 2 / self.data.shape[0]
                    loadings = U * S
                finally:
                    if self._unfolded4decomposition is True:
                        self.fold()
                        self._unfolded4decomposition is False
            else:
                this_data = []
                try:
                    for chunk in progressbar(
                            self._block_iterator(
                                flat_signal=True,
                                get=get,
                                signal_mask=signal_mask,
                                navigation_mask=navigation_mask),
                            total=nblocks,
                            leave=True,
                            desc='Learn'):
                        this_data.append(chunk)
                        if len(this_data) == num_chunks:
                            thedata = np.concatenate(this_data, axis=0)
                            method(thedata)
                            this_data = []
                    if len(this_data):
                        thedata = np.concatenate(this_data, axis=0)
                        method(thedata)
                except KeyboardInterrupt:
                    pass

            # GET ALREADY CALCULATED RESULTS
            if algorithm == 'PCA':
                explained_variance = obj.explained_variance_
                explained_variance_ratio = obj.explained_variance_ratio_
                factors = obj.components_.T

            elif algorithm == 'ORPCA':
                _, _, U, S, V = obj.finish()
                factors = U * S
                loadings = V
                explained_variance = S**2 / len(factors)

            elif algorithm == 'ONMF':
                factors, loadings = obj.finish()
                loadings = loadings.T

            # REPROJECT
            if reproject:
                if algorithm == 'PCA':
                    method = obj.transform

                    def post(a): return np.concatenate(a, axis=0)
                elif algorithm == 'ORPCA':
                    method = obj.project
                    obj.R = []

                    def post(a): return obj.finish()[4]
                elif algorithm == 'ONMF':
                    method = obj.project

                    def post(a): return np.concatenate(a, axis=1).T

                _map = map(lambda thing: method(thing),
                           self._block_iterator(
                               flat_signal=True,
                               get=get,
                               signal_mask=signal_mask,
                               navigation_mask=navigation_mask))
                H = []
                try:
                    for thing in progressbar(
                            _map, total=nblocks, desc='Project'):
                        H.append(thing)
                except KeyboardInterrupt:
                    pass
                loadings = post(H)

            if explained_variance is not None and \
                    explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()

            # RESHUFFLE "blocked" LOADINGS
            ndim = self.axes_manager.navigation_dimension
            if algorithm != "svd":  # Only needed for online algorithms
                try:
                    loadings = _reshuffle_mixed_blocks(
                        loadings,
                        ndim,
                        (output_dimension,),
                        nav_chunks).reshape((-1, output_dimension))
                except ValueError:
                    # In case the projection step was not finished, it's left
                    # as scrambled
                    pass
        finally:
            self.data = original_data

        target = self.learning_results
        target.decomposition_algorithm = algorithm
        target.output_dimension = output_dimension
        if algorithm != "svd":
            target._object = obj
        target.factors = factors
        target.loadings = loadings
        target.explained_variance = explained_variance
        target.explained_variance_ratio = explained_variance_ratio

        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.factors = target.factors * rbH.ravel()[:, np.newaxis]
            target.loadings = target.loadings * raG.ravel()[:, np.newaxis]
Beispiel #23
0
def _transform_array(image: da.Array,
                     scale: Tuple[float, ...],
                     offset: Tuple[float, ...],
                     shape: Tuple[int, ...],
                     chunks: Optional[Tuple[int, ...]],
                     spline_order: int,
                     recover_nan: bool) -> da.Array:
    """
    Apply affine transformation to ND-image.

    :param image: ND-image with shape (..., size_y, size_x)
    :param scale: Scaling factors (1, ..., 1, sy, sx)
    :param offset: Offset values (0, ..., 0, oy, ox)
    :param shape: (..., size_y, size_x)
    :param chunks: (..., chunk_size_y, chunk_size_x)
    :param spline_order: 0 ... 5
    :param recover_nan: True/False
    :return: Transformed ND-image.
    """
    assert_true(len(scale) == image.ndim, 'invalid scale')
    assert_true(len(offset) == image.ndim, 'invalid offset')
    assert_true(len(shape) == image.ndim, 'invalid shape')
    assert_true(chunks is None or len(chunks) == image.ndim,
                'invalid chunks')
    if _is_no_op(image, scale, offset, shape):
        return image
    # As of scipy 0.18, matrix = scale is no longer supported.
    # Therefore we use the diagonal matrix form here,
    # where scale is the diagonal.
    matrix = np.diag(scale)
    at_kwargs = dict(
        offset=offset,
        order=spline_order,
        output_shape=shape,
        output_chunks=chunks,
        mode='constant',
    )
    if recover_nan and spline_order > 0:
        # We can "recover" values that are neighbours to NaN values
        # that would otherwise become NaN too.
        mask = da.isnan(image)
        # First check if there are NaN values ar all
        if da.any(mask):
            # Yes, then
            # 1. replace NaN by zero
            filled_im = da.where(mask, 0.0, image)
            # 2. transform the zeo-filled image
            scaled_im = ndinterp.affine_transform(filled_im,
                                                  matrix,
                                                  **at_kwargs,
                                                  cval=0.0)
            # 3. transform the inverted mask
            scaled_norm = ndinterp.affine_transform(1.0 - mask,
                                                    matrix,
                                                    **at_kwargs,
                                                    cval=0.0)
            # 4. put back NaN where there was zero,
            #    otherwise decode using scaled mask
            return da.where(da.isclose(scaled_norm, 0.0),
                            np.nan, scaled_im / scaled_norm)

    # No dealing with NaN required
    return ndinterp.affine_transform(image, matrix, **at_kwargs, cval=np.nan)
Beispiel #24
0
def test_where_has_informative_error():
    x = da.ones(5, chunks=3)
    try:
        result = da.where(x > 0)
    except Exception as e:
        assert 'dask' in str(e)
Beispiel #25
0
def update_W_da(M, H, W):
    denominator = da.dot(W, da.dot(H, H.T))
    denominator_new = da.where(
        da.fabs(denominator) < EPSILON, EPSILON, denominator)
    W_new = W * da.dot(M, H.T) / denominator_new
    return (W_new)
Beispiel #26
0
def update_H_da(M, H, W):
    denominator = da.dot(W.T, da.dot(W, H))
    denominator_new = da.where(
        da.fabs(denominator) < EPSILON, EPSILON, denominator)
    H_new = H * da.dot(W.T, M) / denominator_new
    return (H_new)
Beispiel #27
0
    def read_band(self, key, info):
        """Read the data."""
        tic = datetime.now()
        header = {}
        with open(self.filename, "rb") as fp_:

            header['block1'] = np.fromfile(fp_,
                                           dtype=_BASIC_INFO_TYPE,
                                           count=1)
            header["block2"] = np.fromfile(fp_, dtype=_DATA_INFO_TYPE, count=1)
            header["block3"] = np.fromfile(fp_, dtype=_PROJ_INFO_TYPE, count=1)
            header["block4"] = np.fromfile(fp_, dtype=_NAV_INFO_TYPE, count=1)
            header["block5"] = np.fromfile(fp_, dtype=_CAL_INFO_TYPE, count=1)
            logger.debug("Band number = " +
                         str(header["block5"]['band_number'][0]))
            logger.debug('Time_interval: %s - %s', str(self.start_time),
                         str(self.end_time))
            band_number = header["block5"]['band_number'][0]
            if band_number < 7:
                cal = np.fromfile(fp_, dtype=_VISCAL_INFO_TYPE, count=1)
            else:
                cal = np.fromfile(fp_, dtype=_IRCAL_INFO_TYPE, count=1)

            header['calibration'] = cal

            header["block6"] = np.fromfile(fp_,
                                           dtype=_INTER_CALIBRATION_INFO_TYPE,
                                           count=1)
            header["block7"] = np.fromfile(fp_,
                                           dtype=_SEGMENT_INFO_TYPE,
                                           count=1)
            header["block8"] = np.fromfile(
                fp_, dtype=_NAVIGATION_CORRECTION_INFO_TYPE, count=1)
            # 8 The navigation corrections:
            ncorrs = header["block8"]['numof_correction_info_data'][0]
            dtype = np.dtype([
                ("line_number_after_rotation", "<u2"),
                ("shift_amount_for_column_direction", "f4"),
                ("shift_amount_for_line_direction", "f4"),
            ])
            corrections = []
            for i in range(ncorrs):
                corrections.append(np.fromfile(fp_, dtype=dtype, count=1))
            fp_.seek(40, 1)
            header['navigation_corrections'] = corrections
            header["block9"] = np.fromfile(fp_,
                                           dtype=_OBS_TIME_INFO_TYPE,
                                           count=1)
            numobstimes = header["block9"]['number_of_observation_times'][0]

            dtype = np.dtype([
                ("line_number", "<u2"),
                ("observation_time", "f8"),
            ])
            lines_and_times = []
            for i in range(numobstimes):
                lines_and_times.append(np.fromfile(fp_, dtype=dtype, count=1))
            header['observation_time_information'] = lines_and_times
            fp_.seek(40, 1)

            header["block10"] = np.fromfile(fp_,
                                            dtype=_ERROR_INFO_TYPE,
                                            count=1)
            dtype = np.dtype([
                ("line_number", "<u2"),
                ("numof_error_pixels_per_line", "<u2"),
            ])
            num_err_info_data = header["block10"]['number_of_error_info_data'][
                0]
            err_info_data = []
            for i in range(num_err_info_data):
                err_info_data.append(np.fromfile(fp_, dtype=dtype, count=1))
            header['error_information_data'] = err_info_data
            fp_.seek(40, 1)

            np.fromfile(fp_, dtype=_SPARE_TYPE, count=1)

            nlines = int(header["block2"]['number_of_lines'][0])
            ncols = int(header["block2"]['number_of_columns'][0])

            res = da.from_array(np.memmap(self.filename,
                                          offset=fp_.tell(),
                                          dtype='<u2',
                                          shape=(nlines, ncols),
                                          mode='r'),
                                chunks=CHUNK_SIZE)
        res = da.where(res == 65535, np.float32(np.nan), res)
        self._header = header

        logger.debug("Reading time " + str(datetime.now() - tic))
        res = self.calibrate(res, key.calibration)
        new_info = dict(
            units=info['units'],
            standard_name=info['standard_name'],
            wavelength=info['wavelength'],
            resolution='resolution',
            id=key,
            name=key.name,
            scheduled_time=self.scheduled_time,
            platform_name=self.platform_name,
            sensor=self.sensor,
            satellite_longitude=float(self.nav_info['SSP_longitude']),
            satellite_latitude=float(self.nav_info['SSP_latitude']),
            satellite_altitude=float(
                self.nav_info['distance_earth_center_to_satellite'] -
                self.proj_info['earth_equatorial_radius']) * 1000)
        res = xr.DataArray(res, attrs=new_info, dims=['y', 'x'])
        res = res.where(
            header['block5']["count_value_outside_scan_pixels"][0] != res)
        res = res.where(header['block5']["count_value_error_pixels"][0] != res)
        res = res.where(self.geo_mask())
        return res
Beispiel #28
0
def density_flux(population, total_population, carrying_capacity, distance,
                 csx, csy, **kwargs):
    """
    'density-based dispersion'

    Dispersal is calculated using the following sequence of methods:

    Portions of populations at each element (node, or grid cell) in the study area array (raster) are moved to
    surrounding elements (a neighbourhood) within a radius that is defined by the input distance (:math:`d`), as
    presented in the conceptual figure below.

        .. image:: images/density_flux_neighbourhood.png
            :align: center

    .. attention:: No dispersal will occur if the provided distance is less than the distance between elements (grid cells) in the model domain, as none will be included in the neighbourhood

    The mean density (:math:`\\rho`) of all elements in the neighbourhood is calculated as:

    .. math::
       \\rho=\\frac{\\sum_{i=1}^{n} \\frac{pop_T(i)}{k_T(i)}}{n}

    where,

    :math:`pop_T` is the total population (of the entire species) at each element (:math:`i`); and\n
    :math:`k_T` is the total carrying capacity for the species

    The density gradient at each element (:math:`\\Delta`) with respect to the mean is calculated as:

    .. math::
        \\Delta(i)=\\frac{pop_T(i)}{k_T(i)}-\\rho

    If the centroid element is above the mean :math:`[\\Delta(i_0) > 0]`, it is able to release a portion of its
    population to elements in the neighbourhood. The eligible population to be received by surrounding elements is equal
    to the sum of populations at elements with negative density gradients, the :math:`candidates`:

    .. math::
        candidates=\\sum_{i=1}^{n} \\Delta(i)[\\Delta(i) < 0]k_T(i)

    The minimum of either the population above the mean at the centroid element - :math:`source=\\Delta(i_0)*k_T(i_0)`,
    or the :math:`candidates` are used to determine the total population that is dispersed from the centroid element to
    the other elements in the neighbourhood:

    .. math::
        dispersal=min\{source, candidates\}

    The population at the centroid element becomes:

    .. math::
        pop_a(i_0)=pop_a(i_0)-\\frac{pop_a(i_0)}{pop_T(i_0)}dispersal

    where,

    :math:`pop_a` is the age (stage) group population, which is a sub-population of the total.

    The populations of the candidate elements in the neighbourhood become (a net gain due to negative gradients):

    .. math::
        pop_a(i)=pop_a(i)-\\frac{\\Delta(i)[\\Delta(i) < 0]k_T(i)}{candidates}dispersal\\frac{pop_a(i)}{pop_T(i)}

    :param da.Array population: Sub-population to redistribute (subset of the ``total_population``)
    :param da.Array total_population: Total population
    :param da.Array carrying_capacity: Total Carrying Capacity (k)
    :param float distance: Maximum dispersal distance
    :param float csx: Cell size of the domain in the x-direction
    :param float csy: Cell size of the domain in the y-direction

    .. Attention:: Ensure the cell sizes are in the same units as the specified direction

    :Keyword Arguments:
        **mask** (*array*) --
            A weighting mask that scales dispersal based on the normalized mask value (default: None)
    :return: Redistributed population
    """
    if any([
            not isinstance(a, da.Array)
            for a in [population, total_population, carrying_capacity]
    ]):
        raise DispersalError('Inputs must be a dask arrays')

    if distance == 0:
        # Don't do anything
        return population

    chunks = tuple(c[0] if c else 0 for c in population.chunks)[:2]

    mask = kwargs.get('mask', None)
    if mask is None:
        mask = da.ones(shape=population.shape, dtype='float32', chunks=chunks)

    # Normalize the mask
    mask_min = da.min(mask)
    _range = da.max(mask) - mask_min
    mask = da.where(_range > 0, (mask - mask_min) / _range, 1.)

    # Calculate the kernel indices and shape
    kernel = calculate_kernel(distance, csx, csy)
    if kernel is None:
        # Not enough distance to cover a grid cell
        return population
    kernel, m, n = kernel
    m = int(m)
    n = int(n)

    a = da.pad(da.dstack(
        [population, total_population, carrying_capacity, mask]),
               ((m, m), (n, n), (0, 0)),
               'constant',
               constant_values=0)
    _m = -m
    if m == 0:
        _m = None
    _n = -n
    if n == 0:
        _n = None
    output = delayed(density_flux_task)(a, kernel, m, n)[m:_m, n:_n, 0]
    output = da.from_delayed(output, population.shape, np.float32)

    return output.rechunk(chunks)
Beispiel #29
0
    def get_bil_info(self):
        """Return neighbour info.

        Returns
        -------
        t__ : numpy array
            Vertical fractional distances from corner to the new points
        s__ : numpy array
            Horizontal fractional distances from corner to the new points
        valid_input_index : numpy array
            Valid indices in the input data
        index_array : numpy array
            Mapping array from valid source points to target points

        """
        if self.source_geo_def.size < self.neighbours:
            warnings.warn('Searching for %s neighbours in %s data points' %
                          (self.neighbours, self.source_geo_def.size))

        # Create kd-tree
        valid_input_index, resample_kdtree = self._create_resample_kdtree()
        # This is a numpy array
        self.valid_input_index = valid_input_index

        if resample_kdtree.n == 0:
            # Handle if all input data is reduced away
            bilinear_t, bilinear_s, valid_input_index, index_array = \
                _create_empty_bil_info(self.source_geo_def,
                                       self.target_geo_def)
            self.bilinear_t = bilinear_t
            self.bilinear_s = bilinear_s
            self.valid_input_index = valid_input_index
            self.index_array = index_array

            return bilinear_t, bilinear_s, valid_input_index, index_array

        target_lons, target_lats = self.target_geo_def.get_lonlats()
        valid_output_idx = ((target_lons >= -180) & (target_lons <= 180) &
                            (target_lats <= 90) & (target_lats >= -90))

        index_array, distance_array = self._query_resample_kdtree(
            resample_kdtree, target_lons, target_lats, valid_output_idx)

        # Reduce index reference
        input_size = da.sum(self.valid_input_index)
        index_mask = index_array == input_size
        index_array = da.where(index_mask, 0, index_array)

        # Get output projection as pyproj object
        proj = Proj(self.target_geo_def.proj_str)

        # Get output x/y coordinates
        out_x, out_y = self.target_geo_def.get_proj_coords(chunks=CHUNK_SIZE)
        out_x = da.ravel(out_x)
        out_y = da.ravel(out_y)

        # Get input x/y coordinates
        in_x, in_y = _get_input_xy_dask(self.source_geo_def, proj,
                                        self.valid_input_index, index_array)

        # Get the four closest corner points around each output location
        pt_1, pt_2, pt_3, pt_4, index_array = \
            _get_bounding_corners_dask(in_x, in_y, out_x, out_y,
                                       self.neighbours, index_array)

        # Calculate vertical and horizontal fractional distances t and s
        t__, s__ = _get_ts_dask(pt_1, pt_2, pt_3, pt_4, out_x, out_y)
        self.bilinear_t, self.bilinear_s = t__, s__

        self.valid_output_index = valid_output_idx
        self.index_array = index_array
        self.distance_array = distance_array

        self._get_slices()

        return (self.bilinear_t, self.bilinear_s,
                self.slices, self.mask_slices,
                self.out_coords)
Beispiel #30
0
 def _get_valid_lonlats(self, vis):
     lons, lats = vis.attrs['area'].get_lonlats(chunks=vis.data.chunks)
     lons = da.where(lons >= 1e30, np.nan, lons)
     lats = da.where(lats >= 1e30, np.nan, lats)
     return lons, lats
Beispiel #31
0
    def decomposition(self,
                      output_dimension,
                      normalize_poissonian_noise=False,
                      algorithm='PCA',
                      signal_mask=None,
                      navigation_mask=None,
                      get=threaded.get,
                      num_chunks=None,
                      reproject=True,
                      bounds=True,
                      **kwargs):
        """Perform Incremental (Batch) decomposition on the data, keeping n
        significant components.

        Parameters
        ----------
        output_dimension : int
            the number of significant components to keep
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : str
            One of ('PCA', 'ORPCA', 'ONMF'). By default ('PCA') IncrementalPCA
            from scikit-learn is run.
        get : dask scheduler
            the dask scheduler to use for computations;
            default `dask.threaded.get`
        num_chunks : int
            the number of dask chunks to pass to the decomposition model.
            More chunks require more memory, but should run faster. Will be
            increased to contain atleast output_dimension signals.
        navigation_mask : {BaseSignal, numpy array, dask array}
            The navigation locations marked as True are not used in the
            decompostion.
        signal_mask : {BaseSignal, numpy array, dask array}
            The signal locations marked as True are not used in the
            decomposition.
        reproject : bool
            Reproject data on the learnt components (factors) after learning.
        bounds : {tuple, bool}
            The (min, max) values of the data to normalize before learning.
            If tuple (min, max), those values will be used for normalization.
            If True, extremes will be looked up (expensive), default.
            If False, no normalization is done (learning may be very slow).
            If normalize_poissonian_noise is True, this cannot be True.
        **kwargs
            passed to the partial_fit/fit functions.

        Notes
        -----
        Various algorithm parameters and their default values:
            ONMF:
                lambda1=1,
                kappa=1,
                robust=False,
                store_r=False
                batch_size=None
            ORPCA:
                fast=True,
                lambda1=None,
                lambda2=None,
                method=None,
                learning_rate=None,
                init=None,
                training_samples=None,
                momentum=None
            PCA:
                batch_size=None,
                copy=True,
                white=False


        """
        explained_variance = None
        explained_variance_ratio = None
        _al_data = self._data_aligned_with_axes
        nav_chunks = _al_data.chunks[:self.axes_manager.navigation_dimension]
        sig_chunks = _al_data.chunks[self.axes_manager.navigation_dimension:]

        num_chunks = 1 if num_chunks is None else num_chunks
        blocksize = np.min([multiply(ar) for ar in product(*nav_chunks)])
        nblocks = multiply([len(c) for c in nav_chunks])
        if blocksize / output_dimension < num_chunks:
            num_chunks = np.ceil(blocksize / output_dimension)
        blocksize *= num_chunks

        ## LEARN
        if algorithm == 'PCA':
            from sklearn.decomposition import IncrementalPCA
            obj = IncrementalPCA(n_components=output_dimension)
            method = partial(obj.partial_fit, **kwargs)
            reproject = True

        elif algorithm == 'ORPCA':
            from hyperspy.learn.rpca import ORPCA
            kwg = {'fast': True}
            kwg.update(kwargs)
            obj = ORPCA(output_dimension, **kwg)
            method = partial(obj.fit, iterating=True)

        elif algorithm == 'ONMF':
            from hyperspy.learn.onmf import ONMF
            batch_size = kwargs.pop('batch_size', None)
            obj = ONMF(output_dimension, **kwargs)
            method = partial(obj.fit, batch_size=batch_size)

        else:
            raise ValueError('algorithm not known')

        original_data = self.data
        try:
            if normalize_poissonian_noise:
                if bounds is True:
                    bounds = False
                    # warnings.warn?
                data = self._data_aligned_with_axes
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                nm = da.logical_not(
                    da.zeros(
                        self.axes_manager.navigation_shape[::-1],
                        chunks=nav_chunks)
                    if navigation_mask is None else to_array(
                        navigation_mask, chunks=nav_chunks))
                sm = da.logical_not(
                    da.zeros(
                        self.axes_manager.signal_shape[::-1],
                        chunks=sig_chunks)
                    if signal_mask is None else to_array(
                        signal_mask, chunks=sig_chunks))
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                bH, aG = da.compute(
                    data.sum(axis=range(ndim)),
                    data.sum(axis=range(ndim, ndim + sdim)))
                bH = da.where(sm, bH, 1)
                aG = da.where(nm, aG, 1)

                raG = da.sqrt(aG)
                rbH = da.sqrt(bH)

                coeff = raG[(..., ) + (None, )*rbH.ndim] *\
                        rbH[(None, )*raG.ndim + (...,)]
                coeff.map_blocks(np.nan_to_num)
                coeff = da.where(coeff == 0, 1, coeff)
                data = data / coeff
                self.data = data

            # normalize the data for learning algs:
            if bounds:
                if bounds is True:
                    _min, _max = da.compute(self.data.min(), self.data.max())
                else:
                    _min, _max = bounds
                self.data = (self.data - _min) / (_max - _min)

            # LEARN
            this_data = []
            try:
                for chunk in progressbar(
                        self._block_iterator(
                            flat_signal=True,
                            get=get,
                            signal_mask=signal_mask,
                            navigation_mask=navigation_mask),
                        total=nblocks,
                        leave=True,
                        desc='Learn'):
                    this_data.append(chunk)
                    if len(this_data) == num_chunks:
                        thedata = np.concatenate(this_data, axis=0)
                        method(thedata)
                        this_data = []
                if len(this_data):
                    thedata = np.concatenate(this_data, axis=0)
                    method(thedata)
            except KeyboardInterrupt:
                pass

            # GET ALREADY CALCULATED RESULTS
            if algorithm == 'PCA':
                explained_variance = obj.explained_variance_
                explained_variance_ratio = obj.explained_variance_ratio_
                factors = obj.components_.T

            elif algorithm == 'ORPCA':
                _, _, U, S, V = obj.finish()
                factors = U * S
                loadings = V
                explained_variance = S**2 / len(factors)

            elif algorithm == 'ONMF':
                factors, loadings = obj.finish()
                loadings = loadings.T

            # REPROJECT
            if reproject:
                if algorithm == 'PCA':
                    method = obj.transform
                    post = lambda a: np.concatenate(a, axis=0)
                elif algorithm == 'ORPCA':
                    method = obj.project
                    obj.R = []
                    post = lambda a: obj.finish()[4]
                elif algorithm == 'ONMF':
                    method = obj.project
                    post = lambda a: np.concatenate(a, axis=1).T

                _map = map(lambda thing: method(thing),
                           self._block_iterator(
                               flat_signal=True,
                               get=get,
                               signal_mask=signal_mask,
                               navigation_mask=navigation_mask))
                H = []
                try:
                    for thing in progressbar(
                            _map, total=nblocks, desc='Project'):
                        H.append(thing)
                except KeyboardInterrupt:
                    pass
                loadings = post(H)

            if explained_variance is not None and \
                    explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()

            # RESHUFFLE "blocked" LOADINGS
            ndim = self.axes_manager.navigation_dimension
            try:
                loadings = _reshuffle_mixed_blocks(
                    loadings,
                    ndim,
                    (output_dimension,),
                    nav_chunks).reshape((-1, output_dimension))
            except ValueError:
                # In case the projection step was not finished, it's left
                # as scrambled
                pass
        finally:
            self.data = original_data

        target = self.learning_results
        target.decomposition_algorithm = algorithm
        target.output_dimension = output_dimension
        target._object = obj
        target.factors = factors
        target.loadings = loadings
        target.explained_variance = explained_variance
        target.explained_variance_ratio = explained_variance_ratio
Beispiel #32
0
    def get_reflectance(self,
                        sun_zenith,
                        sat_zenith,
                        azidiff,
                        bandname,
                        redband=None):
        """Get the reflectance from the three sun-sat angles"""
        # Get wavelength in nm for band:
        if isinstance(bandname, float):
            LOG.warning(
                'A wavelength is provided instead of band name - ' +
                'disregard the relative spectral responses and assume ' +
                'it is the effective wavelength: %f (micro meter)', bandname)
            wvl = bandname * 1000.0
        else:
            wvl = self.get_effective_wavelength(bandname) * 1000.0

        rayl, wvl_coord, azid_coord, satz_sec_coord, sunz_sec_coord = \
            self.get_reflectance_lut()

        # force dask arrays
        compute = False
        if HAVE_DASK and not isinstance(sun_zenith, Array):
            compute = True
            sun_zenith = from_array(sun_zenith, chunks=sun_zenith.shape)
            sat_zenith = from_array(sat_zenith, chunks=sat_zenith.shape)
            azidiff = from_array(azidiff, chunks=azidiff.shape)
            if redband is not None:
                redband = from_array(redband, chunks=redband.shape)

        clip_angle = rad2deg(arccos(1. / sunz_sec_coord.max()))
        sun_zenith = clip(sun_zenith, 0, clip_angle)
        sunzsec = 1. / cos(deg2rad(sun_zenith))
        clip_angle = rad2deg(arccos(1. / satz_sec_coord.max()))
        sat_zenith = clip(sat_zenith, 0, clip_angle)
        satzsec = 1. / cos(deg2rad(sat_zenith))
        shape = sun_zenith.shape

        if not (wvl_coord.min() < wvl < wvl_coord.max()):
            LOG.warning(
                "Effective wavelength for band %s outside 400-800 nm range!",
                str(bandname))
            LOG.info(
                "Set the rayleigh/aerosol reflectance contribution to zero!")
            if HAVE_DASK:
                chunks = sun_zenith.chunks if redband is None \
                    else redband.chunks
                res = zeros(shape, chunks=chunks)
                return res.compute() if compute else res
            else:
                return zeros(shape)

        idx = np.searchsorted(wvl_coord, wvl)
        wvl1 = wvl_coord[idx - 1]
        wvl2 = wvl_coord[idx]

        fac = (wvl2 - wvl) / (wvl2 - wvl1)
        raylwvl = fac * rayl[idx - 1, :, :, :] + (1 - fac) * rayl[idx, :, :, :]
        tic = time.time()

        smin = [sunz_sec_coord[0], azid_coord[0], satz_sec_coord[0]]
        smax = [sunz_sec_coord[-1], azid_coord[-1], satz_sec_coord[-1]]
        orders = [len(sunz_sec_coord), len(azid_coord), len(satz_sec_coord)]
        minterp = MultilinearInterpolator(smin, smax, orders)

        f_3d_grid = raylwvl
        minterp.set_values(atleast_2d(f_3d_grid.ravel()))

        def _do_interp(minterp, sunzsec, azidiff, satzsec):
            interp_points2 = np.vstack(
                (sunzsec.ravel(), 180 - azidiff.ravel(), satzsec.ravel()))
            res = minterp(interp_points2)
            return res.reshape(sunzsec.shape)

        if HAVE_DASK:
            ipn = map_blocks(_do_interp,
                             minterp,
                             sunzsec,
                             azidiff,
                             satzsec,
                             dtype=raylwvl.dtype,
                             chunks=azidiff.chunks)
        else:
            ipn = _do_interp(minterp, sunzsec, azidiff, satzsec)

        LOG.debug("Time - Interpolation: {0:f}".format(time.time() - tic))

        ipn *= 100
        res = ipn
        if redband is not None:
            res = where(redband < 20., res, (1 - (redband - 20) / 80) * res)

        res = clip(res, 0, 100)
        if compute:
            res = res.compute()
        return res
Beispiel #33
0
 def _bt_threshold(band_data):
     # expects dask array to be passed
     return da.where(band_data >= threshold,
                     high_offset - high_factor * band_data,
                     low_offset - low_factor * band_data)
Beispiel #34
0
def _vis_calibrate(data,
                   chn,
                   calib_type,
                   pre_launch_coeffs=False,
                   calib_coeffs=None,
                   mask=True):
    """Calibrate visible channel data.

    *calib_type* in count, reflectance, radiance.

    """
    # Calibration count to albedo, the calibration is performed separately for
    # two value ranges.
    if calib_type not in ['counts', 'radiance', 'reflectance']:
        raise ValueError('Calibration ' + calib_type + ' unknown!')

    channel = da.from_array(data["hrpt"][:, :, chn], chunks=(LINE_CHUNK, 2048))
    mask &= channel != 0

    if calib_type == 'counts':
        return channel

    channel = channel.astype(np.float64)

    if calib_type == 'radiance':
        logger.info("Radiances are not yet supported for " +
                    "the VIS/NIR channels!")

    if pre_launch_coeffs:
        coeff_idx = 2
    else:
        # check that coeffs are valid
        if np.all(data["calvis"][:, chn, 0, 4] == 0):
            logger.info(
                "No valid operational coefficients, fall back to pre-launch")
            coeff_idx = 2
        else:
            coeff_idx = 0

    intersection = da.from_array(data["calvis"][:, chn, coeff_idx, 4],
                                 chunks=LINE_CHUNK)

    if calib_coeffs is not None:
        logger.info("Updating from external calibration coefficients.")
        slope1 = da.from_array(calib_coeffs[0], chunks=LINE_CHUNK)
        intercept1 = da.from_array(calib_coeffs[1], chunks=LINE_CHUNK)
        slope2 = da.from_array(calib_coeffs[2], chunks=LINE_CHUNK)
        intercept2 = da.from_array(calib_coeffs[3], chunks=LINE_CHUNK)
    else:
        slope1 = da.from_array(data["calvis"][:, chn, coeff_idx, 0],
                               chunks=LINE_CHUNK) * 1e-10
        intercept1 = da.from_array(data["calvis"][:, chn, coeff_idx, 1],
                                   chunks=LINE_CHUNK) * 1e-7
        slope2 = da.from_array(data["calvis"][:, chn, coeff_idx, 2],
                               chunks=LINE_CHUNK) * 1e-10
        intercept2 = da.from_array(data["calvis"][:, chn, coeff_idx, 3],
                                   chunks=LINE_CHUNK) * 1e-7

        if chn == 1:
            # In the level 1b file, the visible coefficients are stored as 4-byte integers. Scaling factors then convert
            # them to real numbers which are applied to the measured counts. The coefficient is different depending on
            # whether the counts are less than or greater than the high-gain/low-gain transition value (nominally 500).
            # The slope for visible channels should always be positive (reflectance increases with count). With the
            # pre-launch coefficients the channel 2 slope is always positive but with the operational coefs the stored
            # number in the high-reflectance regime overflows the maximum 2147483647, i.e. it is negative when
            # interpreted as a signed integer. So you have to modify it.
            slope2 = da.where(slope2 < 0, slope2 + 0.4294967296, slope2)

    channel = da.where(channel <= intersection[:, None],
                       channel * slope1[:, None] + intercept1[:, None],
                       channel * slope2[:, None] + intercept2[:, None])

    channel = channel.clip(min=0)

    return da.where(mask, channel, np.nan)
Beispiel #35
0
def _stage_2(
    YP: Array,
    X: Array,
    Y: Array,
    alphas: Optional[ndarray] = None,
    normalize: bool = True,
    _glow_adj_alpha: bool = False,
    _glow_adj_scaling: bool = False,
) -> Tuple[Array, Array]:
    """Stage 2 - WGR Meta Regression

    This stage will train separate ridge regression models for each outcome
    using the predictions from stage 1 for that same outcome as features. These
    predictions are then evaluated based on R2 score to determine an optimal
    "meta" estimator (see `_stage_1` for the "base" estimator description). Results
    then include only predictions and coefficients from this optimal model.

    For more details, see the level 1 regression model described in step 1
    of [Mbatchou et al. 2020](https://www.biorxiv.org/content/10.1101/2020.06.19.162354v2).
    """
    assert YP.ndim == 4
    assert X.ndim == 2
    assert Y.ndim == 2
    # Check that chunking across samples is the same for all arrays
    assert YP.numblocks[2] == X.numblocks[0] == Y.numblocks[0]
    assert YP.chunks[2] == X.chunks[0] == Y.chunks[0]
    # Assert single chunks for covariates and outcomes
    assert X.numblocks[1] == Y.numblocks[1] == 1
    # Extract shape statistics
    n_variant_block, n_alpha_1 = YP.shape[:2]
    n_sample_block = Y.numblocks[0]
    n_sample, n_outcome = Y.shape
    n_covar = X.shape[1]
    n_indvar = n_covar + n_variant_block * n_alpha_1
    sample_chunks = Y.chunks[0]

    if normalize:
        assert_block_shape(YP, n_variant_block, 1, n_sample_block, 1)
        assert_chunk_shape(YP, 1, n_alpha_1, sample_chunks[0], n_outcome)
        # See: https://github.com/projectglow/glow/issues/260
        if _glow_adj_scaling:
            YP = da.map_blocks(
                lambda x: (x - x.mean(axis=2, keepdims=True))
                / x.std(axis=2, keepdims=True),
                YP,
            )
        else:
            YP = (YP - YP.mean(axis=2, keepdims=True)) / YP.std(axis=2, keepdims=True)
    # Tranpose for refit on level 1 predictions
    YP = YP.transpose((3, 2, 0, 1))
    assert_array_shape(YP, n_outcome, n_sample, n_variant_block, n_alpha_1)

    if alphas is None:
        # See: https://github.com/projectglow/glow/issues/255
        if _glow_adj_alpha:
            alphas = get_alphas(n_variant_block * n_alpha_1 * n_outcome)
        else:
            alphas = get_alphas(n_variant_block * n_alpha_1)
    n_alpha_2 = alphas.size

    YR = []
    BR = []
    for i in range(n_outcome):
        # Slice and reshape to new 2D covariate matrix;
        # The order of raveling in trailing dimensions is important
        # and later reshapes will assume variants, alphas order
        XPB = YP[i].reshape((n_sample, n_variant_block * n_alpha_1))
        # Prepend covariates and chunk along first dim only
        XPB = da.concatenate((X, XPB), axis=1)
        XPB = XPB.rechunk(chunks=(None, -1))
        assert_array_shape(XPB, n_sample, n_indvar)
        assert XPB.numblocks == (n_sample_block, 1)
        # Extract outcome vector
        YB = Y[:, [i]]
        assert XPB.ndim == YB.ndim == 2
        # Fit and predict folds for each parameter
        BB, YPB = _ridge_regression_cv(XPB, YB, alphas, n_zero_reg=n_covar)[-2:]
        assert_array_shape(BB, n_alpha_2, n_sample_block * n_indvar, 1)
        assert_array_shape(YPB, n_alpha_2, n_sample, 1)
        BR.append(BB)
        YR.append(YPB)

    # Concatenate predictions along outcome dimension
    YR = da.concatenate(YR, axis=2)
    assert_block_shape(YR, 1, n_sample_block, n_outcome)
    assert_chunk_shape(YR, n_alpha_2, sample_chunks[0], 1)
    assert_array_shape(YR, n_alpha_2, n_sample, n_outcome)
    # Move samples to last dim so all others are batch
    # dims for R2 calculations
    YR = da.transpose(YR, (0, 2, 1))
    assert_array_shape(YR, n_alpha_2, n_outcome, n_sample)
    YR = YR.rechunk((-1, -1, None))
    assert_block_shape(YR, 1, 1, n_sample_block)
    assert YR.shape[1:] == Y.T.shape

    # Concatenate betas along outcome dimension
    BR = da.concatenate(BR, axis=2)
    assert_block_shape(BR, 1, n_sample_block, n_outcome)
    assert_chunk_shape(BR, n_alpha_2, n_indvar, 1)
    assert_array_shape(BR, n_alpha_2, n_sample_block * n_indvar, n_outcome)

    # Compute R2 scores within each sample block for each outcome + alpha
    R2 = da.stack(
        [
            r2_score(YR.blocks[..., i], Y.T.blocks[..., i])
            # Avoid warnings on R2 calculations for blocks with single rows
            if YR.chunks[-1][i] > 1 else da.full(YR.shape[:-1], np.nan)
            for i in range(n_sample_block)
        ]
    )
    assert_array_shape(R2, n_sample_block, n_alpha_2, n_outcome)
    # Coerce to finite or nan before nan-aware mean
    R2 = da.where(da.isfinite(R2), R2, np.nan)
    # Find highest mean alpha score for each outcome across blocks
    R2M = da.nanmean(R2, axis=0)
    assert_array_shape(R2M, n_alpha_2, n_outcome)
    # Identify index for the alpha value with the highest mean score
    R2I = da.argmax(R2M, axis=0)
    assert_array_shape(R2I, n_outcome)

    # Choose the predictions corresponding to the model with best score
    YRM = da.stack([YR[R2I[i], i, :] for i in range(n_outcome)], axis=-1)
    YRM = YRM.rechunk((None, -1))
    assert_block_shape(YRM, n_sample_block, 1)
    assert_chunk_shape(YRM, sample_chunks[0], n_outcome)
    assert_array_shape(YRM, n_sample, n_outcome)
    # Choose the betas corresponding to the model with the best score
    BRM = da.stack([BR[R2I[i], :, i] for i in range(n_outcome)], axis=-1)
    BRM = BRM.rechunk((None, -1))
    assert_block_shape(BRM, n_sample_block, 1)
    assert_chunk_shape(BRM, n_indvar, n_outcome)
    assert_array_shape(BRM, n_sample_block * n_indvar, n_outcome)
    return BRM, YRM
Beispiel #36
0
def randn(shape, chunks=None, nan=False, seed=0):
    rng = da.random.RandomState(seed)
    x = 5 + 3 * rng.standard_normal(shape, chunks=chunks)
    if nan:
        x = da.where(x < 0, np.nan, x)
    return x
Beispiel #37
0
    def read_band(self, key, info):
        """Read the data"""
        tic = datetime.now()
        header = {}
        with open(self.filename, "rb") as fp_:

            header['block1'] = np.fromfile(
                fp_, dtype=_BASIC_INFO_TYPE, count=1)
            header["block2"] = np.fromfile(fp_, dtype=_DATA_INFO_TYPE, count=1)
            header["block3"] = np.fromfile(fp_, dtype=_PROJ_INFO_TYPE, count=1)
            header["block4"] = np.fromfile(fp_, dtype=_NAV_INFO_TYPE, count=1)
            header["block5"] = np.fromfile(fp_, dtype=_CAL_INFO_TYPE, count=1)
            logger.debug("Band number = " +
                         str(header["block5"]['band_number'][0]))
            logger.debug('Time_interval: %s - %s',
                         str(self.start_time), str(self.end_time))
            band_number = header["block5"]['band_number'][0]
            if band_number < 7:
                cal = np.fromfile(fp_, dtype=_VISCAL_INFO_TYPE, count=1)
            else:
                cal = np.fromfile(fp_, dtype=_IRCAL_INFO_TYPE, count=1)

            header['calibration'] = cal

            header["block6"] = np.fromfile(
                fp_, dtype=_INTER_CALIBRATION_INFO_TYPE, count=1)
            header["block7"] = np.fromfile(
                fp_, dtype=_SEGMENT_INFO_TYPE, count=1)
            header["block8"] = np.fromfile(
                fp_, dtype=_NAVIGATION_CORRECTION_INFO_TYPE, count=1)
            # 8 The navigation corrections:
            ncorrs = header["block8"]['numof_correction_info_data'][0]
            dtype = np.dtype([
                ("line_number_after_rotation", "<u2"),
                ("shift_amount_for_column_direction", "f4"),
                ("shift_amount_for_line_direction", "f4"),
            ])
            corrections = []
            for i in range(ncorrs):
                corrections.append(np.fromfile(fp_, dtype=dtype, count=1))
            fp_.seek(40, 1)
            header['navigation_corrections'] = corrections
            header["block9"] = np.fromfile(fp_,
                                           dtype=_OBS_TIME_INFO_TYPE,
                                           count=1)
            numobstimes = header["block9"]['number_of_observation_times'][0]

            dtype = np.dtype([
                ("line_number", "<u2"),
                ("observation_time", "f8"),
            ])
            lines_and_times = []
            for i in range(numobstimes):
                lines_and_times.append(np.fromfile(fp_,
                                                   dtype=dtype,
                                                   count=1))
            header['observation_time_information'] = lines_and_times
            fp_.seek(40, 1)

            header["block10"] = np.fromfile(fp_,
                                            dtype=_ERROR_INFO_TYPE,
                                            count=1)
            dtype = np.dtype([
                ("line_number", "<u2"),
                ("numof_error_pixels_per_line", "<u2"),
            ])
            num_err_info_data = header["block10"][
                'number_of_error_info_data'][0]
            err_info_data = []
            for i in range(num_err_info_data):
                err_info_data.append(np.fromfile(fp_, dtype=dtype, count=1))
            header['error_information_data'] = err_info_data
            fp_.seek(40, 1)

            np.fromfile(fp_, dtype=_SPARE_TYPE, count=1)

            nlines = int(header["block2"]['number_of_lines'][0])
            ncols = int(header["block2"]['number_of_columns'][0])

            res = da.from_array(np.memmap(self.filename, offset=fp_.tell(),
                                          dtype='<u2',  shape=(nlines, ncols)),
                                chunks=CHUNK_SIZE)
        res = da.where(res == 65535, np.float32(np.nan), res)

        self._header = header

        logger.debug("Reading time " + str(datetime.now() - tic))

        res = self.calibrate(res, key.calibration)

        new_info = dict(units=info['units'],
                        standard_name=info['standard_name'],
                        wavelength=info['wavelength'],
                        resolution='resolution',
                        id=key,
                        name=key.name,
                        platform_name=self.platform_name,
                        sensor=self.sensor,
                        satellite_longitude=float(
                            self.nav_info['SSP_longitude']),
                        satellite_latitude=float(
                            self.nav_info['SSP_latitude']),
                        satellite_altitude=float(self.nav_info['distance_earth_center_to_satellite'] -
                                                 self.proj_info['earth_equatorial_radius']) * 1000)
        res = xr.DataArray(res, attrs=new_info, dims=['y', 'x'])
        res = res.where(header['block5']["count_value_outside_scan_pixels"][0] != res)
        res = res.where(header['block5']["count_value_error_pixels"][0] != res)
        res = res.where(self.geo_mask())
        return res
Beispiel #38
0
    def get_sample_from_neighbour_info(self, data):

        # flatten x and y in the source array

        output_shape = []
        chunks = []
        source_dims = data.dims
        for dim in source_dims:
            if dim == 'y':
                output_shape += [self.target_geo_def.y_size]
                chunks += [1000]
            elif dim == 'x':
                output_shape += [self.target_geo_def.x_size]
                chunks += [1000]
            else:
                output_shape += [data[dim].size]
                chunks += [10]

        new_dims = []
        xy_dims = []
        source_shape = [1, 1]
        chunks = [1, 1]
        for i, dim in enumerate(data.dims):
            if dim not in ['x', 'y']:
                new_dims.append(dim)
                source_shape[1] *= data.shape[i]
                chunks[1] *= 10
            else:
                xy_dims.append(dim)
                source_shape[0] *= data.shape[i]
                chunks[0] *= 1000

        new_dims = xy_dims + new_dims

        target_shape = [np.prod(self.target_geo_def.shape), source_shape[1]]
        source_data = data.transpose(*new_dims).data.reshape(source_shape)

        input_size = self.valid_input_index.sum()
        index_mask = (self.index_array == input_size)
        new_index_array = da.where(
            index_mask, 0, self.index_array).ravel().astype(int).compute()
        valid_targets = self.valid_output_index.ravel()

        target_lines = []

        for line in range(target_shape[1]):
            #target_data_line = target_data[:, line]
            new_data = source_data[:, line][self.valid_input_index.ravel()]
            # could this be a bug in dask ? we have to compute to avoid errors
            result = new_data.compute()[new_index_array]
            result[index_mask.ravel()] = np.nan
            #target_data_line = da.full(target_shape[0], np.nan, chunks=1000000)
            target_data_line = np.full(target_shape[0], np.nan)
            target_data_line[valid_targets] = result
            target_lines.append(target_data_line[:, np.newaxis])

        target_data = np.hstack(target_lines)

        new_shape = []
        for dim in new_dims:
            if dim == 'x':
                new_shape.append(self.target_geo_def.x_size)
            elif dim == 'y':
                new_shape.append(self.target_geo_def.y_size)
            else:
                new_shape.append(data[dim].size)

        output_arr = DataArray(da.from_array(target_data.reshape(new_shape), chunks=[1000] * len(new_shape)),
                               dims=new_dims)
        for dim in source_dims:
            if dim == 'x':
                output_arr['x'] = self.target_geo_def.proj_x_coords
            elif dim == 'y':
                output_arr['y'] = self.target_geo_def.proj_y_coords
            else:
                output_arr[dim] = data[dim]

        return output_arr.transpose(*source_dims)
Beispiel #39
0
    def get_sample_from_bil_info(self, data, fill_value=np.nan,
                                 output_shape=None):
        if fill_value is None:
            fill_value = np.nan
        # FIXME: can be this made into a dask construct ?
        cols, lines = np.meshgrid(np.arange(data['x'].size),
                                  np.arange(data['y'].size))
        cols = da.ravel(cols)
        lines = da.ravel(lines)
        try:
            self.valid_input_index = self.valid_input_index.compute()
        except AttributeError:
            pass
        vii = self.valid_input_index.squeeze()
        try:
            self.index_array = self.index_array.compute()
        except AttributeError:
            pass

        # ia contains reduced (valid) indices of the source array, and has the
        # shape of the destination array
        ia = self.index_array
        rlines = lines[vii][ia]
        rcols = cols[vii][ia]

        slices = []
        mask_slices = []
        mask_2d_added = False
        coords = {}
        try:
            # FIXME: Use same chunk size as input data
            coord_x, coord_y = self.target_geo_def.get_proj_vectors_dask()
        except AttributeError:
            coord_x, coord_y = None, None

        for _, dim in enumerate(data.dims):
            if dim == 'y':
                slices.append(rlines)
                if not mask_2d_added:
                    mask_slices.append(ia >= self.target_geo_def.size)
                    mask_2d_added = True
                if coord_y is not None:
                    coords[dim] = coord_y
            elif dim == 'x':
                slices.append(rcols)
                if not mask_2d_added:
                    mask_slices.append(ia >= self.target_geo_def.size)
                    mask_2d_added = True
                if coord_x is not None:
                    coords[dim] = coord_x
            else:
                slices.append(slice(None))
                mask_slices.append(slice(None))
                try:
                    coords[dim] = data.coords[dim]
                except KeyError:
                    pass

        res = data.values[slices]
        res[mask_slices] = fill_value

        try:
            p_1 = res[:, :, 0]
            p_2 = res[:, :, 1]
            p_3 = res[:, :, 2]
            p_4 = res[:, :, 3]
        except IndexError:
            p_1 = res[:, 0]
            p_2 = res[:, 1]
            p_3 = res[:, 2]
            p_4 = res[:, 3]

        s__, t__ = self.bilinear_s, self.bilinear_t

        res = (p_1 * (1 - s__) * (1 - t__) +
               p_2 * s__ * (1 - t__) +
               p_3 * (1 - s__) * t__ +
               p_4 * s__ * t__)

        epsilon = 1e-6
        data_min = da.nanmin(data) - epsilon
        data_max = da.nanmax(data) + epsilon

        idxs = (res > data_max) | (res < data_min)
        res = da.where(idxs, fill_value, res)
        shp = self.target_geo_def.shape
        if data.ndim == 3:
            res = da.reshape(res, (res.shape[0], shp[0], shp[1]))
        else:
            res = da.reshape(res, (shp[0], shp[1]))
        res = DataArray(da.from_array(res, chunks=CHUNK_SIZE),
                        dims=data.dims, coords=coords)

        return res
def gradient(image):
    return da.where(
        da.fabs(image) <= huber['threshold'], 2 * image,
        2 * huber['threshold'] * da.sign(image))
Beispiel #41
0
    def decomposition(self,
                      normalize_poissonian_noise=False,
                      algorithm="SVD",
                      output_dimension=None,
                      signal_mask=None,
                      navigation_mask=None,
                      get=dask.threaded.get,
                      num_chunks=None,
                      reproject=True,
                      print_info=True,
                      **kwargs):
        """Perform Incremental (Batch) decomposition on the data.

        The results are stored in ``self.learning_results``.

        Read more in the :ref:`User Guide <big_data.decomposition>`.

        Parameters
        ----------
        normalize_poissonian_noise : bool, default False
            If True, scale the signal to normalize Poissonian noise using
            the approach described in [KeenanKotula2004]_.
        algorithm : {'SVD', 'PCA', 'ORPCA', 'ORNMF'}, default 'SVD'
            The decomposition algorithm to use.
        output_dimension : int or None, default None
            Number of components to keep/calculate. If None, keep all
            (only valid for 'SVD' algorithm)
        get : dask scheduler
            the dask scheduler to use for computations;
            default `dask.threaded.get`
        num_chunks : int or None, default None
            the number of dask chunks to pass to the decomposition model.
            More chunks require more memory, but should run faster. Will be
            increased to contain at least ``output_dimension`` signals.
        navigation_mask : {BaseSignal, numpy array, dask array}
            The navigation locations marked as True are not used in the
            decomposition.
        signal_mask : {BaseSignal, numpy array, dask array}
            The signal locations marked as True are not used in the
            decomposition.
        reproject : bool, default True
            Reproject data on the learnt components (factors) after learning.
        print_info : bool, default True
            If True, print information about the decomposition being performed.
            In the case of sklearn.decomposition objects, this includes the
            values of all arguments of the chosen sklearn algorithm.
        **kwargs
            passed to the partial_fit/fit functions.

        References
        ----------
        .. [KeenanKotula2004] M. Keenan and P. Kotula, "Accounting for Poisson noise
            in the multivariate analysis of ToF-SIMS spectrum images", Surf.
            Interface Anal 36(3) (2004): 203-212.

        See Also
        --------
        * :py:meth:`~.learn.mva.MVA.decomposition` for non-lazy signals
        * :py:func:`dask.array.linalg.svd`
        * :py:class:`sklearn.decomposition.IncrementalPCA`
        * :py:class:`~.learn.rpca.ORPCA`
        * :py:class:`~.learn.ornmf.ORNMF`

        """
        if kwargs.get("bounds", False):
            warnings.warn(
                "The `bounds` keyword is deprecated and will be removed "
                "in v2.0. Since version > 1.3 this has no effect.",
                VisibleDeprecationWarning,
            )
            kwargs.pop("bounds", None)

        # Deprecate 'ONMF' for 'ORNMF'
        if algorithm == "ONMF":
            warnings.warn(
                "The argument `algorithm='ONMF'` has been deprecated and will "
                "be removed in future. Please use `algorithm='ORNMF'` instead.",
                VisibleDeprecationWarning,
            )
            algorithm = "ORNMF"

        # Check algorithms requiring output_dimension
        algorithms_require_dimension = ["PCA", "ORPCA", "ORNMF"]
        if algorithm in algorithms_require_dimension and output_dimension is None:
            raise ValueError(
                "`output_dimension` must be specified for '{}'".format(
                    algorithm))

        explained_variance = None
        explained_variance_ratio = None

        _al_data = self._data_aligned_with_axes
        nav_chunks = _al_data.chunks[:self.axes_manager.navigation_dimension]
        sig_chunks = _al_data.chunks[self.axes_manager.navigation_dimension:]

        num_chunks = 1 if num_chunks is None else num_chunks
        blocksize = np.min([multiply(ar) for ar in product(*nav_chunks)])
        nblocks = multiply([len(c) for c in nav_chunks])

        if output_dimension and blocksize / output_dimension < num_chunks:
            num_chunks = np.ceil(blocksize / output_dimension)

        blocksize *= num_chunks

        # Initialize print_info
        to_print = [
            "Decomposition info:", "  normalize_poissonian_noise={}".format(
                normalize_poissonian_noise),
            "  algorithm={}".format(algorithm),
            "  output_dimension={}".format(output_dimension)
        ]

        # LEARN
        if algorithm == "PCA":
            if not import_sklearn.sklearn_installed:
                raise ImportError("algorithm='PCA' requires scikit-learn")

            obj = import_sklearn.sklearn.decomposition.IncrementalPCA(
                n_components=output_dimension)
            method = partial(obj.partial_fit, **kwargs)
            reproject = True
            to_print.extend(["scikit-learn estimator:", obj])

        elif algorithm == "ORPCA":
            from hyperspy.learn.rpca import ORPCA

            batch_size = kwargs.pop("batch_size", None)
            obj = ORPCA(output_dimension, **kwargs)
            method = partial(obj.fit, batch_size=batch_size)

        elif algorithm == "ORNMF":
            from hyperspy.learn.ornmf import ORNMF

            batch_size = kwargs.pop("batch_size", None)
            obj = ORNMF(output_dimension, **kwargs)
            method = partial(obj.fit, batch_size=batch_size)

        elif algorithm != "SVD":
            raise ValueError("'algorithm' not recognised")

        original_data = self.data
        try:
            _logger.info("Performing decomposition analysis")

            if normalize_poissonian_noise:
                _logger.info("Scaling the data to normalize Poissonian noise")

                data = self._data_aligned_with_axes
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                nm = da.logical_not(
                    da.zeros(self.axes_manager.navigation_shape[::-1],
                             chunks=nav_chunks) if navigation_mask is None else
                    to_array(navigation_mask, chunks=nav_chunks))
                sm = da.logical_not(
                    da.zeros(self.axes_manager.signal_shape[::-1],
                             chunks=sig_chunks) if signal_mask is None else
                    to_array(signal_mask, chunks=sig_chunks))
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                bH, aG = da.compute(
                    data.sum(axis=tuple(range(ndim))),
                    data.sum(axis=tuple(range(ndim, ndim + sdim))),
                )
                bH = da.where(sm, bH, 1)
                aG = da.where(nm, aG, 1)

                raG = da.sqrt(aG)
                rbH = da.sqrt(bH)

                coeff = raG[(..., ) +
                            (None, ) * rbH.ndim] * rbH[(None, ) * raG.ndim +
                                                       (..., )]
                coeff.map_blocks(np.nan_to_num)
                coeff = da.where(coeff == 0, 1, coeff)
                data = data / coeff
                self.data = data

            # LEARN
            if algorithm == "SVD":
                reproject = False
                from dask.array.linalg import svd

                try:
                    self._unfolded4decomposition = self.unfold()
                    # TODO: implement masking
                    if navigation_mask or signal_mask:
                        raise NotImplementedError(
                            "Masking is not yet implemented for lazy SVD")

                    U, S, V = svd(self.data)

                    if output_dimension is None:
                        min_shape = min(min(U.shape), min(V.shape))
                    else:
                        min_shape = output_dimension

                    U = U[:, :min_shape]
                    S = S[:min_shape]
                    V = V[:min_shape]

                    factors = V.T
                    explained_variance = S**2 / self.data.shape[0]
                    loadings = U * S
                finally:
                    if self._unfolded4decomposition is True:
                        self.fold()
                        self._unfolded4decomposition is False
            else:
                this_data = []
                try:
                    for chunk in progressbar(
                            self._block_iterator(
                                flat_signal=True,
                                get=get,
                                signal_mask=signal_mask,
                                navigation_mask=navigation_mask,
                            ),
                            total=nblocks,
                            leave=True,
                            desc="Learn",
                    ):
                        this_data.append(chunk)
                        if len(this_data) == num_chunks:
                            thedata = np.concatenate(this_data, axis=0)
                            method(thedata)
                            this_data = []
                    if len(this_data):
                        thedata = np.concatenate(this_data, axis=0)
                        method(thedata)
                except KeyboardInterrupt:  # pragma: no cover
                    pass

            # GET ALREADY CALCULATED RESULTS
            if algorithm == "PCA":
                explained_variance = obj.explained_variance_
                explained_variance_ratio = obj.explained_variance_ratio_
                factors = obj.components_.T

            elif algorithm == "ORPCA":
                factors, loadings = obj.finish()
                loadings = loadings.T

            elif algorithm == "ORNMF":
                factors, loadings = obj.finish()
                loadings = loadings.T

            # REPROJECT
            if reproject:
                if algorithm == "PCA":
                    method = obj.transform

                    def post(a):
                        return np.concatenate(a, axis=0)

                elif algorithm == "ORPCA":
                    method = obj.project

                    def post(a):
                        return np.concatenate(a, axis=1).T

                elif algorithm == "ORNMF":
                    method = obj.project

                    def post(a):
                        return np.concatenate(a, axis=1).T

                _map = map(
                    lambda thing: method(thing),
                    self._block_iterator(
                        flat_signal=True,
                        get=get,
                        signal_mask=signal_mask,
                        navigation_mask=navigation_mask,
                    ),
                )
                H = []
                try:
                    for thing in progressbar(_map,
                                             total=nblocks,
                                             desc="Project"):
                        H.append(thing)
                except KeyboardInterrupt:  # pragma: no cover
                    pass
                loadings = post(H)

            if explained_variance is not None and explained_variance_ratio is None:
                explained_variance_ratio = explained_variance / explained_variance.sum(
                )

            # RESHUFFLE "blocked" LOADINGS
            ndim = self.axes_manager.navigation_dimension
            if algorithm != "SVD":  # Only needed for online algorithms
                try:
                    loadings = _reshuffle_mixed_blocks(loadings, ndim,
                                                       (output_dimension, ),
                                                       nav_chunks).reshape(
                                                           (-1,
                                                            output_dimension))
                except ValueError:
                    # In case the projection step was not finished, it's left
                    # as scrambled
                    pass
        finally:
            self.data = original_data

        target = self.learning_results
        target.decomposition_algorithm = algorithm
        target.output_dimension = output_dimension
        if algorithm != "SVD":
            target._object = obj
        target.factors = factors
        target.loadings = loadings
        target.explained_variance = explained_variance
        target.explained_variance_ratio = explained_variance_ratio

        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.factors = target.factors * rbH.ravel()[:, np.newaxis]
            target.loadings = target.loadings * raG.ravel()[:, np.newaxis]

        # Print details about the decomposition we just performed
        if print_info:
            print("\n".join([str(pr) for pr in to_print]))
Beispiel #42
0
 def _get_abs_max_from_min_max(min_, max_):
     """From array of min and array of max, get array of abs max."""
     return da.where(-min_ > max_, min_, max_)
Beispiel #43
0
 def _mask_invalid(self, data, header):
     """Mask invalid data"""
     invalid = da.logical_or(data == header['block5']["count_value_outside_scan_pixels"][0],
                             data == header['block5']["count_value_error_pixels"][0])
     return da.where(invalid, np.float32(np.nan), data)
Beispiel #44
0
    def __call__(self, signal, out=None, axes=None):
        """Slice the signal according to the ROI, and return it.

        Arguments
        ---------
        signal : Signal
            The signal to slice with the ROI.
        out : Signal, default = None
            If the 'out' argument is supplied, the sliced output will be put
            into this instead of returning a Signal. See Signal.__getitem__()
            for more details on 'out'.
        axes : specification of axes to use, default = None
            The axes argument specifies which axes the ROI will be applied on.
            The items in the collection can be either of the following:
                * a tuple of:
                    - DataAxis. These will not be checked with
                      signal.axes_manager.
                    - anything that will index signal.axes_manager
                * For any other value, it will check whether the navigation
                  space can fit the right number of axis, and use that if it
                  fits. If not, it will try the signal space.
        """

        if axes is None and signal in self.signal_map:
            axes = self.signal_map[signal][1]
        else:
            axes = self._parse_axes(axes, signal.axes_manager)

        natax = signal.axes_manager._get_axes_in_natural_order()
        # Slice original data with a circumscribed rectangle
        cx = self.cx + 0.5001 * axes[0].scale
        cy = self.cy + 0.5001 * axes[1].scale
        ranges = [[cx - self.r, cx + self.r],
                  [cy - self.r, cy + self.r]]
        slices = self._make_slices(natax, axes, ranges)
        ir = [slices[natax.index(axes[0])],
              slices[natax.index(axes[1])]]
        vx = axes[0].axis[ir[0]] - cx
        vy = axes[1].axis[ir[1]] - cy
        gx, gy = np.meshgrid(vx, vy)
        gr = gx**2 + gy**2
        mask = gr > self.r**2
        if self.r_inner != t.Undefined:
            mask |= gr < self.r_inner**2
        tiles = []
        shape = []
        chunks = []
        for i in range(len(slices)):
            if signal._lazy:
                chunks.append(signal.data.chunks[i][0])
            if i == natax.index(axes[0]):
                thisshape = mask.shape[0]
                tiles.append(thisshape)
                shape.append(thisshape)
            elif i == natax.index(axes[1]):
                thisshape = mask.shape[1]
                tiles.append(thisshape)
                shape.append(thisshape)
            else:
                tiles.append(signal.axes_manager._axes[i].size)
                shape.append(1)
        mask = mask.reshape(shape)

        nav_axes = [ax.navigate for ax in axes]
        nav_dim = signal.axes_manager.navigation_dimension
        if True in nav_axes:
            if False in nav_axes:

                slicer = signal.inav[slices[:nav_dim]].isig.__getitem__
                slices = slices[nav_dim:]
            else:
                slicer = signal.inav.__getitem__
                slices = slices[0:nav_dim]
        else:
            slicer = signal.isig.__getitem__
            slices = slices[nav_dim:]

        roi = slicer(slices, out=out)
        roi = out or roi
        if roi._lazy:
            import dask.array as da
            mask = da.from_array(mask, chunks=chunks)
            mask = da.broadcast_to(mask, tiles)
            # By default promotes dtype to float if required
            roi.data = da.where(mask, np.nan, roi.data)
        else:
            mask = np.broadcast_to(mask, tiles)
            roi.data = np.ma.masked_array(roi.data, mask, hard_mask=True)
        if out is None:
            return roi
        else:
            out.events.data_changed.trigger(out)
Beispiel #45
0
    def get_bil_info(self):
        """Return neighbour info.

        Returns
        -------
        t__ : numpy array
            Vertical fractional distances from corner to the new points
        s__ : numpy array
            Horizontal fractional distances from corner to the new points
        input_idxs : numpy array
            Valid indices in the input data
        idx_arr : numpy array
            Mapping array from valid source points to target points

        """
        if self.source_geo_def.size < self.neighbours:
            warnings.warn('Searching for %s neighbours in %s data points' %
                          (self.neighbours, self.source_geo_def.size))

        # Create kd-tree
        valid_input_idx, resample_kdtree = self._create_resample_kdtree()
        # This is a numpy array
        self.valid_input_index = valid_input_idx

        if resample_kdtree.n == 0:
            # Handle if all input data is reduced away
            bilinear_t, bilinear_s, valid_input_index, index_array = \
                _create_empty_bil_info(self.source_geo_def,
                                       self.target_geo_def)
            self.bilinear_t = bilinear_t
            self.bilinear_s = bilinear_s
            self.valid_input_index = valid_input_idx
            self.index_array = index_array

            return bilinear_t, bilinear_s, valid_input_index, index_array

        target_lons, target_lats = self.target_geo_def.get_lonlats()
        valid_output_idx = ((target_lons >= -180) & (target_lons <= 180) &
                            (target_lats <= 90) & (target_lats >= -90))

        index_array, distance_array = self._query_resample_kdtree(
            resample_kdtree, target_lons, target_lats, valid_output_idx)

        # Reduce index reference
        input_size = da.sum(self.valid_input_index)
        index_mask = index_array == input_size
        index_array = da.where(index_mask, 0, index_array)

        # Get output projection as pyproj object
        proj = Proj(self.target_geo_def.proj_str)

        # Get output x/y coordinates
        out_x, out_y = _get_output_xy_dask(self.target_geo_def, proj)

        # Get input x/y coordinates
        in_x, in_y = _get_input_xy_dask(self.source_geo_def, proj,
                                        self.valid_input_index, index_array)

        # Get the four closest corner points around each output location
        pt_1, pt_2, pt_3, pt_4, index_array = \
            _get_bounding_corners_dask(in_x, in_y, out_x, out_y,
                                       self.neighbours, index_array)

        # Calculate vertical and horizontal fractional distances t and s
        t__, s__ = _get_ts_dask(pt_1, pt_2, pt_3, pt_4, out_x, out_y)
        self.bilinear_t, self.bilinear_s = t__, s__

        self.valid_output_index = valid_output_idx
        self.index_array = index_array
        self.distance_array = distance_array

        return (self.bilinear_t, self.bilinear_s, self.valid_input_index,
                self.index_array)
Beispiel #46
0
    def get_reflectance(self, sun_zenith, sat_zenith, azidiff, bandname, redband=None):
        """Get the reflectance from the three sun-sat angles"""
        # Get wavelength in nm for band:
        if isinstance(bandname, float):
            LOG.warning('A wavelength is provided instead of band name - ' +
                        'disregard the relative spectral responses and assume ' +
                        'it is the effective wavelength: %f (micro meter)', bandname)
            wvl = bandname * 1000.0
        else:
            wvl = self.get_effective_wavelength(bandname)
            wvl = wvl * 1000.0

        rayl, wvl_coord, azid_coord, satz_sec_coord, sunz_sec_coord = self.get_reflectance_lut()

        # force dask arrays
        compute = False
        if HAVE_DASK and not isinstance(sun_zenith, Array):
            compute = True
            sun_zenith = from_array(sun_zenith, chunks=sun_zenith.shape)
            sat_zenith = from_array(sat_zenith, chunks=sat_zenith.shape)
            azidiff = from_array(azidiff, chunks=azidiff.shape)
            if redband is not None:
                redband = from_array(redband, chunks=redband.shape)

        clip_angle = rad2deg(arccos(1. / sunz_sec_coord.max()))
        sun_zenith = clip(sun_zenith, 0, clip_angle)
        sunzsec = 1. / cos(deg2rad(sun_zenith))
        clip_angle = rad2deg(arccos(1. / satz_sec_coord.max()))
        sat_zenith = clip(sat_zenith, 0, clip_angle)
        satzsec = 1. / cos(deg2rad(sat_zenith))
        shape = sun_zenith.shape

        if not(wvl_coord.min() < wvl < wvl_coord.max()):
            LOG.warning(
                "Effective wavelength for band %s outside 400-800 nm range!",
                str(bandname))
            LOG.info(
                "Set the rayleigh/aerosol reflectance contribution to zero!")
            if HAVE_DASK:
                chunks = sun_zenith.chunks if redband is None else redband.chunks
                res = zeros(shape, chunks=chunks)
                return res.compute() if compute else res
            else:
                return zeros(shape)

        idx = np.searchsorted(wvl_coord, wvl)
        wvl1 = wvl_coord[idx - 1]
        wvl2 = wvl_coord[idx]

        fac = (wvl2 - wvl) / (wvl2 - wvl1)
        raylwvl = fac * rayl[idx - 1, :, :, :] + (1 - fac) * rayl[idx, :, :, :]
        tic = time.time()

        smin = [sunz_sec_coord[0], azid_coord[0], satz_sec_coord[0]]
        smax = [sunz_sec_coord[-1], azid_coord[-1], satz_sec_coord[-1]]
        orders = [
            len(sunz_sec_coord), len(azid_coord), len(satz_sec_coord)]
        f_3d_grid = atleast_2d(raylwvl.ravel())

        if HAVE_DASK and isinstance(smin[0], Array):
            # compute all of these at the same time before passing to the interpolator
            # otherwise they are computed separately
            smin, smax, orders, f_3d_grid = da.compute(smin, smax, orders, f_3d_grid)
        minterp = MultilinearInterpolator(smin, smax, orders)
        minterp.set_values(f_3d_grid)

        if HAVE_DASK:
            ipn = map_blocks(self._do_interp, minterp, sunzsec, azidiff,
                             satzsec, dtype=raylwvl.dtype, chunks=azidiff.chunks)
        else:
            ipn = self._do_interp(minterp, sunzsec, azidiff, satzsec)

        LOG.debug("Time - Interpolation: {0:f}".format(time.time() - tic))

        ipn *= 100
        res = ipn
        if redband is not None:
            res = where(redband < 20., res,
                        (1 - (redband - 20) / 80) * res)

        res = clip(res, 0, 100)
        if compute:
            res = res.compute()
        return res
def segment(
    image,
    channels,
    model_type,
    diameter,
    fast_mode=False,
    use_anisotropy=True,
    iou_depth=2,
    iou_threshold=0.7,
):
    """Use cellpose to segment nuclei in fluorescence data.

    Parameters
    ----------
    image : array of shape (z, y, x, channel)
        Image used for detection of objects
    channels : array of int with size 2
        See cellpose
    model_type : str
        "cyto" or "nuclei"
    diameter : tuple of size 3
        Approximate diameter (in pixels) of a segmented region, i.e. cell width
    fast_mode : bool
        In fast mode, network averaging, tiling, and augmentation are turned off.
    use_anisotropy : bool
        If true, use anisotropy parameter of cellpose
    iou_depth: dask depth parameter
        Number of pixels of overlap to use in intersection-over-union calculation when
        linking segments across neighboring, overlapping dask chunk regions.
    iou_threshold: float
        Minimum intersection-over-union in neighboring, overlapping dask chunk regions
        to be considered the same segment.  The region for calculating IOU is given by the
        iou_depth parameter.

    Returns:
        segments : array of int32 with same shape as input
            Each segmented cell is assigned a number and all its pixels contain that value (0 is background)
    """
    assert image.ndim == 4, image.ndim
    assert image.shape[-1] in {1, 2}, image.shape
    assert diameter[1] == diameter[2], diameter

    diameter_yx = diameter[1]
    anisotropy = diameter[0] / diameter[1] if use_anisotropy else None

    image = da.asarray(image)
    image = image.rechunk({-1: -1})  # color channel is chunked together

    depth = tuple(np.ceil(diameter).astype(np.int64))
    boundary = "reflect"

    # No chunking in channel direction
    image = da.overlap.overlap(image, depth + (0, ), boundary)

    block_iter = zip(
        np.ndindex(*image.numblocks),
        map(
            functools.partial(operator.getitem, image),
            da.core.slices_from_chunks(image.chunks),
        ),
    )

    labeled_blocks = np.empty(image.numblocks[:-1], dtype=object)
    total = None
    for index, input_block in block_iter:
        labeled_block, n = dask.delayed(segment_chunk, nout=2)(
            input_block,
            channels,
            model_type,
            diameter_yx,
            anisotropy,
            fast_mode,
            index,
        )

        shape = input_block.shape[:-1]
        labeled_block = da.from_delayed(labeled_block,
                                        shape=shape,
                                        dtype=np.int32)

        n = dask.delayed(np.int32)(n)
        n = da.from_delayed(n, shape=(), dtype=np.int32)

        total = n if total is None else total + n

        block_label_offset = da.where(labeled_block > 0, total, np.int32(0))
        labeled_block += block_label_offset

        labeled_blocks[index[:-1]] = labeled_block
        total += n

    # Put all the blocks together
    block_labeled = da.block(labeled_blocks.tolist())

    depth = da.overlap.coerce_depth(len(depth), depth)

    if np.prod(block_labeled.numblocks) > 1:
        iou_depth = da.overlap.coerce_depth(len(depth), iou_depth)

        if any(iou_depth[ax] > depth[ax] for ax in depth.keys()):
            raise DistSegError("iou_depth (%s) > depth (%s)" %
                               (iou_depth, depth))

        trim_depth = {k: depth[k] - iou_depth[k] for k in depth.keys()}
        block_labeled = da.overlap.trim_internal(block_labeled,
                                                 trim_depth,
                                                 boundary=boundary)
        block_labeled = link_labels(
            block_labeled,
            total,
            iou_depth,
            iou_threshold=iou_threshold,
        )

        block_labeled = da.overlap.trim_internal(block_labeled,
                                                 iou_depth,
                                                 boundary=boundary)

    else:
        block_labeled = da.overlap.trim_internal(block_labeled,
                                                 depth,
                                                 boundary=boundary)

    return block_labeled
Beispiel #48
0
 def invalid_to_nan(t__, s__):
     idxs = (t__ < 0) | (t__ > 1) | (s__ < 0) | (s__ > 1)
     t__ = da.where(idxs, np.nan, t__)
     s__ = da.where(idxs, np.nan, s__)
     return t__, s__
Beispiel #49
0
def individual_heterozygosity(
    ds: Dataset,
    *,
    call_allele_count: Hashable = variables.call_allele_count,
    merge: bool = True,
) -> Dataset:
    """Compute per call individual heterozygosity.

    Individual heterozygosity is the probability that two alleles
    drawn at random without replacement, from an individual at a
    given site, are not identical in state. Therefore, individual
    heterozygosity is defined for diploid and polyploid calls but
    will return nan in the case of haploid calls.

    Parameters
    ----------
    ds
        Dataset containing genotype calls.
    call_allele_count
        Input variable name holding call_allele_count as defined by
        :data:`sgkit.variables.call_allele_count_spec`.
        If the variable is not present in ``ds``, it will be computed
        using :func:`count_call_alleles`.
    merge
        If True (the default), merge the input dataset and the computed
        output variables into a single dataset, otherwise return only
        the computed output variables.
        See :ref:`dataset_merge` for more details.
    Returns
    -------
    A dataset containing :data:`sgkit.variables.call_heterozygosity_spec`
    of per genotype observed heterozygosity with shape (variants, samples)
    containing values within the interval [0, 1] or nan if ploidy < 2.

    Examples
    --------

    >>> import sgkit as sg
    >>> ds = sg.simulate_genotype_call_dataset(n_variant=4, n_sample=2, seed=1)
    >>> sg.display_genotypes(ds) # doctest: +NORMALIZE_WHITESPACE
    samples    S0   S1
    variants
    0         1/0  1/0
    1         1/0  1/1
    2         0/1  1/0
    3         0/0  0/0

    >>> sg.individual_heterozygosity(ds)["call_heterozygosity"].values # doctest: +NORMALIZE_WHITESPACE
    array([[1., 1.],
           [1., 0.],
           [1., 1.],
           [0., 0.]])
    """
    ds = define_variable_if_absent(ds, variables.call_allele_count,
                                   call_allele_count, count_call_alleles)
    variables.validate(ds,
                       {call_allele_count: variables.call_allele_count_spec})

    AC = da.asarray(ds.call_allele_count)
    K = AC.sum(axis=-1)
    # use nan denominator to avoid divide by zero with K - 1
    K2 = da.where(K > 1, K, np.nan)
    AF = AC / K2[..., None]
    HI = (1 - da.sum(AF**2, axis=-1)) * (K / (K2 - 1))
    new_ds = create_dataset(
        {variables.call_heterozygosity: (("variants", "samples"), HI)})
    return conditional_merge_datasets(ds, new_ds, merge)
Beispiel #50
0
            lambda df: df.dup_strings.where(df.dup_strings != 'a'),
            tm.assert_series_equal,
            id='series_literal',
        ),
        pytest.param(
            lambda t: t.dup_strings,
            lambda t: t.dup_strings,
            lambda df: df.dup_strings.where(df.dup_strings != df.dup_strings),
            tm.assert_series_equal,
            id='series_series',
        ),
        pytest.param(
            lambda t: ibis.literal('a'),
            lambda t: t.dup_strings,
            lambda df: dd.from_array(
                da.where(df.dup_strings.eq('a').values, np.nan, 'a')
            ),
            tm.assert_series_equal,
            id='literal_series',
        ),
    ],
)
def test_nullif(t, df, left, right, expected, compare):
    expr = left(t).nullif(right(t))
    result = execute(expr)
    if isinstance(result, (dd.Series, dd.DataFrame)):
        compare(result.compute(), expected(df).compute())
    else:
        compare(result, expected(df))

Beispiel #51
0
def test_where_has_informative_error():
    x = da.ones(5, chunks=3)
    try:
        result = da.where(x > 0)
    except Exception as e:
        assert 'dask' in str(e)
Beispiel #52
0
 def _correct_slope(self, slope):
     # 0 slope is invalid. Note: slope can be a scalar or array.
     return da.where(slope == 0, 1, slope)