Beispiel #1
0
def test_unique_baselines(ant1, ant2):
    # Reverse ant1, ant2 to test that sort works
    ant1 = np.flipud(ant1)
    ant2 = np.flipud(ant2)

    test_bl = np.stack([ant1, ant2], axis=1)

    bl, idx, inv, counts = unique_baselines(ant1, ant2)
    assert_array_equal(bl, [[0, 0], [0, 1], [0, 2], [1, 2], [2, 3]])
    assert_array_equal(bl[inv], test_bl)
    assert_array_equal(test_bl[idx], bl)
    assert_array_equal(counts, [2, 3, 1, 3, 1])
Beispiel #2
0
def _gen_testing_lookup(time, interval, ant1, ant2, flag_row, time_bin_secs,
                        row_meta):
    """
    Generates the same lookup as row_mapper, but different.

    Returns
    -------
    list of (float, (int, int), list of lists)
        Each tuple in the list corresponds to an output row, and
        is composed of `(avg_time, (ant1, ant2), effective_rows, nominal_rows)`

    """
    utime, _, time_inv, _ = unique_time(time)
    ubl, _, bl_inv, _ = unique_baselines(ant1, ant2)
    bl_time_lookup = np.full((ubl.shape[0], utime.shape[0]),
                             -1,
                             dtype=np.int32)

    # Create the row index
    row_idx = np.arange(time.size)

    # Assign the row indices
    bl_time_lookup[bl_inv, time_inv] = row_idx

    # Create the time, baseline, row map
    time_bl_row_map = []

    # For each baseline, bin data that such that it fits within time_bin_secs
    # t1 - i1/2 + time_bin_secs < t2 - i2/2
    # where (t1, t2) and (i1, i2) are the times and intervals associated
    # with two different samples in the baseline.
    # Compute two different bins
    # 1. Effective row bin, which only includes unflagged rows
    #    unless the entire bin is flagged, in which case it includes flagged
    #    data
    # 2. Nominal row bin, which includes both flagged and unflagged rows

    for bl, (a1, a2) in enumerate(ubl):
        bl_row_idx = bl_time_lookup[bl, :]

        effective_bin_map = []
        effective_map = []

        nominal_bin_map = []
        nominal_map = []

        for ri in bl_row_idx:
            if ri == -1:
                continue

            half_int = 0.5 * interval[ri]

            # We're starting a new bin
            if len(nominal_map) == 0:
                bin_low = time[ri] - half_int
            # Reached passed the endpoint of the bin, start a new one
            elif time[ri] + half_int - bin_low > time_bin_secs:
                if len(effective_map) > 0:
                    effective_bin_map.append(effective_map)
                    nominal_bin_map.append(nominal_map)
                # No effective samples, the entire bin must be flagged
                elif len(nominal_map) > 0:
                    effective_bin_map.append(nominal_map)
                    nominal_bin_map.append(nominal_map)
                else:
                    raise ValueError("Zero-filled bin")

                effective_map = []
                nominal_map = []

            # Effective only includes unflagged samples
            if flag_row[ri] == 0:
                effective_map.append(ri)

            # Nominal includes all samples
            nominal_map.append(ri)

        # Add any remaining values
        if len(effective_map) > 0:
            effective_bin_map.append(effective_map)
            nominal_bin_map.append(nominal_map)
        # No effective samples, the entire bin must be flagged
        # so we add nominal samples
        elif len(nominal_map) > 0:
            effective_bin_map.append(nominal_map)
            nominal_bin_map.append(nominal_map)

        # Produce a (avg_time, bl, effective_rows, nominal_rows) tuple
        time_bl_row_map.extend(
            (time[nrows].mean(), (a1, a2), erows, nrows)
            for erows, nrows in zip(effective_bin_map, nominal_bin_map))

    # Sort lookup sorted on averaged times
    return sorted(time_bl_row_map, key=lambda tup: tup[0])
Beispiel #3
0
    def impl(time,
             interval,
             antenna1,
             antenna2,
             flag_row=None,
             time_bin_secs=1):
        ubl, _, bl_inv, _ = unique_baselines(antenna1, antenna2)
        utime, _, time_inv, _ = unique_time(time)

        nbl = ubl.shape[0]
        ntime = utime.shape[0]

        sentinel = np.finfo(time.dtype).max
        out_rows = numba.uint32(0)

        scratch = np.full(3 * nbl * ntime, -1, dtype=np.int32)
        row_lookup = scratch[:nbl * ntime].reshape(nbl, ntime)
        bin_lookup = scratch[nbl * ntime:2 * nbl * ntime].reshape(nbl, ntime)
        inv_argsort = scratch[2 * nbl * ntime:]
        time_lookup = np.zeros((nbl, ntime), dtype=time.dtype)
        interval_lookup = np.zeros((nbl, ntime), dtype=interval.dtype)

        bin_flagged = np.zeros((nbl, ntime), dtype=np.bool_)

        # Create a mapping from the full bl x time resolution back
        # to the original input rows
        for r in range(time.shape[0]):
            bl = bl_inv[r]
            t = time_inv[r]
            row_lookup[bl, t] = r

        # Average times over each baseline and construct the
        # bin_lookup and time_lookup arrays
        for bl in range(ubl.shape[0]):
            tbin = numba.int32(0)
            bin_count = numba.int32(0)
            bin_flag_count = numba.int32(0)
            bin_low = time.dtype.type(0)

            for t in range(utime.shape[0]):
                # Lookup input row
                r = row_lookup[bl, t]

                # Ignore if not present
                if r == -1:
                    continue

                # At this point, we decide whether to contribute to
                # the current bin, or create a new one. We don't add
                # the current sample to the current bin if
                # high - low >= time_bin_secs
                half_int = interval[r] * 0.5

                # We're starting a new bin anyway,
                # just set the lower bin value
                if bin_count == 0:
                    bin_low = time[r] - half_int
                # If we exceed the seconds in the bin,
                # normalise the time and start a new bin
                elif time[r] + half_int - bin_low > time_bin_secs:
                    # Normalise and flag the bin
                    # if total counts match flagged counts
                    if bin_count > 0:
                        time_lookup[bl, tbin] /= bin_count
                        bin_flagged[bl, tbin] = bin_count == bin_flag_count
                    # There was nothing in the bin
                    else:
                        time_lookup[bl, tbin] = sentinel
                        bin_flagged[bl, tbin] = False

                    tbin += 1
                    bin_count = 0
                    bin_flag_count = 0

                # Record the output bin associated with the row
                bin_lookup[bl, t] = tbin

                # Time + Interval take unflagged + unflagged
                # samples into account (nominal value)
                time_lookup[bl, tbin] += time[r]
                interval_lookup[bl, tbin] += interval[r]
                bin_count += 1

                # Record flags
                if is_flagged_fn(flag_row, r):
                    bin_flag_count += 1

            # Normalise the last bin if it has entries in it
            if bin_count > 0:
                time_lookup[bl, tbin] /= bin_count
                bin_flagged[bl, tbin] = bin_count == bin_flag_count
                tbin += 1

            # Add this baseline's number of bins to the output rows
            out_rows += tbin

            # Set any remaining bins to sentinel value and unflagged
            for b in range(tbin, ntime):
                time_lookup[bl, b] = sentinel
                bin_flagged[bl, b] = False

        # Flatten the time lookup and argsort it
        flat_time = time_lookup.ravel()
        flat_int = interval_lookup.ravel()
        argsort = np.argsort(flat_time, kind='mergesort')

        # Generate lookup from flattened (bl, time) to output row
        for i, a in enumerate(argsort):
            inv_argsort[a] = i

        # Construct the final row map
        row_map = np.empty((time.shape[0]), dtype=np.uint32)

        # Construct output flag row, if necessary
        out_flag_row = output_flag_row(out_rows, flag_row)

        # foreach input row
        for in_row in range(time.shape[0]):
            # Lookup baseline and time
            bl = bl_inv[in_row]
            t = time_inv[in_row]

            # lookup time bin and output row
            tbin = bin_lookup[bl, t]
            # lookup output row in inv_argsort
            out_row = inv_argsort[bl * ntime + tbin]

            if out_row >= out_rows:
                raise RowMapperError("out_row >= out_rows")

            # Handle output row flagging
            set_flag_row(flag_row, in_row, out_flag_row, out_row,
                         bin_flagged[bl, tbin])

            row_map[in_row] = out_row

        time_ret = flat_time[argsort[:out_rows]]
        int_ret = flat_int[argsort[:out_rows]]

        return RowMapOutput(row_map, time_ret, int_ret, out_flag_row)
Beispiel #4
0
def test_row_mapper(time, interval, ant1, ant2, flagged_rows, time_bin_secs):
    utime, _, time_inv, _ = unique_time(time)
    ubl, _, bl_inv, _ = unique_baselines(ant1, ant2)
    mask = np.full((ubl.shape[0], utime.shape[0]), -1, dtype=np.int32)

    mask[bl_inv, time_inv] = np.arange(time.size)

    flag_row = flag_row_factory(time.size, flagged_rows)

    ret = row_mapper(time,
                     interval,
                     ant1,
                     ant2,
                     flag_row=flag_row,
                     time_bin_secs=time_bin_secs)

    # For TIME AND INTERVAL, flagged inputs can
    # contribute to unflagged outputs
    new_time = np.zeros_like(ret.time)
    new_interval = np.zeros_like(ret.interval)
    counts = np.zeros(ret.time.shape, dtype=np.uint32)
    np.add.at(new_time, ret.map, time)
    np.add.at(new_interval, ret.map, interval)
    np.add.at(counts, ret.map, 1)

    assert_array_equal(ret.time, new_time / counts)
    assert_array_equal(ret.interval, new_interval)

    # For TIME_CENTROID and EXPOSURE,
    # unflagged inputs only contribute to unflagged outputs and
    # flagged inputs only contribute to flagged outputs

    # Now recalculate time_avg using the row_map
    new_tc = np.zeros_like(ret.time)
    new_exp = np.zeros_like(ret.interval)
    counts = np.zeros(ret.time.shape, dtype=np.uint32)

    sel = flag_row == ret.flag_row[ret.map]
    np.add.at(new_tc, ret.map[sel], time[sel])
    np.add.at(new_exp, ret.map[sel], interval[sel])
    np.add.at(counts, ret.map[sel], 1)

    ant1_avg = np.empty(ret.time.shape, dtype=ant1.dtype)
    ant2_avg = np.empty(ret.time.shape, dtype=ant2.dtype)
    ant1_avg[ret.map[sel]] = ant1[sel]
    ant2_avg[ret.map[sel]] = ant2[sel]

    # Do it a different way
    new_tc2 = np.zeros_like(ret.time)
    new_exp2 = np.zeros_like(ret.interval)
    counts2 = np.zeros(ret.time.shape, dtype=np.uint32)

    for ri, ro in enumerate(ret.map):
        if flag_row[ri] == 1 and ret.flag_row[ro] == 1:
            new_tc2[ro] += time[ri]
            new_exp2[ro] += interval[ri]
            counts2[ro] += 1
        elif flag_row[ri] == 0 and ret.flag_row[ro] == 0:
            new_tc2[ro] += time[ri]
            new_exp2[ro] += interval[ri]
            counts2[ro] += 1

    assert_array_almost_equal(new_tc / counts, new_tc2 / counts2)
    assert_array_almost_equal(new_exp, new_exp2)
    def impl(time,
             interval,
             ant1,
             ant2,
             uvw,
             chan_width,
             chan_freq,
             max_uvw_dist,
             flag_row=None,
             max_fov=3.0,
             decorrelation=0.98,
             time_bin_secs=None,
             min_nchan=1):
        # 𝞓 𝝿 𝞇 𝞍 𝝼

        if decorrelation < 0.0 or decorrelation > 1.0:
            raise ValueError("0.0 <= decorrelation <= 1.0 must hold")

        if max_fov <= 0.0 or max_fov > 90.0:
            raise ValueError("0.0 < max_fov <= 90.0 must hold")

        max_lm = np.deg2rad(max_fov)

        ubl, _, bl_inv, _ = unique_baselines(ant1, ant2)
        utime, _, time_inv, _ = unique_time(time)

        nrow = time.shape[0]
        ntime = utime.shape[0]
        nbl = ubl.shape[0]
        nchan = chan_width.shape[0]
        nchan_factors = factors(nchan)
        bandwidth = chan_width.sum()

        if min_nchan is None:
            min_nchan = 1
        else:
            s = np.searchsorted(nchan_factors, min_nchan, side='left')
            min_nchan = max(min_nchan, nchan_factors[s])

        if nchan == 0:
            raise ValueError("zero channels")

        # Create the row lookup
        row_lookup = np.full((nbl, ntime), -1, dtype=np.int32)
        bin_lookup = np.full((nbl, ntime), -1, dtype=np.int32)
        bin_chan_width = np.full((nbl, ntime), 0.0, dtype=chan_width.dtype)
        sentinel = np.finfo(time.dtype).max
        time_lookup = np.full((nbl, ntime), sentinel, dtype=time.dtype)
        interval_lookup = np.full((nbl, ntime), sentinel, dtype=interval.dtype)
        # Is the entire bin flagged?
        bin_flagged = np.zeros((nbl, ntime), dtype=np.bool_)
        bin_chan_map = np.empty((nbl, ntime, nchan), dtype=np.int32)

        out_rows = 0
        nr_of_time_bins = 0
        out_row_chans = 0

        def update_lookups(finalised, bl):
            """
            Closure which updates lookups for a baseline,
            given a binner's finalisation data
            """
            # NOTE(sjperkins) Why do scalars need this, but not arrays?
            nonlocal out_rows
            nonlocal out_row_chans
            nonlocal min_nchan

            tbin = finalised.tbin

            time_lookup[bl, tbin] = finalised.time
            interval_lookup[bl, tbin] = finalised.interval
            bin_flagged[bl, tbin] = finalised.flag
            nchan = max(finalised.nchan, min_nchan)
            bin_nchan = chan_width.shape[0] // nchan
            bin_chan_width[bl, tbin] = bandwidth / finalised.nchan

            # Construct the channel map
            for c in range(chan_width.shape[0]):
                bin_chan_map[bl, tbin, c] = c // bin_nchan

            out_rows += 1
            out_row_chans += nchan

        for r in range(nrow):
            t = time_inv[r]
            bl = bl_inv[r]

            if row_lookup[bl, t] != -1:
                raise ValueError("Duplicate (TIME, ANTENNA1, ANTENNA2)")

            row_lookup[bl, t] = r

        # If we don't have time_bin_secs
        # set it to the maximum floating point value,
        # effectively ignoring this limit
        if not have_time_bin_secs:
            time_bin_secs = np.finfo(time.dtype).max

        # This derived from Synthesis & Imaging II (18-31)
        # Converts decrease in amplitude into change in phase
        dphi = np.sqrt(6. * (1. - decorrelation))

        binner = JitBinner(0, 0, max_lm, dphi, time_bin_secs, chan_freq.max())

        for bl in range(nbl):
            # Reset the binner for this baseline
            binner.reset()

            # Auto-correlated baseline
            auto_corr = ubl[bl, 0] == ubl[bl, 1]

            for t in range(ntime):
                # Lookup row, continue if non-existent
                r = row_lookup[bl, t]

                if r == -1:
                    continue

                # Start a new bin
                if binner.empty:
                    binner.start_bin(r, time, interval, flag_row)
                # Try add the row to the bin
                # If this fails, finalise the current bin and start a new one
                elif not binner.add_row(r, auto_corr, time, interval, uvw,
                                        flag_row):
                    f = binner.finalise_bin(auto_corr, uvw, nchan_factors,
                                            chan_width, chan_freq)
                    update_lookups(f, bl)
                    # Post-finalisation, the bin is empty, start a new bin
                    binner.start_bin(r, time, interval, flag_row)

                # Record the time bin associated with this row
                bin_lookup[bl, t] = binner.tbin

            # Finalise any remaining data in the bin
            if not binner.empty:
                f = binner.finalise_bin(auto_corr, uvw, nchan_factors,
                                        chan_width, chan_freq)
                update_lookups(f, bl)

            nr_of_time_bins += binner.tbin

            # Mark remaining bins as unoccupied and unflagged
            for tbin in range(binner.tbin, ntime):
                time_lookup[bl, tbin] = sentinel
                bin_flagged[bl, tbin] = False

        assert out_rows == nr_of_time_bins

        # Flatten the time lookup and argsort it
        flat_time = time_lookup.ravel()
        argsort = np.argsort(flat_time, kind='mergesort')
        inv_argsort = np.empty_like(argsort)

        # Generate lookup from flattened (bl, time) to output row
        for i, a in enumerate(argsort):
            inv_argsort[a] = i

        # Generate row offsets
        fbin_chan_map = bin_chan_map.reshape((-1, nchan))
        offsets = np.zeros(out_rows + 1, dtype=np.uint32)
        decorr_chan_width = np.empty(out_rows, dtype=chan_width.dtype)

        # NOTE(sjperkins)
        # This: out_rows > 0
        # does not work here for some strange (numba reason?)
        if offsets.shape[0] > 0:
            offsets[0] = 0

            for r in range(1, out_rows + 1):
                prev_bin_chans = fbin_chan_map[argsort[r - 1]].max() + 1
                offsets[r] = offsets[r - 1] + prev_bin_chans

        # Construct the final row map
        row_chan_map = np.full((nrow, nchan), -1, dtype=np.int32)
        time_ret = np.full(out_row_chans, -1, dtype=time.dtype)
        int_ret = np.full(out_row_chans, -1, dtype=interval.dtype)
        chan_width_ret = np.full(out_row_chans, -1, dtype=chan_width.dtype)

        # Construct output flag row, if necessary
        out_flag_row = (None if flag_row is None else np.empty(
            out_row_chans, dtype=flag_row.dtype))

        # foreach input row
        for in_row in range(time.shape[0]):
            # Lookup baseline and time
            bl = bl_inv[in_row]
            t = time_inv[in_row]

            # lookup time bin and output row in inv_argsort
            tbin = bin_lookup[bl, t]
            bin_time = time_lookup[bl, tbin]
            bin_interval = interval_lookup[bl, tbin]
            flagged = bin_flagged[bl, tbin]
            out_row = inv_argsort[bl * ntime + tbin]

            decorr_chan_width[out_row] = bin_chan_width[bl, tbin]

            # Should never happen, but check
            if out_row >= out_rows:
                raise RowMapperError("out_row >= out_rows")

            # Handle output row flagging
            if flag_row is not None and flag_row[in_row] == 0 and flagged:
                raise RowMapperError("Unflagged input row "
                                     "contributing to "
                                     "flagged output row. "
                                     "This should never happen!")

            # Set up the row channel map, populate
            # time, interval and chan_width
            for c in range(nchan):
                out_offset = offsets[out_row] + bin_chan_map[bl, tbin, c]

                # Should never happen, but check
                if out_offset >= out_row_chans:
                    raise RowMapperError("out_offset >= out_row_chans")

                # Set the output row for this input row and channel
                row_chan_map[in_row, c] = out_offset

                # Broadcast the time and interval to the output row
                time_ret[out_offset] = bin_time
                int_ret[out_offset] = bin_interval

                # Add channel contribution for each row
                chan_width_ret[out_offset] += chan_width[c]

                if flag_row is not None:
                    out_flag_row[out_offset] = 1 if flagged else 0

        return RowMapOutput(row_chan_map, offsets, decorr_chan_width, time_ret,
                            int_ret, chan_width_ret, out_flag_row)