def test_digitize():
    x = np.array([2, 4, 5, 6, 1])
    bins = np.array([1, 2, 3, 4, 5])
    for chunks in [2, 4]:
        for right in [False, True]:
            d = da.from_array(x, chunks=chunks)
            assert_eq(da.digitize(d, bins, right=right),
                      np.digitize(x, bins, right=right))

    x = np.random.random(size=(100, 100))
    bins = np.random.random(size=13)
    bins.sort()
    for chunks in [(10, 10), (10, 20), (13, 17), (87, 54)]:
        for right in [False, True]:
            d = da.from_array(x, chunks=chunks)
            assert_eq(da.digitize(d, bins, right=right),
                      np.digitize(x, bins, right=right))
Beispiel #2
0
def test_digitize():
    x = np.array([2, 4, 5, 6, 1])
    bins = np.array([1, 2, 3, 4, 5])
    for chunks in [2, 4]:
        for right in [False, True]:
            d = da.from_array(x, chunks=chunks)
            assert_eq(da.digitize(d, bins, right=right),
                      np.digitize(x, bins, right=right))

    x = np.random.random(size=(100, 100))
    bins = np.random.random(size=13)
    bins.sort()
    for chunks in [(10, 10), (10, 20), (13, 17), (87, 54)]:
        for right in [False, True]:
            d = da.from_array(x, chunks=chunks)
            assert_eq(da.digitize(d, bins, right=right),
                      np.digitize(x, bins, right=right))
def make_images(data: Data, image_size: Tuple[int, int],
                bins: ArrayLike) -> da.Array:
    """
    Bin LATRD events data into images of event counts.

    Given a collection of events data, a known image shape and an array of the
    desired time bin edges, make an image for each time bin, representing the number
    of events recorded at each pixel.

    Args:
        data:        A LATRD data dictionary (a dictionary with data set names as keys
                     and Dask arrays as values).  Must contain one entry for event
                     location messages and one for event timestamps.  The two arrays are
                     assumed to have the same length.
        image_size:  The (y, x), i.e. (slow, fast) dimensions (number of pixels) of
                     the image.
        bins:        The time bin edges of the images (in clock cycles, to match the
                     event timestamps).

    Returns:
        A dask array representing the calculations required to obtain the
        resulting image.
    """
    # We need to ensure that the chunk layout of the event location array matches
    # that of the event time array, so that we can perform matching blockwise iterations
    event_locations = data[event_location_key].rechunk(
        data[event_time_key].chunks)
    event_locations = pixel_index(event_locations, image_size)

    num_images = len(bins) - 1

    if num_images > 1:
        # We cannot perform a single bincount of the entire data set because that
        # would require allocating enough memory for the entire image stack.

        # Find the index of the image to which each event belongs.
        image_indices = da.digitize(data[event_time_key], bins) - 1

        (images_in_block, ) = da.compute(map(np.unique, image_indices.blocks))

        # Construct a stack of images using dask.array.bincount.
        images = []
        for i in range(num_images):
            # When searching for events with a given image index, we already know we
            # can exclude some blocks and thereby save some computation time.
            contains_index = [i in indices for indices in images_in_block]

            image_events = event_locations.blocks[contains_index][
                image_indices.blocks[contains_index] == i]
            images.append(da.bincount(image_events,
                                      minlength=mul(*image_size)))

        images = da.stack(images)
    else:
        images = da.bincount(event_locations, minlength=mul(*image_size))

    return images.astype(np.uint32).reshape(num_images, *image_size)
def test_digitize(bins_type):
    x = cupy.array([2, 4, 5, 6, 1])
    bins = bins_type.array([1, 2, 3, 4, 5])
    for chunks in [2, 4]:
        for right in [False, True]:
            d = da.from_array(x, chunks=chunks)
            bins_cupy = cupy.array(bins)
            assert_eq(
                da.digitize(d, bins, right=right),
                np.digitize(x, bins_cupy, right=right),
                check_type=False,
            )

    x = cupy.random.random(size=(100, 100))
    bins = bins_type.random.random(size=13)
    bins.sort()
    for chunks in [(10, 10), (10, 20), (13, 17), (87, 54)]:
        for right in [False, True]:
            d = da.from_array(x, chunks=chunks)
            bins_cupy = cupy.array(bins)
            assert_eq(
                da.digitize(d, bins, right=right),
                np.digitize(x, bins_cupy, right=right),
            )
def multiple_sequences_cli(args):
    """
    Utility for making multiple image sequences from a pump-probe data collection.

    The time between one pump trigger signal and the next is subdivided into a number
    of intervals of equal duration, quantising the time elapsed since the most recent
    trigger pulse.  Events are labelled according to which interval they fall in and,
    for each interval in turn, all the events so labelled are aggregated, providing a
    stack of image sequences that captures the evolution of the response of the
    measurement to a pump signal.
    """
    write_mode = "w" if args.force else "x"

    input_nexus = args.data_dir / f"{args.stem}.nxs"
    if not input_nexus.exists():
        print(
            "Could not find a NeXus file containing experiment metadata.\n"
            "Resorting to writing raw image data without accompanying metadata."
        )

    image_size = args.image_size or determine_image_size(input_nexus)

    raw_files, _ = data_files(args.data_dir, args.stem)

    trigger_type = triggers.get(args.trigger_type)

    print("Finding trigger signal times.")
    with latrd_data(raw_files, keys=cue_keys) as data:
        trigger_times = cue_times(data, trigger_type)
        progress(trigger_times.persist())
        trigger_times = trigger_times.compute().astype(int)

    print()  # Dask distributed progress bar does not end with a newline, so insert one.

    trigger_times = np.sort(trigger_times)

    if not trigger_times.any():
        sys.exit(f"Could not find a '{cues[trigger_type]}' signal.")

    intervals_end = np.diff(trigger_times).min()
    interval_time, _, num_intervals = exposure(
        0, intervals_end, args.interval, args.num_sequences
    )
    intervals = np.linspace(0, intervals_end, num_intervals + 1, dtype=np.uint64)

    output_files, out_file_pattern = check_multiple_output_files(
        num_intervals, args.output_file, args.stem, "images", args.force
    )

    with latrd_data(raw_files, keys=cue_keys) as data:
        start, end = find_start_end(data, distributed=True)

    exposure_time, exposure_cycles, num_images = exposure(
        start, end, args.exposure_time, args.num_images
    )
    bins = np.linspace(start, end, num_images + 1, dtype=np.uint64)

    print(
        f"Using '{cues[trigger_type]}' as the pump signal,\n"
        f"binning events into {num_intervals} sequences, corresponding to "
        f"successive pump-probe delay intervals of {interval_time:~.3g}.\n"
        f"Each sequence consists of {num_images} images with an exposure time of "
        f"{exposure_time:~.3g}."
    )

    out_file_stem = out_file_pattern.stem

    n_dig = len(str(num_images))
    out_file_pattern = out_file_pattern.parent / f"{out_file_stem}_{'#' * n_dig}.h5"

    if input_nexus.exists():
        # Write output NeXus files if we have an input NeXus file.
        output_nexus_pattern = out_file_pattern.with_suffix(".nxs")
        for output_file in output_files:
            try:
                CopyTristanNexus.multiple_images_nexus(
                    output_file,
                    input_nexus,
                    nbins=num_images,
                    write_mode=write_mode,
                )
            except FileExistsError:
                sys.exit(
                    f"One or more output files already exist, matching the pattern:\n\t"
                    f"{output_nexus_pattern}\n"
                    "Use '-f' to override, "
                    "or specify a different output file path with '-o'."
                )
    else:
        output_nexus_pattern = None

    trigger_times = da.from_array(trigger_times)
    with latrd_data(raw_files, keys=(event_location_key, event_time_key)) as data:
        data = valid_events(data, start, end)

        # Find the time elapsed since the most recent trigger signal.
        pump_probe_time = data[event_time_key].astype(np.int64)
        pump_probe_time -= trigger_times[
            da.digitize(pump_probe_time, trigger_times) - 1
        ]
        sequence = da.digitize(pump_probe_time, intervals) - 1

        image_sequence_stack = []
        for i in range(num_intervals):
            interval_selection = sequence == i

            event_times = data[event_time_key][interval_selection]
            event_locs = data[event_location_key][interval_selection]
            interval = {
                event_time_key: event_times.compute_chunk_sizes(),
                event_location_key: event_locs.compute_chunk_sizes(),
            }

            size = max(data[event_time_key].itemsize, data[event_location_key].itemsize)
            chunks = aggregate_chunks(*interval[event_time_key].chunks, size)
            interval[event_time_key] = interval[event_time_key].rechunk(chunks)

            image_sequence_stack.append(make_images(interval, image_size, bins))

        save_multiple_image_sequences(
            da.stack(image_sequence_stack), out_file_stem, output_files, write_mode
        )

    print(f"Images written to\n\t{output_nexus_pattern or out_file_pattern}")
def pump_probe_cli(args):
    """
    Utility for making multiple images from a pump-probe data collection.

    The time between one pump trigger signal and the next is subdivided into a number
    of exposures of equal duration.  Data from all such pump-to-pump intervals is
    aggregated, providing a single stack of images that captures the evolution of the
    response of the measurement to a pump signal.
    """
    write_mode = "w" if args.force else "x"
    output_file = check_output_file(args.output_file, args.stem, "images", args.force)

    input_nexus = args.data_dir / f"{args.stem}.nxs"
    if input_nexus.exists():
        try:
            # Write output NeXus file if we have an input NeXus file.
            output_nexus = CopyTristanNexus.single_image_nexus(
                output_file, input_nexus, write_mode=write_mode
            )
        except FileExistsError:
            sys.exit(
                f"This output file already exists:\n\t"
                f"{output_file.with_suffix('.nxs')}\n"
                "Use '-f' to override, "
                "or specify a different output file path with '-o'."
            )
    else:
        output_nexus = None
        print(
            "Could not find a NeXus file containing experiment metadata.\n"
            "Resorting to writing raw image data without accompanying metadata."
        )

    image_size = args.image_size or determine_image_size(input_nexus)

    raw_files, _ = data_files(args.data_dir, args.stem)

    trigger_type = triggers.get(args.trigger_type)

    print("Finding trigger signal times.")
    with latrd_data(raw_files, keys=cue_keys) as data:
        trigger_times = cue_times(data, trigger_type)
        progress(trigger_times.persist())
        trigger_times = trigger_times.compute().astype(int)

    print()  # Dask distributed progress bar does not end with a newline, so insert one.

    trigger_times = np.sort(trigger_times)

    if not trigger_times.any():
        sys.exit(f"Could not find a '{cues[trigger_type]}' signal.")

    end = np.diff(trigger_times).min()
    exposure_time, _, num_images = exposure(0, end, args.exposure_time, args.num_images)
    bins = np.linspace(0, end, num_images + 1, dtype=np.uint64)

    print(
        f"Binning events into {num_images} images with an exposure time of "
        f"{exposure_time:~.3g} according to the time elapsed since the most recent "
        f"'{cues[trigger_type]}' signal."
    )

    trigger_times = da.from_array(trigger_times)
    with latrd_data(raw_files, keys=(event_location_key, event_time_key)) as data:
        # Measure the event time as time elapsed since the most recent trigger signal.
        data[event_time_key] = data[event_time_key].astype(np.int64)
        data[event_time_key] -= trigger_times[
            da.digitize(data[event_time_key], trigger_times) - 1
        ]

        images = make_images(valid_events(data, 0, end), image_size, bins)
        save_multiple_images(images, output_file, write_mode)

    print(f"Images written to\n\t{output_nexus or output_file}")