def test_remove():
    rng = np.random.default_rng(0)
    points_raw = rng.random((10, 4)) * 100
    labels = rng.integers(3, size=10)

    points = pept.PointData(
        np.c_[points_raw, labels, labels],
        columns=["t", "x", "y", "z", "label", "label2"],
    )
    points.samples_indices = [[0, 10], [5, 5], [5, 10]]

    rm = Remove("label").fit_sample(points)
    assert "label" not in rm.columns
    assert rm.points.shape[1] == 5

    rm = Remove("label*").fit_sample(points)
    assert "label" not in rm.columns
    assert "label2" not in rm.columns
    assert rm.points.shape[1] == 4

    # Testing different settings
    Remove(0).fit(points, "sequential")
    Remove(-1).fit(points, "sequential")
    Remove("label", "label2").fit(points, "sequential")
    Remove(0, "label").fit(points, "sequential")
def test_swap():
    rng = np.random.default_rng(0)
    points_raw = rng.random((10, 4)) * 100
    labels = rng.integers(3, size=10)

    points = pept.PointData(
        np.c_[points_raw, labels],
        columns=["t", "x", "y", "z", "label"],
    )
    points.samples_indices = [[0, 10], [5, 5], [5, 10]]

    # Simple, single swap
    p2 = Swap("y, z").fit_sample(points.copy())
    assert np.all(p2["y"] == points["z"]), "Swap not done"
    assert np.all(p2["z"] == points["y"]), "Swap not done"

    # Single swap with quoted column names
    p2 = Swap("'y', 'z'").fit_sample(points.copy())
    assert np.all(p2["y"] == points["z"]), "Swap not done"
    assert np.all(p2["z"] == points["y"]), "Swap not done"

    # Single swap with quoted column indices
    p2 = Swap("'2', '3'").fit_sample(points.copy())
    assert np.all(p2["y"] == points["z"]), "Swap not done"
    assert np.all(p2["z"] == points["y"]), "Swap not done"

    # Testing different settings
    Swap("y, z").fit(points)
    Swap("label, 'z'").fit(points)
    Swap("'0', '1'", "'y', 'z'", "x, z").fit(points)
def test_condition():
    rng = np.random.default_rng(0)
    points_raw = rng.random((10, 4)) * 100
    labels = rng.integers(3, size=10)

    points = pept.PointData(
        np.c_[points_raw, labels],
        columns=["t", "x", "y", "z", "label"],
    )
    points.samples_indices = [[0, 10], [5, 5], [5, 10]]

    cp = Condition("x < 50").fit_sample(points)
    assert np.all(
        cp.data == points.data[points.points[:,
                                             points.columns.index("x")] < 50])

    cp2 = Condition("'2' < 50").fit_sample(points)
    cp3 = Condition("50 > '2'").fit_sample(points)
    assert np.allclose(cp2.data, points.data[points.data[:, 2] < 50])
    assert np.allclose(cp2.data, cp3.data)

    # Testing different settings
    Condition("np.isfinite('x')").fit(points)
    Condition("'x' < 'y'").fit(points)
    Condition("x < 2, 'x' > 0, 1 > 'x'").fit(points)
    Condition(lambda arr: arr[:, 0] > 10).fit(points)
    Condition(lambda x: x[:, -1] < 50, 'x > 10').fit(points)
def test_split_all():
    rng = np.random.default_rng(0)
    points_raw = rng.random((10, 4)) * 100
    labels = rng.integers(3, size=10)
    line_index = rng.integers(10, size=10)

    points = pept.PointData(
        np.c_[points_raw, labels, line_index],
        columns=["t", "x", "y", "z", "label", "line_index"],
    )
    points.samples_indices = [[0, 10], [5, 5], [5, 10]]

    # Check each split label
    split = SplitAll("label").fit(points)
    assert np.all(split[0].points[:, :4] == points_raw[labels == 0])
    assert np.all(split[1].points[:, :4] == points_raw[labels == 1])
    assert np.all(split[2].points[:, :4] == points_raw[labels == 2])

    # Check with empty sample
    empty_split = SplitLabels().fit_sample(points[1])
    assert len(empty_split[0].data) == 0

    # Check using numeric index
    split_str = SplitAll("label").fit(points)
    split_idx = SplitAll(4).fit(points)

    assert np.all(split_str[0].points == split_idx[0].points)
    assert np.all(split_str[1].points == split_idx[1].points)
    assert np.all(split_str[2].points == split_idx[2].points)

    # Testing different settings
    SplitAll("label").fit([points])
    SplitAll("label").fit([points, points])
    SplitAll(4).fit(points.points)
def test_stack():
    rng = np.random.default_rng(0)
    points_raw = rng.random((10, 4)) * 100
    lines_raw = rng.random((10, 7)) * 500

    points = pept.PointData(points_raw, sample_size=4)
    lines = pept.LineData(lines_raw, sample_size=4)

    # Test it returns points back
    p = Stack().fit(points)
    assert p is points, "Stack did not return a single PointData back"

    # Test it returns lines back
    ls = Stack().fit(lines)
    assert ls is lines, "Stack did not return a single LineData back"

    # Test it concatenates a list of two points
    points2 = Stack().fit([points, points])
    assert np.all(points2.points[:10] == points.points[:10])

    # Test it concatenates a list of two lines
    lines2 = Stack().fit([lines, lines])
    assert np.all(lines2.lines[:10] == lines.lines[:10])

    # Test list[list] flattening
    assert Stack().fit([[1, 2, 3]]) == [1, 2, 3], "List flattening wrong"
def test_segregate():
    rng = np.random.default_rng(0)
    points_raw = rng.random((100, 4)) * 100
    points = pept.PointData(points_raw, sample_size=4)

    se = Segregate(20, cut_distance=np.inf).fit(points)
    assert np.allclose(se.points[:, -1], 0.)

    # Testing different settings
    Segregate(5, 10, 15).fit(points)
    Segregate(1, 1).fit(points)
    def fit(self, points: Iterable[pept.PointData]):
        # Stack the input points into a single PointData
        if not isinstance(points, pept.PointData):
            points = pept.PointData(points)

        if len(points.points) == 0:
            return points.copy(
                data=points.points[0:0],
                columns=points.columns + ["label"],
            )

        pts = points.points

        # Sort pts based on the time column (col 0) and create a C-ordered copy
        # to send to Cython.
        pts = np.asarray(pts[pts[:, 0].argsort()], dtype=float, order="C")

        # Calculate the sparse distance matrix between reachable points. This
        # is an optimised Cython function returning a sparse CSR matrix.
        distance_matrix = distance_matrix_reachable(pts, self.window)

        # Construct the minimum spanning tree from the sparse distance matrix.
        # Note that `mst` is also a sparse CSR matrix.
        mst = minimum_spanning_tree(distance_matrix)

        # Get the minimum spanning tree edges into the [vertex 1, vertex 2,
        # edge distance] format, then sort it based on the edge distance.
        mst = mst.tocoo()
        mst_edges = np.vstack((mst.row, mst.col, mst.data)).T
        mst_edges = mst_edges[mst_edges[:, 2].argsort()]

        # Ignore deprecation warning from HDBSCAN's use of `np.bool`
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=DeprecationWarning)

            # Create the single linkage tree from the minimum spanning tree
            # edges using internal hdbscan methods (because they're damn fast).
            # This should be a fairly quick step.
            linkage_tree = hdbscan._hdbscan_linkage.label(mst_edges)
            linkage_tree = hdbscan.plots.SingleLinkageTree(linkage_tree)

            # Cut the single linkage tree at `trajectory_cut_distance` and get
            # the cluster labels, setting clusters smaller than
            # `min_trajectory_size` to -1 (i.e. noise).
            labels = linkage_tree.get_clusters(
                self.cut_distance,
                self.min_trajectory_size,
            )

        # Append the labels to `pts`.
        return points.copy(
            data=np.c_[pts, labels],
            columns=points.columns + ["label"],
        )
def test_velocity():
    rng = np.random.default_rng(0)
    points_raw = rng.random((10, 4)) * 100
    points = pept.PointData(points_raw, sample_size=4)

    vs = Velocity(5).fit_sample(points)
    assert "vx" in vs.columns
    assert "vy" in vs.columns
    assert "vz" in vs.columns

    assert "v" in Velocity(5, absolute=True).fit_sample(points).columns

    # Testing different settings
    Velocity(3).fit(points, "sequential")
    Velocity(window=9, degree=5).fit(points, "sequential")
Esempio n. 9
0
    def copy(self):
        '''Create a deep copy of an instance of this class, including a new
        inner numpy array `points`.

        Returns
        -------
        pept.PointData
            A new instance of the `pept.PointData` class with the same
            attributes as this instance, deep-copied.
        '''

        return pept.PointData(self._points.copy(order="C"),
                              sample_size=self._sample_size,
                              overlap=self._overlap,
                              verbose=False)
def test_dynamic_probability3d():
    # Generate tracer locations
    num_particles = 10
    positions = pept.PointData(
        np.random.uniform(0, 500, (num_particles, 5)),
        columns = ["t", "x", "y", "z", "v"]
    )

    # Test different uses
    voxels = DynamicProbability3D(1., "v").fit(positions)
    assert voxels.voxels.any(), "all voxels are zero!"

    DynamicProbability3D(0.1, "t", "yzx").fit(positions)
    DynamicProbability3D(0.1, 4,).fit(positions)
    DynamicProbability3D(0.1, "v", xlim = [0, 500]).fit(positions)
    DynamicProbability3D(0.1, "v", resolution = [20, 20, 20]).fit(positions)
    DynamicProbability3D(0.1, 4, max_workers = 1).fit(positions)
def test_residence_distribution3d():
    # Generate tracer locations
    num_particles = 10
    positions = pept.PointData(
        np.random.uniform(0, 500, (num_particles, 5)),
        columns = ["t", "x", "y", "z", "v"]
    )

    # Test different uses
    voxels = ResidenceDistribution3D(1., "v").fit(positions)
    assert voxels.voxels.any(), "all voxels are zero!"

    ResidenceDistribution3D(0.1, "t", "yzx").fit(positions)
    ResidenceDistribution3D(0.1, 0).fit(positions)
    ResidenceDistribution3D(0.1, xlim = [0, 500]).fit(positions)
    ResidenceDistribution3D(0.1, resolution = [20, 20, 20]).fit(positions)
    ResidenceDistribution3D(0.1, 0, max_workers = 1).fit(positions)
def test_interpolate():
    points_raw = np.arange(60).reshape(10, 6)

    points = pept.PointData(
        points_raw,
        columns=["t", "x", "y", "z", "label", "line_index"],
    )
    points.samples_indices = [[0, 10], [5, 5], [5, 10]]

    # Interpolate at double sampling rate
    half_interpolator = Interpolate((points_raw[1, 0] - points_raw[0, 0]) / 2)
    interp = half_interpolator.fit_sample(points)

    assert interp.points[1, 2] == (points_raw[0, 2] + points_raw[1, 2]) / 2

    # Testing different settings
    Interpolate(3., kind="cubic").fit(points, "sequential")
    Interpolate(10., kind="nearest").fit(points, "sequential")
Esempio n. 13
0
    def fit_sample(self, sample_lines):
        if not isinstance(sample_lines, pept.LineData):
            sample_lines = pept.LineData(sample_lines)

        # If cutoffs were not defined, automatically compute them
        if self.cutoffs is not None:
            cutoffs = self.cutoffs
        else:
            cutoffs = get_cutoffs(sample_lines.lines)

        # Only compute minpoints if there are at least num_lines LoRs
        if len(sample_lines.lines) >= self.num_lines:
            sample_minpoints = pept.utilities.find_minpoints(
                sample_lines.lines,
                self.num_lines,
                self.max_distance,
                cutoffs,
                append_indices=self.append_indices,
            )
        else:
            ncols = 4 + self.num_lines if self.append_indices else 4
            sample_minpoints = np.empty((0, ncols))

        # Column names
        columns = ["t", "x", "y", "z"]
        if self.append_indices:
            columns += [f"line_index{i + 1}" for i in range(self.num_lines)]

        # Encapsulate minpoints in a PointData
        points = pept.PointData(sample_minpoints, columns=columns)

        # Add optional metadata to the points; because they have an underscore,
        # they won't be propagated when new objects are constructed
        points.attrs["_num_lines"] = self.num_lines
        points.attrs["_max_distance"] = self.max_distance
        points.attrs["_cutoffs"] = cutoffs

        # If LoR indices were appended, also include the constituent LoRs
        if self.append_indices:
            points.attrs["_lines"] = sample_lines

        return points
def test_split_labels():
    rng = np.random.default_rng(0)
    points_raw = rng.random((10, 4)) * 100
    labels = rng.integers(3, size=10)
    line_index = rng.integers(10, size=10)

    points = pept.PointData(
        np.c_[points_raw, labels, line_index],
        columns=["t", "x", "y", "z", "label", "line_index"],
    )
    points.samples_indices = [[0, 10], [5, 5], [5, 10]]

    # Check each split label
    split = SplitLabels().fit_sample(points[0])
    assert np.all(split[0].points[:, :4] == points_raw[labels == 0])
    assert np.all(split[1].points[:, :4] == points_raw[labels == 1])
    assert np.all(split[2].points[:, :4] == points_raw[labels == 2])

    # Check with empty sample
    empty_split = SplitLabels().fit_sample(points[1])
    assert len(empty_split[0].data) == 0

    # Extracting `_lines`
    lines_raw = rng.random((10, 7)) * 500
    lines = pept.LineData(lines_raw, sample_size=4)
    points.attrs["_lines"] = lines

    splines = SplitLabels().fit_sample(points[0])
    assert "_lines" in splines[0].attrs

    splines = SplitLabels(extract_lines=True).fit_sample(points[0])
    assert isinstance(splines[0], pept.LineData)

    # Test different settings
    SplitLabels().fit(points, "sequential")
    SplitLabels(remove_labels=False).fit(points, "sequential")
    SplitLabels(noise=True).fit(points, "sequential")
    SplitLabels(extract_lines=True).fit(points, "sequential")
Esempio n. 15
0
def connect_trajectories(
    trajectories_points,
    max_time_difference,
    max_signature_difference,
    points_to_check = 50,
    signature_col = 4,
    label_col = -1,
    as_list = False
):
    '''Connect segregated trajectories based on tracer signatures.

    A pair of trajectories in `trajectories_points` will be connected if their
    ends have a timestamp difference that is smaller than `max_time_difference`
    and the difference between the signature averages of the closest
    `points_to_check` points is smaller than `max_signature_difference`.

    The `trajectories_points` are distinguished based on the trajectory
    indices in the data column `label_col`. This can be achieved using the
    `segregate_trajectories` function, which appends the labels to the data
    points.

    Because the tracer signature (e.g. cluster size in PEPT-ML) varies with the
    tracer position in the system, an average of `points_to_check` points
    is used for connecting pairs of trajectories.

    Parameters
    ----------
    trajectories_points : (M, N>=6) numpy.ndarray or pept.PointData
        A numpy array of points that have a timestamp, spatial coordinates,
        a tracer signature (such as cluster size in PEPT-ML) and a trajectory
        index (or label). The data columns in `trajectories_points` are then
        [time, x, y, z, ..., signature, ..., label, ...]. Note that the
        timestamps and spatial coordinates must be the first 4 columns, while
        the signature and label columns may be anywhere and are pointed at
        by `signature_col` and `label_col`.
    max_time_difference : float
        Only try to connect trajectories whose ends have a timestamp difference
        smaller than `max_time_difference`.
    max_signature_difference : float
        Connect two trajectories if the difference between the signature
        averages of the closest `points_to_check` is smaller than this.
    points_to_check : int, default 50
        The number of points used when computing the average tracer signature
        in one trajectory.
    signature_col : int, default 4
        The column in `trajectories_points` that contains the tracer
        signatures. The default is 4 (i.e. the signature comes right after
        the spatial coordinates).
    label_col : int, default -1
        The column in `trajectories_points` that contains the trajectory
        indices (labels). The default is -1 (i.e. the last column).
    as_list : bool, default False
        If True, return a list of arrays, where each array contains the points
        in a single trajectory. In other words, return separate, single
        trajectories in a list. If False, return a single array of all points
        (if `trajectories_points` was a `numpy.ndarray`) or a `pept.PointData`
        (if `trajectories_points` was a `pept.PointData` instance), but with
        labels changed to reflect the connected trajectories.

    Returns
    -------
    numpy.ndarray or pept.PointData or list of numpy.ndarray
        If `as_list` is True, return separate, single trajectories in a list.
        If `as_list` is False, return a single array of all points
        (if `trajectories_points` was a `numpy.ndarray`) or a `pept.PointData`
        (if `trajectories_points` was a `pept.PointData` instance), but with
        labels changed to reflect the connected trajectories.

    Raises
    ------
    ValueError
        If `point_data` is a numpy array with fewer than 6 columns.

    Note
    ----
    The labels are changed in-place to reflect the connected trajectories. For
    example, if there are 3 trajectories with labels 0, 1, 2 and the first two
    are connected, then all points which previously had the label 1 will be
    changed to label 0; the last trajectory's label remains unchanged, 2.

    Examples
    --------
    [TODO] - add full tutorial page on Bham PIC GitHub page for this.

    See Also
    --------
    segregate_trajectories : Segregate the intertwined points from multiple
                             trajectories into individual paths.
    PlotlyGrapher : Easy, publication-ready plotting of PEPT-oriented data.
    '''

    # Check `point_data` is a numpy array or pept.PointData
    if isinstance(trajectories_points, pept.PointData):
        trajs = trajectories_points.points
    else:
        trajs = np.asarray(trajectories_points, dtype = float, order = "C")
        if trajs.ndim != 2 or trajs.shape[1] < 6:
            raise ValueError((
                "\n[ERROR]: `trajectories_points` should have dimensions "
                f"(M, N), where N >= 6. Received {trajs.shape}.\n"
            ))

    # Type-check the input parameters
    max_time_difference = float(max_time_difference)
    max_signature_difference = float(max_signature_difference)
    points_to_check = int(points_to_check)
    signature_col = int(signature_col)
    label_col = int(label_col)
    as_list = bool(as_list)

    # Separate the trajs array into a list of individual trajectories based on
    # the `label_col`.
    trajectory_list = pept.utilities.group_by_column(trajs.copy(), label_col)

    trajectory_list = _connect_trajectories(
        trajectory_list,
        max_time_difference,
        max_signature_difference,
        points_to_check,
        signature_col,
        label_col
    )

    if as_list:
        return trajectory_list
    elif isinstance(trajectories_points, pept.PointData):
        trajectories_points_connected = pept.PointData(
            np.vstack(np.array(trajectory_list)),
            sample_size = trajectories_points.sample_size,
            overlap = trajectories_points.overlap,
            verbose = False
        )
        return trajectories_points_connected
    else:
        return np.vstack(np.array(trajectory_list))
Esempio n. 16
0
def segregate_trajectories(
    point_data,
    points_window,
    trajectory_cut_distance,
    min_trajectory_size = 5,
    as_list = False,
    return_mst = False
):
    '''Segregate the intertwined points from multiple trajectories into
    individual paths.

    The points in `point_data` (a numpy array or `pept.PointData`) are used to
    construct a minimum spanning tree in which every point can only be
    connected to `points_window` points around it - this "window" refers to the
    points in the initial data array, sorted based on the time column;
    therefore, only points within a certain timeframe can be connected. All
    edges (or "connections") in the minimum spanning tree that are larger than
    `trajectory_cut_distance` are removed (or "cut") and the remaining
    connected "clusters" are deemed individual trajectories if they contain
    more than `min_trajectory_size` points.

    The trajectory indices (or labels) are appended to `point_data`. That is,
    for each data point (i.e. row) in `point_data`, a label will be appended
    starting from 0 for the corresponding trajectory; a label of -1 represents
    noise. If `point_data` is a numpy array, a new numpy array is returned; if
    it is a `pept.PointData` instance, a new instance is returned.

    This function uses single linkage clustering with a custom metric for
    spatio-temporal data to segregate trajectory points. The single linkage
    clustering was optimised for this use-case: points are only connected if
    they are within a certain `points_window` in the time-sorted input array.
    Sparse matrices are also used for minimising the memory footprint.

    Parameters
    ----------
    point_data : (M, N>=4) numpy.ndarray or pept.PointData
        The points from multiple trajectories. Each row in `point_data` will
        have a timestamp and the 3 spatial coordinates, such that the data
        columns are [time, x_coord, y_coord, z_coord]. Note that `point_data`
        can have more data columns and they will simply be ignored.
    points_window : int
        Two points are "reachable" (i.e. they can be connected) if and only if
        they are within `points_window` in the time-sorted input `point_data`.
        As the points from different trajectories are intertwined (e.g. for two
        tracers A and B, the `point_data` array might have two entries for A,
        followed by three entries for B, then one entry for A, etc.), this
        should optimally be the largest number of points in the input array
        between two consecutive points on the same trajectory. If
        `points_window` is too small, all points in the dataset will be
        unreachable. Naturally, a larger `time_window` correponds to more pairs
        needing to be checked (and the function will take a longer to
        complete).
    trajectory_cut_distance : float
        Once all the closest points are connected (i.e. the minimum spanning
        tree is constructed), separate all trajectories that are further
        apart than `trajectory_cut_distance`.
    min_trajectory_size : float, default 5
        After the trajectories have been cut, declare all trajectories with
        fewer points than `min_trajectory_size` as noise.
    as_list : bool, default False
        If True, return a list of arrays, where each array contains the points
        in a single trajectory. In other words, return separate, single
        trajectories in a list. If False, return a single array of all points
        (if `point_data` was a `numpy.ndarray`) or a `pept.PointData`
        (if `point_data` was a `pept.PointData` instance).
    return_mst : bool, default False
        If `True`, the function will also return the minimum spanning tree
        constructed using the input `point_data`. This is a numpy array with
        columns [vertex1, vertex2, edge_length], where vertex1 and vertex2 are
        the indices in `point_data` of the connected points, and edge_length is
        the euclidian distance between them.

    Returns
    -------
    points_labelled: numpy.ndarray or pept.PointData or list of numpy.ndarray
        If `as_list` is `False`, this is the `point_data` array or
        `pept.PointData` instance with an extra column for the trajectory index
        (i.e. label) - the return type is similar to the input type. If
        `as_list` is `True`, this is a list of arrays, in which each array
        contains the points in a single_trajectory; these still include the
        trajectory label. A label value of `-1` indicates noise; the found
        trajectories are then labelled starting from 0.
    mst: numpy.ndarray, optional
        If `return_mst` is `True`, another numpy array is returned as a second
        variable containing the columns [vertex1, vertex2, edge_length], where
        vertex1 and vertex2 are the indices in `point_data` of the connected
        points, and edge_length is the euclidian distance between them.

    Raises
    ------
    ValueError
        If `point_data` is a numpy array with fewer than 4 columns.
    ValueError
        If `points_window` is smaller than 1.

    Examples
    --------
    A typical workflow would involve transforming LoRs into points using some
    tracking algorithm. These points include all tracers moving through the
    system, being intertwined (e.g. for two tracers A and B, the `point_data`
    array might have two entries for A, followed by three entries for B, then
    one entry for A, etc.). They can be segregated based on position alone
    using this function; take for example two tracers that go downwards (below,
    'x' is the position, and in parens is the array index at which that point
    is found).

    `points`, numpy.ndarray, shape (10, 4), columns [time, x, y, z]:
        x (1)                       x (2)
         x (3)                     x (4)
           x (5)                 x (7)
           x (6)                x (9)
          x (8)                 x (10)

    >>> import pept.tracking.trajectory_separation as tsp
    >>> points_window = 10
    >>> trajectory_cut_distance = 15    # mm
    >>> segregated_trajectories = tsp.segregate_trajectories(
    >>>     points, points_window, trajectory_cut_distance
    >>> )

    `segregated_trajectories`, numpy.ndarray, shape (10, 5),
    columns [time, x, y, z, trajectory_label]:
        x (1, label = 0)            x (2, label = 1)
         x (3, label = 0)          x (4, label = 1)
           x (5, label = 0)      x (7, label = 1)
           x (6, label = 0)     x (9, label = 1)
          x (8, label = 0)      x (10, label = 1)

    See Also
    --------
    connect_trajectories : Connect segregated trajectories based on tracer
                           signatures.
    PlotlyGrapher : Easy, publication-ready plotting of PEPT-oriented data.
    '''

    # Check `point_data` is a numpy array or pept.PointData
    if isinstance(point_data, pept.PointData):
        pts = point_data.points
    else:
        pts = np.asarray(point_data)
        if pts.ndim != 2 or pts.shape[1] < 4:
            raise ValueError((
                "\n[ERROR]: `point_data` should have dimensions (M, N), where "
                f"N >= 4. Received {pts.shape}.\n"
            ))

    # Sort pts based on the time column (col 0) and create a C-ordered copy to
    # send to Cython.
    pts = np.asarray(pts[pts[:, 0].argsort()], dtype = float, order = "C")

    # Type-check the input parameters
    points_window = int(points_window)
    if points_window < 1:
        raise ValueError((
            "\n[ERROR]: `points_window` should be larger than 1! Received "
            f"{points_window}.\n"
        ))

    trajectory_cut_distance = float(trajectory_cut_distance)
    min_trajectory_size = int(min_trajectory_size)
    return_mst = bool(return_mst)

    # Calculate the sparse distance matrix between reachable points. This is an
    # optimised Cython function returning a sparse CSR matrix.
    distance_matrix = distance_matrix_reachable(pts, points_window)

    # Construct the minimum spanning tree from the sparse distance matrix. Note
    # that `mst` is also a sparse CSR matrix.
    mst = minimum_spanning_tree(distance_matrix)

    # Get the minimum spanning tree edges into the [vertex 1, vertex 2,
    # edge distance] format, then sort it based on the edge distance.
    mst = mst.tocoo()
    mst_edges = np.vstack((mst.row, mst.col, mst.data)).T
    mst_edges = mst_edges[mst_edges[:, 2].argsort()]

    # Create the single linkage tree from the minimum spanning tree edges using
    # internal hdbscan methods (because they're damn fast). This should be a
    # fairly quick step.
    single_linkage_tree = hdbscan._hdbscan_linkage.label(mst_edges)
    single_linkage_tree = hdbscan.plots.SingleLinkageTree(single_linkage_tree)

    # Cut the single linkage tree at `trajectory_cut_distance` and get the
    # cluster labels, setting clusters smaller than `min_trajectory_size` to
    # -1 (i.e. noise).
    labels = single_linkage_tree.get_clusters(
        trajectory_cut_distance,
        min_trajectory_size
    )

    # Append the labels to `pts`.
    pts = np.append(pts, labels[:, np.newaxis], axis = 1)

    # Returns based on as_list, return_mst and input data type
    if as_list:
        # Get a list of arrays for each trajectory
        separate_pts = pept.utilities.group_by_column(pts, -1)
        if return_mst:
            return separate_pts, mst_edges
        return separate_pts

    # If `point_data` was a `pept.PointData` instance, return a new
    # `pept.PointData` with the new label column.
    if isinstance(point_data, pept.PointData):
        point_data_labelled = pept.PointData(
            pts,
            sample_size = point_data.sample_size,
            overlap = point_data.overlap,
            verbose = False
        )
        if return_mst:
            return point_data_labelled, mst_edges
        else:
            return point_data_labelled
    elif return_mst:
        return pts, mst_edges
    return pts
    def timeseries_trace(
        points,
        size=6.0,
        color=None,
        opacity=0.8,
        colorbar=True,
        colorbar_col=-1,
        colorscale="Magma",
        colorbar_title=None,
    ):
        '''Static method for creating a list of 3 Plotly traces of timeseries.
        See `PlotlyGrapher2D.add_timeseries` for the full documentation.
        '''

        if not isinstance(points, pept.PointData):
            points = pept.PointData(points)

        pts = points.points

        # No need to type-check the other parameters as Plotly will do that
        # anyway...

        # Create the dictionary of marker properties
        marker = dict(size=size, color=color, opacity=opacity)

        # Update `marker` if a colorbar is requested AND color is None.
        if colorbar and color is None:
            if isinstance(colorbar_col, str):
                color_data = points[colorbar_col]
            else:
                color_data = pts[:, colorbar_col]

            marker.update(colorscale=colorscale)
            if colorbar_title is not None:
                marker["colorbar"] = dict(title=colorbar_title)

            # Special case: if there are less than 10 values in the colorbar
            # column, add them as separate traces for better distinction
            # between colours.
            labels = np.unique(color_data)

            if len(labels) <= 10:
                traces = [[], [], []]
                for label in labels:
                    selected = pts[color_data == label]

                    for i in range(3):
                        traces[i].append(
                            go.Scatter(x=selected[:, 0],
                                       y=selected[:, i + 1],
                                       mode="markers",
                                       marker=marker))
                return traces

            # Otherwise just use a typical continuous colorbar for all the
            # values in colorbar_col.
            else:
                marker['color'] = color_data

        traces = []
        for i in range(3):
            traces.append(
                go.Scatter(x=pts[:, 0],
                           y=pts[:, i + 1],
                           mode="markers",
                           marker=marker))
        return traces
Esempio n. 18
0
    def fit_sample(self, sample):
        '''Use the Birmingham method to track a tracer location from a numpy
        array (i.e. one sample) of LoRs.

        For the given `sample` of LoRs (a numpy.ndarray), this function
        minimises the distance between all of the LoRs, rejecting a fraction of
        lines that lie furthest away from the calculated distance. The process
        is repeated iteratively until a specified fraction (`fopt`) of the
        original subset of LORs remains.

        Parameters
        ----------
        sample : (N, M>=7) numpy.ndarray
            The sample of LORs that will be clustered. Each LoR is expressed as
            a timestamps and a line defined by two points; the data columns are
            then `[time, x1, y1, z1, x2, y2, z2, extra...]`.

        get_used : bool, default False
            If `True`, the function will also return a boolean mask of the LoRs
            used to compute the tracer location - that is, a vector of the same
            length as `sample`, containing 1 for the rows that were used, and 0
            otherwise.

        as_array : bool, default True
            If set to True, the tracked locations are returned as numpy arrays.
            If set to False, they are returned inside an instance of
            `pept.PointData` for ease of iteration and plotting.

        verbose : bool, default False
            Provide extra information when tracking a location: time the
            operation and show a progress bar.

        Returns
        -------
        locations : numpy.ndarray or pept.PointData
            The tracked locations found.

        used : numpy.ndarray, optional
            If `get_used` is true, then also return a boolean mask of the LoRs
            used to compute the tracer location - that is, a vector of the same
            length as `sample`, containing 1 for the rows that were used, and 0
            otherwise.
            [ Used for multi-particle tracking, not implemented yet]

        Raises
        ------
        ValueError
            If `sample` is not a numpy array of shape (N, M), where M >= 7.
        '''

        if not isinstance(sample, pept.LineData):
            sample = pept.LineData(sample)

        locations, used = birmingham_method(sample.lines, self.fopt)

        # Propagate any LineData attributes besides `columns`
        attrs = sample.extra_attrs()

        locations = pept.PointData(
            [locations],
            columns=["t", "x", "y", "z", "error"],
            **attrs,
        )

        # If `get_used`, also attach a `._lines` attribute with the lines used
        if self.get_used:
            locations.attrs["_lines"] = sample.copy(
                data=np.c_[sample.lines, used],
                columns=sample.columns + ["used"],
            )

        return locations
Esempio n. 19
0
    def fit_cutpoints(self,
                      cutpoints,
                      store_labels = False,
                      noise = False,
                      verbose = True):
        '''Fit cutpoints (an instance of `PointData`) and return the cluster
        centres and (optionally) the labelled cutpoints.

        Parameters
        ----------
        cutpoints : an instance of `pept.PointData`
            The samples of points that will be clustered. In every sample, every point
            corresponds to a row and is formatted as `[time, x, y, z, etc]`. Only
            columns `[1, 2, 3]` are used for clustering.
        store_labels : bool, optional
            If set to True, the clustered cutpoints are returned along with the centres
            of the clusters. Setting it to False speeds up the clustering. The default
            is False.
        noise : bool, optional
            If set to True, the clustered cutpoints also include the points classified
            as noise. Only has an effect if `store_labels` is set to True. The default
            is False.
        verbose : bool, optional
            Provide extra information when computing the cutpoints: time the operation
            and show a progress bar. The default is `False`.

        Returns
        -------
        centres : pept.PointData
            The centroids of every cluster found. They are computed as the average
            of every column of `[time, x, y, z, etc]` of the clustered points. Another
            column is added to the initial data in `sample`, signifying the cluster
            size - the number of points included in the cluster.
        clustered_cutpoints : numpy.ndarray or pept.PointData
            The points in `sample` that fall in every cluster. A new column is added
            to the points in `sample` that signifies the label of cluster that the
            point was associated with: all points in cluster number 3 will have the
            number 3 as the last element in their row. The points classified as noise
            have the number -1 associated.

        Raises
        ------
        Exception
            If `cutpoints` is not an instance (or a subclass) of `pept.PointData`.

        '''

        if verbose:
            start = time.time()

        if not isinstance(cutpoints, pept.PointData):
            raise Exception('[ERROR]: cutpoints should be an instance of pept.PointData (or any class inheriting from it)')

        # Fit all samples in `cutpoints` in parallel using joblib
        # Collect all outputs as a list. If verbose, show progress bar with
        # tqdm
        if verbose:
            data_list = Parallel(n_jobs = -1)(delayed(self.fit_sample)(sample,
                                                store_labels = store_labels,
                                                noise = noise,
                                                as_array = True) for sample in tqdm(cutpoints))
        else:
            data_list = Parallel(n_jobs = -1)(delayed(self.fit_sample)(sample,
                                                store_labels = store_labels,
                                                noise = noise,
                                                as_array = True) for sample in cutpoints)

        # Access joblib.Parallel output as list comprehensions
        centres = np.array([row[0] for row in data_list if len(row[0]) != 0])
        if len(centres) != 0:
            centres = pept.PointData(np.vstack(centres),
                                     sample_size = 0,
                                     overlap = 0,
                                     verbose = False)

        if store_labels:
            clustered_cutpoints = np.array([row[1] for row in data_list if len(row[1]) != 0])
            clustered_cutpoints = pept.PointData(np.vstack(np.array(clustered_cutpoints)),
                                                 sample_size = 0,
                                                 overlap = 0,
                                                 verbose = False)

        if verbose:
            end = time.time()
            print("\nFitting cutpoints took {} seconds\n".format(end - start))

        if store_labels:
            return [centres, clustered_cutpoints]
        else:
            return [centres, []]
Esempio n. 20
0
    def fit(self,
            line_data,
            max_error=10,
            get_used=False,
            max_workers=None,
            verbose=True):
        '''Fit lines of response (an instance of 'LineData') and return the
        tracked locations and (optionally) the LoRs that were used.

        This is a convenience function that asynchronously iterates through the
        samples in a `LineData`, finding the tracer locations. For more
        fine-grained control over the tracking, the `fit_sample` method can be
        used for individual samples.

        Parameters
        ----------
        line_data : an instance of `pept.LineData`
            The samples of lines of reponse (LoRs) that will be used for
            locating the tracer. Be careful to set the appropriate
            `sample_size` and `overlap` for good results. If the `sample_size`
            is too low, the tracer might not be found; if it is too high,
            temporal resolution is decreased. If the `overlap` is too small,
            the tracked points might be very "sparse".
        max_error : float, default = 10
            The maximum error allowed to return a 'valid' tracked location. All
            tracer locations with an error larger than `max_error` will be
            discarded.
        get_used : bool, default False
            If `True`, the function will also return a list of boolean masks of
            the LoRs used to compute the tracer location for each sample - that
            is, a vector of the same length as `sample`, containing 1 for the
            rows that were used, and 0 otherwise.
        max_workers : int, optional
            The maximum number of threads that will be used for asynchronously
            clustering the samples in `cutpoints`. If unset (`None`), the
            number of threads available on the machine (as returned by
            `os.cpu_count()`) will be used.
        verbose : bool, default True
            Provide extra information when tracking: time the operation and
            show a progress bar.

        Returns
        -------
        locations : pept.PointData
            The tracer locations found.
        used : list of numpy.ndarray
            A list of boolean masks of the LoRs used to compute the tracer
            location for each corresponding sample in `line_data` - that is, a
            vector of the same length as a sample, containing 1 for the rows
            that were used, and 0 otherwise.

        Raises
        ------
        TypeError
            If `line_data` is not an instance of `pept.LineData`.
        '''

        if verbose:
            start = time.time()

        if not isinstance(line_data, pept.LineData):
            raise TypeError(
                textwrap.fill(
                    "[ERROR]: `line_data` should be an instance of `pept.LineData`"
                    f" (or any subclass thereof). Received {type(line_data)}.")
            )

        # Users might forget to set the sample_size, leaving it to the default
        # value of 0; in that case, all lines are returned as a single sample -
        # that might not be the intended behaviour.

        if line_data.sample_size == 0:
            warnings.warn(
                textwrap.fill((
                    "\n[WARNING]: The `line_data.sample_size` was left to the "
                    "default value of 0, in which case all lines are returned "
                    "as a single sample. For a very large number of lines, "
                    "this might result in a long function execution time.\n"),
                              replace_whitespace=False), RuntimeWarning)

        get_used = bool(get_used)

        # Using ThreadPoolExecutor, asynchronously collect the locations from
        # every sample in a list of arrays. This is more efficient than using
        # ProcessPoolExecutor (or joblib) because birmingham_method is a Cython
        # function that releases the GIL for most of its computation.
        # If verbose, show progress bar using tqdm.
        if max_workers is None:
            max_workers = os.cpu_count()

        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = []
            for sample in line_data:
                futures.append(
                    executor.submit(birmingham_method, sample, self._fopt))

            if verbose:
                futures = tqdm(futures)

            data_list = [f.result() for f in futures]

        # Access the data_list output as list comprehensions
        # data_list is a list of tuples, in which the first element is an
        # array of the `location`, and the second element is `used`, a
        # boolean mask representing the used LoRs.
        locations = np.array([r[0] for r in data_list if len(r[0]) != 0])
        used = np.array([r[1] for r in data_list if len(r[1]) != 0])

        # Remove LoRs with error above max_error
        locations = np.vstack(np.array(locations))
        locations = np.delete(locations,
                              np.argwhere(locations[:, 4] > max_error),
                              axis=0)

        if len(locations) != 0:
            locations = pept.PointData(locations,
                                       sample_size=0,
                                       overlap=0,
                                       verbose=False)

        if verbose:
            end = time.time()
            print("\nTracking locations took {} seconds\n".format(end - start))

        if get_used:
            # `used is a list of the `used` arrays for the corresponding sample
            # in `line_data`.
            return locations, used

        return locations
Esempio n. 21
0
def find_cutpoints(sample_lines,
                   max_distance,
                   cutoffs=None,
                   append_indices=False):
    '''Find the cutpoints from a sample / array of LoRs.

    A cutpoint is the point in 3D space that minimises the distance between any
    two lines. For any two non-parallel 3D lines, this point corresponds to the
    midpoint of the unique segment that is perpendicular to both lines.

    This function considers every pair of lines in `sample_lines` and returns
    all the cutpoints that satisfy the following conditions:

    1. The distance between the two lines is smaller than `max_distance`.
    2. The cutpoint is within the `cutoffs`.

    Parameters
    ----------
    sample_lines : (N, M >= 7) numpy.ndarray
        A sample of LoRs, where each row is `[time, x1, y1, z1, x2, y2, z2]`,
        such that every line is defined by the points `[x1, y1, z1]` and
        `[x2, y2, z2]`.
    max_distance : float
        The maximum distance between any two lines for their cutpoint to be
        considered. A good starting value would be 0.1 mm for small tracers
        and/or clean data, or 0.2 mm for larger tracers and/or noisy data.
    cutoffs : list, optional
        The cutoffs for each dimension, formatted as `[x_min, x_max,
        y_min, y_max, z_min, z_max]`. If it is `None`, they are computed
        automatically by calling `get_cutoffs`. The default is `None`.
    append_indices : bool, optional
        If set to `True`, the indices of the individual LoRs that were used
        to compute each cutpoint are also appended to the returned array.
        Default is `False`.

    Returns
    -------
    cutpoints : (M, 4) or (M, 6) numpy.ndarray
        A numpy array of the calculated cutpoints. If `append_indices` is
        `False`, then the columns are [time, x, y, z]. If `append_indices` is
        `True`, then the columns are [time, x, y, z, i, j], where `i` and `j`
        are the LoR indices from `sample_lines` that were used to compute the
        weighted cutpoints. The time is the average between the timestamps of
        the two LoRs that were used to compute the cutpoint. The first column
        (for time) is sorted.

    Raises
    ------
    ValueError
        If `sample_lines` is not a numpy array with shape (N, M >= 7).
    ValueError
        If `cutoffs` is not a one-dimensional array with values
        `[min_x, max_x, min_y, max_y, min_z, max_z]`

    See Also
    --------
    pept.tracking.peptml.Cutpoints : Compute cutpoints from `pept.LineData`.
    pept.utilities.read_csv : Fast CSV file reading into numpy arrays.
    '''

    if not isinstance(sample_lines, pept.LineData):
        sample_lines = pept.LineData(sample_lines)

    lines = sample_lines.lines

    lines = np.asarray(lines, order='C', dtype=float)
    max_distance = float(max_distance)

    # If cutoffs were not defined, automatically compute them
    if cutoffs is None:
        cutoffs = get_cutoffs(lines)
    else:
        cutoffs = np.asarray(cutoffs, order='C', dtype=float)
        if cutoffs.ndim != 1 or len(cutoffs) != 6:
            raise ValueError(
                ("\n[ERROR]: cutoffs should be a one-dimensional array with "
                 "values [min_x, max_x, min_y, max_y, min_z, max_z]. Received "
                 f"{cutoffs}.\n"))

    sample_cutpoints = pept.utilities.find_cutpoints(
        lines, max_distance, cutoffs, append_indices=append_indices)

    columns = ["t", "x", "y", "z"]
    if append_indices:
        columns += ["line_index1", "line_index2"]

    points = pept.PointData(sample_cutpoints, columns=columns)

    # Add optional metadata to the points; because they have an underscore,
    # they won't be propagated when new objects are constructed
    points._max_distance = max_distance
    points._cutoffs = cutoffs
    if append_indices:
        points._lines = sample_lines

    return points
Esempio n. 22
0
    def find_trajectories(self):

        for i, current_point in enumerate(self.centres):

            if i == 0:
                # Add the first point to trajectory 0
                self.trajectory_indices[0].add(self.max_index)
                self.centres_indices[self.max_index].append(0)
                self.max_index += 1
                continue

            # Search for the closest previous pointsToCheck points
            # within a given maxDistance
            start_index = i - self.points_to_check
            end_index = i

            if start_index < 0:
                start_index = 0

            # Construct a KDTree from the x, y, z (1:4) of the
            # selected points. Get the indices for all the points within
            # maxDistance of the currentPoint
            tree = cKDTree(self.centres[start_index:end_index, 1:4])
            closest_indices = tree.query_ball_point(current_point[1:4],
                                                    self.max_distance,
                                                    n_jobs=-1)
            closest_indices = np.array(closest_indices) + start_index

            # If no point was found, it is a new trajectory. Continue
            if len(closest_indices) == 0:
                self.trajectory_indices[i].add(self.max_index)
                self.centres_indices.append([i])
                self.max_index += 1
                continue

            # For every close point found, search for all the trajectory indices
            #   - If all trajectory indices sets are equal and of a single value
            #   then currentPoint is part of the same trajectory
            #   - If all trajectory indices sets are equal, but of more values,
            #   then currentPoint diverged from an intersection of trajectories
            #   and is part of a single trajectory => separate it
            #
            #   - If every pair of trajectory indices sets is not disjoint, then
            #   currentPoint is only one of them
            #   - If there exists a pair of trajectory indices sets that is
            #   disjoint, then currentPoint is part of all of them

            # Select the trajectories of all the points that were found
            # to be the closest
            closest_trajectories = self.trajectory_indices[closest_indices]
            #print("closestTrajectories:")
            #print(closestTrajectories)

            # If all the closest points are part of the same trajectory
            # (just one!), then the currentPoint is part of it too
            if (np.all(closest_trajectories == closest_trajectories[0])
                    and len(closest_trajectories[0]) == 1):

                self.trajectory_indices[i] = closest_trajectories[0]
                self.centres_indices[next(iter(
                    closest_trajectories[0]))].append(i)
                continue

            # Otherwise, check the points based on their cluster size
            else:
                # Create a list of all the trajectories that were found to
                # intersect
                #print('\nIntersection:')
                closest_traj_indices = list(set().union(*closest_trajectories))

                #print("ClosestTrajIndices:")
                #print(closestTrajIndices)

                # For each close trajectory, calculate the mean cluster size
                # of the last points_cluster_size points

                # Keep track of the mean cluster size that is the closest to
                # the currentPoint's clusterSize
                current_cluster_size = current_point[4]
                #print("currentClusterSize = {}".format(currentClusterSize))
                closest_traj_index = -1
                cluster_size_diff = self.max_cluster_diff

                for traj_index in closest_traj_indices:
                    #print("trajIndex = {}".format(trajIndex))

                    traj_centres = self.centres[
                        self.centres_indices[traj_index]]
                    #print("trajCentres:")
                    #print(trajCentres)
                    mean_cluster_size = traj_centres[
                        -self.points_cluster_size:][:, 4].mean()
                    #print("meanClusterSize = {}".format(meanClusterSize))
                    #print("clusterSizeDiff = {}".format(clusterSizeDiff))
                    #print("abs diff = {}".format(np.abs( currentClusterSize - meanClusterSize )))
                    if np.abs(current_cluster_size -
                              mean_cluster_size) < cluster_size_diff:
                        closest_traj_index = traj_index
                        cluster_size_diff = np.abs(current_cluster_size -
                                                   mean_cluster_size)

                if closest_traj_index == -1:
                    #self.trajectoryIndices[i] = set(closestTrajIndices)
                    #for trajIndex in closestTrajIndices:
                    #    self.centresIndices[trajIndex].append(i)

                    print("\n**** -1 ****\n")
                    break
                else:
                    #print("ClosestTrajIndex found = {}".format(closestTrajIndex))
                    self.trajectory_indices[i] = set([closest_traj_index])
                    self.centres_indices[closest_traj_index].append(i)

        individual_trajectories = []
        for traj_centres in self.centres_indices:
            individual_traj = pept.PointData(self.centres[traj_centres],
                                             sample_size=0,
                                             overlap=0,
                                             verbose=False)
            individual_trajectories.append(individual_traj)

        return individual_trajectories
        '''
    def fit(self, points):
        points = pept.tracking.Stack().fit(points)
        if not isinstance(points, pept.PointData):
            points = pept.PointData(points)

        # Columns corresponding to the signatures
        sig_cols = [points.columns.index(sn) for sn in self.signatures.keys()]

        trajs = pept.tracking.SplitAll(self.column).fit(points)
        trajs.sort(key=lambda traj: traj["t"][0])

        # List of connections to do, list[tuple[int, int]]
        connections = []

        # Try to forward-connect the end of trajs[i] to the start of trajs[j]
        start_times = np.array([t["t"][0] for t in trajs])

        for i in range(len(trajs)):
            # Select all future trajectories whose start time is within tmax
            cur_traj = trajs[i]
            indices = np.argwhere(
                (start_times > cur_traj["t"][-1]) &
                (start_times - cur_traj["t"][-1] < self.tmax), ).flatten()

            # If no feasible times were found, carry on
            if not indices.any():
                continue

            # Compute connection costs between trajectory ends
            costs = []
            for j in indices:
                e2 = trajs[i].points[-self.num_points:].mean(axis=0)
                e1 = trajs[j].points[:self.num_points].mean(axis=0)

                # The first cost is the distance between traj ends; the rest
                # are the signature differences
                cost = [np.linalg.norm(e2[1:4] - e1[1:4])]
                for sc in sig_cols:
                    cost.append(np.abs(e2[sc] - e1[sc]))

                costs.append(cost)

            # Keep track of trajectory indices and associated costs
            costs = np.c_[indices, np.array(costs)]

            # Remove condidate connections that have costs larger than threshs
            selection = costs[:, 1] < self.dmax
            for i, sthresh in enumerate(self.signatures.values()):
                selection = selection & (costs[:, 2 + i] < sthresh)

            costs = costs[selection]

            # If no feasible connection was found, carry on
            if not len(costs):
                continue

            # Otherwise, establish connection with minimum overall cost
            best = costs[:, 1:].mean(axis=1).argmin()
            connection_index = int(costs[best, 0])
            connections.append((i, connection_index))

        # Set connected labels
        if isinstance(self.column, str):
            label_col = points.columns.index(self.column)
        else:
            label_col = self.column

        for i1, i2 in connections:
            trajs[i2].points[:, label_col] = trajs[i1].points[0, label_col]

        # Stack trajectories and map labels from [0, 2, 2, 3, 0] to
        # [0, 1, 1, 2, 0]
        trajs = pept.tracking.Stack().fit(trajs)

        labels = trajs.points[:, label_col]
        _, ordered = np.unique(labels, return_inverse=True)
        trajs.points[:, label_col] = ordered

        return trajs
def test_centroids():
    rng = np.random.default_rng(0)
    points_raw = rng.random((10, 4)) * 100
    points = pept.PointData(points_raw, sample_size=4)

    f1 = pept.tracking.Centroids()
    print(f1)

    # Test `fit_sample`
    s1 = f1.fit_sample(points_raw).points
    s2 = points_raw.mean(axis=0)
    assert (s1 == s2).all(), "Single sample geometric centroid"

    s1 = f1.fit_sample(points[0]).points
    s2 = points[0].points.mean(axis=0)
    assert (s1 == s2).all(), "Single sample geometric centroid"

    # Test `fit`
    traversed = f1.fit(points)
    manual = [p.points.mean(axis=0) for p in points]

    assert all([(t.points == m).all() for t, m in zip(traversed, manual)]), \
        "Full `fit` traversal"

    # Test `fit_sample`
    s1 = f1.fit_sample(points[0]).points
    s2 = points_raw[:4].mean(axis=0)
    assert (s2[:4] == s1[:, :4]).all(), "Single sample geometric centroid"

    # Test `fit`
    traversed = f1.fit(points)

    # Test different settings
    Centroids(error=True).fit_sample(points[0])
    Centroids(error=True, cluster_size=True).fit_sample(points[0])

    # Test weighted centroid computation
    points_raw = np.arange(50).reshape(10, 5)  # Last column is "weight"
    points_raw[:, -1] = 1.  # Start with equal weights
    points = pept.PointData(points_raw,
                            columns=["t", "x", "y", "z", "weight"],
                            sample_size=4)

    # Test `fit_sample`
    s1 = f1.fit_sample(points_raw).points
    s2 = points_raw.mean(axis=0)
    assert np.allclose(s1[:, :4], s2[:4]), "Single sample weighted centroid"

    s1 = f1.fit_sample(points[0]).points
    s2 = points[0].points.mean(axis=0)
    assert np.allclose(s1[:, :4], s2[:4]), "Single sample weighted centroid"

    # Ensure "weight" is removed
    assert "weight" not in f1.fit_sample(points).columns

    # Test `fit`
    traversed = f1.fit(points)

    # Test different settings
    Centroids(error=True).fit_sample(points[0])
    Centroids(error=True, cluster_size=True).fit_sample(points[0])
Esempio n. 25
0
File: peptml.py Progetto: smngr/pept
    def fit_sample(self,
                   sample,
                   get_labels=False,
                   as_array=True,
                   verbose=False,
                   _set_labels=True):
        '''Fit one sample of cutpoints and return the cluster centres and
        (optionally) the labelled cutpoints.

        Parameters
        ----------
        sample : (N, M >= 4) numpy.ndarray
            The sample of points that will be clustered. The expected columns
            are `[time, x, y, z, etc]`. Only columns `[1, 2, 3]` are used for
            clustering.
        get_labels : bool, default False
            If set to True, the input `sample` is also returned with an extra
            column representing the label of the cluster that each point is
            associated with. This label is an `int`, numbering clusters
            starting from 0; noise is represented with the value -1.
        as_array : bool, default True
            If set to True, the centres of the clusters and the labelled
            cutpoints are returned as numpy arrays. If set to False, they are
            returned inside instances of `pept.PointData`.
        verbose : bool, default False
            Provide extra information when computing the cutpoints: time the
            operation and show a progress bar.
        _set_labels : bool, default True
            This is an internal setting that an end-user should not normally
            care about. If `True`, the class property `labels` will be set
            after fitting. Setting this to `False` is helpful for multithreaded
            contexts - when calling `fit_sample` in parallel, it makes sure
            no internal attributes are mutated at the same time.

        Returns
        -------
        centres : numpy.ndarray or pept.PointData
            The centroids of every cluster found with columns
            `[time, x, y, z, ..., cluster_size]`. They are computed as the
            column-wise average of the points included in each cluster (i.e.
            for each label). Another column is added to the initial data in
            `sample`, signifying the cluster size - that is, the number of
            points included in the cluster. If `as_array` is set to True, it is
            a numpy array, otherwise the centres are stored in a
            `pept.PointData` instance.
        sample_labelled : optional, numpy.ndarray or pept.PointData
            Returned if `get_labels` is `True`. It is the input `sample` with
            an appended column representing the label of the cluster that the
            point was associated with. The labels are integers starting from 0.
            The points classified as noise have the number -1 associated. If
            `as_array` is set to True, it is a numpy array, otherwise the
            labelled points are stored in a `pept.PointData` instance.

        Raises
        ------
        ValueError
            If `sample` is not a numpy array of shape (N, M), where M >= 4.

        Note
        ----
        If no clusters were found (i.e. all labels are -1), the returned values
        are empty numpy arrays.

        '''

        if verbose:
            start = time.time()

        # sample columns: [time, x, y, z, ...]
        sample = np.asarray(sample, dtype=float, order="C")
        if sample.ndim != 2 or sample.shape[1] < 4:
            raise ValueError((
                "\n[ERROR]: `sample` should have two dimensions (M, N), where "
                f"N >= 4. Received {sample.shape}.\n"))

        # Only cluster based on [x, y, z]. Make a C-contiguous copy to improve
        # cache-locality, then delete it.
        sample_xyz = np.asarray(sample[:, 1:4], dtype=float, order="C")

        labels = self.clusterer.fit_predict(sample_xyz)
        max_label = labels.max()

        # If `allow_single_cluster` is "auto", check if no clusters were found
        # and try again using the hdbscan option allow_single_cluster = True.
        if max_label == -1 and self._allow_single_cluster == "auto":
            labels = self.clusterer_single.fit_predict(sample_xyz)
            max_label = labels.max()

        del sample_xyz

        if _set_labels:
            self._labels = labels

        # the centre of a cluster is the average of the time, x, y, z columns
        # + the number of points in that cluster (i.e. cluster size)
        # centres columns: [time, x, y, z, ..etc.., cluster_size]
        centres = []
        for i in range(0, max_label + 1):
            # Average time, x, y, z of cluster of label i
            centres_row = np.mean(sample[labels == i], axis=0)

            # Append the number of points of label i => cluster_size
            centres_row = np.append(centres_row, (labels == i).sum())
            centres.append(centres_row)

        centres = np.array(centres)

        if not as_array and len(centres) != 0:
            centres = pept.PointData(centres,
                                     sample_size=0,
                                     overlap=0,
                                     verbose=False)

        if verbose:
            end = time.time()
            print("Fitting one sample took {} seconds".format(end - start))

        # If labels are requested, also return the initial sample with appended
        # labels. Labels go from 0 to max_label; -1 represents noise.
        if get_labels:
            sample_labelled = np.append(sample, labels[:, np.newaxis], axis=1)
            if not as_array and len(samples_labelled) != 0:
                sample_labelled = pept.PointData(samples_labelled,
                                                 sample_size=0,
                                                 overlap=0,
                                                 verbose=False)
            return centres, sample_labelled

        # Otherwise just return the found centres
        return centres
Esempio n. 26
0
def find_minpoints(sample_lines,
                   num_lines,
                   max_distance,
                   cutoffs=None,
                   append_indices=False):
    '''Compute the minimum distance points (MDPs) from all combinations of
    `num_lines` lines given in an array of lines `sample_lines`.

    Given a sample of lines, this functions computes the minimum distance
    points (MDPs) for every possible combination of `num_lines` lines. The
    returned numpy array contains all MDPs that satisfy the following:

    1. Are within the `cutoffs`.
    2. Are closer to all the constituent LoRs than `max_distance`.

    Parameters
    ----------
    sample_lines: (M, N) numpy.ndarray
        A 2D array of lines, where each line is defined by two points such that
        every row is formatted as `[t, x1, y1, z1, x2, y2, z2, etc.]`. It
        *must* have at least 2 lines and the combination size `num_lines`
        *must* be smaller or equal to the number of lines. Put differently:
        2 <= num_lines <= len(sample_lines).

    num_lines: int
        The number of lines in each combination of LoRs used to compute the
        MDP. This function considers every combination of `numlines` from the
        input `sample_lines`. It must be smaller or equal to the number of
        input lines `sample_lines`.

    max_distance: float
        The maximum allowed distance between an MDP and its constituent lines.
        If any distance from the MDP to one of its lines is larger than
        `max_distance`, the MDP is thrown away.

    cutoffs: (6,) numpy.ndarray, optional
        An array of spatial cutoff coordinates with *exactly 6 elements* as
        [x_min, x_max, y_min, y_max, z_min, z_max]. If any MDP lies outside
        this region, it is thrown away. If it is `None`, they are computed
        automatically by calling `get_cutoffs`. The default is `None`.

    append_indices: bool, default False
        A boolean specifying whether to include the indices of the lines used
        to compute each MDP. If `False`, the output array will only contain the
        [time, x, y, z] of the MDPs. If `True`, the output array will have
        extra columns [time, x, y, z, line_idx(1), ..., line_idx(n)] where
        n = `num_lines`.

    Returns
    -------
    minpoints: (M, N) numpy.ndarray
        A 2D array of `float`s containing the time and coordinates of the MDPs
        [time, x, y, z]. The time is computed as the average of the constituent
        lines. If `append_indices` is `True`, then `num_lines` indices of the
        constituent lines are appended as extra columns:
        [time, x, y, z, line_idx1, line_idx2, ..]. The first column (for time)
        is sorted.

    Raises
    ------
    ValueError
        If `sample_lines` is not a numpy array with shape (N, M >= 7).

    ValueError
        If 2 <= num_lines <= len(sample_lines) is not satisfied.

    ValueError
        If `cutoffs` is not a one-dimensional array with values
        `[min_x, max_x, min_y, max_y, min_z, max_z]`

    See Also
    --------
    pept.tracking.peptml.Minpoints : Compute minpoints from `pept.LineData`.
    pept.utilities.read_csv : Fast CSV file reading into numpy arrays.
    '''

    if not isinstance(sample_lines, pept.LineData):
        sample_lines = pept.LineData(sample_lines)

    lines = sample_lines.lines

    lines = np.asarray(lines, order='C', dtype=float)

    num_lines = int(num_lines)
    max_distance = float(max_distance)

    if cutoffs is None:
        cutoffs = get_cutoffs(sample_lines)
    else:
        cutoffs = np.asarray(cutoffs, order='C', dtype=float)
        if cutoffs.ndim != 1 or len(cutoffs) != 6:
            raise ValueError(
                ("\n[ERROR]: cutoffs should be a one-dimensional array with "
                 "values [min_x, max_x, min_y, max_y, min_z, max_z]. Received "
                 f"{cutoffs}.\n"))

    sample_minpoints = pept.utilities.find_minpoints(
        lines, num_lines, max_distance, cutoffs, append_indices=append_indices)

    columns = ["t", "x", "y", "z"]
    if append_indices:
        columns += [f"line_index{i + 1}" for i in range(num_lines)]

    points = pept.PointData(sample_minpoints, columns=columns)

    # Add optional metadata to the points; because they have an underscore,
    # they won't be propagated when new objects are constructed
    points._max_distance = max_distance
    points._cutoffs = cutoffs
    points._num_lines = num_lines

    if append_indices:
        points._lines = sample_lines

    return points
Esempio n. 27
0
File: peptml.py Progetto: smngr/pept
    def fit(self, cutpoints, get_labels=False, max_workers=None, verbose=True):
        '''Fit cutpoints (an instance of `PointData`) and return the cluster
        centres and (optionally) the labelled cutpoints.

        This is a convenience function that clusters each sample in an instance
        of `pept.PointData` *in parallel*, using joblib. For more fine-grained
        control over the clustering, the `fit_sample` method can be used for
        each individual sample.

        Parameters
        ----------
        cutpoints : an instance of `pept.PointData`
            The samples of points that will be clustered. Be careful to set the
            appropriate `sample_size` and `overlap` for good results. If the
            `sample_size` is too low, the less radioactive tracers might not be
            found; if it is too high, temporal resolution is decreased. If the
            `overlap` is too small, the tracked points might be very "sparse".
            Note: when transforming LoRs into cutpoints using the `Cutpoints`
            class, the `sample_size` is automatically set based on the average
            number of cutpoints found per sample of LoRs.
        get_labels : bool, default False
            If set to True, the labelled cutpoints are returned along with the
            centres of the clusters. The labelled cutpoints are a list of
            `pept.PointData` for each sample of cutpoints, with an appended
            column representing the cluster labels (starting from 0; noise is
            encoded as -1).
        max_workers : int, optional
            The maximum number of threads that will be used for asynchronously
            clustering the samples in `cutpoints`. If unset (`None`), the
            number of threads available on the machine (as returned by
            `os.cpu_count()`) will be used.
        verbose : bool, default True
            Provide extra information when computing the cutpoints: time the
            operation and show a progress bar.

        Returns
        -------
        centres : pept.PointData
            The centroids of every cluster found with columns
            `[time, x, y, z, ..., cluster_size]`. They are computed as the
            column-wise average of the points included in each cluster (i.e.
            for each label). Another column is added to the initial data in
            `sample`, signifying the cluster size - that is, the number of
            points included in the cluster.
        labelled_cutpoints : optional, pept.PointData
            Returned if `get_labels` is `True`. It is a `pept.PointData`
            instance in which every sample is the corresponding sample in
            `cutpoints`, but with an appended column representing the label of
            the cluster that the point was associated with. The labels are
            integers starting from 0. The points classified as noise have the
            number -1 associated. Note that the labels are only consistent
            within the same sample; that is, for tracers A and B, if in one
            sample A gets the label 0 and B the label 1, in another sample
            their order might be inversed. The trajectory separation module
            might be used to separate them out.

        Raises
        ------
        TypeError
            If `cutpoints` is not an instance (or a subclass) of
            `pept.PointData`.

        Note
        ----
        If no clusters were found (i.e. all labels are -1), the returned values
        are empty numpy arrays.
        '''

        if verbose:
            start = time.time()

        if not isinstance(cutpoints, pept.PointData):
            raise TypeError((
                "\n[ERROR]: cutpoints should be an instance of "
                "`pept.PointData` (or any class inheriting from it). Received "
                f"{type(cutpoints)}.\n"))

        # Users might forget to set the sample_size, leaving it to the default
        # value of 0; in that case, all points are returned as a single sample;
        # that might not be the intended behaviour.
        if cutpoints.sample_size == 0:
            warnings.warn(
                textwrap.fill((
                    "\n[WARNING]: The `cutpoints.sample_size` was left to the "
                    "default value of 0, in which case all points are returned"
                    " as a single sample. For a very large number of points, "
                    "this might result in a long function execution time.\n"),
                              replace_whitespace=False), RuntimeWarning)

        get_labels = bool(get_labels)

        # Fit all samples in `cutpoints` in parallel using joblib
        # Collect all outputs as a list. If verbose, show progress bar with
        # tqdm
        if verbose:
            cutpoints = tqdm(cutpoints)

        if max_workers is None:
            max_workers = os.cpu_count()

        data_list = Parallel(n_jobs=max_workers)(
            delayed(self.fit_sample)(sample,
                                     get_labels=get_labels,
                                     as_array=True,
                                     verbose=False,
                                     _set_labels=False)
            for sample in cutpoints)

        if not get_labels:
            # data_list is a list of arrays. Only choose the arrays with at
            # least one row.
            centres = np.array([r for r in data_list if len(r) != 0])
        else:
            # data_list is a list of tuples, in which the first element is an
            # array of the centres, and the second element is an array of the
            # labelled cutpoints.
            centres = np.array([r[0] for r in data_list if len(r[0]) != 0])

        if len(centres) != 0:
            centres = pept.PointData(np.vstack(centres),
                                     sample_size=0,
                                     overlap=0,
                                     verbose=False)

        if verbose:
            end = time.time()
            print("\nFitting cutpoints took {} seconds.\n".format(end - start))

        if get_labels:
            # data_list is a list of tuples, in which the first element is an
            # array of the centres, and the second element is an array of the
            # labelled cutpoints.
            labelled_cutpoints = [r[1] for r in data_list if len(r[1]) != 0]
            if len(labelled_cutpoints) != 0:
                # Encapsulate `labelled_cutpoints` in a `pept.PointData`
                # instance in which every sample is the corresponding sample in
                # `cutpoints`, but with an appended column representing the
                # labels. Therefore, the `sample_size` is the same as for
                # `cutpoints`, which is equal to the length of every array in
                # `labelled_cutpoints`
                labelled_cutpoints = pept.PointData(
                    np.vstack(np.array(labelled_cutpoints)),
                    sample_size=len(labelled_cutpoints[0]),
                    overlap=0,
                    verbose=False)

            # Set the attribute `labels` to the stacked labels of all the
            # labelled cutpoints; that is, the last column in the
            # labelled_cutpoints internal data:
            self._labels = labelled_cutpoints.points[:, -1]

            return centres, labelled_cutpoints

        return centres
Esempio n. 28
0
    def fit_sample(self,
                   sample,
                   store_labels = False,
                   noise = False,
                   as_array = True,
                   verbose = False):
        '''Fit one sample of cutpoints and return the cluster centres and
        (optionally) the labelled cutpoints.

        Parameters
        ----------
        sample : (N, M >= 4) numpy.ndarray
            The sample of points that will be clustered. Every point corresponds to
            a row and is formatted as `[time, x, y, z, etc]`. Only columns `[1, 2, 3]`
            are used for clustering.
        store_labels : bool, optional
            If set to True, the clustered cutpoints are returned along with the centres
            of the clusters. Setting it to False speeds up the clustering. The default
            is False.
        noise : bool, optional
            If set to True, the clustered cutpoints also include the points classified
            as noise. Only has an effect if `store_labels` is set to True. The default
            is False.
        as_array : bool, optional
            If set to True, the centres of the clusters and the clustered cutpoints are
            returned as numpy arrays. If set to False, they are returned inside
            instances of `pept.PointData`.
        verbose : bool, optional
            Provide extra information when computing the cutpoints: time the operation
            and show a progress bar. The default is `False`.

        Returns
        -------
        centres : numpy.ndarray or pept.PointData
            The centroids of every cluster found. They are computed as the average
            of every column of `[time, x, y, z, etc]` of the clustered points. Another
            column is added to the initial data in `sample`, signifying the cluster
            size - the number of points included in the cluster. If `as_array` is
            set to True, it is a numpy array, otherwise the centres are stored
            in a pept.PointData instance.
        clustered_cutpoints : numpy.ndarray or pept.PointData
            The points in `sample` that fall in every cluster. A new column is added
            to the points in `sample` that signifies the label of cluster that the
            point was associated with: all points in cluster number 3 will have the
            number 3 as the last element in their row. The points classified as noise
            have the number -1 associated. If `as_array` is set to True, it is a numpy
            array, otherwise the clustered cutpoints are stored in a pept.PointData
            instance.

        Raises
        ------
        TypeError
            If `sample` is not a numpy array of shape (N, M), where M >= 4.

        '''

        if verbose:
            start = time.time()

        # sample row: [time, x, y, z]
        if sample.ndim != 2 or sample.shape[1] < 4:
            raise TypeError('\n[ERROR]: sample should have two dimensions (M, N), where N >= 4. Received {}\n'.format(sample.shape))

        # Only cluster based on [x, y, z]
        labels = self.clusterer.fit_predict(sample[:, 1:4])
        max_label = labels.max()

        centres = []
        clustered_cutpoints = []

        # the centre of a cluster is the average of the time, x, y, z columns
        # and the number of points of that cluster
        # centres row: [time, x, y, z, ..etc.., cluster_size]
        for i in range(0, max_label + 1):
            # Average time, x, y, z of cluster of label i
            centres_row = np.mean(sample[labels == i], axis = 0)
            # Append the number of points of label i => cluster_size
            centres_row = np.append(centres_row, (labels == i).sum())
            centres.append(centres_row)

        centres = np.array(centres)

        if not as_array:
            centres = pept.PointData(centres,
                                     sample_size = 0,
                                     overlap = 0,
                                     verbose = False)

        # Return all cutpoints as a list of numpy arrays for every label
        # where the last column of an array is the label
        if store_labels:
            # Create a list of numpy arrays with rows: [t, x, y, z, ..etc.., label]
            if noise:
                cutpoints = sample[labels == -1]
                cutpoints = np.insert(cutpoints, cutpoints.shape[1], -1, axis = 1)
                clustered_cutpoints.append(cutpoints)

            for i in range(0, max_label + 1):
                cutpoints = sample[labels == i]
                cutpoints = np.insert(cutpoints, cutpoints.shape[1], i, axis = 1)
                clustered_cutpoints.append(cutpoints)

            clustered_cutpoints = np.vstack(np.array(clustered_cutpoints))

            if not as_array:
                clustered_cutpoints = pept.PointData(clustered_cutpoints,
                                                     sample_size = 0,
                                                     overlap = 0,
                                                     verbose = False)

        if verbose:
            end = time.time()
            print("Fitting one sample took {} seconds".format(end - start))

        return [centres, clustered_cutpoints]
Esempio n. 29
0
    def fit_sample(self, voxels: pept.Voxels):
        '''Use the FPI algorithm to locate a tracer from a single voxellised
        space (i.e. from one sample of LoRs).

        A sample of LoRs can be voxellised using the `pept.Voxels.from_lines`
        method before calling this function.

        Parameters
        ----------
        voxels: pept.Voxels
            A single voxellised space (i.e. from a single sample of LoRs) for
            which the tracers' locations will be found using the FPI method.

        timestamp: float, default 0.
            The timestamp to associate with the tracer positions found in this
            voxel space.

        as_array: bool, default False
            If `True`, return the found tracers' locations as a NumPy array.
            Otherwise, return them in a `pept.PointData` instance.

        verbose: bool, default False
            Show extra information on the sample processing step.

        Returns
        -------
        locations: numpy.ndarray or pept.PointData
            The tracked locations found; if `as_array` is True, they are
            returned as a NumPy array with columns [time, x, y, z, error_x,
            error_y, error_z]. If `as_array` is False, the points are returned
            in a `pept.PointData` for ease of visualisation.

        Raises
        ------
        TypeError
            If `voxels` is not an instance of `pept.Voxels` (or subclass
            thereof).
        '''

        positions = fpi_ext(
            np.asarray(voxels.voxels, dtype=float, order="C"),
            self.w,
            self.r,
            self.lld_counts,
        )

        # Translate the coordinates from the voxel space to the physical space
        positions[:, :3] *= voxels.voxel_size
        positions[:, :3] += [voxels.xlim[0], voxels.ylim[0], voxels.zlim[0]]

        # Convert errors to physical space too
        positions[:, 3:] *= voxels.voxel_size

        # Create points array to store [t, x, y, z, xerr, yerr, zerr, err]
        points = np.full((len(positions), 8), np.nan)
        points[:, 1:7] = positions
        points[:, 7] = np.linalg.norm(positions[:, 3:6], axis=1)

        # Set the timestamp if `_lines` exists
        if "_lines" in voxels.attrs:
            points[:, 0] = voxels.attrs["_lines"].lines[:, 0].mean()
        else:
            warnings.warn(
                ("The input `Voxels` did not have a '_lines' attribute, so no "
                 "timestamp can be inferred. The time was set to NaN."),
                RuntimeWarning)

        return pept.PointData(
            points,
            columns=[
                "t", "x", "y", "z", "error_x", "error_y", "error_z", "error"
            ],
        )
Esempio n. 30
0
    def fit_sample(self, sample, get_used=False, as_array=True, verbose=False):
        '''Use the Birmingham method to track a tracer location from a numpy
        array (i.e. one sample) of LoRs.

        For the given `sample` of LoRs (a numpy.ndarray), this function
        minimises the distance between all of the LoRs, rejecting a fraction of
        lines that lie furthest away from the calculated distance. The process
        is repeated iteratively until a specified fraction (`fopt`) of the
        original subset of LORs remains.

        Parameters
        ----------
        sample : (N, M>=7) numpy.ndarray
            The sample of LORs that will be clustered. Each LoR is expressed as
            a timestamps and a line defined by two points; the data columns are
            then `[time, x1, y1, z1, x2, y2, z2, extra...]`.
        get_used : bool, default False
            If `True`, the function will also return a boolean mask of the LoRs
            used to compute the tracer location - that is, a vector of the same
            length as `sample`, containing 1 for the rows that were used, and 0
            otherwise.
        as_array : bool, default True
            If set to True, the tracked locations are returned as numpy arrays.
            If set to False, they are returned inside an instance of
            `pept.PointData` for ease of iteration and plotting.
        verbose : bool, default False
            Provide extra information when tracking a location: time the
            operation and show a progress bar.

        Returns
        -------
        locations : numpy.ndarray or pept.PointData
            The tracked locations found.
        used : numpy.ndarray, optional
            If `get_used` is true, then also return a boolean mask of the LoRs
            used to compute the tracer location - that is, a vector of the same
            length as `sample`, containing 1 for the rows that were used, and 0
            otherwise.
            [ Used for multi-particle tracking, not implemented yet]

        Raises
        ------
        ValueError
            If `sample` is not a numpy array of shape (N, M), where M >= 7.
        '''

        if verbose:
            start = time.time()

        # Type-check input parameters.
        # sample cols: [time, x1, y1, z1, x2, y2, z2, etc.]
        sample = np.asarray(sample, dtype=float, order="C")

        if sample.ndim != 2 or sample.shape[1] < 7:
            raise ValueError(
                textwrap.fill(
                    "[ERROR]: `sample` should have two dimensions (M, N), where "
                    f"N >= 7. Received {sample.shape}."))

        locations, used = birmingham_method(sample, self._fopt)

        if not as_array:
            locations = pept.PointData(locations,
                                       sample_size=0,
                                       overlap=0,
                                       verbose=False)

        if verbose:
            end = time.time()
            print(("Tracking one location with %i LORs took %.3f seconds" %
                   (sample.shape[0], end - start)))

        if get_used:
            return locations, used

        return locations