def test_stack():
    rng = np.random.default_rng(0)
    points_raw = rng.random((10, 4)) * 100
    lines_raw = rng.random((10, 7)) * 500

    points = pept.PointData(points_raw, sample_size=4)
    lines = pept.LineData(lines_raw, sample_size=4)

    # Test it returns points back
    p = Stack().fit(points)
    assert p is points, "Stack did not return a single PointData back"

    # Test it returns lines back
    ls = Stack().fit(lines)
    assert ls is lines, "Stack did not return a single LineData back"

    # Test it concatenates a list of two points
    points2 = Stack().fit([points, points])
    assert np.all(points2.points[:10] == points.points[:10])

    # Test it concatenates a list of two lines
    lines2 = Stack().fit([lines, lines])
    assert np.all(lines2.lines[:10] == lines.lines[:10])

    # Test list[list] flattening
    assert Stack().fit([[1, 2, 3]]) == [1, 2, 3], "List flattening wrong"
Ejemplo n.º 2
0
    def test_good_data(self):
        samples = pept.LineData(self.good_data, sample_size = 200, overlap = 10, verbose = False)
        # Test private attributes
        assert samples._index == 0, "_index was not set to 0"
        assert samples._overlap == 10, "_overlap was not set correctly"
        assert samples._sample_size == 200, "_sample_size was not set correctly"
        assert np.array_equal(samples._line_data, self.good_data) == True
        assert samples._line_data.flags['C_CONTIGUOUS'] == True, "_line_data is not C-contiguous"
        assert samples._number_of_lines == len(samples._line_data), "_number_of_lines was not set correctly"

        # Test properties
        assert np.array_equal(samples.line_data, samples._line_data) == True
        assert samples.sample_size == samples._sample_size
        assert samples.overlap == samples._overlap
        assert samples.number_of_samples == 2, "number of samples was not calculated correctly"
        assert samples.number_of_lines == samples._number_of_lines

        # Test property setters
        samples.sample_size = 300
        assert samples.sample_size == 300
        assert samples._index == 0
        samples.sample_size = 200

        samples.overlap = 50
        assert samples.overlap == 50
        assert samples._index == 0
        samples.overlap = 10
Ejemplo n.º 3
0
 def test_error_overlap(self):
     samples = pept.LineData(self.good_data,
                             sample_size=200,
                             overlap=100,
                             verbose=False)
     # Should not be able to set sample_size <= overlap
     samples.overlap = 200
def test_minpoints():
    rng = np.random.default_rng(0)
    lines_raw = rng.random((10, 7)) * 100
    lines = pept.LineData(lines_raw, sample_size=4)

    max_distance = 1000
    cutoffs = np.array([0, 100, 0, 100, 0, 100], dtype=float)
    minpoints = pept.tracking.Minpoints(3, max_distance, cutoffs)
    print(minpoints)

    # Test `fit_sample`
    s1 = minpoints.fit_sample(lines[0]).points
    s2 = pept.utilities.find_minpoints(lines[0].lines, 3, max_distance,
                                       cutoffs)
    assert (s1 == s2).all(), "Cutpoints not found correctly"

    # Test `fit`
    traversed = minpoints.fit(lines)
    manual = [
        pept.utilities.find_minpoints(ln.lines, 3, max_distance, cutoffs)
        for ln in lines
    ]

    assert all([(t.points == m).all() for t, m in zip(traversed, manual)]), \
        "Traversed list of cutpoints not found correctly"
def test_fpi():
    rng = np.random.default_rng(0)
    lines_raw = rng.random((1000, 7)) * 100
    lines = pept.LineData(lines_raw, sample_size=200)

    ex = "sequential"
    voxels = Voxelize((50, 50, 50)).fit(lines, ex)
    positions = FPI().fit(voxels, ex)
    print(positions)
    def lines_trace(
        lines,
        width=2.0,
        color=None,
        opacity=0.6,
        colorbar=True,
        colorbar_col=0,
        colorscale="Magma",
        colorbar_title=None,
    ):
        '''Static method for creating a Plotly trace of lines. See
        `PlotlyGrapher.add_lines` for the full documentation.
        '''

        if not isinstance(lines, pept.LineData):
            lines = pept.LineData(lines)

        marker = dict(
            width=width,
            color=color,
        )

        if colorbar:
            if color is None:
                marker['color'] = []

            marker.update(colorscale=colorscale)
            if colorbar_title is not None:
                marker.update(colorbar=dict(title=colorbar_title))

        coords_x = np.full(3 * len(lines.lines), np.nan)
        coords_x[0::3] = lines.lines[:, 1]
        coords_x[1::3] = lines.lines[:, 4]

        coords_y = np.full(3 * len(lines.lines), np.nan)
        coords_y[0::3] = lines.lines[:, 2]
        coords_y[1::3] = lines.lines[:, 5]

        coords_z = np.full(3 * len(lines.lines), np.nan)
        coords_z[0::3] = lines.lines[:, 3]
        coords_z[1::3] = lines.lines[:, 6]

        if colorbar and color is None:
            if isinstance(colorbar_col, str):
                color_data = lines[colorbar_col]
            else:
                color_data = lines.lines[:, colorbar_col]

            marker['color'] = np.repeat(color_data, 3)

        return go.Scatter3d(x=coords_x,
                            y=coords_y,
                            z=coords_z,
                            mode='lines',
                            opacity=opacity,
                            line=marker)
def test_birmingham_method():
    rng = np.random.default_rng(0)
    lines_raw = rng.random((5000, 7)) * 100
    lines = pept.LineData(lines_raw, sample_size=200)

    location = BirminghamMethod(0.5, get_used=True).fit_sample(lines[0])
    print(location)

    locations = BirminghamMethod(0.5).fit(lines, "sequential")
    print(locations)
def test_lines_centroids():
    rng = np.random.default_rng(0)
    lines_raw = rng.random((1000, 7)) * 100
    lines = pept.LineData(lines_raw, sample_size=200)

    LinesCentroids().fit_sample(lines)

    ex = "sequential"
    LinesCentroids().fit(lines, ex)
    LinesCentroids().fit(lines[0:0], ex)
def test_voxelizer():
    rng = np.random.default_rng(0)
    lines_raw = rng.random((1000, 7)) * 100
    lines = pept.LineData(lines_raw, sample_size=200)

    vox = Voxelize((20, 20, 20)).fit_sample(lines)
    assert "_lines" in vox.attrs

    ex = "sequential"
    LinesCentroids().fit(lines, ex)
    LinesCentroids().fit(lines[0:0], ex)
def test_hdbscan():
    rng = np.random.default_rng(0)
    lines_raw = rng.random((5000, 7)) * 100
    lines = pept.LineData(lines_raw, sample_size=200)

    ex = "sequential"

    cutpoints = Cutpoints(0.5).fit(lines, ex)
    clustered = HDBSCAN(0.15, 2).fit(cutpoints, ex)
    print(clustered)

    clustered2 = HDBSCAN(0.15, 2).fit(clustered, ex)
    print(clustered2)
def test_peptml():
    rng = np.random.default_rng(0)
    lines_raw = rng.random((5000, 7)) * 100
    lines = pept.LineData(lines_raw, sample_size=200)

    ex = "sequential"

    cutpoints = Cutpoints(0.5).fit(lines, ex)
    clustered = HDBSCAN(0.15, 2).fit(cutpoints, ex)
    centres = (SplitLabels() + Centroids() + Stack(30, 29)).fit(clustered, ex)
    clustered2 = HDBSCAN(0.6, 2).fit(centres, ex)
    centres2 = (SplitLabels() + Centroids()).fit(clustered2, ex)
    print(centres2)
Ejemplo n.º 12
0
    def copy(self):
        '''Create a deep copy of an instance of this class, including a new
        inner numpy array `lines`.

        Returns
        -------
        pept.LineData
            A new instance of the `pept.LineData` class with the same
            attributes as this instance, deep-copied.
        '''

        return pept.LineData(
            self._lines.copy(order = "C"),
            sample_size = self._sample_size,
            overlap = self._overlap,
            verbose = False
        )
Ejemplo n.º 13
0
    def fit_sample(self, sample_lines):
        if not isinstance(sample_lines, pept.LineData):
            sample_lines = pept.LineData(sample_lines)

        # If cutoffs were not defined, automatically compute them
        if self.cutoffs is not None:
            cutoffs = self.cutoffs
        else:
            cutoffs = get_cutoffs(sample_lines.lines)

        # Only compute minpoints if there are at least num_lines LoRs
        if len(sample_lines.lines) >= self.num_lines:
            sample_minpoints = pept.utilities.find_minpoints(
                sample_lines.lines,
                self.num_lines,
                self.max_distance,
                cutoffs,
                append_indices=self.append_indices,
            )
        else:
            ncols = 4 + self.num_lines if self.append_indices else 4
            sample_minpoints = np.empty((0, ncols))

        # Column names
        columns = ["t", "x", "y", "z"]
        if self.append_indices:
            columns += [f"line_index{i + 1}" for i in range(self.num_lines)]

        # Encapsulate minpoints in a PointData
        points = pept.PointData(sample_minpoints, columns=columns)

        # Add optional metadata to the points; because they have an underscore,
        # they won't be propagated when new objects are constructed
        points.attrs["_num_lines"] = self.num_lines
        points.attrs["_max_distance"] = self.max_distance
        points.attrs["_cutoffs"] = cutoffs

        # If LoR indices were appended, also include the constituent LoRs
        if self.append_indices:
            points.attrs["_lines"] = sample_lines

        return points
def test_split_labels():
    rng = np.random.default_rng(0)
    points_raw = rng.random((10, 4)) * 100
    labels = rng.integers(3, size=10)
    line_index = rng.integers(10, size=10)

    points = pept.PointData(
        np.c_[points_raw, labels, line_index],
        columns=["t", "x", "y", "z", "label", "line_index"],
    )
    points.samples_indices = [[0, 10], [5, 5], [5, 10]]

    # Check each split label
    split = SplitLabels().fit_sample(points[0])
    assert np.all(split[0].points[:, :4] == points_raw[labels == 0])
    assert np.all(split[1].points[:, :4] == points_raw[labels == 1])
    assert np.all(split[2].points[:, :4] == points_raw[labels == 2])

    # Check with empty sample
    empty_split = SplitLabels().fit_sample(points[1])
    assert len(empty_split[0].data) == 0

    # Extracting `_lines`
    lines_raw = rng.random((10, 7)) * 500
    lines = pept.LineData(lines_raw, sample_size=4)
    points.attrs["_lines"] = lines

    splines = SplitLabels().fit_sample(points[0])
    assert "_lines" in splines[0].attrs

    splines = SplitLabels(extract_lines=True).fit_sample(points[0])
    assert isinstance(splines[0], pept.LineData)

    # Test different settings
    SplitLabels().fit(points, "sequential")
    SplitLabels(remove_labels=False).fit(points, "sequential")
    SplitLabels(noise=True).fit(points, "sequential")
    SplitLabels(extract_lines=True).fit(points, "sequential")
                lor = [
                    t[i], x[i], y[i], z[i], t[i + 1], x[i + 1], y[i + 1],
                    z[i + 1]
                ]

                for item in lor:

                    f.write("%s\t" % str(item))
                f.write('\n')

            else:

                continue
    f.close()


filename = 'data/lors.txt'

makeLORs(data, mask, filename)

lors = np.loadtxt(filename, usecols=(0, 1, 2, 3, 5, 6, 7))

lors = pept.LineData(lors)

# Create a PlotlyGrapher instance, then have it create a Plotly figure.
grapher = PlotlyGrapher()

# Add a Plotly trace from the LoRs
grapher.add_lines(lors)
grapher.show()
Ejemplo n.º 16
0
    def fit_sample(self, sample):
        '''Use the Birmingham method to track a tracer location from a numpy
        array (i.e. one sample) of LoRs.

        For the given `sample` of LoRs (a numpy.ndarray), this function
        minimises the distance between all of the LoRs, rejecting a fraction of
        lines that lie furthest away from the calculated distance. The process
        is repeated iteratively until a specified fraction (`fopt`) of the
        original subset of LORs remains.

        Parameters
        ----------
        sample : (N, M>=7) numpy.ndarray
            The sample of LORs that will be clustered. Each LoR is expressed as
            a timestamps and a line defined by two points; the data columns are
            then `[time, x1, y1, z1, x2, y2, z2, extra...]`.

        get_used : bool, default False
            If `True`, the function will also return a boolean mask of the LoRs
            used to compute the tracer location - that is, a vector of the same
            length as `sample`, containing 1 for the rows that were used, and 0
            otherwise.

        as_array : bool, default True
            If set to True, the tracked locations are returned as numpy arrays.
            If set to False, they are returned inside an instance of
            `pept.PointData` for ease of iteration and plotting.

        verbose : bool, default False
            Provide extra information when tracking a location: time the
            operation and show a progress bar.

        Returns
        -------
        locations : numpy.ndarray or pept.PointData
            The tracked locations found.

        used : numpy.ndarray, optional
            If `get_used` is true, then also return a boolean mask of the LoRs
            used to compute the tracer location - that is, a vector of the same
            length as `sample`, containing 1 for the rows that were used, and 0
            otherwise.
            [ Used for multi-particle tracking, not implemented yet]

        Raises
        ------
        ValueError
            If `sample` is not a numpy array of shape (N, M), where M >= 7.
        '''

        if not isinstance(sample, pept.LineData):
            sample = pept.LineData(sample)

        locations, used = birmingham_method(sample.lines, self.fopt)

        # Propagate any LineData attributes besides `columns`
        attrs = sample.extra_attrs()

        locations = pept.PointData(
            [locations],
            columns=["t", "x", "y", "z", "error"],
            **attrs,
        )

        # If `get_used`, also attach a `._lines` attribute with the lines used
        if self.get_used:
            locations.attrs["_lines"] = sample.copy(
                data=np.c_[sample.lines, used],
                columns=sample.columns + ["used"],
            )

        return locations
 def fit(self, lines):
     return pept.LineData(lines)
Ejemplo n.º 18
0
def find_minpoints(sample_lines,
                   num_lines,
                   max_distance,
                   cutoffs=None,
                   append_indices=False):
    '''Compute the minimum distance points (MDPs) from all combinations of
    `num_lines` lines given in an array of lines `sample_lines`.

    Given a sample of lines, this functions computes the minimum distance
    points (MDPs) for every possible combination of `num_lines` lines. The
    returned numpy array contains all MDPs that satisfy the following:

    1. Are within the `cutoffs`.
    2. Are closer to all the constituent LoRs than `max_distance`.

    Parameters
    ----------
    sample_lines: (M, N) numpy.ndarray
        A 2D array of lines, where each line is defined by two points such that
        every row is formatted as `[t, x1, y1, z1, x2, y2, z2, etc.]`. It
        *must* have at least 2 lines and the combination size `num_lines`
        *must* be smaller or equal to the number of lines. Put differently:
        2 <= num_lines <= len(sample_lines).

    num_lines: int
        The number of lines in each combination of LoRs used to compute the
        MDP. This function considers every combination of `numlines` from the
        input `sample_lines`. It must be smaller or equal to the number of
        input lines `sample_lines`.

    max_distance: float
        The maximum allowed distance between an MDP and its constituent lines.
        If any distance from the MDP to one of its lines is larger than
        `max_distance`, the MDP is thrown away.

    cutoffs: (6,) numpy.ndarray, optional
        An array of spatial cutoff coordinates with *exactly 6 elements* as
        [x_min, x_max, y_min, y_max, z_min, z_max]. If any MDP lies outside
        this region, it is thrown away. If it is `None`, they are computed
        automatically by calling `get_cutoffs`. The default is `None`.

    append_indices: bool, default False
        A boolean specifying whether to include the indices of the lines used
        to compute each MDP. If `False`, the output array will only contain the
        [time, x, y, z] of the MDPs. If `True`, the output array will have
        extra columns [time, x, y, z, line_idx(1), ..., line_idx(n)] where
        n = `num_lines`.

    Returns
    -------
    minpoints: (M, N) numpy.ndarray
        A 2D array of `float`s containing the time and coordinates of the MDPs
        [time, x, y, z]. The time is computed as the average of the constituent
        lines. If `append_indices` is `True`, then `num_lines` indices of the
        constituent lines are appended as extra columns:
        [time, x, y, z, line_idx1, line_idx2, ..]. The first column (for time)
        is sorted.

    Raises
    ------
    ValueError
        If `sample_lines` is not a numpy array with shape (N, M >= 7).

    ValueError
        If 2 <= num_lines <= len(sample_lines) is not satisfied.

    ValueError
        If `cutoffs` is not a one-dimensional array with values
        `[min_x, max_x, min_y, max_y, min_z, max_z]`

    See Also
    --------
    pept.tracking.peptml.Minpoints : Compute minpoints from `pept.LineData`.
    pept.utilities.read_csv : Fast CSV file reading into numpy arrays.
    '''

    if not isinstance(sample_lines, pept.LineData):
        sample_lines = pept.LineData(sample_lines)

    lines = sample_lines.lines

    lines = np.asarray(lines, order='C', dtype=float)

    num_lines = int(num_lines)
    max_distance = float(max_distance)

    if cutoffs is None:
        cutoffs = get_cutoffs(sample_lines)
    else:
        cutoffs = np.asarray(cutoffs, order='C', dtype=float)
        if cutoffs.ndim != 1 or len(cutoffs) != 6:
            raise ValueError(
                ("\n[ERROR]: cutoffs should be a one-dimensional array with "
                 "values [min_x, max_x, min_y, max_y, min_z, max_z]. Received "
                 f"{cutoffs}.\n"))

    sample_minpoints = pept.utilities.find_minpoints(
        lines, num_lines, max_distance, cutoffs, append_indices=append_indices)

    columns = ["t", "x", "y", "z"]
    if append_indices:
        columns += [f"line_index{i + 1}" for i in range(num_lines)]

    points = pept.PointData(sample_minpoints, columns=columns)

    # Add optional metadata to the points; because they have an underscore,
    # they won't be propagated when new objects are constructed
    points._max_distance = max_distance
    points._cutoffs = cutoffs
    points._num_lines = num_lines

    if append_indices:
        points._lines = sample_lines

    return points
def test_pipeline():

    class F1(pept.base.LineDataFilter):
        def fit_sample(self, sample_lines):
            sample_lines.lines[:] += 1
            sample_lines.attrs["attr1"] = "New attribute added by F1"
            return sample_lines


    class F2(pept.base.LineDataFilter):
        def fit_sample(self, sample_lines):
            sample_lines.lines[:] += 2
            sample_lines.attrs["attr2"] = "New attribute added by F2"
            return sample_lines


    class R1(pept.base.Reducer):
        def fit(self, lines):
            return tuple(lines)


    class R2(pept.base.Reducer):
        def fit(self, lines):
            return pept.LineData(lines)


    # Generate some dummy LineData
    lines_raw = np.arange(70).reshape(10, 7)
    lines = pept.LineData(lines_raw, sample_size=4)

    # Test pipeline creation
    assert isinstance(F1() + F2(), pept.base.Pipeline)
    assert isinstance(pept.base.Pipeline([F1(), F2()]), pept.base.Pipeline)
    assert isinstance(F1() + F2() + R1(), pept.base.Pipeline)

    # Test fit_sample
    pipe = F1() + F2()
    print(pipe)

    lp1 = pipe.fit_sample(lines[0]).lines
    lp2 = F2().fit_sample(F1().fit_sample(lines[0])).lines
    assert (lp1 == lp2).all(), "Apply simple pipeline steps manually"

    pipe = F1() + F2() + R1()
    print(pipe)

    lp1 = pipe.fit_sample(lines[0])
    lp2 = F1().fit_sample(lines[0])
    lp2 = F2().fit_sample(lp2)
    lp2 = R1().fit([lp2])

    assert isinstance(lp1, tuple), "Final pipeline reducer to tuple"
    assert isinstance(lp2, tuple), "Final manual reducer to tuple"
    assert (lp1[0].lines == lp2[0].lines).all(), "Apply steps manually"

    # Test the attribute is added by the first filter
    assert "attr1" in F1().fit_sample(lines[0]).attrs
    assert "attr1" in pept.base.Pipeline([F1()]).fit_sample(lines[0]).attrs

    # Test fit
    # Simple filter-only pipeline
    pipe = F1() + F2()

    lp1 = pipe.fit(lines)
    lp2 = F2().fit(F1().fit(lines))
    assert isinstance(lp1, list)
    assert isinstance(lp2, list)
    assert len(lp1) == len(lp2) == len(lines)

    assert all([(l1.lines == l2.lines).all() for l1, l2 in zip(lp1, lp2)])

    # Pipeline ending in reducer
    pipe = F1() + F2() + R1()
    print(pipe)

    lp1 = pipe.fit(lines)
    lp2 = F1().fit(lines)
    lp2 = F2().fit(lp2)
    lp2 = R1().fit(lp2)

    assert isinstance(lp1, tuple)
    assert isinstance(lp2, tuple)
    assert len(lp1) == len(lp2) == len(lines)

    assert all([(l1.lines == l2.lines).all() for l1, l2 in zip(lp1, lp2)])

    # Complex pipeline
    pipe = F1() + F2() + R2() + F1() + R1()
    print(pipe)

    lp1 = pipe.fit(lines)
    lp2 = F1().fit(lines)
    lp2 = F2().fit(lp2)
    lp2 = R2().fit(lp2)
    lp2 = F1().fit(lp2)
    lp2 = R1().fit(lp2)

    assert isinstance(lp1, tuple)
    assert isinstance(lp2, tuple)
    assert len(lp1) == len(lp2) == len(lines)

    assert all([(l1.lines == l2.lines).all() for l1, l2 in zip(lp1, lp2)])

    # Test the attribute is added by the first filter
    assert "attr1" in F1().fit(lines)[0].attrs
    assert "attr1" in pept.base.Pipeline([F1()]).fit(lines)[0].attrs
Ejemplo n.º 20
0
def find_cutpoints(sample_lines,
                   max_distance,
                   cutoffs=None,
                   append_indices=False):
    '''Find the cutpoints from a sample / array of LoRs.

    A cutpoint is the point in 3D space that minimises the distance between any
    two lines. For any two non-parallel 3D lines, this point corresponds to the
    midpoint of the unique segment that is perpendicular to both lines.

    This function considers every pair of lines in `sample_lines` and returns
    all the cutpoints that satisfy the following conditions:

    1. The distance between the two lines is smaller than `max_distance`.
    2. The cutpoint is within the `cutoffs`.

    Parameters
    ----------
    sample_lines : (N, M >= 7) numpy.ndarray
        A sample of LoRs, where each row is `[time, x1, y1, z1, x2, y2, z2]`,
        such that every line is defined by the points `[x1, y1, z1]` and
        `[x2, y2, z2]`.
    max_distance : float
        The maximum distance between any two lines for their cutpoint to be
        considered. A good starting value would be 0.1 mm for small tracers
        and/or clean data, or 0.2 mm for larger tracers and/or noisy data.
    cutoffs : list, optional
        The cutoffs for each dimension, formatted as `[x_min, x_max,
        y_min, y_max, z_min, z_max]`. If it is `None`, they are computed
        automatically by calling `get_cutoffs`. The default is `None`.
    append_indices : bool, optional
        If set to `True`, the indices of the individual LoRs that were used
        to compute each cutpoint are also appended to the returned array.
        Default is `False`.

    Returns
    -------
    cutpoints : (M, 4) or (M, 6) numpy.ndarray
        A numpy array of the calculated cutpoints. If `append_indices` is
        `False`, then the columns are [time, x, y, z]. If `append_indices` is
        `True`, then the columns are [time, x, y, z, i, j], where `i` and `j`
        are the LoR indices from `sample_lines` that were used to compute the
        weighted cutpoints. The time is the average between the timestamps of
        the two LoRs that were used to compute the cutpoint. The first column
        (for time) is sorted.

    Raises
    ------
    ValueError
        If `sample_lines` is not a numpy array with shape (N, M >= 7).
    ValueError
        If `cutoffs` is not a one-dimensional array with values
        `[min_x, max_x, min_y, max_y, min_z, max_z]`

    See Also
    --------
    pept.tracking.peptml.Cutpoints : Compute cutpoints from `pept.LineData`.
    pept.utilities.read_csv : Fast CSV file reading into numpy arrays.
    '''

    if not isinstance(sample_lines, pept.LineData):
        sample_lines = pept.LineData(sample_lines)

    lines = sample_lines.lines

    lines = np.asarray(lines, order='C', dtype=float)
    max_distance = float(max_distance)

    # If cutoffs were not defined, automatically compute them
    if cutoffs is None:
        cutoffs = get_cutoffs(lines)
    else:
        cutoffs = np.asarray(cutoffs, order='C', dtype=float)
        if cutoffs.ndim != 1 or len(cutoffs) != 6:
            raise ValueError(
                ("\n[ERROR]: cutoffs should be a one-dimensional array with "
                 "values [min_x, max_x, min_y, max_y, min_z, max_z]. Received "
                 f"{cutoffs}.\n"))

    sample_cutpoints = pept.utilities.find_cutpoints(
        lines, max_distance, cutoffs, append_indices=append_indices)

    columns = ["t", "x", "y", "z"]
    if append_indices:
        columns += ["line_index1", "line_index2"]

    points = pept.PointData(sample_cutpoints, columns=columns)

    # Add optional metadata to the points; because they have an underscore,
    # they won't be propagated when new objects are constructed
    points._max_distance = max_distance
    points._cutoffs = cutoffs
    if append_indices:
        points._lines = sample_lines

    return points
def test_line_data():
    # Test simple sample size, no overlap
    lines_raw = np.arange(70).reshape(10, 7)
    lines = pept.LineData(lines_raw, sample_size=4)
    print(lines)

    assert (lines[0].lines == lines_raw[:4]).all(), "Incorrect first sample"
    assert (lines[1].lines == lines_raw[4:8]).all(), "Incorrect second sample"
    assert len(lines) == 2, "Incorrent number of samples"
    assert np.all(lines["t"] == lines_raw[:, 0]), "Incorrect string indexing"

    # Test copying
    assert lines.copy() is not lines, "Copy is not deep"
    assert (lines.copy().lines == lines.lines).all(), "Incorrect copying"

    # Test changing sample size and overlap (int)
    lines.sample_size = 3
    lines.overlap = 2
    assert (lines[0].lines == lines_raw[:3]).all(), "Incorrect ssize changing"
    assert (lines[1].lines == lines_raw[1:4]).all(), "Incorrect overlapping"
    assert len(lines) == 8, "Incorrect number of samples after overlap"

    # Test changing sample size to List[Int]
    lines.sample_size = [3, 4, 2, 0]
    assert lines.overlap is None, "Overlap was not set to None"
    assert len(lines) == 4, "Incorrect number of samples"
    assert (lines[0].lines == lines_raw[:3]).all(), "List sample size"
    assert (lines[1].lines == lines_raw[3:7]).all(), "List sample size"
    assert (lines[2].lines == lines_raw[7:9]).all(), "List sample size"
    assert (lines[3].lines == lines_raw[9:9]).all(), "List sample size"

    # Test copying
    assert lines.copy().lines is not lines.lines, "Copy is not deep"
    assert lines.copy(deep=False).lines is lines.lines, "Not shallow copy"
    assert (lines.copy().lines == lines.lines).all(), "Incorrect copying"

    assert np.all(lines.copy().samples_indices == lines.samples_indices)
    lines.samples_indices = [[0, 5], [5, 5], [5, 10]]
    assert np.all(lines.copy().samples_indices == lines.samples_indices)

    # Test different constructors: copy, iterable, numpy-like
    lines_raw = np.arange(80).reshape(10, 8)
    columns = ["t", "x1", "y1", "z1", "x2", "y2", "z2", "error"]
    lines = pept.LineData(lines_raw, columns = columns)

    pept.LineData(lines)
    pept.LineData([lines, lines])
    pept.LineData([range(7), range(7)])

    # Test unnamed columns
    pept.LineData([range(8), range(8)])
    pept.LineData([range(7)], columns = ["a", "b", "c", "d", "e", "f", "g",
                                         "h", "i"])

    # Test columns propagation
    assert "error" in pept.LineData(lines).columns
    assert "error" in pept.LineData([lines, lines]).columns

    # Test attrs propagation
    lines.attrs["_lines"] = 123
    lines.attrs["_attr2"] = [1, 2, 3]

    assert "_lines" in pept.LineData(lines).attrs
    assert "_attr2" in pept.LineData([lines, lines]).attrs
    assert "_lines" in lines[0].attrs
    assert "_attr2" in lines.copy().attrs

    # Test illegal changes to sample size and overlap
    with pytest.raises(ValueError):
        lines.sample_size = 3
        lines.overlap = 3

    with pytest.raises(ValueError):
        lines.sample_size = 0
        lines.overlap = 3
        lines.sample_size = 3

    with pytest.raises(ValueError):
        lines.sample_size = -1

    # Test illegal array shapes
    with pytest.raises(ValueError):
        pept.LineData(np.arange(12))

    with pytest.raises(ValueError):
        pept.LineData(np.arange(12).reshape(2, 6))

    with pytest.raises(ValueError):
        pept.LineData(np.arange(12).reshape(2, 2, 3))