Ejemplo n.º 1
0
def test_picker_nloci():
    """Test if the number of foci detected by picker is correct"""
    obs_coords, _ = cud.picker(gauss12, precision=1)
    assert len(obs_coords) == 2
Ejemplo n.º 2
0
def test_picker_idx(patterns, matrix):
    """Test that index is not shifted when using picker"""
    obs_coords, _ = cud.picker(matrix, precision=None)
    assert np.all(obs_coords[0] == patterns)
Ejemplo n.º 3
0
def test_picker_speckles():
    """Test if speckles are discarded by picker"""
    obs_coords, obs_mat = cud.picker(point_mat, precision=None)
    assert obs_coords is None
    assert obs_mat is None
Ejemplo n.º 4
0
def change_detection_pipeline(
    cool_files: Iterable[str],
    conditions: Iterable[str],
    kernel: Union[np.ndarray, str] = "loops",
    bed2d_file: Optional[str] = None,
    region: Optional[Union[Iterable[str], str]] = None,
    max_dist: Optional[int] = None,
    subsample: bool = True,
    percentile_thresh: float = 95.0,
    n_cpus: int = 4,
) -> pd.DataFrame:
    """
    Run end to end pattern change detection pipeline on input cool files. A
    list of conditions of the same lengths as the sample list must be provided.
    The first condition in the list is used as the reference (control) state.

    Changes for a specific pattern are computed. A valid chromosight pattern
    name can be supplied (e.g. loops, borders, hairpins, ...) or a kernel matrix
    can be supplied directly instead.

    Positions with significant changes will be reported in a pandas
    dataframe. Optionally, a 2D bed file with positions of interest can be
    specified, in which case change value at these positions will be reported
    instead.

    Positive diff_scores mean the pattern intensity was increased relative to
    control (first condition).
    """
    # Make sure each sample has an associated condition
    if len(cool_files) != len(conditions):
        raise ValueError(
            "The lists of cool files and conditions must have the same length")

    # If a pattern name was provided, load corresponding chromosight kernel
    if isinstance(kernel, str):
        kernel_name = kernel
        try:
            kernel = getattr(ck, kernel)["kernels"][0]
        except AttributeError:
            raise AttributeError(f"{kernel_name} is not a valid pattern name")
    elif isinstance(kernel, np.ndarray):
        kernel_name = "custom kernel"
    else:
        raise ValueError(
            "Kernel must either be a valid chromosight pattern name, or a 2D numpy.ndarray of floats"
        )
    # Associate samples with their conditions
    samples = pd.DataFrame({
        "cond": conditions,
        "cool": pai.get_coolers(cool_files)
    })
    print(
        f"Changes will be computed relative to condition: {samples.cond.values[0]}"
    )
    # Define each chromosome as a region, if None specified
    clr = samples.cool.values[0]
    if region is None:
        regions = clr.chroms()[:]["name"].tolist()
    elif isinstance(region, str):
        region = [region]
    pos_cols = [
        "chrom1",
        "start1",
        "end1",
        "chrom2",
        "start2",
        "end2",
        "bin1",
        "bin2",
        "diff_score",
    ]
    if bed2d_file:
        positions = cio.load_bed2d(bed2d_file)
        for col in ["diff_score", " bin1", "bin2"]:
            positions[col] = np.nan
    else:
        positions = pd.DataFrame(columns=pos_cols)
    for reg in regions:
        # Subset bins to the range of interest
        bins = clr.bins().fetch(reg).reset_index(drop=True)
        diff, thresh = detection_matrix(
            samples,
            kernel,
            region=reg,
            subsample=subsample,
            max_dist=max_dist,
            percentile_thresh=percentile_thresh,
            n_cpus=n_cpus,
        )
        # If positions were provided, return the change value for each of them
        if bed2d_file:
            tmp_chr = reg.split(":")[0]
            tmp_rows = (positions.chrom1 == tmp_chr) & (positions.chrom2
                                                        == tmp_chr)
            # If there are no positions of interest on this chromosome, just
            # skip it
            if not np.any(tmp_rows):
                continue
            tmp_pos = positions.loc[tmp_rows, :]
            # Convert both coordinates from genomic coords to bins
            for i in [1, 2]:
                tmp_pos["chrom"] = tmp_pos[f"chrom{i}"]
                tmp_pos["pos"] = (tmp_pos[f"start{i}"] +
                                  tmp_pos[f"end{i}"]) // 2
                tmp_pos[f"bin{i}"] = coords_to_bins(clr, tmp_pos).astype(int)
                # Save bin coordinates from current chromosome to the full table
                positions.loc[tmp_rows, f"bin{i}"] = tmp_pos[f"bin{i}"]
            tmp_pos = tmp_pos.drop(columns=["pos", "chrom"])
            # Retrieve diff values for each coordinate
            positions.loc[tmp_rows,
                          "diff_score"] = diff[tmp_pos.start1 // clr.binsize,
                                               tmp_pos.start2 //
                                               clr.binsize, ].A1
        # Otherwise report individual spots of change using chromosight
        else:
            # Pick "foci" of changed pixels and their local maxima
            tmp_pos, _ = cud.picker(abs(diff), thresh)
            # Get genomic positions from matrix coordinates
            tmp_pos = pd.DataFrame(tmp_pos, columns=["bin1", "bin2"])
            for i in [1, 2]:
                coords = (bins.loc[tmp_pos[f"bin{i}"],
                                   ["chrom", "start", "end"]].reset_index(
                                       drop=True).rename(
                                           columns={
                                               "chrom": f"chrom{i}",
                                               "start": f"start{i}",
                                               "end": f"end{i}",
                                           }))
                # Add axis' columns to  dataframe
                tmp_pos = pd.concat([coords, tmp_pos], axis=1)
            # Retrieve diff values for each coordinate
            tmp_pos["diff_score"] = diff[tmp_pos.bin1, tmp_pos.bin2].A1
            # Append new chromosome's rows
            positions = pd.concat([positions, tmp_pos], axis=0)
    positions = positions.loc[:, pos_cols, ]
    return positions
Ejemplo n.º 5
0
def test_picker_nloci():
    """Test if the number of foci detected by picker is correct"""
    thresh = gauss12.data.mean()
    obs_coords, _ = cud.picker(gauss12, pearson=thresh)
    assert len(obs_coords) == 2
Ejemplo n.º 6
0
def test_picker_idx(patterns, matrix):
    """Test that index is not shifted when using picker"""
    thresh = matrix.data.mean()
    obs_coords, _ = cud.picker(matrix, pearson=thresh)
    assert np.all(obs_coords[0] == patterns)