Ejemplo n.º 1
0
from __future__ import print_function
import numpy as np
from glob import glob
from iced import io
from iced import utils

filenames = glob("data/ay2013/*10000_raw.matrix") + \
            glob("data/lemieux2013/25kb/*.matrix")
filenames.sort()

for filename in filenames:
    lengths = io.load_lengths(filename.replace(".matrix", ".bed"))
    counts = io.load_counts(filename, lengths=lengths)

    counts = counts.toarray()
    counts = counts.T + counts

    mask = utils.get_intra_mask(lengths)

    # Just making sure there is no interaction counted in teh diag
    counts[np.diag_indices_from(counts)] = 0
    print(filename)
    print("Total number of counts", counts.sum())
    print("%% of intra", counts[mask].sum()/counts.sum() * 100)
    print("%% of inter", counts[np.invert(mask)].sum()/counts.sum() * 100)
    print()
Ejemplo n.º 2
0
from matplotlib import colors

from iced import datasets
from iced.utils import get_intra_mask
from iced.utils import get_inter_mask
"""
Extracting parts of a contact map.

This examples shows how to use a mask to plot only the inter or the intra
contact map.

"""

# Loading a sample dataset
counts, lengths = datasets.load_sample_yeast()
intra_mask = get_intra_mask(lengths)
inter_mask = get_inter_mask(lengths)

fig, axes = plt.subplots(ncols=2, figsize=(12, 6))
inter_counts = counts.copy()
inter_counts[intra_mask] = np.nan
intra_counts = counts.copy()
intra_counts[inter_mask] = np.nan

m = axes[0].matshow(intra_counts,
                    cmap="Blues",
                    norm=colors.SymLogNorm(1),
                    origin="bottom",
                    extent=(0, len(counts), 0, len(counts)))
m = axes[1].matshow(inter_counts,
                    cmap="Blues",
from iced import datasets
from iced.utils import get_intra_mask
from iced.utils import get_inter_mask

"""
Extracting parts of a contact map.

This examples shows how to use a mask to plot only the inter or the intra
contact map.

"""

# Loading a sample dataset
counts, lengths = datasets.load_sample_yeast()
intra_mask = get_intra_mask(lengths)
inter_mask = get_inter_mask(lengths)

fig, axes = plt.subplots(ncols=2, figsize=(12, 6))
inter_counts = counts.copy()
inter_counts[intra_mask] = np.nan
intra_counts = counts.copy()
intra_counts[inter_mask] = np.nan

m = axes[0].matshow(intra_counts, cmap="Blues", norm=colors.SymLogNorm(1),
                    origin="bottom",
                    extent=(0, len(counts), 0, len(counts)))
m = axes[1].matshow(inter_counts, cmap="Blues", norm=colors.SymLogNorm(1),
                    origin="bottom",
                    extent=(0, len(counts), 0, len(counts)))
Ejemplo n.º 4
0
def generate_dataset_from_distances(dis,
                                    alpha=-3,
                                    beta=1,
                                    alpha_inter=None,
                                    lengths=None,
                                    distribution="NegativeBinomial",
                                    random_state=None,
                                    dispersion=7):
    """
    Generate dataset from distance matrix

    Parameters
    ----------
    dis : (n, n) ndarray

    alpha : float, optional, default: -3
        count-to-distance parameter

    beta : float, optional, default: 1
        coverage or scaling factor

    alpha_inter : float, optional, default: None
        count-to-distance parameter for inter-chromosomal count.
        When provided, lengths also needs to be provided

    lengths : ndarray (L, ), optional, default: None
        Vector of lengths of chromosomes.

    distribution : string, optional, default: "NegativeBinomial"
        The distribution used to draw contact counts from. Can be "Poisson",
        "NB", "NegativeBinomial", or "Intensity".
        If "Intensity" is provided, returns the intensity of the random
        process instead of a random distribution.

    random_state : int, optional, default: None
        Determines random number generation. Use an int to make the randomness
        deterministic.

    dispersion : float, optional, default: 7.
        Dispersion parameter for the Negative Binomial distribution.
        Will be ignored for the Poisson distribution.

    Returns
    -------
    ndarray (n, n)
    """
    if distribution not in ["Poisson", "NB", "NegativeBinomial"]:
        raise ValueError(
            "Unknown distribution %s. Possibile distributions are %s" %
            (distribution, ", ".join(_DISTRIBUTIONS)))
    if lengths is not None and dis.shape[0] != lengths.sum():
        raise ValueError("lengths and dis are of incompatible shapes")

    if alpha_inter is not None and lengths is None:
        raise ValueError(
            "When alpha_inter is provided, lengths also needs to be provided")

    random_state = check_random_state(random_state)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        intensity = beta * dis**alpha

    if alpha_inter is not None:
        inter_mask = ~get_intra_mask(lengths)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            intensity[inter_mask] = beta * dis[inter_mask]**alpha

    intensity[np.isinf(intensity)] = 0
    if distribution in ["NB", "NegativeBinomial"]:
        if hasattr(dispersion, "predict"):
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                d = beta * dispersion.predict(dis**alpha)
        else:
            d = beta * dispersion

        p = intensity / (intensity + d)
        counts = random_state.negative_binomial(d, 1 - p)
    elif distribution == "Poisson":
        counts = random_state.poisson(intensity)
    elif distribution == "Intensity":
        counts = intensity

    counts = (counts + counts.T)

    return counts