Exemple #1
0
def test_load_counts():
    module_path = dirname(__file__)
    counts_filename = os.path.join(
        module_path,
        "../../datasets/data/duan2009/duan.SC.10000.raw_sub.matrix")

    counts = io.load_counts(counts_filename)
    assert counts is not None
Exemple #2
0
def run_nmds(directory):
    if os.path.exists(os.path.join(directory, "config.ini")):
        config_file = os.path.join(directory, "config.ini")
    else:
        config_file = None

    options = parse(config_file)

    random_state = np.random.RandomState(seed=options["seed"])

    # First, compute MDS
    if options["lengths"].endswith(".bed"):
        lengths = load_lengths(os.path.join(directory, options["lengths"]))
    else:
        lengths = None

    if options["counts"].endswith("npy"):
        counts = np.load(os.path.join(directory, options["counts"]))
    elif options["counts"].endswith(".matrix"):
        counts = load_counts(os.path.join(directory, options["counts"]),
                             lengths=lengths)

    if options["normalize"]:
        counts = iced.filter.filter_low_counts(counts,
                                               sparsity=False,
                                               percentage=0.04)
        counts = iced.normalization.ICE_normalization(counts, max_iter=300)

    if not sparse.issparse(counts):
        counts = sparse.coo_matrix(counts)
    else:
        counts = counts.tocsr()
        counts.eliminate_zeros()
        counts = counts.tocoo()

    torm = np.array((counts.sum(axis=0) == 0)).flatten()
    nmds = NMDS(alpha=options["alpha"],
                beta=options["beta"],
                random_state=random_state,
                max_iter=options["max_iter"],
                verbose=options["verbose"])
    X = nmds.fit(counts)

    X[torm] = np.nan
    np.savetxt(os.path.join(directory, "NMDS." + options["output_name"]), X)

    # PDB file
    pdbfilename = os.path.join(directory,
                               "NMDS." + options["output_name"] + ".pdb")
    # pdbfilename = "test.pdb"
    writePDB(X, pdbfilename)

    return True
Exemple #3
0
def load_sample_yeast():
    """
    Load and return a sample of S. cerevisiae contact count matrix from duan
    et al, Nature, 2009

    Returns
    -------
        counts, lengths:
            tuple of two elements, the first a contact count matrix, the
            second an ndarray containing the lengths of the chromosomes.
    """
    module_path = dirname(__file__)
    lengths = io.load_lengths(
        os.path.join(module_path, "data/duan2009/duan.SC.10000.raw_sub.bed"))
    counts = io.load_counts(
        os.path.join(module_path,
                     "data/duan2009/duan.SC.10000.raw_sub.matrix"),
        lengths=lengths)
    counts = counts.toarray()
    counts = counts.T + counts
    return counts, lengths
Exemple #4
0
def _get_counts(counts, lengths):
    """Load counts from file, or reformat counts object.
    """

    if not isinstance(counts, list):
        counts = [counts]
    lengths = _get_lengths(lengths)
    output = []
    for f in counts:
        if isinstance(f, np.ndarray) or sparse.issparse(f):
            counts_maps = f
        elif f.endswith(".npy"):
            counts_maps = np.load(f)
        elif f.endswith(".matrix"):
            counts_maps = load_counts(f, lengths=lengths)
        else:
            raise ValueError("Counts file must end with .npy (for numpy array)"
                             " or .matrix (for hiclib / iced format)")
        if sparse.issparse(counts_maps):
            counts_maps = counts_maps.toarray()
        counts_maps[np.isnan(counts_maps)] = 0
        output.append(sparse.coo_matrix(counts_maps))
    return output
        sys.exit(1)

    if args.perchr is True and args.bins is None:
        print "--bins parameter is required when --perchr is specified"
        sys.exit(1)
    
    ## bin option
    if args.bins is not None:
        chr_lengths = load_lengths_perchr(args.bins)
        lengths = chr_lengths[0]
        chrnames = chr_lengths[1]
    else:
        lengths = None

    ## Load counts in sparse format
    counts = io.load_counts(args.filename, lengths=lengths)

    ## transform to integer if possible
    if counts.data[0].is_integer():
        counts.data = counts.data.astype(int)
    
    ## di/is option
    if args.di is True or args.ins is True:
        bins = load_bed(args.bins)
        if len(bins) != counts.shape[1]:
            print "Error -  number of rows in BED and matrix files are not equal"
            sys.exit(1)

    if args.ins is True:
        def myfunc( x, idx, org):
            return "bin" + str(idx) + "|" + org + "|" + x[0] + ":" + x[1] + "-" + x[2]
Exemple #6
0
from __future__ import print_function
import numpy as np
from glob import glob
from iced import io
from iced import utils

filenames = glob("data/ay2013/*10000_raw.matrix") + \
            glob("data/lemieux2013/25kb/*.matrix")
filenames.sort()

for filename in filenames:
    lengths = io.load_lengths(filename.replace(".matrix", ".bed"))
    counts = io.load_counts(filename, lengths=lengths)

    counts = counts.toarray()
    counts = counts.T + counts

    mask = utils.get_intra_mask(lengths)

    # Just making sure there is no interaction counted in teh diag
    counts[np.diag_indices_from(counts)] = 0
    print(filename)
    print("Total number of counts", counts.sum())
    print("%% of intra", counts[mask].sum()/counts.sum() * 100)
    print("%% of inter", counts[np.invert(mask)].sum()/counts.sum() * 100)
    print()
Exemple #7
0
def run_pm2(directory):
    if os.path.exists(os.path.join(directory, "config.ini")):
        config_file = os.path.join(directory, "config.ini")
    else:
        config_file = None

    options = parse(config_file)

    random_state = np.random.RandomState(seed=options["seed"])

    options = parse(config_file)

    if options["lengths"].endswith(".bed"):
        lengths = load_lengths(os.path.join(directory, options["lengths"]))
    else:
        lengths = None

    if options["counts"].endswith("npy"):
        counts = np.load(os.path.join(directory, options["counts"]))
        counts[np.arange(len(counts)), np.arange(len(counts))] = 0
    elif options["counts"].endswith(".matrix"):
        counts = load_counts(os.path.join(directory, options["counts"]),
                             lengths=lengths)

    if options["normalize"]:
        counts = iced.filter.filter_low_counts(counts,
                                               sparsity=False,
                                               percentage=0.04)

        _, bias = iced.normalization.ICE_normalization(counts,
                                                       max_iter=300,
                                                       output_bias=True)
    else:
        bias = None

    if not sparse.issparse(counts):
        counts[np.isnan(counts)] = 0
        counts = sparse.coo_matrix(counts)
    else:
        counts = counts.tocsr()
        counts.eliminate_zeros()
        counts = counts.tocoo()

    pm2 = PM2(alpha=options["alpha"],
              beta=options["beta"],
              random_state=random_state,
              max_iter=options["max_iter"],
              bias=bias,
              verbose=options["verbose"])
    X = pm2.fit(counts)

    torm = np.array(((counts + counts.transpose()).sum(axis=0) == 0)).flatten()

    X[torm] = np.nan

    np.savetxt(os.path.join(directory, "PM2." + options["output_name"]), X)
    # PDB file
    pdbfilename = os.path.join(directory,
                               "PM2." + options["output_name"] + ".pdb")
    # pdbfilename = "test.pdb"
    writePDB(X, pdbfilename)

    return True
Exemple #8
0
from __future__ import print_function
from iced import io
import numpy as np

print("Checking the normalized matrix can be re-loaded")

counts = io.load_counts("/tmp/iced_matrix.matrix")

# Load with np.loadtxt and check that the shape makes sense
print("Checking the shape of the written matrix makes sense")
t = np.loadtxt("/tmp/iced_matrix.matrix")
if t.shape[1] != 3:
    raise ValueError("The shape of the written matrix doesn't make sense")
import numpy as np

from iced import io
from iced import filter
from iced import normalization
from iced import utils

from matplotlib.colors import LogNorm
import matplotlib.pyplot as plt


lengths = io.load_lengths("data/trophozoites_10000_raw.bed")
counts = io.load_counts("data/trophozoites_10000_raw.matrix", lengths=lengths)
counts = utils.from_sparse_to_dense(counts)

normed = filter.filter_low_counts(counts, remove_all_zeros_loci=True,
                                  sparsity=False)
normed = normalization.ICE_normalization(normed)

normed, l = utils.extract_sub_contact_map(normed, lengths, [6, 7])
to_rm = normed.sum(axis=0) == 0
normed[to_rm] = np.nan
normed[:, to_rm] = np.nan

fig, ax = plt.subplots()
m = ax.matshow(np.log(normed+1), cmap="RdYlBu_r", vmax=5)
ax.set_xticks([])
ax.set_yticks([])
l = np.concatenate([[0], l])
[ax.axhline(i, color="0", linestyle="--") for i in l.cumsum()]
[ax.axvline(i, color="0", linestyle="--") for i in l.cumsum()]