コード例 #1
0
ファイル: test_io.py プロジェクト: tanya-tang/pastis
def test_load_counts():
    module_path = dirname(__file__)
    counts_filename = os.path.join(
        module_path,
        "../../datasets/data/duan2009/duan.SC.10000.raw_sub.matrix")

    counts = io.load_counts(counts_filename)
    assert counts is not None
コード例 #2
0
def run_nmds(directory):
    if os.path.exists(os.path.join(directory, "config.ini")):
        config_file = os.path.join(directory, "config.ini")
    else:
        config_file = None

    options = parse(config_file)

    random_state = np.random.RandomState(seed=options["seed"])

    # First, compute MDS
    if options["lengths"].endswith(".bed"):
        lengths = load_lengths(os.path.join(directory, options["lengths"]))
    else:
        lengths = None

    if options["counts"].endswith("npy"):
        counts = np.load(os.path.join(directory, options["counts"]))
    elif options["counts"].endswith(".matrix"):
        counts = load_counts(os.path.join(directory, options["counts"]),
                             lengths=lengths)

    if options["normalize"]:
        counts = iced.filter.filter_low_counts(counts,
                                               sparsity=False,
                                               percentage=0.04)
        counts = iced.normalization.ICE_normalization(counts, max_iter=300)

    if not sparse.issparse(counts):
        counts = sparse.coo_matrix(counts)
    else:
        counts = counts.tocsr()
        counts.eliminate_zeros()
        counts = counts.tocoo()

    torm = np.array((counts.sum(axis=0) == 0)).flatten()
    nmds = NMDS(alpha=options["alpha"],
                beta=options["beta"],
                random_state=random_state,
                max_iter=options["max_iter"],
                verbose=options["verbose"])
    X = nmds.fit(counts)

    X[torm] = np.nan
    np.savetxt(os.path.join(directory, "NMDS." + options["output_name"]), X)

    # PDB file
    pdbfilename = os.path.join(directory,
                               "NMDS." + options["output_name"] + ".pdb")
    # pdbfilename = "test.pdb"
    writePDB(X, pdbfilename)

    return True
コード例 #3
0
ファイル: base.py プロジェクト: NelleV/iced
def load_sample_yeast():
    """
    Load and return a sample of S. cerevisiae contact count matrix from duan
    et al, Nature, 2009

    Returns
    -------
        counts, lengths:
            tuple of two elements, the first a contact count matrix, the
            second an ndarray containing the lengths of the chromosomes.
    """
    module_path = dirname(__file__)
    lengths = io.load_lengths(
        os.path.join(module_path, "data/duan2009/duan.SC.10000.raw_sub.bed"))
    counts = io.load_counts(
        os.path.join(module_path,
                     "data/duan2009/duan.SC.10000.raw_sub.matrix"),
        lengths=lengths)
    counts = counts.toarray()
    counts = counts.T + counts
    return counts, lengths
コード例 #4
0
def _get_counts(counts, lengths):
    """Load counts from file, or reformat counts object.
    """

    if not isinstance(counts, list):
        counts = [counts]
    lengths = _get_lengths(lengths)
    output = []
    for f in counts:
        if isinstance(f, np.ndarray) or sparse.issparse(f):
            counts_maps = f
        elif f.endswith(".npy"):
            counts_maps = np.load(f)
        elif f.endswith(".matrix"):
            counts_maps = load_counts(f, lengths=lengths)
        else:
            raise ValueError("Counts file must end with .npy (for numpy array)"
                             " or .matrix (for hiclib / iced format)")
        if sparse.issparse(counts_maps):
            counts_maps = counts_maps.toarray()
        counts_maps[np.isnan(counts_maps)] = 0
        output.append(sparse.coo_matrix(counts_maps))
    return output
コード例 #5
0
        sys.exit(1)

    if args.perchr is True and args.bins is None:
        print "--bins parameter is required when --perchr is specified"
        sys.exit(1)
    
    ## bin option
    if args.bins is not None:
        chr_lengths = load_lengths_perchr(args.bins)
        lengths = chr_lengths[0]
        chrnames = chr_lengths[1]
    else:
        lengths = None

    ## Load counts in sparse format
    counts = io.load_counts(args.filename, lengths=lengths)

    ## transform to integer if possible
    if counts.data[0].is_integer():
        counts.data = counts.data.astype(int)
    
    ## di/is option
    if args.di is True or args.ins is True:
        bins = load_bed(args.bins)
        if len(bins) != counts.shape[1]:
            print "Error -  number of rows in BED and matrix files are not equal"
            sys.exit(1)

    if args.ins is True:
        def myfunc( x, idx, org):
            return "bin" + str(idx) + "|" + org + "|" + x[0] + ":" + x[1] + "-" + x[2]
コード例 #6
0
ファイル: total_counts.py プロジェクト: NelleV/takefive
from __future__ import print_function
import numpy as np
from glob import glob
from iced import io
from iced import utils

filenames = glob("data/ay2013/*10000_raw.matrix") + \
            glob("data/lemieux2013/25kb/*.matrix")
filenames.sort()

for filename in filenames:
    lengths = io.load_lengths(filename.replace(".matrix", ".bed"))
    counts = io.load_counts(filename, lengths=lengths)

    counts = counts.toarray()
    counts = counts.T + counts

    mask = utils.get_intra_mask(lengths)

    # Just making sure there is no interaction counted in teh diag
    counts[np.diag_indices_from(counts)] = 0
    print(filename)
    print("Total number of counts", counts.sum())
    print("%% of intra", counts[mask].sum()/counts.sum() * 100)
    print("%% of inter", counts[np.invert(mask)].sum()/counts.sum() * 100)
    print()
コード例 #7
0
def run_pm2(directory):
    if os.path.exists(os.path.join(directory, "config.ini")):
        config_file = os.path.join(directory, "config.ini")
    else:
        config_file = None

    options = parse(config_file)

    random_state = np.random.RandomState(seed=options["seed"])

    options = parse(config_file)

    if options["lengths"].endswith(".bed"):
        lengths = load_lengths(os.path.join(directory, options["lengths"]))
    else:
        lengths = None

    if options["counts"].endswith("npy"):
        counts = np.load(os.path.join(directory, options["counts"]))
        counts[np.arange(len(counts)), np.arange(len(counts))] = 0
    elif options["counts"].endswith(".matrix"):
        counts = load_counts(os.path.join(directory, options["counts"]),
                             lengths=lengths)

    if options["normalize"]:
        counts = iced.filter.filter_low_counts(counts,
                                               sparsity=False,
                                               percentage=0.04)

        _, bias = iced.normalization.ICE_normalization(counts,
                                                       max_iter=300,
                                                       output_bias=True)
    else:
        bias = None

    if not sparse.issparse(counts):
        counts[np.isnan(counts)] = 0
        counts = sparse.coo_matrix(counts)
    else:
        counts = counts.tocsr()
        counts.eliminate_zeros()
        counts = counts.tocoo()

    pm2 = PM2(alpha=options["alpha"],
              beta=options["beta"],
              random_state=random_state,
              max_iter=options["max_iter"],
              bias=bias,
              verbose=options["verbose"])
    X = pm2.fit(counts)

    torm = np.array(((counts + counts.transpose()).sum(axis=0) == 0)).flatten()

    X[torm] = np.nan

    np.savetxt(os.path.join(directory, "PM2." + options["output_name"]), X)
    # PDB file
    pdbfilename = os.path.join(directory,
                               "PM2." + options["output_name"] + ".pdb")
    # pdbfilename = "test.pdb"
    writePDB(X, pdbfilename)

    return True
コード例 #8
0
from __future__ import print_function
from iced import io
import numpy as np

print("Checking the normalized matrix can be re-loaded")

counts = io.load_counts("/tmp/iced_matrix.matrix")

# Load with np.loadtxt and check that the shape makes sense
print("Checking the shape of the written matrix makes sense")
t = np.loadtxt("/tmp/iced_matrix.matrix")
if t.shape[1] != 3:
    raise ValueError("The shape of the written matrix doesn't make sense")
コード例 #9
0
import numpy as np

from iced import io
from iced import filter
from iced import normalization
from iced import utils

from matplotlib.colors import LogNorm
import matplotlib.pyplot as plt


lengths = io.load_lengths("data/trophozoites_10000_raw.bed")
counts = io.load_counts("data/trophozoites_10000_raw.matrix", lengths=lengths)
counts = utils.from_sparse_to_dense(counts)

normed = filter.filter_low_counts(counts, remove_all_zeros_loci=True,
                                  sparsity=False)
normed = normalization.ICE_normalization(normed)

normed, l = utils.extract_sub_contact_map(normed, lengths, [6, 7])
to_rm = normed.sum(axis=0) == 0
normed[to_rm] = np.nan
normed[:, to_rm] = np.nan

fig, ax = plt.subplots()
m = ax.matshow(np.log(normed+1), cmap="RdYlBu_r", vmax=5)
ax.set_xticks([])
ax.set_yticks([])
l = np.concatenate([[0], l])
[ax.axhline(i, color="0", linestyle="--") for i in l.cumsum()]
[ax.axvline(i, color="0", linestyle="--") for i in l.cumsum()]