def test_load_counts(): module_path = dirname(__file__) counts_filename = os.path.join( module_path, "../../datasets/data/duan2009/duan.SC.10000.raw_sub.matrix") counts = io.load_counts(counts_filename) assert counts is not None
def run_nmds(directory): if os.path.exists(os.path.join(directory, "config.ini")): config_file = os.path.join(directory, "config.ini") else: config_file = None options = parse(config_file) random_state = np.random.RandomState(seed=options["seed"]) # First, compute MDS if options["lengths"].endswith(".bed"): lengths = load_lengths(os.path.join(directory, options["lengths"])) else: lengths = None if options["counts"].endswith("npy"): counts = np.load(os.path.join(directory, options["counts"])) elif options["counts"].endswith(".matrix"): counts = load_counts(os.path.join(directory, options["counts"]), lengths=lengths) if options["normalize"]: counts = iced.filter.filter_low_counts(counts, sparsity=False, percentage=0.04) counts = iced.normalization.ICE_normalization(counts, max_iter=300) if not sparse.issparse(counts): counts = sparse.coo_matrix(counts) else: counts = counts.tocsr() counts.eliminate_zeros() counts = counts.tocoo() torm = np.array((counts.sum(axis=0) == 0)).flatten() nmds = NMDS(alpha=options["alpha"], beta=options["beta"], random_state=random_state, max_iter=options["max_iter"], verbose=options["verbose"]) X = nmds.fit(counts) X[torm] = np.nan np.savetxt(os.path.join(directory, "NMDS." + options["output_name"]), X) # PDB file pdbfilename = os.path.join(directory, "NMDS." + options["output_name"] + ".pdb") # pdbfilename = "test.pdb" writePDB(X, pdbfilename) return True
def load_sample_yeast(): """ Load and return a sample of S. cerevisiae contact count matrix from duan et al, Nature, 2009 Returns ------- counts, lengths: tuple of two elements, the first a contact count matrix, the second an ndarray containing the lengths of the chromosomes. """ module_path = dirname(__file__) lengths = io.load_lengths( os.path.join(module_path, "data/duan2009/duan.SC.10000.raw_sub.bed")) counts = io.load_counts( os.path.join(module_path, "data/duan2009/duan.SC.10000.raw_sub.matrix"), lengths=lengths) counts = counts.toarray() counts = counts.T + counts return counts, lengths
def _get_counts(counts, lengths): """Load counts from file, or reformat counts object. """ if not isinstance(counts, list): counts = [counts] lengths = _get_lengths(lengths) output = [] for f in counts: if isinstance(f, np.ndarray) or sparse.issparse(f): counts_maps = f elif f.endswith(".npy"): counts_maps = np.load(f) elif f.endswith(".matrix"): counts_maps = load_counts(f, lengths=lengths) else: raise ValueError("Counts file must end with .npy (for numpy array)" " or .matrix (for hiclib / iced format)") if sparse.issparse(counts_maps): counts_maps = counts_maps.toarray() counts_maps[np.isnan(counts_maps)] = 0 output.append(sparse.coo_matrix(counts_maps)) return output
sys.exit(1) if args.perchr is True and args.bins is None: print "--bins parameter is required when --perchr is specified" sys.exit(1) ## bin option if args.bins is not None: chr_lengths = load_lengths_perchr(args.bins) lengths = chr_lengths[0] chrnames = chr_lengths[1] else: lengths = None ## Load counts in sparse format counts = io.load_counts(args.filename, lengths=lengths) ## transform to integer if possible if counts.data[0].is_integer(): counts.data = counts.data.astype(int) ## di/is option if args.di is True or args.ins is True: bins = load_bed(args.bins) if len(bins) != counts.shape[1]: print "Error - number of rows in BED and matrix files are not equal" sys.exit(1) if args.ins is True: def myfunc( x, idx, org): return "bin" + str(idx) + "|" + org + "|" + x[0] + ":" + x[1] + "-" + x[2]
from __future__ import print_function import numpy as np from glob import glob from iced import io from iced import utils filenames = glob("data/ay2013/*10000_raw.matrix") + \ glob("data/lemieux2013/25kb/*.matrix") filenames.sort() for filename in filenames: lengths = io.load_lengths(filename.replace(".matrix", ".bed")) counts = io.load_counts(filename, lengths=lengths) counts = counts.toarray() counts = counts.T + counts mask = utils.get_intra_mask(lengths) # Just making sure there is no interaction counted in teh diag counts[np.diag_indices_from(counts)] = 0 print(filename) print("Total number of counts", counts.sum()) print("%% of intra", counts[mask].sum()/counts.sum() * 100) print("%% of inter", counts[np.invert(mask)].sum()/counts.sum() * 100) print()
def run_pm2(directory): if os.path.exists(os.path.join(directory, "config.ini")): config_file = os.path.join(directory, "config.ini") else: config_file = None options = parse(config_file) random_state = np.random.RandomState(seed=options["seed"]) options = parse(config_file) if options["lengths"].endswith(".bed"): lengths = load_lengths(os.path.join(directory, options["lengths"])) else: lengths = None if options["counts"].endswith("npy"): counts = np.load(os.path.join(directory, options["counts"])) counts[np.arange(len(counts)), np.arange(len(counts))] = 0 elif options["counts"].endswith(".matrix"): counts = load_counts(os.path.join(directory, options["counts"]), lengths=lengths) if options["normalize"]: counts = iced.filter.filter_low_counts(counts, sparsity=False, percentage=0.04) _, bias = iced.normalization.ICE_normalization(counts, max_iter=300, output_bias=True) else: bias = None if not sparse.issparse(counts): counts[np.isnan(counts)] = 0 counts = sparse.coo_matrix(counts) else: counts = counts.tocsr() counts.eliminate_zeros() counts = counts.tocoo() pm2 = PM2(alpha=options["alpha"], beta=options["beta"], random_state=random_state, max_iter=options["max_iter"], bias=bias, verbose=options["verbose"]) X = pm2.fit(counts) torm = np.array(((counts + counts.transpose()).sum(axis=0) == 0)).flatten() X[torm] = np.nan np.savetxt(os.path.join(directory, "PM2." + options["output_name"]), X) # PDB file pdbfilename = os.path.join(directory, "PM2." + options["output_name"] + ".pdb") # pdbfilename = "test.pdb" writePDB(X, pdbfilename) return True
from __future__ import print_function from iced import io import numpy as np print("Checking the normalized matrix can be re-loaded") counts = io.load_counts("/tmp/iced_matrix.matrix") # Load with np.loadtxt and check that the shape makes sense print("Checking the shape of the written matrix makes sense") t = np.loadtxt("/tmp/iced_matrix.matrix") if t.shape[1] != 3: raise ValueError("The shape of the written matrix doesn't make sense")
import numpy as np from iced import io from iced import filter from iced import normalization from iced import utils from matplotlib.colors import LogNorm import matplotlib.pyplot as plt lengths = io.load_lengths("data/trophozoites_10000_raw.bed") counts = io.load_counts("data/trophozoites_10000_raw.matrix", lengths=lengths) counts = utils.from_sparse_to_dense(counts) normed = filter.filter_low_counts(counts, remove_all_zeros_loci=True, sparsity=False) normed = normalization.ICE_normalization(normed) normed, l = utils.extract_sub_contact_map(normed, lengths, [6, 7]) to_rm = normed.sum(axis=0) == 0 normed[to_rm] = np.nan normed[:, to_rm] = np.nan fig, ax = plt.subplots() m = ax.matshow(np.log(normed+1), cmap="RdYlBu_r", vmax=5) ax.set_xticks([]) ax.set_yticks([]) l = np.concatenate([[0], l]) [ax.axhline(i, color="0", linestyle="--") for i in l.cumsum()] [ax.axvline(i, color="0", linestyle="--") for i in l.cumsum()]