def test_read_directed_bedfile(): lines = [ "chr1\t0\t10\t.\t.\t+", "chr1\t10\t25\t.\t.\t-", "chr1\t25\t35\t.\t.\t+", "chr2\t0\t5\t.\t.\t-", "chr2\t5\t10\t.\t.\t+" ] f = io.StringIO("\n".join(lines)) bedfile = read_bedfile(f) assert bedfile == { "chr1": Regions([0, 10, 25], [10, 25, 35], [1, -1, 1]), "chr2": Regions([0, 5], [5, 10], [-1, 1]) }
def test_read_bedfile(): lines = [ "chr1\t0\t10", "chr1\t10\t25", "chr1\t25\t35", "chr2\t0\t5", "chr2\t5\t10" ] f = io.StringIO("\n".join(lines)) bedfile = read_bedfile(f) assert bedfile == { "chr1": Regions([0, 10, 25], [10, 25, 35]), "chr2": Regions([0, 5], [5, 10]) }
def to_regions(states, log_probs, lengths, chroms): offsets = np.cumsum(lengths) start = 0 regions = {} scores = {} peaks = {} log_probs = log_probs[:, 1] - log_probs[:, 0] # cumulative_log_probs = np.insert(np.cumsum(log_probs), 0, 0) for chrom, length in zip(chroms, lengths): dense = states[start:start + length] local_probs = log_probs[start:start + length] changes = np.flatnonzero(np.diff(dense)) + 1 if dense[0] == 1: changes = np.insert(changes, 0, 0) if dense[-1] == 1: changes = np.append(changes, dense.size) changes = changes.reshape((-1, 2)) regions[chrom] = Regions(changes[:, 0], changes[:, 1]) scores[chrom] = [ np.max(local_probs[start:end]) for start, end in zip(changes[:, 0], changes[:, 1]) ] peaks[chrom] = [ np.mean(np.flatnonzero(local_probs[start:end] == m)).astype(int) for start, end, m in zip(changes[:, 0], changes[:, 1], scores[chrom]) ] # np.argmax(local_probs[start:end]) for start, end in zip(changes[:, 0], changes[:, 1])] # cumulative_log_probs[changes[:, 1]]-cumulative_log_probs[changes[:, 0]] return regions, scores, peaks
def test_read_genes(): lines = [ "1366 NM_026243 chr10 + 102374436 102391468 102378157 102389363 4 102374436,102378151,102385005,102388221, 102374644,102378304,102385153,102391468, 0 Mgat4c cmpl cmpl -1,0,0,1,", "171 NM_172553 chr10 - 103007846 103028777 103009187 103028606 4 103007846,103022176,103025134,103028380, 103009508,103022305,103025439,103028777, 0 Alx1 cmpl cmpl 0,0,1,0," ] f = io.StringIO("\n".join(lines)) genes = read_refseq(f)["chr10"] exon_starts = [102374436, 102378151, 102385005, 102388221 ] + [103007846, 103022176, 103025134, 103028380][::-1] exon_ends = [102374644, 102378304, 102385153, 102391468 ] + [103009508, 103022305, 103025439, 103028777][::-1] cd_starts = [ 102378157 - 102378151 + 102374644 - 102374436, 103028777 - 103028606 ] cd_ends = [509 + 1142, 321 + 831] true_genes = Genes(Regions(exon_starts, exon_ends, [1] * 4 + [-1] * 4), [0, 4, 8], coding_regions=Regions(cd_starts, cd_ends)) assert genes == true_genes
def regions_10b(): return Regions([2, 13, 17], [12, 23, 27], [1, -1, 1])
def test_averageplot(bedgraph, regions_10b, true_signal): plotter = AveragePlot(8, do_normalize=False) r = Regions(regions_10b.starts + 3, regions_10b.ends - 3, regions_10b.directions) signal = plotter([("chr1", bedgraph)], {"chr1": r}) assert np.all(signal["y"].values == true_signal[1:-1])
def test_tssplot(bedgraph, regions_10b, true_signal): plotter = TSSPlot(10, 10, do_normalize=False) mids = (regions_10b.starts + regions_10b.ends) // 2 r = Regions(mids, mids + 1, regions_10b.directions) signal = plotter([("chr1", bedgraph)], {"chr1": r}) assert np.all(signal["y"].values == true_signal)
def regions(): return Regions([100, 200, 300, 400, 500], [101, 202, 303, 404, 505], [1, 1, 1, 1, 1])
def regions(): regions = Regions([4, 14, 24], [10, 20, 30]) return {"chr1": regions}