Exemple #1
0
def test_read_directed_bedfile():
    lines = [
        "chr1\t0\t10\t.\t.\t+", "chr1\t10\t25\t.\t.\t-",
        "chr1\t25\t35\t.\t.\t+", "chr2\t0\t5\t.\t.\t-", "chr2\t5\t10\t.\t.\t+"
    ]
    f = io.StringIO("\n".join(lines))
    bedfile = read_bedfile(f)
    assert bedfile == {
        "chr1": Regions([0, 10, 25], [10, 25, 35], [1, -1, 1]),
        "chr2": Regions([0, 5], [5, 10], [-1, 1])
    }
Exemple #2
0
def test_read_bedfile():
    lines = [
        "chr1\t0\t10", "chr1\t10\t25", "chr1\t25\t35", "chr2\t0\t5",
        "chr2\t5\t10"
    ]
    f = io.StringIO("\n".join(lines))
    bedfile = read_bedfile(f)
    assert bedfile == {
        "chr1": Regions([0, 10, 25], [10, 25, 35]),
        "chr2": Regions([0, 5], [5, 10])
    }
Exemple #3
0
def to_regions(states, log_probs, lengths, chroms):
    offsets = np.cumsum(lengths)
    start = 0
    regions = {}
    scores = {}
    peaks = {}
    log_probs = log_probs[:, 1] - log_probs[:, 0]
    # cumulative_log_probs = np.insert(np.cumsum(log_probs), 0, 0)
    for chrom, length in zip(chroms, lengths):
        dense = states[start:start + length]
        local_probs = log_probs[start:start + length]
        changes = np.flatnonzero(np.diff(dense)) + 1
        if dense[0] == 1:
            changes = np.insert(changes, 0, 0)
        if dense[-1] == 1:
            changes = np.append(changes, dense.size)
        changes = changes.reshape((-1, 2))
        regions[chrom] = Regions(changes[:, 0], changes[:, 1])
        scores[chrom] = [
            np.max(local_probs[start:end])
            for start, end in zip(changes[:, 0], changes[:, 1])
        ]
        peaks[chrom] = [
            np.mean(np.flatnonzero(local_probs[start:end] == m)).astype(int)
            for start, end, m in zip(changes[:, 0], changes[:,
                                                            1], scores[chrom])
        ]
        # np.argmax(local_probs[start:end]) for start, end in zip(changes[:, 0], changes[:, 1])]
        # cumulative_log_probs[changes[:, 1]]-cumulative_log_probs[changes[:, 0]]
    return regions, scores, peaks
Exemple #4
0
def test_read_genes():
    lines = [
        "1366	NM_026243	chr10	+	102374436	102391468	102378157	102389363	4	102374436,102378151,102385005,102388221,	102374644,102378304,102385153,102391468,	0	Mgat4c	cmpl	cmpl	-1,0,0,1,",
        "171	NM_172553	chr10	-	103007846	103028777	103009187	103028606	4	103007846,103022176,103025134,103028380,	103009508,103022305,103025439,103028777,	0	Alx1	cmpl	cmpl	0,0,1,0,"
    ]
    f = io.StringIO("\n".join(lines))
    genes = read_refseq(f)["chr10"]
    exon_starts = [102374436, 102378151, 102385005, 102388221
                   ] + [103007846, 103022176, 103025134, 103028380][::-1]
    exon_ends = [102374644, 102378304, 102385153, 102391468
                 ] + [103009508, 103022305, 103025439, 103028777][::-1]
    cd_starts = [
        102378157 - 102378151 + 102374644 - 102374436, 103028777 - 103028606
    ]
    cd_ends = [509 + 1142, 321 + 831]
    true_genes = Genes(Regions(exon_starts, exon_ends, [1] * 4 + [-1] * 4),
                       [0, 4, 8],
                       coding_regions=Regions(cd_starts, cd_ends))
    assert genes == true_genes
Exemple #5
0
def regions_10b():
    return Regions([2, 13, 17], [12, 23, 27], [1, -1, 1])
Exemple #6
0
def test_averageplot(bedgraph, regions_10b, true_signal):
    plotter = AveragePlot(8, do_normalize=False)
    r = Regions(regions_10b.starts + 3, regions_10b.ends - 3,
                regions_10b.directions)
    signal = plotter([("chr1", bedgraph)], {"chr1": r})
    assert np.all(signal["y"].values == true_signal[1:-1])
Exemple #7
0
def test_tssplot(bedgraph, regions_10b, true_signal):
    plotter = TSSPlot(10, 10, do_normalize=False)
    mids = (regions_10b.starts + regions_10b.ends) // 2
    r = Regions(mids, mids + 1, regions_10b.directions)
    signal = plotter([("chr1", bedgraph)], {"chr1": r})
    assert np.all(signal["y"].values == true_signal)
Exemple #8
0
def regions():
    return Regions([100, 200, 300, 400, 500], [101, 202, 303, 404, 505],
                   [1, 1, 1, 1, 1])
Exemple #9
0
def regions():
    regions = Regions([4, 14, 24], [10, 20, 30])
    return {"chr1": regions}