Example #1
0
def test_bw():
    b = BigWig("libBigWig/test/test.bw")

    assert repr(b) == "BigWig('libBigWig/test/test.bw')"


    intervals = list(b("1", 0, 99))

    assert intervals[0] == Interval(chrom='1', start=0, end=1, value=0.10000000149011612)
    assert intervals[1] == Interval(chrom='1', start=1, end=2, value=0.20000000298023224)
    assert intervals[2] == Interval(chrom='1', start=2, end=3, value=0.30000001192092896)

    # default is to include all values
    vals = b.values("1", 0, 9)
    exp = array('f', [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, nan, nan, nan, nan, nan, nan])
    arr_equal(vals, exp)

    vals = b.values("1", 0, 9, False)
    exp = array('f', [0.10000000149011612, 0.20000000298023224, 0.30000001192092896])
    arr_equal(vals, exp)

    v = b.stats("1", 0, 9)
    assert v == 0.2000000054637591

    v = b.stats("1", 0, 9, stat="stdev")
    assert v == 0.10000000521540645

    v = b.stats("1", 0, 4, stat="coverage")
    assert v == 0.75

    v = b.stats("1", 0, 4, stat="coverage", nBins=2)
    assert v == array('d', [1.0, 0.5])


    b.close()
Example #2
0
def read_gerp(region, sends, path='/scratch/ucgd/lustre/u1021864/serial/hg19.gerp.bw'):
    gerp = BigWig(path)
    exongerp=[]
    chrom, se = region.split(":")
    s, e = map(int, se.split("-"))
    if not chrom.startswith("chr"):
        chrom = "chr" + chrom
    for key in sends:
        for (exs, exe) in zip(sends[key][0], sends[key][1]):
             exongerp.extend(np.frombuffer(gerp.values(chrom, int(exs)-1, int(exe)), dtype='f'))

    return np.frombuffer(gerp.values(chrom, int(s)-1, int(e)), dtype='f'), exongerp
 def build_dnase_fc_scores(self):
     path=DNASE_FOLD_COV_DIR
     scores = np.zeros((len(self), len(self.samples)), dtype=float)
     for sample_i, sample_name in enumerate(self.samples):
         fname = "DNASE.{}.fc.signal.bigwig".format(sample_name)
         b = BigWig(os.path.join(path, fname))
         for region_i, region in enumerate(self.iter_regions()):
             if region_i%1000000 == 0:
                 print "Sample %i/%i, row %i/%i" % (
                     sample_i+1, len(self.samples), region_i, len(self))
             scores[region_i, sample_i] = b.stats(
                 region.contig, region.start, region.stop, 'mean')
         b.close()
     return pd.DataFrame(
         np.nan_to_num(scores), columns=self.samples, index=self.data.index)
Example #4
0
def test_bw():
    b = BigWig("libBigWig/test/test.bw")

    assert repr(b) == "BigWig('libBigWig/test/test.bw')"

    intervals = list(b("1", 0, 99))

    assert intervals[0] == Interval(chrom='1',
                                    start=0,
                                    end=1,
                                    value=0.10000000149011612)
    assert intervals[1] == Interval(chrom='1',
                                    start=1,
                                    end=2,
                                    value=0.20000000298023224)
    assert intervals[2] == Interval(chrom='1',
                                    start=2,
                                    end=3,
                                    value=0.30000001192092896)

    # default is to include all values
    vals = b.values("1", 0, 9)
    exp = array('f', [
        0.10000000149011612, 0.20000000298023224, 0.30000001192092896, nan,
        nan, nan, nan, nan, nan
    ])
    arr_equal(vals, exp)

    vals = b.values("1", 0, 9, False)
    exp = array(
        'f', [0.10000000149011612, 0.20000000298023224, 0.30000001192092896])
    arr_equal(vals, exp)

    v = b.stats("1", 0, 9)
    assert v == 0.2000000054637591

    v = b.stats("1", 0, 9, stat="stdev")
    assert v == 0.10000000521540645

    v = b.stats("1", 0, 4, stat="coverage")
    assert v == 0.75

    v = b.stats("1", 0, 4, stat="coverage", nBins=2)
    assert v == array('d', [1.0, 0.5])

    b.close()
Example #5
0
 def build_dnase_fc_scores(self):
     path = DNASE_FOLD_COV_DIR
     scores = np.zeros((len(self), len(self.samples)), dtype=float)
     for sample_i, sample_name in enumerate(self.samples):
         fname = "DNASE.{}.fc.signal.bigwig".format(sample_name)
         b = BigWig(os.path.join(path, fname))
         for region_i, region in enumerate(self.iter_regions()):
             if region_i % 1000000 == 0:
                 print "Sample %i/%i, row %i/%i" % (
                     sample_i + 1, len(self.samples), region_i, len(self))
             scores[region_i,
                    sample_i] = b.stats(region.contig, region.start,
                                        region.stop, 'mean')
         b.close()
     return pd.DataFrame(np.nan_to_num(scores),
                         columns=self.samples,
                         index=self.data.index)
Example #6
0
def perchrom(pli_gerp_chrom):
    plis = []
    gerps = []
    plipath, gerppath, chrom = pli_gerp_chrom
    pLI = tabix.open(plipath)
    gerp = BigWig(gerppath)

    gerpdict = {}
    lengths = []
    scores = []
    rangeprev = None
    for region in pLI.querys(chrom):
        gerpscore, overlap = read_gerp(gerp, region)
        gerps.append(float(gerpscore))
        plis.append(float(region[-1]))

    return gerps, plis
Example #7
0
def perchrom(ccr_gerp_chrom):
    ccrpath, gerppath, chrom = ccr_gerp_chrom
    ccr = tabix.open(ccrpath)
    gerp = BigWig(gerppath)

    gerpdict={}; gerps=[]
    for region in ccr.querys(chrom):
        gene=region[3]; ranges=region[6]; pctile=float(region[-1])
        gerpscore, overlap = read_gerp(gerp, region) # _ = pfam, redundant variable
        gerps.append((gerpscore, overlap, ranges, gene, pctile))
    sorter = itemgetter(2,3)
    grouper = itemgetter(2,3)
    for key, grp in groupby(sorted(gerps, key = sorter), grouper):
        lengths = []; scores = []
        grp = list(grp)
        ranges = grp[0][2]; gene = grp[0][3]; pctile = grp[0][-1]
        for i, elem in enumerate(grp):
            scores.append(grp[i][0])
            lengths.append(grp[i][1])
        gerpscore=sum([a*b for a,b in zip(scores,lengths)])/sum(lengths)
        gerpdict[key]=(gerpscore,pctile,gene,sum(lengths),ranges,chrom)

    return gerpdict
Example #8
0
            scores.append(grp[i][1])
            lengths.append(grp[i][-1])
        famscore = sum([a * b for a, b in zip(scores, lengths)]) / sum(lengths)
        gerpdict[family] = famscore

    return gerpdict


#pfampath='pfam.hg19.bed' # pfam doms incl. introns
pfampath = sys.argv[
    1]  # from '/uufs/chpc.utah.edu/common/home/u1021864/analysis/pfam/pfam.genome.bed' # sorted by pfam name, in genome space
gerppath = sys.argv[2]  # '/scratch/ucgd/lustre/u1021864/serial/hg19.gerp.bw'
ccrpath = sys.argv[
    3]  # '/uufs/chpc.utah.edu/common/home/u1021864/analysis/exacresiduals/gnomad10x.5syn-ccrs.bed.gz'

gerp = BigWig(gerppath)
pfams = read_pfam(pfampath)
#ccr = tabix.open(ccrpath)

ccrs = score_average(pfampath, ccrpath)
gerps = score_gerp(pfams, gerp)

#for i in ccrs:
#    print i, ccrs[i]
#for i in gerps:
#    print i, gerps[i]
cscores, gscores, labels = [], [], []
for pfam in ccrs:
    cscores.append(ccrs[pfam])
    gscores.append(gerps[pfam])
    labels.append(pfam)
Example #9
0
def test_bad_chr():
    b = BigWig("libBigWig/test/test.bw")
    assert b.stats("chr1", 0, 10) is None
    v = b.values("chr1", 0, 10)
    assert len(v) == 0, v
Example #10
0
def test_seqs():
    b = BigWig("libBigWig/test/test.bw")
    assert b.chroms == [('1', 195471971), ('10', 130694993)], b.chroms
Example #11
0
def test_bad_chr():
    b = BigWig("libBigWig/test/test.bw")
    assert b.stats("chr1", 0, 10) is None
    v = b.values("chr1", 0, 10)
    assert len(v) == 0, v