def test_bw(): b = BigWig("libBigWig/test/test.bw") assert repr(b) == "BigWig('libBigWig/test/test.bw')" intervals = list(b("1", 0, 99)) assert intervals[0] == Interval(chrom='1', start=0, end=1, value=0.10000000149011612) assert intervals[1] == Interval(chrom='1', start=1, end=2, value=0.20000000298023224) assert intervals[2] == Interval(chrom='1', start=2, end=3, value=0.30000001192092896) # default is to include all values vals = b.values("1", 0, 9) exp = array('f', [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, nan, nan, nan, nan, nan, nan]) arr_equal(vals, exp) vals = b.values("1", 0, 9, False) exp = array('f', [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]) arr_equal(vals, exp) v = b.stats("1", 0, 9) assert v == 0.2000000054637591 v = b.stats("1", 0, 9, stat="stdev") assert v == 0.10000000521540645 v = b.stats("1", 0, 4, stat="coverage") assert v == 0.75 v = b.stats("1", 0, 4, stat="coverage", nBins=2) assert v == array('d', [1.0, 0.5]) b.close()
def read_gerp(region, sends, path='/scratch/ucgd/lustre/u1021864/serial/hg19.gerp.bw'): gerp = BigWig(path) exongerp=[] chrom, se = region.split(":") s, e = map(int, se.split("-")) if not chrom.startswith("chr"): chrom = "chr" + chrom for key in sends: for (exs, exe) in zip(sends[key][0], sends[key][1]): exongerp.extend(np.frombuffer(gerp.values(chrom, int(exs)-1, int(exe)), dtype='f')) return np.frombuffer(gerp.values(chrom, int(s)-1, int(e)), dtype='f'), exongerp
def build_dnase_fc_scores(self): path=DNASE_FOLD_COV_DIR scores = np.zeros((len(self), len(self.samples)), dtype=float) for sample_i, sample_name in enumerate(self.samples): fname = "DNASE.{}.fc.signal.bigwig".format(sample_name) b = BigWig(os.path.join(path, fname)) for region_i, region in enumerate(self.iter_regions()): if region_i%1000000 == 0: print "Sample %i/%i, row %i/%i" % ( sample_i+1, len(self.samples), region_i, len(self)) scores[region_i, sample_i] = b.stats( region.contig, region.start, region.stop, 'mean') b.close() return pd.DataFrame( np.nan_to_num(scores), columns=self.samples, index=self.data.index)
def test_bw(): b = BigWig("libBigWig/test/test.bw") assert repr(b) == "BigWig('libBigWig/test/test.bw')" intervals = list(b("1", 0, 99)) assert intervals[0] == Interval(chrom='1', start=0, end=1, value=0.10000000149011612) assert intervals[1] == Interval(chrom='1', start=1, end=2, value=0.20000000298023224) assert intervals[2] == Interval(chrom='1', start=2, end=3, value=0.30000001192092896) # default is to include all values vals = b.values("1", 0, 9) exp = array('f', [ 0.10000000149011612, 0.20000000298023224, 0.30000001192092896, nan, nan, nan, nan, nan, nan ]) arr_equal(vals, exp) vals = b.values("1", 0, 9, False) exp = array( 'f', [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]) arr_equal(vals, exp) v = b.stats("1", 0, 9) assert v == 0.2000000054637591 v = b.stats("1", 0, 9, stat="stdev") assert v == 0.10000000521540645 v = b.stats("1", 0, 4, stat="coverage") assert v == 0.75 v = b.stats("1", 0, 4, stat="coverage", nBins=2) assert v == array('d', [1.0, 0.5]) b.close()
def build_dnase_fc_scores(self): path = DNASE_FOLD_COV_DIR scores = np.zeros((len(self), len(self.samples)), dtype=float) for sample_i, sample_name in enumerate(self.samples): fname = "DNASE.{}.fc.signal.bigwig".format(sample_name) b = BigWig(os.path.join(path, fname)) for region_i, region in enumerate(self.iter_regions()): if region_i % 1000000 == 0: print "Sample %i/%i, row %i/%i" % ( sample_i + 1, len(self.samples), region_i, len(self)) scores[region_i, sample_i] = b.stats(region.contig, region.start, region.stop, 'mean') b.close() return pd.DataFrame(np.nan_to_num(scores), columns=self.samples, index=self.data.index)
def perchrom(pli_gerp_chrom): plis = [] gerps = [] plipath, gerppath, chrom = pli_gerp_chrom pLI = tabix.open(plipath) gerp = BigWig(gerppath) gerpdict = {} lengths = [] scores = [] rangeprev = None for region in pLI.querys(chrom): gerpscore, overlap = read_gerp(gerp, region) gerps.append(float(gerpscore)) plis.append(float(region[-1])) return gerps, plis
def perchrom(ccr_gerp_chrom): ccrpath, gerppath, chrom = ccr_gerp_chrom ccr = tabix.open(ccrpath) gerp = BigWig(gerppath) gerpdict={}; gerps=[] for region in ccr.querys(chrom): gene=region[3]; ranges=region[6]; pctile=float(region[-1]) gerpscore, overlap = read_gerp(gerp, region) # _ = pfam, redundant variable gerps.append((gerpscore, overlap, ranges, gene, pctile)) sorter = itemgetter(2,3) grouper = itemgetter(2,3) for key, grp in groupby(sorted(gerps, key = sorter), grouper): lengths = []; scores = [] grp = list(grp) ranges = grp[0][2]; gene = grp[0][3]; pctile = grp[0][-1] for i, elem in enumerate(grp): scores.append(grp[i][0]) lengths.append(grp[i][1]) gerpscore=sum([a*b for a,b in zip(scores,lengths)])/sum(lengths) gerpdict[key]=(gerpscore,pctile,gene,sum(lengths),ranges,chrom) return gerpdict
scores.append(grp[i][1]) lengths.append(grp[i][-1]) famscore = sum([a * b for a, b in zip(scores, lengths)]) / sum(lengths) gerpdict[family] = famscore return gerpdict #pfampath='pfam.hg19.bed' # pfam doms incl. introns pfampath = sys.argv[ 1] # from '/uufs/chpc.utah.edu/common/home/u1021864/analysis/pfam/pfam.genome.bed' # sorted by pfam name, in genome space gerppath = sys.argv[2] # '/scratch/ucgd/lustre/u1021864/serial/hg19.gerp.bw' ccrpath = sys.argv[ 3] # '/uufs/chpc.utah.edu/common/home/u1021864/analysis/exacresiduals/gnomad10x.5syn-ccrs.bed.gz' gerp = BigWig(gerppath) pfams = read_pfam(pfampath) #ccr = tabix.open(ccrpath) ccrs = score_average(pfampath, ccrpath) gerps = score_gerp(pfams, gerp) #for i in ccrs: # print i, ccrs[i] #for i in gerps: # print i, gerps[i] cscores, gscores, labels = [], [], [] for pfam in ccrs: cscores.append(ccrs[pfam]) gscores.append(gerps[pfam]) labels.append(pfam)
def test_bad_chr(): b = BigWig("libBigWig/test/test.bw") assert b.stats("chr1", 0, 10) is None v = b.values("chr1", 0, 10) assert len(v) == 0, v
def test_seqs(): b = BigWig("libBigWig/test/test.bw") assert b.chroms == [('1', 195471971), ('10', 130694993)], b.chroms