def test_BAM_reading_without_caching(self): reads = pyDNase.BAMHandler(pyDNase.example_reads(), caching=0) numpy.testing.assert_array_equal( reads["chr6,170863142,170863150,+"]["+"], [1, 0, 0, 0, 1, 11, 1, 0]) numpy.testing.assert_array_equal( reads["chr6,170863142,170863150,+"]["-"], [0, 1, 0, 0, 1, 0, 0, 1])
def test_BAM_reading(self): """Test BAM access""" reads = pyDNase.BAMHandler(pyDNase.example_reads()) numpy.testing.assert_array_equal( reads["chr6,170863142,170863150,+"]["+"], [1, 0, 0, 0, 1, 11, 1, 0]) numpy.testing.assert_array_equal( reads["chr6,170863142,170863150,+"]["-"], [0, 1, 0, 0, 1, 0, 0, 1])
def getBamCutMean(regions, bam_file): cuts = pyDNase.BAMHandler(bam_file) Profile = [] for region in regions: cut = cuts[str(region.chrom) + "," + str(region.start) + "," + str(region.stop) + ",+"] Profile.append( sum(cut['+'] + cut['-']) * 1.0 / (region.stop - region.start)) return Profile
def test_footprinting(self): """Test footprinting""" #Load test data reads = pyDNase.BAMHandler(pyDNase.example_reads()) regions = pyDNase.GenomicIntervalSet(pyDNase.example_regions()) footprinter = wellington(regions[0], reads) #Note - we only check the accuracy of the footprinting to 3 decimal places to allow for differences in floating point numbers numpy.testing.assert_array_almost_equal(footprinter.scores, [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.8505197962574915, -0.7522459055434079, -0.6405956238609599, -0.35029217770692905, -0.19445213824845226, -0.04510918998207078, -0.013127544708030047, -0.019434755711449096, -0.017813062409838532, -0.4899192539679181, -0.7366170062412767, -1.160234291218491, -1.4932241116142613, -2.528451574312211, -2.9873463332686545, -4.0789439624702215, -4.608073840135845, -4.6080738401358445, -5.46591166889954, -6.317058518040485, -7.846849141309235, -8.70970430615968, -7.84684914298093, -10.57133857477595, -9.524456623200592, -8.450720744685238, -7.351088844276472, -6.227879918162327, -5.085807684913266, -1.412414402021511, -3.461932293846784, -3.6968244901998126, -3.6968244901997713, -3.9374380500569046, -3.9374380500569046, -3.502106381128687, -3.968687434788506, -3.968687434788506, -3.9686874347885044, -4.210084222760481, -4.708248147109799, -4.481083945460659, -4.614616491048433, -6.331304868565458, -6.7188196319447515, -7.805240790859276, -10.096125803164037, -10.096125904865069, -9.804317009970552, -10.942957174739428, -10.831197056706369, -9.451636014876547, -9.271803479479166, -10.547425524609011, -11.356756808330887, -10.173763450595242, -17.266997956146163, -24.135650052599853, -26.79974412054261, -24.068532700189742, -20.83033463447785, -17.442306072203564, -3.3271869067645095, -1.552524387513255, -1.2303389949451933, -1.116146321342096, -0.7241346073398854, -0.8217741198401821, -0.5077397193727583, -0.4619110913457732, -0.22648726483418524, -0.08368942693734599, -0.04662652321248819, -0.10740322088702083, -0.1600382576388667, -0.09849358892510252, -0.2996877100052051, -0.4956516466712493, -0.8286771565689258, -0.7441816651207845, -0.5312102440124086, -6.089145200199429, -54.524611990632465, -55.11290166247622, -53.73358776712574, -56.37380673644542, -59.597668457279916, -63.142121596069494, -69.8245790871056, -76.97479986221292, -83.6326531975367, -88.05928977864403, -87.62205344847811, -90.7846299628178, -94.85120273316905, -90.09506169785546, -85.09363194018195, -90.25622681870428, -80.40916250197246, -84.41195387381595, -96.25001089840575, -105.99203665518576, -109.60076099775432, -116.04973655820825, -124.40507207962382, -120.71820677125163, -121.99289957155713, -121.7696295849731, -128.86709184814546, -130.00197395916774, -138.7286574562139, -150.07398897152254, -141.58993458465335, -134.33745073269844, -134.76596995468543, -106.6912682602024, -96.02214212537493, -85.8950778423277, -73.04392809450209, -54.85091731066348, -44.010732916962205, -31.573437293391223, -23.59371038683095, -18.62378346291484, -3.2863459020700057, -1.8733702431391752, -0.492074167081423, -0.27948577530733343, -0.27948577530733343, -0.07138091975833981, -0.09972653646891905, -0.05418579937724513, -0.024132554170139438, -0.021842812415429565, -0.9566534364564785, -6.932360951667957, -11.187077720714367, -13.553355643835602, -14.21631406001477, -14.983929833667665, -15.422758574896921, -18.32278174888965, -18.2834926735795, -17.265359820713286, -16.13035610465361, -14.086076680349992, -13.521427957090859, -12.515293283803214, -11.480271740126698, -9.92078604101271, -8.797191973771438, -6.985510255611701, -5.426767915467293, -5.183152081566609, -3.7475983370968295, -1.9153547972282414, -0.0006083021245538324, -13.64272847695586, -10.286808471857325, -15.63569341874549, -20.86940117070692, -22.928591109686124, -30.496433497261098, -26.10052633266505, -29.221144392666716, -24.0276270737085, -21.301001754269702, -20.97154340860586, -15.798224427435104, -17.780912132981612, -24.823354886252613, -24.604927499889286, -24.955334454941635, -38.74241644973382, -43.782982787325366, -46.80273522972689, -46.08571305295883, -47.92277577875605, -41.4868217475951, -37.915322367616675, -34.16174895135005, -33.58267055798403, -32.06130865601216, -34.094574908150825, -39.695727106225405, -40.120719852615196, -41.05121481573844, -42.01796136083251, -39.75209693618059, -35.73339613779332, -34.731089314533676, -32.694583271242884, -29.577625993685, -28.026659577292953, -25.215089099008644, -25.174202473704753, -21.952113990014446, -17.028869764873075, -15.578727453806595, -16.1579750791396, -12.974390056172448, -8.418484753962995, -5.7847304546785905, -2.2267773783077134, -1.4570520375724902, -1.543691534890984, -1.575957362444019, -0.7176800307627448, -0.7968619556272615, -4.841045489929452, -5.248527604937139, -1.0472142687516643, -1.0630763089203221, -2.185755905394793, -3.8307492546267254, -4.993169872339857, -7.2764872801107385, -6.792829090234741, -6.452991771598523, -6.952945781664499, -8.215168486202954, -6.613961853070211, -22.150574756810474, -28.514525290020345, -27.33821547951633, -29.034538366843996, -33.82258103970177, -41.26481032907057, -40.912839794048644, -48.684226156049405, -49.44508720397513, -61.863467137712874, -70.11156862148243, -82.93974699146762, -91.62613467860213, -91.54466150389183, -73.5404690802315, -75.77506886003911, -78.05398228595476, -84.42906672420139, -93.01020782082938, -89.65901048860756, -109.20614016921928, -121.0826042903611, -120.2996268556599, -117.38782641714545, -128.50467987996305, -128.9595101418021, -133.14841986541902, -136.82233726671367, -133.94746637928725, -154.5649504690748, -164.11983575086742, -159.85307484109336, -151.89784688535133, -153.56557629402886, -146.72984757341305, -135.04501822595842, -127.92055598311715, -126.08111294376953, -120.03403862241993, -99.25696665821185, -71.19178328684012, -64.94518489350295, -59.98207339614661, -54.12991577221696, -43.206052468123545, -29.456860663206527, -6.411526985333728, -6.44709453786988, -6.215828945120546, -5.762898291384889, -4.3769156224166315, -3.2727915503830047, -2.616087927600661, -2.313254659995694, -1.8641066899878078, -1.8186414374916933, -0.8008712043775049, -0.6426129783652371, -0.5224073311989104, -0.2710345166975603, -0.43819657644966853, -1.2626459311104576, -1.9408301832235342, -3.9812039032702886, -3.9812039032702886, -2.861605777578473, -3.2137507785013066, -3.2137507785013066, -2.9669916392942004, -3.2617340566815645, -3.9686874347885044, -3.54350638697767, -3.54350638697767, -3.1070679887817896, -2.8384054421005627, -2.2611557931086583, -2.9566374983191013, -2.2617270920463315, -2.5370237970085574, -3.2091208219605813, -3.0532448758817448, -1.6966894030794892, -2.2744775410764126, -2.729866824495538, -3.080565957210189, -2.808261821233711, -3.251159821714309, -2.1636899060453407, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], decimal=3) numpy.testing.assert_array_equal(footprinter.lengths, [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 15, 13, 11, 15, 13, 25, 25, 11, 13, 15, 17, 19, 21, 23, 25, 25, 11, 13, 15, 17, 15, 21, 19, 17, 15, 13, 11, 21, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 23, 25, 15, 17, 19, 19, 17, 15, 13, 19, 25, 25, 23, 25, 11, 11, 13, 15, 13, 11, 11, 25, 25, 15, 15, 19, 17, 15, 13, 11, 25, 25, 25, 11, 15, 17, 19, 15, 23, 11, 25, 25, 25, 25, 25, 25, 21, 23, 25, 25, 25, 25, 25, 23, 25, 25, 25, 21, 23, 25, 25, 23, 25, 25, 21, 19, 19, 21, 25, 25, 25, 23, 25, 23, 21, 19, 19, 15, 15, 11, 11, 11, 23, 25, 25, 25, 25, 25, 25, 25, 25, 25, 11, 11, 13, 15, 11, 11, 21, 17, 15, 13, 15, 13, 25, 25, 23, 21, 19, 19, 13, 13, 13, 11, 25, 11, 13, 15, 17, 11, 13, 15, 17, 15, 13, 11, 25, 15, 17, 19, 19, 23, 25, 25, 21, 23, 21, 19, 17, 13, 25, 25, 25, 25, 25, 25, 25, 23, 21, 19, 25, 25, 23, 11, 11, 15, 15, 13, 15, 13, 11, 19, 15, 13, 11, 11, 11, 11, 11, 15, 15, 19, 21, 23, 25, 25, 23, 25, 25, 15, 11, 13, 15, 17, 19, 21, 23, 25, 25, 25, 23, 25, 25, 25, 25, 25, 25, 25, 25, 25, 21, 23, 25, 25, 25, 25, 25, 25, 21, 23, 25, 25, 25, 25, 23, 25, 25, 25, 25, 23, 21, 19, 15, 15, 13, 11, 13, 25, 25, 25, 25, 25, 25, 25, 23, 25, 25, 25, 25, 11, 11, 11, 13, 11, 11, 15, 17, 17, 21, 23, 25, 25, 25, 25, 23, 17, 19, 17, 15, 13, 11, 19, 11, 13, 15, 15, 13, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ])
"Report cuts on the negative strand as positive numbers instead of negative (default: False)", default=False) parser.add_argument("-A", action="store_true", help="ATAC-seq mode (default: False)", default=False) parser.add_argument( "regions", help="BED file of the regions you want to write wig tracks for") parser.add_argument("reads", help="The BAM file containing the read data") parser.add_argument("fw_output", help="Path to write the forward reads wig track to") parser.add_argument("rev_output", help="Path to write the reverse reads wig track to") args = parser.parse_args() reads = pyDNase.BAMHandler(args.reads, caching=True, ATAC=args.A) regions = pyDNase.GenomicIntervalSet(args.regions) fwigout = open(args.fw_output, "w") bwigout = open(args.rev_output, "w") #Required for UCSC upload print >> fwigout, "track type=wiggle_0" print >> bwigout, "track type=wiggle_0" #Prints all the wig values but sorts by chromosome/genomic location first #TODO: port this most awesome (and hacky) code iteration code to the main API, possibly using a generator expression? puts("Writing wig tracks...") for each in progress.bar([ item for sublist in sorted(regions.intervals.values()) for item in sorted(sublist, key=lambda peak: peak.startbp) ]):
parser.add_argument("control_only_output", help="File to write control specific footprint scores to") args = parser.parse_args() # Sanity check parameters from the user try: args.footprint_sizes = xrange_from_string(args.footprint_sizes) except ValueError: raise RuntimeError("Footprint sizes must be supplied as from,to,step") assert 0 < args.FDR_cutoff < 1, "FDR must be between 0 and 1" assert args.FDR_limit < 0, "FDR limit must be less than 0" # Treatment reads2 = pyDNase.BAMHandler(args.treatment_bam, caching=0, ATAC=args.A) # Control reads1 = pyDNase.BAMHandler(args.control_bam, caching=0, ATAC=args.A) # Regions of Interest regions = pyDNase.GenomicIntervalSet(args.bedsites) # Output treatment_output = open(args.treatment_only_output, "w", buffering=1) control_output = open(args.control_only_output, "w", buffering=1) # Determine Number of CPUs to use if args.processes: CPUs = args.processes else: CPUs = mp.cpu_count() # NOTE: This roughly scales at about 450mb per 300 regions held in memory max_regions_cached_in_memory = 50 * CPUs
# # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import matplotlib.pyplot as plt import pyDNase from pyDNase.footprinting import wellington #Load test data reads = pyDNase.BAMHandler(pyDNase.example_reads()) regions = pyDNase.GenomicIntervalSet(pyDNase.example_regions()) #Plot cuts data plt.plot(reads[regions[0]]["+"], c="red") plt.plot([-i for i in reads[regions[0]]["-"]], c="blue") #Footprint and plot the results footprinter = wellington(regions[0], reads) plt.plot(footprinter.scores, c="black") plt.show()
help= "Size of flanking area around centre of the regions to plot (default: 50)", default=50, type=int) parser.add_argument("-y", help="ymax (default: auto)", default=0, type=int) parser.add_argument( "regions", help="BED file of the regions you want to generate the average profile for" ) parser.add_argument("reads", help="The BAM file containing the DNase-seq data") parser.add_argument("output", help="filename to write the output to (use .pdf or .png)") args = parser.parse_args() xsize = args.window_size reads = pyDNase.BAMHandler(args.reads) regions = pyDNase.GenomicIntervalSet(args.regions) #Set all strands to positive #for each in regions: # each.strand = "+" regions.resizeRegions(xsize) fw = [] rv = [] #TODO: Make this memory efficient - we don't need to store all the fw and rvs plt.figure(num=None, figsize=(4, 12)) #plt.subplot(211) plt.subplot2grid((4, 1), (0, 0)) print("Plotting cut counts...")
# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import argparse import pyDNase from clint.textui import progress parser = argparse.ArgumentParser( description= 'writes a BED file with the FOS for the interval specified as the score') parser.add_argument("-A", action="store_true", help="ATAC-seq mode (default: False)", default=False) parser.add_argument( "regions", help="BED file of the regions you want to generate the average profile for" ) parser.add_argument("reads", help="The BAM file containing the DNase-seq data") parser.add_argument("output", help="filename to write the output to") args = parser.parse_args() reads = pyDNase.BAMHandler(args.reads, ATAC=args.A) regions = pyDNase.GenomicIntervalSet(args.regions) outfile = open(args.output, "w") for i in progress.bar(regions): i.score = reads.FOS(i) print >> outfile, i
cell_type = cell_types[cell_type_id].strip() filebase = DATADIR + "/DNase/DNASE." + cell_type bams = glob.glob(filebase + ".*.bam") if cell_type == "K562": # there are so many otherwise bams = [filebase + '.biorep2.techrep%i.bam' % i for i in (3, 5)] ps = pysam.AlignmentFile(bams[0], "rb") chrs = zip(ps.references, ps.lengths) bam_handlers = [] for f in bams: try: bh = pyDNase.BAMHandler(f, caching=False) bam_handlers.append(bh) except: print("Problem with " + f) total_cuts = 0L data = {} where = {} chunk = 1000000 import gzip outfiles = { strand: gzip.open(filebase + strand + ".txt.gz", "wb") for strand in ("+", "-") }
outputFileName = sys.argv[3] # Parameters cutoff = -30 footprintSizes = range(6, 40, 1) to_remove = [] # Creating new region file name with the first three columns only newRegionFileName = outputFileName + "regions.bed" os.system("cut -f 1,2,3 " + regionFileName + " > " + newRegionFileName) to_remove.append(newRegionFileName) # Execution outputFile = open(outputFileName, "w") regions = pyDNase.GenomicIntervalSet(newRegionFileName) reads = pyDNase.BAMHandler(bamFileName) for region in regions: footprinter = fp.wellington(region, reads, shoulder_sizes=range(35, 36), footprint_sizes=footprintSizes, FDR=0, bonferroni=0) footprints = footprinter.footprints(withCutoff=cutoff) for e in footprints: outputFile.write("\t".join([ str(k) for k in [e.chromosome, e.startbp, e.endbp, e.label, e.score, e.strand] ]) + "\n") outputFile.close()
def get_bam5p( bdir, label_dic, strands=['+', '-'], fle_tag="TF", #genomic_window_size=200,\ force_read=False, verbose=1): pkl_fle = fle_tag + "_" + "_".join(sorted( label_dic.keys())) + '.dnase.pkl.gz' try: if force_read: raise Exception('forced_read') with gzip.open(pkl_fle, 'rb') as handle: dta = cPickle.load(handle) if verbose > 1: print "Read " + pkl_fle except: dta = {} for cell in label_dic.keys(): bam_fles = glob.glob(os.path.join(bdir, '*' + cell + '*.bam')) dta[cell] = {} for label in label_dic[cell].keys(): dta[cell][label] = {} for chrom in label_dic[cell][label].keys(): chroms = str(chrom) if not chrom == 23 else 'X' Nx = len(label_dic[cell][label][chrom]) window_size = label_dic[cell][label][chrom][0][ 1] - label_dic[cell][label][chrom][0][0] #dta[cell][label][chrom] = np.zeros((Nx, window_size * len(strands)), dtype=np.float) dta[cell][label][chrom] = np.zeros((Nx, window_size), dtype=np.float) for bam_fle in bam_fles: reads = pyDNase.BAMHandler(bam_fle, caching=False) gi = 0 for grange in label_dic[cell][label][chrom]: # could not decipher what this function returns; cut counts per position? temp = reads["chr%s,%i,%i,+" % (chroms, grange[0], grange[1])] #si = 0 for strand in strands: dta[cell][label][chrom][gi, :] += temp[strand] #dta[cell][label][chrom][gi, range(si, si + window_size)] += temp[strand] #si += window_size gi += 1 # average per bam_fle for a cell N = 0.0 + len(bam_fles) dta[cell][label][chrom] = dta[cell][label][chrom] / N if verbose > 1: print "Parsed " + str( bam_fles) + ' for average 5p cuts for ' + cell with gzip.open(pkl_fle, 'wb') as handle: cPickle.dump(dta, handle, -1) # -1 is for HIGHEST_PROTOCOL if verbose > 1: print "Wrote " + pkl_fle if verbose > 0: print "FUNCTION " + myself() + " DTA:" for cell in dta.keys(): for label in dta[cell].keys(): i = 0 for chrom in dta[cell][label].keys(): i += len(dta[cell][label][chrom]) print cell, label, "ALL chroms" + str(i) print return (dta)
# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import argparse from clint.textui import progress, puts import pyDNase parser = argparse.ArgumentParser(description='Writes WIG file with the cut information based on the regions in reads BED file and the reads in reads BAM file') parser.add_argument("regions", help="BED file of the regions you want to write wig tracks for") parser.add_argument("reads", help="The BAM file containing the read data") parser.add_argument("wig_output", help="Path to write the reads wig track to") args = parser.parse_args() reads = pyDNase.BAMHandler(args.reads,caching=True) regions = pyDNase.GenomicIntervalSet(args.regions) wigout = open(args.wig_output,"w") #Required for UCSC upload print >> wigout, "track type=wiggle_0" puts("Writing wig tracks...") for each in progress.bar([item for sublist in sorted(regions.intervals.values()) for item in sorted(sublist, key=lambda peak: peak.startbp)]): try: prevregionp=str(each.chromosome)+","+str(each.startbp-2)+","+str(each.startbp)+",+" prevcuts=reads[prevregionp] nextregionp=str(each.chromosome)+","+str(each.endbp+1)+","+str(each.endbp+3)+",+" nextcuts=reads[nextregionp] pp,pm=prevcuts["+"],prevcuts["-"]
"p-value cutoffs must be supplied as a string of numbers separated by commas" ) assert 0 < clargs.FDR_cutoff < 1, "FDR must be between 0 and 1" assert clargs.FDR_limit < 0, "FDR limit must be less than 0" assert len([f for f in os.listdir(clargs.outputdir) if f[0] != "."]) == 0, "output directory {0} is not empty!".format( clargs.outputdir) if not clargs.output_prefix: clargs.output_prefix = str(os.path.basename(clargs.reads)) + "." + str( os.path.basename(clargs.regions)) #Load reads and regions regions = pyDNase.GenomicIntervalSet(clargs.regions) reads = pyDNase.BAMHandler(clargs.reads, caching=False, ATAC=clargs.A) #Create a directory for p-values and WIG output. This /should/ be OS independent os.makedirs(os.path.join(clargs.outputdir, "p value cutoffs")) wigout = open( os.path.relpath(clargs.outputdir) + "/" + clargs.output_prefix + ".WellingtonFootprints.wig", "w") fdrout = open( os.path.relpath(clargs.outputdir) + "/" + clargs.output_prefix + ".WellingtonFootprints.FDR.{0}.bed".format(clargs.FDR_cutoff), "w") #Required for UCSC upload print >> wigout, "track type=wiggle_0" #Iterate in chromosome, basepair order orderedbychr = [
import sys ################################################################################################################################# parser = argparse.ArgumentParser() parser.add_argument('bed_file', type = str, help = 'BED file containing regions to plot') parser.add_argument('bam_file', type = str, help = 'BAM file containing reads to plot') parser.add_argument('outfile', type = str, help = 'Output file (.tsv)') parser.add_argument('-w', '--window', dest = 'w', type = int, default = 200, help = 'Window size to plot. Default = 200bp') args = parser.parse_args() ################################################################################################################################# # Read BAM file reads = pyDNase.BAMHandler(args.bam_file) # Calculate the distance to extend footprints by (window size / 2) extend = int(math.ceil(args.w / 2)) # Get regions from BED file regions = pyDNase.GenomicIntervalSet(args.bed_file) # Keep track of number of forward and reverse reads fwd_cut_tracking = dict() rev_cut_tracking = dict() sys.stderr.write('Counting cuts in regions...\n') for site in progress.bar(regions): # Get chromosome, strand, start and end positions for site
"--bias-file", help="Location of the sorted, index", default=None, type=str) parser.add_argument("-r", action="store_true", help="Randomise the ordering of the output", default=False) parser.add_argument( "regions", help="BED file of the regions you want to generate the heatmap for") parser.add_argument("reads", help="The BAM file containing the read data") parser.add_argument("output", help="filename to write the CSV output to") args = parser.parse_args() reads = pyDNase.BAMHandler(args.reads, caching=not args.c, ATAC=args.A) if args.b: if args.bias_file != None: freads = pyDNase.BAMHandlerWithBias(pyDNase.FASTAHandler( args.bias_file), args.reads, caching=not args.c, ATAC=args.A) else: raise ValueError("No FASTA file provided for bias correction!") regions = pyDNase.GenomicIntervalSet(args.regions) if args.i: for each in regions: each.strand = "+"
# # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import matplotlib.pyplot as plt import pyDNase from pyDNase.footprinting import wellington #Load test data reads = pyDNase.BAMHandler("example.bam") regions = pyDNase.GenomicIntervalSet("example.bed") #Plot cuts data plt.plot(reads[regions[0]]["+"], c="red") plt.plot(-reads[regions[0]]["-"], c="blue") #Footprint and plot the results footprinter = wellington(regions[0], reads) plt.plot(footprinter.scores, c="black") plt.show()
#Call footprints import sys import pyDNase import pyDNase.footprinting as fp if (sys.argv[5] == 'singleEnd'): regions = pyDNase.GenomicIntervalSet(sys.argv[1]) reads = pyDNase.BAMHandler(sys.argv[2]) f = len(regions) - 1 for x in range(f): footprinter = fp.wellington1D(regions[x], reads) footprints = footprinter.footprints(withCutoff=int(sys.argv[4])) with open(sys.argv[3], "a") as bedout: bedout.write(str(footprints)) else: regions = pyDNase.GenomicIntervalSet(sys.argv[1]) reads = pyDNase.BAMHandler(sys.argv[2]) f = len(regions) - 1 for x in range(f): footprinter = fp.wellington(regions[x], reads) footprints = footprinter.footprints(withCutoff=int(sys.argv[4])) with open(sys.argv[3], "a") as bedout: bedout.write(str(footprints))
help="ATAC-seq mode (default: False)", default=False) parser.add_argument( "regions", help="The set of BED files you wish to annotate with dDHS scores") parser.add_argument("treat_dhs", help="The DHSs belonging to the Treatment") parser.add_argument("control_dhs", help="The DHSs belonging to the control") parser.add_argument( "reads_treat", help="The BAM file containing the Treatment DNase-seq data") parser.add_argument("reads_control", help="The BAM file containing the Control DNase-seq data") parser.add_argument("output", help="filename to write the output to") args = parser.parse_args() reads_treat = pyDNase.BAMHandler(args.reads_treat, caching=not args.l, ATAC=args.A) reads_control = pyDNase.BAMHandler(args.reads_control, caching=not args.l, ATAC=args.A) treat_dhs = pyDNase.GenomicIntervalSet(args.treat_dhs) control_dhs = pyDNase.GenomicIntervalSet(args.control_dhs) regions = pyDNase.GenomicIntervalSet(args.regions) treat_total_cuts = 0 control_total_cuts = 0 treat_base_pairs = 0 control_base_pairs = 0 puts("Calculating enrichment for Treatment") for i in progress.bar(treat_dhs):