Ejemplos de BAMHandler en Python, ejemplos de pyDNase.BAMHandler en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: __init__.py Proyecto: tsackton/pyDNase

 def test_BAM_reading_without_caching(self):
     reads = pyDNase.BAMHandler(pyDNase.example_reads(), caching=0)
     numpy.testing.assert_array_equal(
         reads["chr6,170863142,170863150,+"]["+"],
         [1, 0, 0, 0, 1, 11, 1, 0])
     numpy.testing.assert_array_equal(
         reads["chr6,170863142,170863150,+"]["-"], [0, 1, 0, 0, 1, 0, 0, 1])

Ejemplo n.º 2

0

Mostrar archivo

Archivo: __init__.py Proyecto: tsackton/pyDNase

 def test_BAM_reading(self):
     """Test BAM access"""
     reads = pyDNase.BAMHandler(pyDNase.example_reads())
     numpy.testing.assert_array_equal(
         reads["chr6,170863142,170863150,+"]["+"],
         [1, 0, 0, 0, 1, 11, 1, 0])
     numpy.testing.assert_array_equal(
         reads["chr6,170863142,170863150,+"]["-"], [0, 1, 0, 0, 1, 0, 0, 1])

Ejemplo n.º 3

0

Mostrar archivo

Archivo: __init__.py Proyecto: resurgo-genetics/BPAC

def getBamCutMean(regions, bam_file):
    cuts = pyDNase.BAMHandler(bam_file)
    Profile = []
    for region in regions:
        cut = cuts[str(region.chrom) + "," + str(region.start) + "," +
                   str(region.stop) + ",+"]
        Profile.append(
            sum(cut['+'] + cut['-']) * 1.0 / (region.stop - region.start))
    return Profile

Ejemplo n.º 4

0

Mostrar archivo

Archivo: __init__.py Proyecto: tsackton/pyDNase

 def test_footprinting(self):
     """Test footprinting"""
     #Load test data
     reads = pyDNase.BAMHandler(pyDNase.example_reads())
     regions = pyDNase.GenomicIntervalSet(pyDNase.example_regions())
     footprinter = wellington(regions[0], reads)
     #Note - we only check the accuracy of the footprinting to 3 decimal places to allow for differences in floating point numbers
     numpy.testing.assert_array_almost_equal(footprinter.scores, [
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, -0.8505197962574915, -0.7522459055434079, -0.6405956238609599,
         -0.35029217770692905, -0.19445213824845226, -0.04510918998207078,
         -0.013127544708030047, -0.019434755711449096,
         -0.017813062409838532, -0.4899192539679181, -0.7366170062412767,
         -1.160234291218491, -1.4932241116142613, -2.528451574312211,
         -2.9873463332686545, -4.0789439624702215, -4.608073840135845,
         -4.6080738401358445, -5.46591166889954, -6.317058518040485,
         -7.846849141309235, -8.70970430615968, -7.84684914298093,
         -10.57133857477595, -9.524456623200592, -8.450720744685238,
         -7.351088844276472, -6.227879918162327, -5.085807684913266,
         -1.412414402021511, -3.461932293846784, -3.6968244901998126,
         -3.6968244901997713, -3.9374380500569046, -3.9374380500569046,
         -3.502106381128687, -3.968687434788506, -3.968687434788506,
         -3.9686874347885044, -4.210084222760481, -4.708248147109799,
         -4.481083945460659, -4.614616491048433, -6.331304868565458,
         -6.7188196319447515, -7.805240790859276, -10.096125803164037,
         -10.096125904865069, -9.804317009970552, -10.942957174739428,
         -10.831197056706369, -9.451636014876547, -9.271803479479166,
         -10.547425524609011, -11.356756808330887, -10.173763450595242,
         -17.266997956146163, -24.135650052599853, -26.79974412054261,
         -24.068532700189742, -20.83033463447785, -17.442306072203564,
         -3.3271869067645095, -1.552524387513255, -1.2303389949451933,
         -1.116146321342096, -0.7241346073398854, -0.8217741198401821,
         -0.5077397193727583, -0.4619110913457732, -0.22648726483418524,
         -0.08368942693734599, -0.04662652321248819, -0.10740322088702083,
         -0.1600382576388667, -0.09849358892510252, -0.2996877100052051,
         -0.4956516466712493, -0.8286771565689258, -0.7441816651207845,
         -0.5312102440124086, -6.089145200199429, -54.524611990632465,
         -55.11290166247622, -53.73358776712574, -56.37380673644542,
         -59.597668457279916, -63.142121596069494, -69.8245790871056,
         -76.97479986221292, -83.6326531975367, -88.05928977864403,
         -87.62205344847811, -90.7846299628178, -94.85120273316905,
         -90.09506169785546, -85.09363194018195, -90.25622681870428,
         -80.40916250197246, -84.41195387381595, -96.25001089840575,
         -105.99203665518576, -109.60076099775432, -116.04973655820825,
         -124.40507207962382, -120.71820677125163, -121.99289957155713,
         -121.7696295849731, -128.86709184814546, -130.00197395916774,
         -138.7286574562139, -150.07398897152254, -141.58993458465335,
         -134.33745073269844, -134.76596995468543, -106.6912682602024,
         -96.02214212537493, -85.8950778423277, -73.04392809450209,
         -54.85091731066348, -44.010732916962205, -31.573437293391223,
         -23.59371038683095, -18.62378346291484, -3.2863459020700057,
         -1.8733702431391752, -0.492074167081423, -0.27948577530733343,
         -0.27948577530733343, -0.07138091975833981, -0.09972653646891905,
         -0.05418579937724513, -0.024132554170139438, -0.021842812415429565,
         -0.9566534364564785, -6.932360951667957, -11.187077720714367,
         -13.553355643835602, -14.21631406001477, -14.983929833667665,
         -15.422758574896921, -18.32278174888965, -18.2834926735795,
         -17.265359820713286, -16.13035610465361, -14.086076680349992,
         -13.521427957090859, -12.515293283803214, -11.480271740126698,
         -9.92078604101271, -8.797191973771438, -6.985510255611701,
         -5.426767915467293, -5.183152081566609, -3.7475983370968295,
         -1.9153547972282414, -0.0006083021245538324, -13.64272847695586,
         -10.286808471857325, -15.63569341874549, -20.86940117070692,
         -22.928591109686124, -30.496433497261098, -26.10052633266505,
         -29.221144392666716, -24.0276270737085, -21.301001754269702,
         -20.97154340860586, -15.798224427435104, -17.780912132981612,
         -24.823354886252613, -24.604927499889286, -24.955334454941635,
         -38.74241644973382, -43.782982787325366, -46.80273522972689,
         -46.08571305295883, -47.92277577875605, -41.4868217475951,
         -37.915322367616675, -34.16174895135005, -33.58267055798403,
         -32.06130865601216, -34.094574908150825, -39.695727106225405,
         -40.120719852615196, -41.05121481573844, -42.01796136083251,
         -39.75209693618059, -35.73339613779332, -34.731089314533676,
         -32.694583271242884, -29.577625993685, -28.026659577292953,
         -25.215089099008644, -25.174202473704753, -21.952113990014446,
         -17.028869764873075, -15.578727453806595, -16.1579750791396,
         -12.974390056172448, -8.418484753962995, -5.7847304546785905,
         -2.2267773783077134, -1.4570520375724902, -1.543691534890984,
         -1.575957362444019, -0.7176800307627448, -0.7968619556272615,
         -4.841045489929452, -5.248527604937139, -1.0472142687516643,
         -1.0630763089203221, -2.185755905394793, -3.8307492546267254,
         -4.993169872339857, -7.2764872801107385, -6.792829090234741,
         -6.452991771598523, -6.952945781664499, -8.215168486202954,
         -6.613961853070211, -22.150574756810474, -28.514525290020345,
         -27.33821547951633, -29.034538366843996, -33.82258103970177,
         -41.26481032907057, -40.912839794048644, -48.684226156049405,
         -49.44508720397513, -61.863467137712874, -70.11156862148243,
         -82.93974699146762, -91.62613467860213, -91.54466150389183,
         -73.5404690802315, -75.77506886003911, -78.05398228595476,
         -84.42906672420139, -93.01020782082938, -89.65901048860756,
         -109.20614016921928, -121.0826042903611, -120.2996268556599,
         -117.38782641714545, -128.50467987996305, -128.9595101418021,
         -133.14841986541902, -136.82233726671367, -133.94746637928725,
         -154.5649504690748, -164.11983575086742, -159.85307484109336,
         -151.89784688535133, -153.56557629402886, -146.72984757341305,
         -135.04501822595842, -127.92055598311715, -126.08111294376953,
         -120.03403862241993, -99.25696665821185, -71.19178328684012,
         -64.94518489350295, -59.98207339614661, -54.12991577221696,
         -43.206052468123545, -29.456860663206527, -6.411526985333728,
         -6.44709453786988, -6.215828945120546, -5.762898291384889,
         -4.3769156224166315, -3.2727915503830047, -2.616087927600661,
         -2.313254659995694, -1.8641066899878078, -1.8186414374916933,
         -0.8008712043775049, -0.6426129783652371, -0.5224073311989104,
         -0.2710345166975603, -0.43819657644966853, -1.2626459311104576,
         -1.9408301832235342, -3.9812039032702886, -3.9812039032702886,
         -2.861605777578473, -3.2137507785013066, -3.2137507785013066,
         -2.9669916392942004, -3.2617340566815645, -3.9686874347885044,
         -3.54350638697767, -3.54350638697767, -3.1070679887817896,
         -2.8384054421005627, -2.2611557931086583, -2.9566374983191013,
         -2.2617270920463315, -2.5370237970085574, -3.2091208219605813,
         -3.0532448758817448, -1.6966894030794892, -2.2744775410764126,
         -2.729866824495538, -3.080565957210189, -2.808261821233711,
         -3.251159821714309, -2.1636899060453407, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
     ],
                                             decimal=3)
     numpy.testing.assert_array_equal(footprinter.lengths, [
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 15,
         13, 11, 15, 13, 25, 25, 11, 13, 15, 17, 19, 21, 23, 25, 25, 11, 13,
         15, 17, 15, 21, 19, 17, 15, 13, 11, 21, 25, 25, 25, 25, 25, 25, 25,
         25, 25, 25, 25, 25, 23, 25, 15, 17, 19, 19, 17, 15, 13, 19, 25, 25,
         23, 25, 11, 11, 13, 15, 13, 11, 11, 25, 25, 15, 15, 19, 17, 15, 13,
         11, 25, 25, 25, 11, 15, 17, 19, 15, 23, 11, 25, 25, 25, 25, 25, 25,
         21, 23, 25, 25, 25, 25, 25, 23, 25, 25, 25, 21, 23, 25, 25, 23, 25,
         25, 21, 19, 19, 21, 25, 25, 25, 23, 25, 23, 21, 19, 19, 15, 15, 11,
         11, 11, 23, 25, 25, 25, 25, 25, 25, 25, 25, 25, 11, 11, 13, 15, 11,
         11, 21, 17, 15, 13, 15, 13, 25, 25, 23, 21, 19, 19, 13, 13, 13, 11,
         25, 11, 13, 15, 17, 11, 13, 15, 17, 15, 13, 11, 25, 15, 17, 19, 19,
         23, 25, 25, 21, 23, 21, 19, 17, 13, 25, 25, 25, 25, 25, 25, 25, 23,
         21, 19, 25, 25, 23, 11, 11, 15, 15, 13, 15, 13, 11, 19, 15, 13, 11,
         11, 11, 11, 11, 15, 15, 19, 21, 23, 25, 25, 23, 25, 25, 15, 11, 13,
         15, 17, 19, 21, 23, 25, 25, 25, 23, 25, 25, 25, 25, 25, 25, 25, 25,
         25, 21, 23, 25, 25, 25, 25, 25, 25, 21, 23, 25, 25, 25, 25, 23, 25,
         25, 25, 25, 23, 21, 19, 15, 15, 13, 11, 13, 25, 25, 25, 25, 25, 25,
         25, 23, 25, 25, 25, 25, 11, 11, 11, 13, 11, 11, 15, 17, 17, 21, 23,
         25, 25, 25, 25, 23, 17, 19, 17, 15, 13, 11, 19, 11, 13, 15, 15, 13,
         11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
     ])

Ejemplo n.º 5

0

Mostrar archivo

    "Report cuts on the negative strand as positive numbers instead of negative (default: False)",
    default=False)
parser.add_argument("-A",
                    action="store_true",
                    help="ATAC-seq mode (default: False)",
                    default=False)
parser.add_argument(
    "regions", help="BED file of the regions you want to write wig tracks for")
parser.add_argument("reads", help="The BAM file containing the read data")
parser.add_argument("fw_output",
                    help="Path to write the forward reads wig track to")
parser.add_argument("rev_output",
                    help="Path to write the reverse reads wig track to")
args = parser.parse_args()

reads = pyDNase.BAMHandler(args.reads, caching=True, ATAC=args.A)
regions = pyDNase.GenomicIntervalSet(args.regions)
fwigout = open(args.fw_output, "w")
bwigout = open(args.rev_output, "w")

#Required for UCSC upload
print >> fwigout, "track type=wiggle_0"
print >> bwigout, "track type=wiggle_0"

#Prints all the wig values but sorts by chromosome/genomic location first
#TODO: port this most awesome (and hacky) code iteration code to the main API, possibly using a generator expression?
puts("Writing wig tracks...")
for each in progress.bar([
        item for sublist in sorted(regions.intervals.values())
        for item in sorted(sublist, key=lambda peak: peak.startbp)
]):

Ejemplo n.º 6

0

Mostrar archivo

Archivo: wellington_bootstrap.py Proyecto: aeron15/pyDNase

parser.add_argument("control_only_output",
                    help="File to write control specific footprint scores to")
args = parser.parse_args()

# Sanity check parameters from the user

try:
    args.footprint_sizes = xrange_from_string(args.footprint_sizes)
except ValueError:
    raise RuntimeError("Footprint sizes must be supplied as from,to,step")

assert 0 < args.FDR_cutoff < 1, "FDR must be between 0 and 1"
assert args.FDR_limit < 0, "FDR limit must be less than 0"

# Treatment
reads2 = pyDNase.BAMHandler(args.treatment_bam, caching=0, ATAC=args.A)
# Control
reads1 = pyDNase.BAMHandler(args.control_bam, caching=0, ATAC=args.A)
# Regions of Interest
regions = pyDNase.GenomicIntervalSet(args.bedsites)
# Output
treatment_output = open(args.treatment_only_output, "w", buffering=1)
control_output = open(args.control_only_output, "w", buffering=1)

# Determine Number of CPUs to use
if args.processes:
    CPUs = args.processes
else:
    CPUs = mp.cpu_count()
# NOTE: This roughly scales at about 450mb per 300 regions held in memory
max_regions_cached_in_memory = 50 * CPUs

Ejemplo n.º 7

0

Mostrar archivo

Archivo: example_footprint_scores.py Proyecto: tsackton/pyDNase

#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import matplotlib.pyplot as plt
import pyDNase
from pyDNase.footprinting import wellington

#Load test data
reads = pyDNase.BAMHandler(pyDNase.example_reads())
regions = pyDNase.GenomicIntervalSet(pyDNase.example_regions())

#Plot cuts data
plt.plot(reads[regions[0]]["+"], c="red")
plt.plot([-i for i in reads[regions[0]]["-"]], c="blue")

#Footprint and plot the results
footprinter = wellington(regions[0], reads)
plt.plot(footprinter.scores, c="black")

plt.show()

Ejemplo n.º 8

0

Mostrar archivo

    help=
    "Size of flanking area around centre of the regions to plot (default: 50)",
    default=50,
    type=int)
parser.add_argument("-y", help="ymax (default: auto)", default=0, type=int)
parser.add_argument(
    "regions",
    help="BED file of the regions you want to generate the average profile for"
)
parser.add_argument("reads", help="The BAM file containing the DNase-seq data")
parser.add_argument("output",
                    help="filename to write the output to (use .pdf or .png)")
args = parser.parse_args()

xsize = args.window_size
reads = pyDNase.BAMHandler(args.reads)
regions = pyDNase.GenomicIntervalSet(args.regions)

#Set all strands to positive
#for each in regions:
#    each.strand = "+"

regions.resizeRegions(xsize)

fw = []
rv = []
#TODO: Make this memory efficient - we don't need to store all the fw and rvs
plt.figure(num=None, figsize=(4, 12))
#plt.subplot(211)
plt.subplot2grid((4, 1), (0, 0))
print("Plotting cut counts...")

Ejemplo n.º 9

0

Mostrar archivo

#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import argparse
import pyDNase
from clint.textui import progress

parser = argparse.ArgumentParser(
    description=
    'writes a BED file with the FOS for the interval specified as the score')
parser.add_argument("-A",
                    action="store_true",
                    help="ATAC-seq mode (default: False)",
                    default=False)
parser.add_argument(
    "regions",
    help="BED file of the regions you want to generate the average profile for"
)
parser.add_argument("reads", help="The BAM file containing the DNase-seq data")
parser.add_argument("output", help="filename to write the output to")
args = parser.parse_args()

reads = pyDNase.BAMHandler(args.reads, ATAC=args.A)
regions = pyDNase.GenomicIntervalSet(args.regions)

outfile = open(args.output, "w")
for i in progress.bar(regions):
    i.score = reads.FOS(i)
    print >> outfile, i

Ejemplo n.º 10

0

Mostrar archivo

Archivo: get_DNase_cuts.py Proyecto: rmovva/tf_net

cell_type = cell_types[cell_type_id].strip()

filebase = DATADIR + "/DNase/DNASE." + cell_type

bams = glob.glob(filebase + ".*.bam")

if cell_type == "K562":  # there are so many otherwise
    bams = [filebase + '.biorep2.techrep%i.bam' % i for i in (3, 5)]
ps = pysam.AlignmentFile(bams[0], "rb")
chrs = zip(ps.references, ps.lengths)

bam_handlers = []

for f in bams:
    try:
        bh = pyDNase.BAMHandler(f, caching=False)
        bam_handlers.append(bh)
    except:
        print("Problem with " + f)

total_cuts = 0L

data = {}
where = {}
chunk = 1000000

import gzip
outfiles = {
    strand: gzip.open(filebase + strand + ".txt.gz", "wb")
    for strand in ("+", "-")
}

Ejemplo n.º 11

0

Mostrar archivo

outputFileName = sys.argv[3]

# Parameters
cutoff = -30
footprintSizes = range(6, 40, 1)
to_remove = []

# Creating new region file name with the first three columns only
newRegionFileName = outputFileName + "regions.bed"
os.system("cut -f 1,2,3 " + regionFileName + "  > " + newRegionFileName)
to_remove.append(newRegionFileName)

# Execution
outputFile = open(outputFileName, "w")
regions = pyDNase.GenomicIntervalSet(newRegionFileName)
reads = pyDNase.BAMHandler(bamFileName)
for region in regions:
    footprinter = fp.wellington(region,
                                reads,
                                shoulder_sizes=range(35, 36),
                                footprint_sizes=footprintSizes,
                                FDR=0,
                                bonferroni=0)
    footprints = footprinter.footprints(withCutoff=cutoff)
    for e in footprints:
        outputFile.write("\t".join([
            str(k) for k in
            [e.chromosome, e.startbp, e.endbp, e.label, e.score, e.strand]
        ]) + "\n")
outputFile.close()

Ejemplo n.º 12

0

Mostrar archivo

def get_bam5p(
        bdir,
        label_dic,
        strands=['+', '-'],
        fle_tag="TF",  #genomic_window_size=200,\
        force_read=False,
        verbose=1):
    pkl_fle = fle_tag + "_" + "_".join(sorted(
        label_dic.keys())) + '.dnase.pkl.gz'
    try:
        if force_read:
            raise Exception('forced_read')
        with gzip.open(pkl_fle, 'rb') as handle:
            dta = cPickle.load(handle)
        if verbose > 1: print "Read " + pkl_fle
    except:
        dta = {}
        for cell in label_dic.keys():
            bam_fles = glob.glob(os.path.join(bdir, '*' + cell + '*.bam'))
            dta[cell] = {}
            for label in label_dic[cell].keys():
                dta[cell][label] = {}
                for chrom in label_dic[cell][label].keys():
                    chroms = str(chrom) if not chrom == 23 else 'X'
                    Nx = len(label_dic[cell][label][chrom])
                    window_size = label_dic[cell][label][chrom][0][
                        1] - label_dic[cell][label][chrom][0][0]
                    #dta[cell][label][chrom] = np.zeros((Nx, window_size * len(strands)), dtype=np.float)
                    dta[cell][label][chrom] = np.zeros((Nx, window_size),
                                                       dtype=np.float)
                    for bam_fle in bam_fles:
                        reads = pyDNase.BAMHandler(bam_fle, caching=False)
                        gi = 0
                        for grange in label_dic[cell][label][chrom]:
                            # could not decipher what this function returns; cut counts per position?
                            temp = reads["chr%s,%i,%i,+" %
                                         (chroms, grange[0], grange[1])]
                            #si = 0
                            for strand in strands:
                                dta[cell][label][chrom][gi, :] += temp[strand]
                                #dta[cell][label][chrom][gi, range(si, si + window_size)] += temp[strand]
                                #si += window_size
                            gi += 1
                    # average per bam_fle for a cell
                    N = 0.0 + len(bam_fles)
                    dta[cell][label][chrom] = dta[cell][label][chrom] / N
            if verbose > 1:
                print "Parsed " + str(
                    bam_fles) + ' for average 5p cuts for ' + cell
        with gzip.open(pkl_fle, 'wb') as handle:
            cPickle.dump(dta, handle, -1)  # -1 is for HIGHEST_PROTOCOL
        if verbose > 1: print "Wrote " + pkl_fle

    if verbose > 0:
        print "FUNCTION " + myself() + " DTA:"
        for cell in dta.keys():
            for label in dta[cell].keys():
                i = 0
                for chrom in dta[cell][label].keys():
                    i += len(dta[cell][label][chrom])
                print cell, label, "ALL chroms" + str(i)
        print

    return (dta)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: cut_profile.py Proyecto: resurgo-genetics/BPAC

# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import argparse
from clint.textui import progress, puts
import pyDNase

parser = argparse.ArgumentParser(description='Writes WIG file with the cut information based on the regions in reads BED file and the reads in reads BAM file')
parser.add_argument("regions", help="BED file of the regions you want to write wig tracks for")
parser.add_argument("reads", help="The BAM file containing the read data")
parser.add_argument("wig_output", help="Path to write the reads wig track to")
args = parser.parse_args()

reads = pyDNase.BAMHandler(args.reads,caching=True)
regions = pyDNase.GenomicIntervalSet(args.regions)
wigout = open(args.wig_output,"w")

#Required for UCSC upload
print >> wigout, "track type=wiggle_0"

puts("Writing wig tracks...")

for each in progress.bar([item for sublist in sorted(regions.intervals.values()) for item in sorted(sublist, key=lambda peak: peak.startbp)]):
    try:
        prevregionp=str(each.chromosome)+","+str(each.startbp-2)+","+str(each.startbp)+",+"
        prevcuts=reads[prevregionp]
        nextregionp=str(each.chromosome)+","+str(each.endbp+1)+","+str(each.endbp+3)+",+"
        nextcuts=reads[nextregionp]
        pp,pm=prevcuts["+"],prevcuts["-"]

Ejemplo n.º 14

0

Mostrar archivo

Archivo: wellington_footprints.py Proyecto: aeron15/pyDNase

        "p-value cutoffs must be supplied as a string of numbers separated by commas"
    )

assert 0 < clargs.FDR_cutoff < 1, "FDR must be between 0 and 1"
assert clargs.FDR_limit < 0, "FDR limit must be less than 0"
assert len([f for f in os.listdir(clargs.outputdir)
            if f[0] != "."]) == 0, "output directory {0} is not empty!".format(
                clargs.outputdir)

if not clargs.output_prefix:
    clargs.output_prefix = str(os.path.basename(clargs.reads)) + "." + str(
        os.path.basename(clargs.regions))

#Load reads and regions
regions = pyDNase.GenomicIntervalSet(clargs.regions)
reads = pyDNase.BAMHandler(clargs.reads, caching=False, ATAC=clargs.A)

#Create a directory for p-values and WIG output. This /should/ be OS independent
os.makedirs(os.path.join(clargs.outputdir, "p value cutoffs"))
wigout = open(
    os.path.relpath(clargs.outputdir) + "/" + clargs.output_prefix +
    ".WellingtonFootprints.wig", "w")
fdrout = open(
    os.path.relpath(clargs.outputdir) + "/" + clargs.output_prefix +
    ".WellingtonFootprints.FDR.{0}.bed".format(clargs.FDR_cutoff), "w")

#Required for UCSC upload
print >> wigout, "track type=wiggle_0"

#Iterate in chromosome, basepair order
orderedbychr = [

Ejemplo n.º 15

0

Mostrar archivo

import sys

#################################################################################################################################

parser = argparse.ArgumentParser()
parser.add_argument('bed_file', type = str, help = 'BED file containing regions to plot')
parser.add_argument('bam_file', type = str, help = 'BAM file containing reads to plot')
parser.add_argument('outfile', type = str, help = 'Output file (.tsv)')
parser.add_argument('-w', '--window', dest = 'w', type = int, default = 200, help = 'Window size to plot. Default = 200bp')

args = parser.parse_args()

#################################################################################################################################

# Read BAM file
reads = pyDNase.BAMHandler(args.bam_file)

# Calculate the distance to extend footprints by (window size / 2)
extend = int(math.ceil(args.w / 2))

# Get regions from BED file
regions = pyDNase.GenomicIntervalSet(args.bed_file)

# Keep track of number of forward and reverse reads
fwd_cut_tracking = dict()
rev_cut_tracking = dict()

sys.stderr.write('Counting cuts in regions...\n')

for site in progress.bar(regions):
	# Get chromosome, strand, start and end positions for site

Ejemplo n.º 16

0

Mostrar archivo

                    "--bias-file",
                    help="Location of the sorted, index",
                    default=None,
                    type=str)
parser.add_argument("-r",
                    action="store_true",
                    help="Randomise the ordering of the output",
                    default=False)
parser.add_argument(
    "regions",
    help="BED file of the regions you want to generate the heatmap for")
parser.add_argument("reads", help="The BAM file containing the read data")
parser.add_argument("output", help="filename to write the CSV output to")
args = parser.parse_args()

reads = pyDNase.BAMHandler(args.reads, caching=not args.c, ATAC=args.A)
if args.b:
    if args.bias_file != None:
        freads = pyDNase.BAMHandlerWithBias(pyDNase.FASTAHandler(
            args.bias_file),
                                            args.reads,
                                            caching=not args.c,
                                            ATAC=args.A)
    else:
        raise ValueError("No FASTA file provided for bias correction!")
regions = pyDNase.GenomicIntervalSet(args.regions)

if args.i:
    for each in regions:
        each.strand = "+"

Ejemplo n.º 17

0

Mostrar archivo

Archivo: footprinttest.py Proyecto: aeron15/pyDNase

#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import matplotlib.pyplot as plt
import pyDNase
from pyDNase.footprinting import wellington

#Load test data
reads = pyDNase.BAMHandler("example.bam")
regions = pyDNase.GenomicIntervalSet("example.bed")

#Plot cuts data
plt.plot(reads[regions[0]]["+"], c="red")
plt.plot(-reads[regions[0]]["-"], c="blue")

#Footprint and plot the results
footprinter = wellington(regions[0], reads)
plt.plot(footprinter.scores, c="black")

plt.show()

Ejemplo n.º 18

0

Mostrar archivo

Archivo: runWellington.py Proyecto: eugeniaeueu/htsflow

#Call footprints
import sys
import pyDNase
import pyDNase.footprinting as fp

if (sys.argv[5] == 'singleEnd'):
    regions = pyDNase.GenomicIntervalSet(sys.argv[1])
    reads = pyDNase.BAMHandler(sys.argv[2])
    f = len(regions) - 1
    for x in range(f):
        footprinter = fp.wellington1D(regions[x], reads)
        footprints = footprinter.footprints(withCutoff=int(sys.argv[4]))
        with open(sys.argv[3], "a") as bedout:
            bedout.write(str(footprints))
else:
    regions = pyDNase.GenomicIntervalSet(sys.argv[1])
    reads = pyDNase.BAMHandler(sys.argv[2])
    f = len(regions) - 1
    for x in range(f):
        footprinter = fp.wellington(regions[x], reads)
        footprints = footprinter.footprints(withCutoff=int(sys.argv[4]))
        with open(sys.argv[3], "a") as bedout:
            bedout.write(str(footprints))

Ejemplo n.º 19

0

Mostrar archivo

Archivo: dnase_ddhs_scorer.py Proyecto: shulp2211/pyDNase

                    help="ATAC-seq mode (default: False)",
                    default=False)
parser.add_argument(
    "regions",
    help="The set of BED files you wish to annotate with dDHS scores")
parser.add_argument("treat_dhs", help="The DHSs belonging to the Treatment")
parser.add_argument("control_dhs", help="The DHSs belonging to the control")
parser.add_argument(
    "reads_treat", help="The BAM file containing the Treatment DNase-seq data")
parser.add_argument("reads_control",
                    help="The BAM file containing the Control DNase-seq data")
parser.add_argument("output", help="filename to write the output to")
args = parser.parse_args()

reads_treat = pyDNase.BAMHandler(args.reads_treat,
                                 caching=not args.l,
                                 ATAC=args.A)
reads_control = pyDNase.BAMHandler(args.reads_control,
                                   caching=not args.l,
                                   ATAC=args.A)
treat_dhs = pyDNase.GenomicIntervalSet(args.treat_dhs)
control_dhs = pyDNase.GenomicIntervalSet(args.control_dhs)
regions = pyDNase.GenomicIntervalSet(args.regions)

treat_total_cuts = 0
control_total_cuts = 0
treat_base_pairs = 0
control_base_pairs = 0

puts("Calculating enrichment for Treatment")
for i in progress.bar(treat_dhs):