Ejemplo n.º 1
0
 def setUp(self):
     dirpath = os.path.dirname(os.path.realpath(__file__))
     bamfile = os.path.join(dirpath, 'test_coverage.bam')
     self.cov = pysam_coverage.single_coverage(bamfile)
     self.intervals = [
         ('ref', 0, 2),
         ('ref', 5, 9),
         ('ref', 9, 13),
         ('ref', 19, 27),
         ('ref', 43, 49),
         ('ref', 48, 56),
         ('ref', 50, 54),
         ('ref', 64, 70)
     ]
 def setUp(self):
     dirPath = os.path.dirname(os.path.realpath(__file__))
     bamFile = os.path.join(dirPath, 'test_coverage.bam')
     self.cov = pysam_coverage.single_coverage(bamFile)
     self.intervals = [
         ('ref', 0, 2),
         ('ref', 5, 9),
         ('ref', 9, 13),
         ('ref', 19, 27),
         ('ref', 43, 49),
         ('ref', 48, 56),
         ('ref', 50, 54),
         ('ref', 64, 70)
     ]
     self.names = ['{}:{}-{}'.format(*x) for x in self.intervals]
Ejemplo n.º 3
0
 def setUp(self):
     dirpath = os.path.dirname(os.path.realpath(__file__))
     bamfile = os.path.join(dirpath, 'test_coverage.bam')
     self.cov = pysam_coverage.single_coverage(bamfile)
Ejemplo n.º 4
0
args['<outfile>'] = os.path.abspath(args['<outfile>'])
args['<bam>'] = [os.path.abspath(x) for x in args['<bam>']]
# Open interval list file and extract data
intervalList = []
with open(args['<intervals>']) as intervalFile:
    for line in intervalFile:
        chrom, start, end = line.strip().split('\t')[:3]
        intervalList.append((chrom, int(start), int(end)))
# Adjust intervals if they are one based
if args['--onebased']:
    intervalList = [(x[0], x[1] - 1, x[2]) for x in intervalList]
# Create output dataframe
outDF = pd.DataFrame(index=range(args['--maxcov'] + 1), columns=args['<bam>'])
# Extract mean coverage for intervals
for bam in args['<bam>']:
    covCalc = pysam_coverage.single_coverage(bam)
    outDF[bam] = covCalc.coverage_histogram(intervals=intervalList,
                                            max_cov=args['--maxcov'],
                                            map_quality=args['--minmap'],
                                            remove_dup=args['--rmdup'],
                                            remove_secondary=args['--rmsec'])
# Save intervals to file
if args['<outfile>'].endswith('.gz'):
    compression = 'gzip'
else:
    compression = None
outDF.to_csv(args['<outfile>'],
             sep='\t',
             index_label='coverage',
             compression=compression)
Ejemplo n.º 5
0
    --rmsec  Remove secondary reads.
    --rmsup  Remove supplementray reads.

'''
# Load required modules
import os
from ngs_python.bam import pysam_coverage
from general_python import docopt
# Extract and process arguments
args = docopt.docopt(__doc__, version='v1')
args['<mapq>'] = int(args['<mapq>'])
args['<binsize>'] = int(args['<binsize>'])
args['<bam>'] = os.path.abspath(args['<bam>'])
args['<outfile>'] = os.path.abspath(args['<outfile>'])
# Create coverage object and extract data
sc = pysam_coverage.single_coverage(args['<bam>'])
binDict = sc.count_bin_overlaps(binSize=int(args['<binsize>']),
                                binEqual=args['--equal'],
                                mapQ=args['<mapq>'],
                                overlap=args['<overlap>'],
                                rmDup=args['--rmdup'],
                                rmSec=args['--rmsec'],
                                rmSup=args['--rmsup'])
# Open outfile and print parameters
outfile = open(args['<outfile>'], 'w')
outfile.write('# input file: {}\n'.format(args['<bam>']))
outfile.write('# bin size: {}\n'.format(args['<binsize>']))
outfile.write('# bin size equal: {}\n'.format(args['--equal']))
outfile.write('# ovarlap type: {}\n'.format(args['<overlap>']))
outfile.write('# minimum map quality: {}\n'.format(args['<mapq>']))
outfile.write('# remove duplicate alignments: {}\n'.format(args['--rmdup']))