def setUp(self): dirpath = os.path.dirname(os.path.realpath(__file__)) bamfile = os.path.join(dirpath, 'test_coverage.bam') self.cov = pysam_coverage.single_coverage(bamfile) self.intervals = [ ('ref', 0, 2), ('ref', 5, 9), ('ref', 9, 13), ('ref', 19, 27), ('ref', 43, 49), ('ref', 48, 56), ('ref', 50, 54), ('ref', 64, 70) ]
def setUp(self): dirPath = os.path.dirname(os.path.realpath(__file__)) bamFile = os.path.join(dirPath, 'test_coverage.bam') self.cov = pysam_coverage.single_coverage(bamFile) self.intervals = [ ('ref', 0, 2), ('ref', 5, 9), ('ref', 9, 13), ('ref', 19, 27), ('ref', 43, 49), ('ref', 48, 56), ('ref', 50, 54), ('ref', 64, 70) ] self.names = ['{}:{}-{}'.format(*x) for x in self.intervals]
def setUp(self): dirpath = os.path.dirname(os.path.realpath(__file__)) bamfile = os.path.join(dirpath, 'test_coverage.bam') self.cov = pysam_coverage.single_coverage(bamfile)
args['<outfile>'] = os.path.abspath(args['<outfile>']) args['<bam>'] = [os.path.abspath(x) for x in args['<bam>']] # Open interval list file and extract data intervalList = [] with open(args['<intervals>']) as intervalFile: for line in intervalFile: chrom, start, end = line.strip().split('\t')[:3] intervalList.append((chrom, int(start), int(end))) # Adjust intervals if they are one based if args['--onebased']: intervalList = [(x[0], x[1] - 1, x[2]) for x in intervalList] # Create output dataframe outDF = pd.DataFrame(index=range(args['--maxcov'] + 1), columns=args['<bam>']) # Extract mean coverage for intervals for bam in args['<bam>']: covCalc = pysam_coverage.single_coverage(bam) outDF[bam] = covCalc.coverage_histogram(intervals=intervalList, max_cov=args['--maxcov'], map_quality=args['--minmap'], remove_dup=args['--rmdup'], remove_secondary=args['--rmsec']) # Save intervals to file if args['<outfile>'].endswith('.gz'): compression = 'gzip' else: compression = None outDF.to_csv(args['<outfile>'], sep='\t', index_label='coverage', compression=compression)
--rmsec Remove secondary reads. --rmsup Remove supplementray reads. ''' # Load required modules import os from ngs_python.bam import pysam_coverage from general_python import docopt # Extract and process arguments args = docopt.docopt(__doc__, version='v1') args['<mapq>'] = int(args['<mapq>']) args['<binsize>'] = int(args['<binsize>']) args['<bam>'] = os.path.abspath(args['<bam>']) args['<outfile>'] = os.path.abspath(args['<outfile>']) # Create coverage object and extract data sc = pysam_coverage.single_coverage(args['<bam>']) binDict = sc.count_bin_overlaps(binSize=int(args['<binsize>']), binEqual=args['--equal'], mapQ=args['<mapq>'], overlap=args['<overlap>'], rmDup=args['--rmdup'], rmSec=args['--rmsec'], rmSup=args['--rmsup']) # Open outfile and print parameters outfile = open(args['<outfile>'], 'w') outfile.write('# input file: {}\n'.format(args['<bam>'])) outfile.write('# bin size: {}\n'.format(args['<binsize>'])) outfile.write('# bin size equal: {}\n'.format(args['--equal'])) outfile.write('# ovarlap type: {}\n'.format(args['<overlap>'])) outfile.write('# minimum map quality: {}\n'.format(args['<mapq>'])) outfile.write('# remove duplicate alignments: {}\n'.format(args['--rmdup']))