def run_coverage(bam, ref_set, outfile, tmpdir, sort=False, mem=25, gtmp=4, docker_image="apollodorus/bioinf:v1"): tmp = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False) sort = '#!/bin/bash\nsamtools sort {} > {}'.format(bam, tmp.name) coverage = 'samtools view -b {} | bedtools coverage -a {} -b - -d > {}'.format( tmp.name, ref_set, outfile) exc = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False) cmd = [coverage] if sort: cmd = [sort, coverage] with open(exc.name, 'w+') as fp: fp.write('\n'.join(cmd)) os.chmod(exc.name, 0o755) cmd = bsub(exc.name, mem, gtmp, docker_image, 'genomeCoverage') po = subprocess.Popen(cmd, shell=True) return (po)
def run_feature_counts(bam, saf, tmpdir, mem=25, gtmp=8, docker_image="genomicpariscentre/subread:1.6.2"): sample_id = os.path.split(bam)[1].split('_')[0] fn = sample_id + '_peaks.counts' outfile = os.path.join(os.path.abspath(tmpdir), fn) cmd = 'featureCounts -T 5' \ + ' -a ' + saf \ + ' -F SAF' \ + ' -s 0' \ + ' -o ' + outfile + ' ' \ + bam cmd = bsub(cmd, mem, gtmp, docker_image, 'featureCounts') po = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return ((sample_id, outfile), po)
def run_homer(target_set, bg_set=None, mem=25, gtmp=8, docker_image="apollodorus/homer:mm10"): # peak_features format: # 1 - Peak ID, 2- chromosome, 3- start, 4-end, 5-strand cmd = 'findMotifsGenome.pl '+target_set + ' '\ + args.build + ' ' \ + args.output_dir \ + ' -size '+args.region_size \ + ' -mis ' + str(args.mismatches) \ + ' -S ' + str(args.motifs) \ + ' -p ' + str(args.cpus) \ + ' -preparsedDir ' + args.preparsed_dir if bg_set: cmd = cmd + ' -bg ' + bg_set cmd = bsub(cmd, mem, gtmp, docker_image, 'annovar', args.debug) subprocess.check_call(cmd, shell=True) return (args.output_dir)
def peak_summit_routine(bam, refset, sample_id, tmpdir): peak_coverage_outfile = os.path.join(tmpdir, sample_id + '_peakCoverage.bed') # returns after bedtools coverage complete get_peak_coverage(bam, refset, peak_coverage_outfile, tmpdir=tmpdir) exc = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'peakheatmap_summit_bed.py') peak_summit_outfile = os.path.join(args.output_dir, sample_id + ".summitCoverage.bed") cmd = 'python3 {} {} -o {}'.format(exc, peak_coverage_outfile, peak_summit_outfile) cmd = bsub(cmd, mem=30, gtmp=4, docker_image='apollodorus/bioinf:pr', job_name='summitBed') subprocess.check_call(cmd, shell=True) # peak_summit_outfile = os.path.join(args.output_dir, sample_id+".summitCoverage.bed") # get_peak_coverage(bam, tmp.name, peak_summit_outfile, tmpdir) print('Finished peak summit BED for "{}"'.format(sample_id))
def run_bamCovereage(bam, outfile, mem=25, gtmp=4, docker_image="quay.io/bgruening/galaxy-deeptools"): tmp = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False) cmd = 'deeptools bamCoverage -b {} -o {}'.format(bam, outfile) cmd = bsub(cmd, mem, gtmp, docker_image, 'bamCoverage') po = subprocess.Popen(cmd, shell=True) return(po)
def run_bamCoverage(bam, outfile, mem=20, gtmp=4, docker_image="apollodorus/bioinf:pr"): cmd = 'bamCoverage --normalizeUsing CPM --binSize 10 -b {} -o {}'.format( bam, outfile) cmd = bsub(cmd, mem, gtmp, docker_image, 'genomeCoverage') po = subprocess.Popen(cmd, shell=True) return (po)
def run_bam2split(bam, mem=25, gtmp=8, docker_image="apollodorus:dnase2tf:1.0.1"): sample_id = os.path.split(bam)[1].split('_')[0] fn = sample_id + 'b.counts' outfile = os.path.join(os.path.abspath(tmpdir), fn) cmd = 'featureCounts -T 5' \ + ' -a ' + saf \ + ' -F SAF' \ + ' -s 0' \ + ' -o ' + outfile + ' ' \ + bam cmd = bsub(cmd, mem, gtmp, docker_image, 'featureCounts') po = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return((sample_id, outfile), po)
def run_gene_annotation(avinput, dist, build, db, outdir, mem=25, gtmp=8, docker_image="apollodorus/annovar:latest"): prefix = os.path.split(avinput)[1].split('_')[0] outfile = os.path.join(outdir, prefix + '_annot') cmd = 'annotate_variation.pl -out '+outfile \ + ' -build ' + build + ' ' \ + avinput \ + ' --geneanno' \ + ' -neargene '+str(dist) \ + ' -dbtype refGene ' \ + db cmd = bsub(cmd, mem, gtmp, docker_image, 'annovar') po = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) return ((po, outfile + '.variant_function'))
parser.add_argument('-o', '--output_dir', default='.', help='') args = parser.parse_args() args.peak_bed = os.path.abspath(args.peak_bed) args.output_dir = os.path.abspath(args.output_dir) def run_find_motif_bed(find_motif_file): script = os.path.dirname(os.path.abspath('footprinting_motif_bed.py')) cmd = 'python3 ' + script \ + ' ' + args.peak_bed \ + ' ' + find_motif_file \ + ' ' + args.fasta + ' -o ' + args.output_dir cmd = bsub(cmd, mem=8, gtmp=4, docker_image='apollodorus/bioinf:pr', 'motif_scan') print(cmd) # subprocess.check_call(cmd, shell=True) def main(): with open(args.peak_motif_list) as fp: motif_files = fp.read().strip().split('\n') futures = list() executor = concurrent.futures.ProcessPoolExecutor(max_workers=60) for mf in motif_files: futures.append(executor.submit(run_find_motif_bed, mf))