Esempio n. 1
0
def run_coverage(bam,
                 ref_set,
                 outfile,
                 tmpdir,
                 sort=False,
                 mem=25,
                 gtmp=4,
                 docker_image="apollodorus/bioinf:v1"):

    tmp = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False)

    sort = '#!/bin/bash\nsamtools sort {} > {}'.format(bam, tmp.name)
    coverage = 'samtools view -b {} | bedtools coverage -a {} -b - -d > {}'.format(
        tmp.name, ref_set, outfile)

    exc = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False)

    cmd = [coverage]
    if sort:
        cmd = [sort, coverage]

    with open(exc.name, 'w+') as fp:
        fp.write('\n'.join(cmd))

    os.chmod(exc.name, 0o755)

    cmd = bsub(exc.name, mem, gtmp, docker_image, 'genomeCoverage')
    po = subprocess.Popen(cmd, shell=True)

    return (po)
Esempio n. 2
0
def run_feature_counts(bam,
                       saf,
                       tmpdir,
                       mem=25,
                       gtmp=8,
                       docker_image="genomicpariscentre/subread:1.6.2"):

    sample_id = os.path.split(bam)[1].split('_')[0]
    fn = sample_id + '_peaks.counts'

    outfile = os.path.join(os.path.abspath(tmpdir), fn)

    cmd = 'featureCounts -T 5' \
      + ' -a ' + saf \
      + ' -F SAF' \
      + ' -s 0' \
      + ' -o ' + outfile + ' ' \
      + bam

    cmd = bsub(cmd, mem, gtmp, docker_image, 'featureCounts')
    po = subprocess.Popen(cmd,
                          shell=True,
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE)

    return ((sample_id, outfile), po)
Esempio n. 3
0
def run_homer(target_set,
              bg_set=None,
              mem=25,
              gtmp=8,
              docker_image="apollodorus/homer:mm10"):

    # peak_features format:
    #  1 - Peak ID, 2- chromosome, 3- start, 4-end, 5-strand

    cmd = 'findMotifsGenome.pl '+target_set + ' '\
      + args.build + ' ' \
      + args.output_dir \
      + ' -size '+args.region_size \
      + ' -mis ' + str(args.mismatches) \
      + ' -S ' + str(args.motifs) \
      + ' -p ' + str(args.cpus) \
      + ' -preparsedDir ' + args.preparsed_dir

    if bg_set:
        cmd = cmd + ' -bg ' + bg_set

    cmd = bsub(cmd, mem, gtmp, docker_image, 'annovar', args.debug)
    subprocess.check_call(cmd, shell=True)

    return (args.output_dir)
def peak_summit_routine(bam, refset, sample_id, tmpdir):

    peak_coverage_outfile = os.path.join(tmpdir,
                                         sample_id + '_peakCoverage.bed')

    # returns after bedtools coverage complete
    get_peak_coverage(bam, refset, peak_coverage_outfile, tmpdir=tmpdir)

    exc = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                       'peakheatmap_summit_bed.py')

    peak_summit_outfile = os.path.join(args.output_dir,
                                       sample_id + ".summitCoverage.bed")

    cmd = 'python3 {} {} -o {}'.format(exc, peak_coverage_outfile,
                                       peak_summit_outfile)
    cmd = bsub(cmd,
               mem=30,
               gtmp=4,
               docker_image='apollodorus/bioinf:pr',
               job_name='summitBed')

    subprocess.check_call(cmd, shell=True)

    #    peak_summit_outfile = os.path.join(args.output_dir, sample_id+".summitCoverage.bed")

    #    get_peak_coverage(bam, tmp.name, peak_summit_outfile, tmpdir)

    print('Finished peak summit BED for "{}"'.format(sample_id))
Esempio n. 5
0
def run_bamCovereage(bam, outfile, mem=25, gtmp=4, docker_image="quay.io/bgruening/galaxy-deeptools"):  

    tmp = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False)

    cmd = 'deeptools bamCoverage -b {} -o {}'.format(bam, outfile) 

    cmd = bsub(cmd, mem, gtmp, docker_image, 'bamCoverage')
    po = subprocess.Popen(cmd, shell=True)

    return(po)
Esempio n. 6
0
def run_bamCoverage(bam,
                    outfile,
                    mem=20,
                    gtmp=4,
                    docker_image="apollodorus/bioinf:pr"):

    cmd = 'bamCoverage --normalizeUsing CPM --binSize 10 -b {} -o {}'.format(
        bam, outfile)

    cmd = bsub(cmd, mem, gtmp, docker_image, 'genomeCoverage')

    po = subprocess.Popen(cmd, shell=True)

    return (po)
Esempio n. 7
0
def run_bam2split(bam, mem=25, gtmp=8, docker_image="apollodorus:dnase2tf:1.0.1"):  

    sample_id = os.path.split(bam)[1].split('_')[0]
    fn = sample_id + 'b.counts'

    outfile = os.path.join(os.path.abspath(tmpdir), fn)
    
    cmd = 'featureCounts -T 5' \
      + ' -a ' + saf \
      + ' -F SAF' \
      + ' -s 0' \
      + ' -o ' + outfile + ' ' \
      + bam 

    cmd = bsub(cmd, mem, gtmp, docker_image, 'featureCounts')
    po = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    return((sample_id, outfile), po)
Esempio n. 8
0
def run_gene_annotation(avinput,
                        dist,
                        build,
                        db,
                        outdir,
                        mem=25,
                        gtmp=8,
                        docker_image="apollodorus/annovar:latest"):

    prefix = os.path.split(avinput)[1].split('_')[0]
    outfile = os.path.join(outdir, prefix + '_annot')

    cmd = 'annotate_variation.pl -out '+outfile \
      + ' -build ' + build + ' ' \
      + avinput \
      + ' --geneanno' \
      + ' -neargene '+str(dist) \
      + ' -dbtype refGene ' \
      + db

    cmd = bsub(cmd, mem, gtmp, docker_image, 'annovar')
    po = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)

    return ((po, outfile + '.variant_function'))
Esempio n. 9
0
parser.add_argument('-o', '--output_dir', default='.', help='')
args = parser.parse_args()

args.peak_bed = os.path.abspath(args.peak_bed)
args.output_dir = os.path.abspath(args.output_dir)

def run_find_motif_bed(find_motif_file):

    script = os.path.dirname(os.path.abspath('footprinting_motif_bed.py'))
    cmd = 'python3 ' + script \
        + ' ' + args.peak_bed \
        + ' ' + find_motif_file \
        + ' ' + args.fasta 
        + ' -o ' + args.output_dir
         
    cmd = bsub(cmd, mem=8, gtmp=4, docker_image='apollodorus/bioinf:pr', 'motif_scan')
    print(cmd)
#    subprocess.check_call(cmd, shell=True)

def main():


    with open(args.peak_motif_list) as fp:
        motif_files = fp.read().strip().split('\n')

    futures = list()
    executor = concurrent.futures.ProcessPoolExecutor(max_workers=60)
    
    for mf in motif_files:
        futures.append(executor.submit(run_find_motif_bed, mf))