def run_bowtie(directory, dependencies):
    """Run bowtie in directory.

    :dependencies: list of filter jobs for this directory.
    """
    olddir = os.path.abspath('.')
    os.chdir(directory)
    bowtie1 = make_job_file('bowtie --best --strata ' +
                            '-p 16 --chunkmbs 2000 --maxins 2000 -m 1 -q ' +
                            '../pat/PatRef ' +
                            '--chunkmbs 2000 --maxins 1000' +
                            '-1 in.1.filtered.fastq -2 in.2.filtered.fastq ' +
                            'pat_alignment.bam 2> pat_alignment.log',
                            'pat_bowtie', '24:00:00', 16, modules=['bowtie1'])
    bowtie2 = make_job_file('bowtie --best --strata ' +
                            '-p 16 --chunkmbs 2000 --maxins 2000 -m 1 -q ' +
                            '../mat/MatRef ' +
                            '--chunkmbs 2000 --maxins 1000' +
                            '-1 in.1.filtered.fastq -2 in.2.filtered.fastq ' +
                            'mat_alignment.bam 2> mat_alignment.log',
                            'mat_bowtie', '24:00:00', 16, modules=['bowtie1'])
    job1 = sl.monitor_submit(bowtie1, dependencies, max_count=MAX_JOBS)
    job2 = sl.monitor_submit(bowtie2, dependencies, max_count=MAX_JOBS)
    os.chdir(olddir)
    return [job1, job2]
Exemple #2
0
def wasp_step_2(name, remapped, pipeline=None, dependency=None):
    """Run filter_remapped_reads.py following second mapping.

    :name:       The name of the original mapped bam or sam, used to make file
                 names
    :remapped:   The file created by the second mapping.
    :pipeline:   The path to the WASP pipeline.
    :dependency: The job number of the remapping step.
    :returns:    The job number.

    """
    command = os.path.join(os.path.abspath(pipeline),
                           'filter_remapped_reads.py') \
        if pipeline else 'filter_remapped_reads.py'
    # Trim the name
    shortname = '.'.join(name.split('.')[:-1]) if name.endswith('.bam') \
        or name.endswith('.sam') else name
    logme.log('Submitting wasp step 2 for {}'.format(shortname), level='debug')
    return slurmy.monitor_submit(slurmy.make_job_file(
        'python2 {} {} {} {} {}'.format(command,
                                        shortname + '.to.remap.bam',
                                        remapped,
                                        shortname + '.remap.keep.bam',
                                        shortname + '.to.remap.num.gz'),
        shortname + '_step2', '16:00:00', 8, '30000', partition=PARTITION,
        modules=['python/2.7.5']), dependency, MAX_JOBS)
def clean_star(directory, dependencies):
    olddir = os.path.abspath('.')
    os.chdir(directory)
    clean1 = make_job_file('samtools view ' +
                           'pat_alignment_Aligned.sortedByCoord.out.bam' +
                           ' > pat_alignment.sam',
                           'pat_clean', '08:00:00', 2, mem=10000,
                           modules=['samtools'])
    clean2 = make_job_file('samtools view ' +
                           'mat_alignment_Aligned.sortedByCoord.out.bam' +
                           ' > mat_alignment.sam',
                           'mat_clean', '08:00:00', 2, mem=10000,
                           modules=['samtools'])
    job1 = sl.monitor_submit(clean1, dependencies, max_count=MAX_JOBS)
    job2 = sl.monitor_submit(clean2, dependencies, max_count=MAX_JOBS)
    os.chdir(olddir)
    return [job1, job2]
def clean_bowtie(directory, dependencies):
    olddir = os.path.abspath('.')
    os.chdir(directory)
    bowtie1 = make_job_file('samtools sort pat_alignment.bam pat_sorted\n' +
                            'samtools view pat_sorted.bam > ' +
                            'pat_alignment.sam\n' +
                            'rm pat_alignment.bam pat_sorted.bam',
                            'pat_clean', '08:00:00', 2, modules=['samtools'])
    bowtie2 = make_job_file('samtools sort mat_alignment.bam mat_sorted\n' +
                            'samtools view mat_sorted.bam > ' +
                            'mat_alignment.sam\n' +
                            'rm mat_alignment.bam mat_sorted.bam',
                            'mat_clean', '08:00:00', 1, modules=['samtools'])
    job1 = sl.monitor_submit(bowtie1, dependencies, max_count=MAX_JOBS)
    job2 = sl.monitor_submit(bowtie2, dependencies, max_count=MAX_JOBS)
    os.chdir(olddir)
    return [job1, job2]
def filter_fastqs(directory):
    """Trim out all reads with 'N's.

    :directory: Directory to run in, absolute path required.
    :returns:   list of job numbers

    """
    olddir = os.path.abspath('.')
    os.chdir(directory)
    filter1 = make_job_file(
        '/home/dacre/mike_tools/bin/number_fastq_records.py ' +
        '-i in.1.fastq -o in.1.filtered.fastq',
        'filter1', '02:00:00', 1, 22000, modules='python/3.3.2')
    filter2 = make_job_file(
        '/home/dacre/mike_tools/bin/number_fastq_records.py ' +
        '-i in.2.fastq -o in.2.filtered.fastq',
        'filter2', '02:00:00', 1, 22000, modules='python/3.3.2')
    job1 = sl.monitor_submit(filter1, max_count=MAX_JOBS)
    job2 = sl.monitor_submit(filter2, max_count=MAX_JOBS)
    os.chdir(olddir)
    return [job1, job2]
def merge(directory, dependencies, type):
    """Run AlleleSeq Merge Step."""
    olddir = os.path.abspath('.')
    os.chdir(directory)
    merge1 = make_job_file('python ' +
                           '../../AlleleSeq_pipeline_v1.2a/MergeBowtie.py ' +
                           'pat_alignment.sam mat_alignment.sam ' +
                           '../genome/%s_' + type + '.map > ' +
                           'merged_reads.sam 2> merged_reads.log', 'merge',
                           '06:00:00', 1, 8000, modules='python/2.7.5')
    job1 = sl.monitor_submit(merge1, dependencies, max_count=MAX_JOBS)
    os.chdir(olddir)
    return job1
Exemple #7
0
def merge_bams(name, dependency=None):
    """Use samtools to merge two bam files."""
    shortname = '.'.join(name.split('.')[:-1]) if name.endswith('.bam') \
        or name.endswith('.sam') else name
    orig_reads = shortname + '.keep.bam'
    remapped   = shortname + '.remap.keep.bam'
    uname      = shortname + '_wasp_final_unsorted.bam'
    final_name = shortname + '_wasp_final.bam'
    return slurmy.monitor_submit(slurmy.make_job_file(
        'samtools merge -f {} {} {}\n'.format(uname, orig_reads, remapped) +
        'samtools sort -o {} {}'.format(final_name, uname),
        shortname + '_merge', '16:00:00', 4, '26000', partition=PARTITION,
        modules='samtools'), dependency, MAX_JOBS)
def run_star(directory, dependencies):
    """Run STAR in directory.

    :dependencies: list of filter jobs for this directory.
    """
    olddir = os.path.abspath('.')
    os.chdir(directory)
    unzip = ('cat in.1.fastq.gz | /home/dacre/usr/bin/unpigz -p 16 > in.1.fastq\n' +
             'cat in.2.fastq.gz | /home/dacre/usr/bin/unpigz -p 16 > in.2.fastq\n')
    star1 = ('/home/dacre/usr/bin/STAR --runThreadN 16 ' +
             '--genomeDir ../pat/pat_star ' +
             #  '--readFilesIn in.1.fastq in.2.fastq ' +
             '--readFilesIn in.1.filtered.fastq in.2.filtered.fastq ' +
             '--outFilterMultimapNmax 1 ' +
             '--outFileNamePrefix pat_alignment_ ' +
             '--outSAMtype BAM SortedByCoordinate ' +
             '--outSAMattributes MD NH ' +
             '--clip5pNbases 6')
    star2 = ('/home/dacre/usr/bin/STAR --runThreadN 16 ' +
             '--genomeDir ../mat/mat_star ' +
             #  '--readFilesIn in.1.fastq in.2.fastq ' +
             '--readFilesIn in.1.filtered.fastq in.2.filtered.fastq ' +
             '--outFilterMultimapNmax 1 ' +
             '--outFileNamePrefix mat_alignment_ ' +
             '--outSAMtype BAM SortedByCoordinate ' +
             '--outSAMattributes MD NH ' +
             '--clip5pNbases 6')
    #  if not os.path.exists(os.path.join(directory, 'in.1.fastq')):
        #  unzip = make_job_file(unzip, 'unzip', '04:00:00', 16)
        #  unzip_job = sl.monitor_submit(unzip, dependencies, max_count=MAX_JOBS)
        #  dependencies = unzip_job
    star1 = make_job_file(star1, 'pat_star', '12:00:00', 16, modules=['STAR'])
    star2 = make_job_file(star2, 'mat_star', '12:00:00', 16, modules=['STAR'])
    job1 = sl.monitor_submit(star1, dependencies, max_count=MAX_JOBS)
    job2 = sl.monitor_submit(star2, dependencies, max_count=MAX_JOBS)
    os.chdir(olddir)
    return [job1, job2]
def count(directory, dependencies, type):
    """Run AlleleSeq Count Step."""
    olddir = os.path.abspath('.')
    os.chdir(directory)
    name = os.path.basename(directory)
    count1 = make_job_file('python ' +
                           '../../AlleleSeq_pipeline_v1.2a/SnpCounts.py ' +
                           '../*snps.txt merged_reads.sam ' +
                           '../genome/%s_' + type +
                           '.map {}.cnt '.format(name),
                           'count', '06:00:00', 16, 32000,
                           modules='python/2.7.5')
    job1 = sl.monitor_submit(count1, dependencies, max_count=MAX_JOBS)
    os.chdir(olddir)
    return job1
Exemple #10
0
def wasp_step_1(fl, snp_dir, pipeline=None, dependency=None):
    """Run find_intersecting_snps.py on fl.

    :fl:         The sam or bam file to run on.
    :snp_dir:    The SNP directory required by WASP.
    :pipeline:   The path to the WASP pipeline.
    :dependency: The job number of the remapping step.
    :returns:    The job number.
    """
    command = os.path.join(os.path.abspath(pipeline),
                           'find_intersecting_snps.py') \
        if pipeline else 'find_intersecting_snps.py'
    logme.log('Submitting wasp step 1 for {}'.format(fl), level='debug')
    return slurmy.monitor_submit(slurmy.make_job_file(
        'python2 {} -m 1000000 {} {}'.format(command, fl, snp_dir),
        fl + '_step1', '16:00:00', 8, '30000', partition=PARTITION,
        modules=['python/2.7.5']), dependency, MAX_JOBS)
Exemple #11
0
def run_mapping(name, infiles, genome, algorithm='STAR', gtf=None,
                dependency=None):
    """Run read mapping using either tophat or STAR.

    :name:       A name prefix to use for the output.
    :infiles:    List of fastqs, space separated for paired end, comma
                 separated for batches. Must be a string.
                 Note: if gzipped and using STAR, they will be unzipped
                 and rezipped during mapping
    :genome:     The genome or STAR genome index.
    :algorithm:  STAR or tophat. Case ignored.
    :gtf:        A GTF of genes for tophat, not required.
    :dependency: The job number of the remapping step.
    :returns:    Job number of mapping step and name of output bam.

    """
    if algorithm.lower() == 'star':
        cmnd     = []
        new_list = []
        zipped   = False
        for fl in infiles.split(' '):
            b = []
            for i in fl.split(','):
                if i.endswith('.gz'):
                    zipped = True
                    cmnd.append('/home/dacre/usr/bin/unpigz -p 16 ' + i)
                    b.append(i[:-3])
                else:
                    b.append(i)
            new_list.append(','.join(b))
        infiles = ' '.join(new_list)

        cmnd.append('/home/dacre/usr/bin/STAR --runThreadN 16 ' +
                    '--genomeDir {} '.format(genome) +
                    '--readFilesIn {} '.format(infiles) +
                    '--outFilterMultimapNmax 1 ' +
                    '--outFileNamePrefix {} '.format(name) +
                    '--outSAMtype BAM SortedByCoordinate ' +
                    '--outSAMattributes MD NH ' +
                    '--clip5pNbases 6 ' +
                    '--limitBAMsortRAM {}'.format(STAR_MEM))

        if zipped:
            for fl in new_list:
                for i in fl.split(','):
                    cmnd.append(
                        '/home/dacre/usr/bin/pigz -p 16 {}'.format(i))

        command = '\n'.join(cmnd)
        outbam  = name + 'Aligned.sortedByCoord.out.bam'
        modules = ['STAR']

    elif algorithm.lower() == 'tophat':
        command = 'tophat --microexon-search -o {}'.format(name + '_tophat')
        command = command + ' -G ' + gtf if gtf else command
        command = command + ' -p 16 {} {}\n'.format(genome, infiles)
        outbam  = name + '_accepted_hits.bam'
        command = command + 'mv {}/accepted_hits.bam {}'.format(
            name + '_tophat', outbam)
        modules = ['python/2.7.5', 'tophat']

    else:
        raise Exception('Invalid algorithm: {}'.format(algorithm))

    return (slurmy.monitor_submit(slurmy.make_job_file(
        command, name, '24:00:00', STAR_CORES, partition=PARTITION, modules=modules),
        dependency, MAX_JOBS), outbam)