Ejemplo n.º 1
0
def mapReads(infiles, outfile):
    '''Map reads to the genome using BWA '''
    to_cluster = True
    job_options = "-pe dedicated %i -R y" % PARAMS["bwa_threads"]
    m = PipelineMapping.BWA()
    statement = m.build((infiles,), outfile)
    P.run()
Ejemplo n.º 2
0
def mapReads(infiles, outfile):
    '''Map reads to the genome using BWA (output=SAM), convert to BAM, sort and index BAM file '''
    to_cluster = USECLUSTER
    job_options = "-pe dedicated 2 -R y -l mem_free=8G"
    track = P.snip(os.path.basename(outfile), ".bam")
    m = PipelineMapping.BWA(remove_unique=PARAMS["bwa_remove_non_unique"])
    statement = m.build((infiles, ), outfile)
    P.run()
Ejemplo n.º 3
0
def mapReads(infiles, outfile):
    '''Map reads to the genome using BWA (output=SAM), convert to BAM,
    sort and index BAM file, generate alignment statistics and
    deduplicate using Picard
    '''

    job_options = "-pe dedicated 2 -l mem_free=8G"
    track = P.snip(os.path.basename(outfile), ".bam")
    m = PipelineMapping.BWA(remove_unique=PARAMS["bwa_remove_non_unique"],
                            align_stats=True,
                            dedup=True)
    statement = m.build((infiles, ), outfile)
    P.run()
Ejemplo n.º 4
0
def mapReads(infile, outfile):
    '''Map reads to the genome using BWA, sort and index BAM file,
    generate alignment statistics and deduplicate using Picard'''

    job_threads = PARAMS["bwa_threads"]
    job_memory = PARAMS["bwa_memory"]

    if PARAMS["bwa_algorithm"] == "aln":
        m = PipelineMapping.BWA(
            remove_non_unique=PARAMS["bwa_remove_non_unique"],
            strip_sequence=False)

    elif PARAMS["bwa_algorithm"] == "mem":
        m = PipelineMapping.BWAMEM(
            remove_non_unique=PARAMS["bwa_remove_non_unique"],
            strip_sequence=False)
    else:
        raise ValueError("bwa algorithm '%s' not known" % algorithm)

    statement = m.build((infile, ), outfile)
    print(statement)
    P.run()
Ejemplo n.º 5
0
def mapBWAAgainstGenesetGSE53638(infiles, outfile):
    ''' map reads using BWA against transcriptome data

    bwa parameterised according to soumillon et al 2014:
    -l 24 = seed length - 24 bp
    -k 2 = default number of mismatches allowed in seed - 2
    -n 0.04 = default percentage of mismatches allowed across read - 4%

    non-unique alignments will NOT be removed from the final bam
    '''

    infile, reference = infiles
    job_threads = 2
    job_options = "-l mem_free=1.9G"
    bwa_aln_options = "-l 24 -k 2 -n 0.04"
    bwa_index_dir = os.path.abspath(os.path.dirname(reference))
    genome = P.snip(os.path.basename(reference), ".sa")
    bwa_threads = job_threads
    bwa_samse_options = ""
    m = PipelineMapping.BWA(remove_non_unique=0, strip_sequence=0, set_nh=1)

    statement = m.build((infile, ), outfile)
    P.run()
Ejemplo n.º 6
0
def mapReadsWithBwaAgainstExpectedContigs(infiles, outfile):
    '''
    map reads against contigs with bowtie
    '''
    to_cluster = True

    index_dir = os.path.dirname(outfile)
    genome = os.path.basename(
        re.search(".*R[0-9]*", infiles[0]).group(0) +
        ".filtered.contigs.expected.fa")
    track = P.snip(genome, ".filtered.contigs.expected.fa")
    fastq = [
        infile for infile in infiles[1]
        if P.snip(infile, ".fastq.1.gz") == track
    ][0]
    job_options = " -l mem_free=%s" % (PARAMS["bwa_memory"])
    bwa_index_dir = index_dir
    bwa_aln_options = PARAMS["bwa_aln_options"]
    bwa_sampe_options = PARAMS["bwa_sampe_options"]
    bwa_threads = PARAMS["bwa_threads"]
    m = PipelineMapping.BWA(remove_non_unique=True)

    statement = m.build((fastq, ), outfile)
    P.run()
Ejemplo n.º 7
0
def mapReads(infiles, outfile):
    '''Map reads to the genome using BWA '''
    job_threads = PARAMS["bwa_threads"]
    m = PipelineMapping.BWA()
    statement = m.build((infiles, ), outfile)
    P.run()