Ejemplo n.º 1
0
def generate_bedfile(ref_fasta, kmers, output_bedfile, tmpdir='bedgentmp', threads='2', logfile=None):
    """
    Given a reference FASTA file and a fasta-formatted set of kmers, will generate a coverage bedfile for the reference
    FASTA by mapping the kmers back to the FASTA.
    :param ref_fasta: Path to reference FASTA.
    :param kmers: Path to FASTA-formatted kmer file.
    :param output_bedfile: Path to output bedfile.
    :param tmpdir: Temporary directory to store intermediate files. Will be deleted upon method completion.
    :param threads: Number of threads to use for analysis. Must be a string.
    """
    if not os.path.isdir(tmpdir):
        os.makedirs(tmpdir)
    # First, need to generate a bam file - align the kmers to a reference fasta genome.
    bbtools.bbmap(ref_fasta, kmers, os.path.join(tmpdir, 'out.bam'), threads=threads, ambig='best',
                  perfectmode='true')
    # Once the bam file is generated, turn it into a sorted bamfile so that bedtools can work with it.
    cmd = 'samtools sort {bamfile} -o {sorted_bamfile}'.format(bamfile=os.path.join(tmpdir, 'out.bam'),
                                                               sorted_bamfile=os.path.join(tmpdir, 'out_sorted.bam'))
    if logfile:
        with open(logfile, 'a+') as f:
            f.write('Command: {}'.format(cmd))
            subprocess.call(cmd, shell=True, stderr=f, stdout=f)
    else:
        subprocess.call(cmd, shell=True)
    # Use bedtools to get genome coverage, so that we know what to mask.
    cmd = 'bedtools genomecov -ibam {sorted_bamfile} -bga' \
          ' > {output_bed}'.format(sorted_bamfile=os.path.join(tmpdir, 'out_sorted.bam'),
                                   output_bed=output_bedfile)
    if logfile:
        with open(logfile, 'a+') as f:
            f.write('Command: {}'.format(cmd))
            subprocess.call(cmd, shell=True, stderr=f, stdout=f)
    else:
        subprocess.call(cmd, shell=True)
    shutil.rmtree(tmpdir)
Ejemplo n.º 2
0
def test_bbmap_command_paired():
    out, err, cmd = bbtools.bbmap(reference='tests/dummy_fasta/test.fasta',
                                  forward_in='tests/dummy_fastq/test_R1.fastq',
                                  out_bam='tests/out.bam',
                                  returncmd=True)
    assert cmd == 'bbmap.sh ref=tests/dummy_fasta/test.fasta in=tests/dummy_fastq/test_R1.fastq' \
                  ' in2=tests/dummy_fastq/test_R2.fastq out=tests/out.bam nodisk'
    os.remove('tests/out.bam')
Ejemplo n.º 3
0
def test_bbmap_command_paired_kwargs():
    out, err, cmd = bbtools.bbmap(reference='tests/dummy_fasta/test.fasta',
                                  forward_in='tests/dummy_fastq/test_R1.fastq',
                                  out_bam='tests/out.bam',
                                  returncmd=True,
                                  threads='3',
                                  ordered='t')
    assert 'bbmap.sh ref=tests/dummy_fasta/test.fasta in=tests/dummy_fastq/test_R1.fastq' \
           ' in2=tests/dummy_fastq/test_R2.fastq out=tests/out.bam nodisk' in cmd and 'ordered=t' in cmd and 'threads=3' in cmd
    os.remove('tests/out.bam')
Ejemplo n.º 4
0
def create_bam(forward_reads, reverse_reads, reference_fasta, outdir):
    # Map with bbmap default settings. This may or may not work.
    # Getting some real weird results with this - try using bowtie2 and see what happens
    # Bowtie2 gives really similar results with default settings. Will need to play with parameters lots tomorrow to
    # try to figure out what's going on.

    cmd = 'samtools faidx {reference_fasta}'.format(
        reference_fasta=reference_fasta)
    out, err = run_cmd(cmd)
    write_to_logfile(logfile=os.path.join(outdir, 'log.txt'),
                     out=out,
                     err=err,
                     cmd=cmd)

    bbtools.bbmap(
        reference=reference_fasta,
        forward_in=forward_reads,
        reverse_in=reverse_reads,
        out_bam=os.path.join(outdir, 'aligned.bam'),
        subfilter=1
    )  # Limiting to one substitution allowed per read mapped might help - TBD
    # Also sort and index the bamfile so pysam will be happy with us.
    cmd = 'samtools sort {bamfile} -o {sorted_bamfile}'.format(
        bamfile=os.path.join(outdir, 'aligned.bam'),
        sorted_bamfile=os.path.join(outdir, 'aligned_sorted.bam'))
    out, err = run_cmd(cmd)
    write_to_logfile(logfile=os.path.join(outdir, 'log.txt'),
                     out=out,
                     err=err,
                     cmd=cmd)

    cmd = 'samtools index {sorted_bamfile}'.format(
        sorted_bamfile=os.path.join(outdir, 'aligned_sorted.bam'))
    out, err = run_cmd(cmd)
    write_to_logfile(logfile=os.path.join(outdir, 'log.txt'),
                     out=out,
                     err=err,
                     cmd=cmd)