예제 #1
0
 def _write_counts_tsv(cls, reads1, reads2, outfile):
     number_of_read_pairs = fqtools.count([reads1, reads2])
     with open(outfile, 'w') as f:
         print('Name', 'Is_contam', 'Reads', sep='\t', file=f)
         print('Remove_contam_not_run',
               '0',
               2 * number_of_read_pairs,
               sep='\t',
               file=f)
         print('Reads_kept_after_remove_contam',
               '0',
               2 * number_of_read_pairs,
               sep='\t',
               file=f)
예제 #2
0
 def _write_counts_tsv(cls, reads1, reads2, outfile):
     number_of_read_pairs = fqtools.count([reads1, reads2])
     with open(outfile, "w") as f:
         print("Name", "Is_contam", "Reads", sep="\t", file=f)
         print(
             "Remove_contam_not_run", "0", 2 * number_of_read_pairs, sep="\t", file=f
         )
         print(
             "Reads_kept_after_remove_contam",
             "0",
             2 * number_of_read_pairs,
             sep="\t",
             file=f,
         )
예제 #3
0
 def test_count(self):
     """test count"""
     file1 = os.path.join(data_dir, "count.1.fq")
     file2 = os.path.join(data_dir, "count.2.fq")
     got = fqtools.count([file1, file2])
     self.assertEqual(3, got)
예제 #4
0
파일: read_map.py 프로젝트: HDRUK/clockwork
def map_reads(
    ref_fasta, reads1, reads2, outfile, rmdup=False, markdup=False, read_group=None, threads=1
):
    """Maps reads with BWA MEM. By default, outputs SAM file in input read order.
    rmdup=True => remove duplicates using samtools rmdup. Final output is sorted bam
                  Incompatible with markdup=True
    markdup=True => mark duplicates using picard MarkDuplicate. Final output is sorted bam.
                  Incompatible with rmdup=True
    read_group should be a tuple (group_id, group_name). If given, these will be
    put into the BAM"""
    if rmdup and markdup:
        raise Error("Cannot have rmdup and markdup both True." "")

    try:
        expected_read_count = 2 * fqtools.count([reads1, reads2])
    except:
        raise Error("Error counting reads in input files " + reads1 + " " + reads2)

    if rmdup or markdup:
        tmpdir = tempfile.mkdtemp(
            prefix=outfile + ".tmp.map_reads.", dir=os.path.dirname(outfile)
        )
        sam_file = os.path.join(tmpdir, "tmp.sam")
    else:
        sam_file = outfile

    # "LB:LIB" is needed, otherwise samtools rmdup segfaults when map_reads_set() is used
    R_option = (
        ""
        if read_group is None
        else r"""-R '@RG\tLB:LIB\tID:"""
        + read_group[0]
        + r"""\tSM:"""
        + read_group[1]
        + "'"
    )

    cmd = " ".join(
        [
            "bwa mem -M",
            f"-t {threads}",
            R_option,
            ref_fasta,
            reads1,
            reads2,
            r""" | awk '/^@/ || !and($2,256)' """,  # remove secondary alignments (but keep header)
            ">",
            sam_file,
        ]
    )

    try:
        utils.syscall(cmd)
    except:
        if rmdup or markdup:
            shutil.rmtree(tmpdir)
        raise Error("Error running BWA MEM: " + cmd)

    number_in_sam = utils.sam_record_count(sam_file)
    if expected_read_count != number_in_sam:
        if rmdup or markdup:
            shutil.rmtree(tmpdir)
        raise Error(
            "Error! Mismatch in read counts. Expected "
            + str(expected_read_count)
            + " but got "
            + str(number_in_sam)
        )

    if rmdup or markdup:
        sorted_bam = os.path.join(tmpdir, "tmp.sorted.bam")

        cmd = " ".join(["samtools sort", "-o", sorted_bam, sam_file])

        try:
            utils.syscall(cmd)
        except:
            shutil.rmtree(tmpdir)
            raise Error("Error running samtools sort: " + cmd)

        if rmdup:
            cmd = "samtools rmdup " + sorted_bam + " " + outfile
            try:
                utils.syscall(cmd)
            except:
                shutil.rmtree(tmpdir)
                raise Error("Error running samtools rmdup: " + cmd)
        else:
            try:
                picard.mark_duplicates(sorted_bam, outfile)
            except:
                shutil.rmtree(tmpdir)
                raise Error("Error running picard mark_duplicates " + cmd)

        shutil.rmtree(tmpdir)
예제 #5
0
def map_reads(ref_fasta,
              reads1,
              reads2,
              outfile,
              rmdup=False,
              markdup=False,
              read_group=None):
    '''Maps reads with BWA MEM. By default, outputs SAM file in input read order.
    rmdup=True => remove duplicates using samtools rmdup. Final output is sorted bam
                  Incompatible with markdup=True
    markdup=True => mark duplicates using picard MarkDuplicate. Final output is sorted bam.
                  Incompatible with rmdup=True
    read_group should be a tuple (group_id, group_name). If given, these will be
    put into the BAM'''
    if rmdup and markdup:
        raise Error('Cannot have rmdup and markdup both True.' '')

    try:
        expected_read_count = 2 * fqtools.count([reads1, reads2])
    except:
        raise Error('Error counting reads in input files ' + reads1 + ' ' +
                    reads2)

    if rmdup or markdup:
        tmpdir = tempfile.mkdtemp(prefix=outfile + '.tmp.map_reads.',
                                  dir=os.path.dirname(outfile))
        sam_file = os.path.join(tmpdir, 'tmp.sam')
    else:
        sam_file = outfile

    # "LB:LIB" is needed, otherwise samtools rmdup segfaults when map_reads_set() is used
    R_option = '' if read_group is None else r'''-R '@RG\tLB:LIB\tID:''' + read_group[
        0] + r'''\tSM:''' + read_group[1] + "'"

    cmd = ' '.join([
        'bwa mem -M',
        R_option,
        ref_fasta,
        reads1,
        reads2,
        r''' | awk '/^@/ || !and($2,256)' ''',  # remove secondary alignments (but keep header)
        '>',
        sam_file
    ])

    try:
        utils.syscall(cmd)
    except:
        if rmdup or markdup:
            shutil.rmtree(tmpdir)
        raise Error('Error running BWA MEM: ' + cmd)

    number_in_sam = utils.sam_record_count(sam_file)
    if expected_read_count != number_in_sam:
        if rmdup or markdup:
            shutil.rmtree(tmpdir)
        raise Error('Error! Mismatch in read counts. Expected ' +
                    str(expected_read_count) + ' but got ' +
                    str(number_in_sam))

    if rmdup or markdup:
        sorted_bam = os.path.join(tmpdir, 'tmp.sorted.bam')

        cmd = ' '.join(['samtools sort', '-o', sorted_bam, sam_file])

        try:
            utils.syscall(cmd)
        except:
            shutil.rmtree(tmpdir)
            raise Error('Error running samtools sort: ' + cmd)

        if rmdup:
            cmd = 'samtools rmdup ' + sorted_bam + ' ' + outfile
            try:
                utils.syscall(cmd)
            except:
                shutil.rmtree(tmpdir)
                raise Error('Error running samtools rmdup: ' + cmd)
        else:
            try:
                picard.mark_duplicates(sorted_bam, outfile)
            except:
                shutil.rmtree(tmpdir)
                raise Error('Error running picard mark_duplicates ' + cmd)

        shutil.rmtree(tmpdir)
예제 #6
0
 def test_count(self):
     '''test count'''
     file1 = os.path.join(data_dir, 'count.1.fq')
     file2 = os.path.join(data_dir, 'count.2.fq')
     got = fqtools.count([file1, file2])
     self.assertEqual(3, got)