Example #1
0
    def test_add_rg_to_bam(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(reference_fpath, directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        lib_name = 'aa'
        log_fhand = NamedTemporaryFile()
        readgroup = {
            'ID': lib_name,
            'PL': 'illumina',
            'LB': lib_name,
            'SM': '{0}_illumina_pe'.format(lib_name),
            'PU': '0'
        }
        bwa = map_with_bwamem(index_fpath,
                              unpaired_fpath=reads_fpath,
                              readgroup=readgroup,
                              log_fpath=log_fhand.name)
        map_process_to_bam(bwa, bam_fhand.name)
        out = subprocess.check_output(
            [get_binary_path('samtools'), 'view', '-h', bam_fhand.name],
            stderr=log_fhand)
        assert '@RG\tID:aa' in out
        assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out

        directory.close()
Example #2
0
    def test_get_or_create_index(self):
        db_name = 'arabidopsis_genes'
        seq_fpath = os.path.join(TEST_DATA_DIR, db_name)
        assert not _bwa_index_exists(seq_fpath)

        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(seq_fpath, directory.name)
        expected_index = os.path.join(directory.name,
                                      os.path.basename(db_name))
        assert index_fpath == expected_index
        assert _bwa_index_exists(index_fpath)

        # already exists
        index_fpath = get_or_create_bwa_index(seq_fpath, directory.name)
        assert index_fpath == expected_index
        assert _bwa_index_exists(index_fpath)
        directory.close()
Example #3
0
    def test_get_or_create_index(self):
        db_name = 'arabidopsis_genes'
        seq_fpath = os.path.join(TEST_DATA_DIR, db_name)
        assert not _bwa_index_exists(seq_fpath)

        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(seq_fpath, directory.name)
        expected_index = os.path.join(directory.name,
                                      os.path.basename(db_name))
        assert index_fpath == expected_index
        assert _bwa_index_exists(index_fpath)

        # already exists
        index_fpath = get_or_create_bwa_index(seq_fpath, directory.name)
        assert index_fpath == expected_index
        assert _bwa_index_exists(index_fpath)
        directory.close()
Example #4
0
    def test_map_with_bwa(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(reference_fpath, directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bwasw(index_fpath, bam_fhand.name, unpaired_fpath=reads_fpath)
        out = subprocess.check_output([get_binary_path('samtools'), 'view',
                                       bam_fhand.name])
        assert  'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out

        directory.close()
Example #5
0
    def test_map_with_bwa(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(reference_fpath, directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bwasw(index_fpath, bam_fhand.name, unpaired_fpath=reads_fpath)
        out = subprocess.check_output(
            [get_binary_path('samtools'), 'view', bam_fhand.name])
        assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out

        directory.close()
Example #6
0
    def test_add_rg_to_bam(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(reference_fpath, directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        lib_name = 'aa'
        log_fhand = NamedTemporaryFile()
        readgroup = {'ID': lib_name, 'PL': 'illumina', 'LB': lib_name,
                     'SM': '{0}_illumina_pe'.format(lib_name), 'PU': '0'}
        bwa = map_with_bwamem(index_fpath, unpaired_fpath=reads_fpath,
                              readgroup=readgroup, log_fpath=log_fhand.name)
        map_process_to_bam(bwa, bam_fhand.name)
        out = subprocess.check_output([get_binary_path('samtools'), 'view',
                                       '-h', bam_fhand.name], stderr=log_fhand)
        assert '@RG\tID:aa' in out
        assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out

        directory.close()
Example #7
0
def _sorted_mapped_reads(ref_fpath, paired_fpaths=None,
                     unpaired_fpaths=None, directory=None,
                     file_format=None, min_seed_len=None):
    fhand = open(paired_fpaths[0]) if paired_fpaths else open(unpaired_fpaths[0])
    if file_format is not None:
        set_format(fhand, file_format)
    else:
        file_format = get_format(fhand)
    index_fpath = get_or_create_bwa_index(ref_fpath, directory)
    extra_params = ['-a', '-M']
    if min_seed_len is not None:
        extra_params.extend(['-k', min_seed_len])
    bwa = map_with_bwamem(index_fpath, paired_fpaths=paired_fpaths,
                         unpaired_fpath=unpaired_fpaths,
                         extra_params=extra_params)
    bam_fhand = NamedTemporaryFile(dir='/home/carlos/tmp')
    sort_mapped_reads(bwa, bam_fhand.name, key='queryname')
    bamfile = pysam.Samfile(bam_fhand.name)
    return bamfile
Example #8
0
    def test_rev_compl_fragmented_reads(self):
        reference_seq = GENOME

        #with paired_reads.
        #f is reversed r is direct
        query1 = '>seq10 f\nGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT'
        query1 += '\n'
        query2 = '>seq10 r\nATGTAATACGGGCTAGCCGGGGATGCCGACGATTAAACACGCTGTCATA'
        query2 += 'GTAGCGTCTTCCTAGGGTTTTCCCCATGGAATCGGTTATCGTGATACGTTAAATTT\n'
        #f is direct, r is reversed
        query3 = '>seq11 f\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCC'
        query3 += '\n'
        query4 = '>seq11 r\nAAATTTAACGTATCACGATAACCGATTCCATGGGGAAAACCCTAGGAAG'
        query4 += 'ACGCTACTATGACAGCGTGTTTAATCGTCGGCATCCCCGGCTAGCCCGTATTACAT\n'

        #f is fragmented in two reference sequences. r mapps completely
        query7 = '>seq4 f\nCAAATCATCACCAGACCATGTCCGATCCCGGGAGTCTTTTCCAAGGTGTGC'
        query7 += 'TCTTTATCCGGCCCTTGCTCAAGGGTATGTTAAAACGGCAAGAGCTGCCTGAGCGCG\n'
        query8 = '>seq4 r\nTGTTCTGCAATCGATACAACGATCGAATTTAATCTGAGTAACTGCCAATTC'
        query8 += 'TGAGTAATATTATAGAAAGT\n'

        query_f = query1 + query3 + query7
        query_r = query2 + query4 + query8

        f_fhand = NamedTemporaryFile()
        f_fhand.write(query_f)
        f_fhand.flush()
        r_fhand = NamedTemporaryFile()
        r_fhand.write(query_r)
        r_fhand.flush()
        paired_fpaths = [f_fhand.name, r_fhand.name]
        ref_fhand = NamedTemporaryFile()
        ref_fhand.write(reference_seq)
        ref_fhand.flush()

        index_fpath = get_or_create_bwa_index(ref_fhand.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bwa = map_with_bwamem(index_fpath, paired_fpaths=paired_fpaths)
        map_process_to_bam(bwa, bam_fhand.name)
        samfile = pysam.Samfile(bam_fhand.name)