Ejemplo n.º 1
0
    def test_rev_compl_fragmented_reads(self):
        reference_seq = GENOME

        #with unpaired_reads
        query_f = '>seq1\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCCTG'
        query_f += 'AGGACACCCAGTCTCCCGGGAGTCTTTTCCAAGGTGTGCTCCTGATCGCCGTGTTA\n'

        query_r = '>seq2\nTAACACGGCGATCAGGAGCACACCTTGGAAAAGACTCCCGGGAGACTGGGTG'
        query_r += 'TCCTCAGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT\n'

        query = query_f + query_r
        in_fhand = NamedTemporaryFile()
        in_fhand.write(query)
        in_fhand.flush()
        ref_fhand = NamedTemporaryFile()
        ref_fhand.write(reference_seq)
        ref_fhand.flush()

        index_fpath = get_or_create_bowtie2_index(ref_fhand.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, extra_params=['-a', '-f'],
                                   unpaired_fpaths=[in_fhand.name])
        map_process_to_bam(bowtie2, bam_fhand.name)
        samfile = pysam.Samfile(bam_fhand.name)
        #for aligned_read in samfile:
        #    print aligned_read

        #with paired_reads.
        #f is reversed r is direct
        query1 = '>seq10 f\nGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT'
        query1 += '\n'
        query2 = '>seq10 r\nATGTAATACGGGCTAGCCGGGGATGCCGACGATTAAACACGCTGTCATA'
        query2 += 'GTAGCGTCTTCCTAGGGTTTTCCCCATGGAATCGGTTATCGTGATACGTTAAATTT\n'
        #f is direct, r is reversed
        query3 = '>seq11 f\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCC'
        query3 += '\n'
        query4 = '>seq11 r\nAAATTTAACGTATCACGATAACCGATTCCATGGGGAAAACCCTAGGAAG'
        query4 += 'ACGCTACTATGACAGCGTGTTTAATCGTCGGCATCCCCGGCTAGCCCGTATTACAT\n'

        query_f = query1 + query3
        query_r = query2 + query4

        f_fhand = NamedTemporaryFile()
        f_fhand.write(query_f)
        f_fhand.flush()
        r_fhand = NamedTemporaryFile()
        r_fhand.write(query_r)
        r_fhand.flush()
        paired_fpaths = [[f_fhand.name], [r_fhand.name]]
        ref_fhand = NamedTemporaryFile()
        ref_fhand.write(reference_seq)
        ref_fhand.flush()

        index_fpath = get_or_create_bowtie2_index(ref_fhand.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, extra_params=['-a', '-f'],
                                   paired_fpaths=paired_fpaths)
        map_process_to_bam(bowtie2, bam_fhand.name)
        samfile = pysam.Samfile(bam_fhand.name)
Ejemplo n.º 2
0
    def test_map_with_bowtie2(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bowtie2(index_fpath, bam_fhand.name,
                         unpaired_fpaths=[reads_fpath])

        directory.close()
Ejemplo n.º 3
0
    def test_map_with_bowtie2(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bowtie2(index_fpath,
                         bam_fhand.name,
                         unpaired_fpaths=[reads_fpath])

        directory.close()
Ejemplo n.º 4
0
    def test_rev_compl_fragmented_reads(self):
        index_fpath = os.path.join(TEST_DATA_DIR, 'ref_example.fasta')

        # with unpaired_reads
        query_f = '>seq1\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCCTG'
        query_f += 'AGGACACCCAGTCTCCCGGGAGTCTTTTCCAAGGTGTGCTCCTGATCGCCGTGTTA\n'

        query_r = '>seq2\nTAACACGGCGATCAGGAGCACACCTTGGAAAAGACTCCCGGGAGACTGGGTG'
        query_r += 'TCCTCAGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT\n'

        query = query_f + query_r
        in_fhand = NamedTemporaryFile()
        in_fhand.write(query)
        in_fhand.flush()

        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath,
                                   extra_params=['-a', '-f'],
                                   unpaired_fpath=in_fhand.name)
        map_process_to_bam(bowtie2, bam_fhand.name)
        samfile = pysam.Samfile(bam_fhand.name)
        # for aligned_read in samfile:
        #    print aligned_read

        # with paired_reads.
        # f is reversed r is direct
        query1 = '>seq10 f\nGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT'
        query1 += '\n'
        query2 = '>seq10 r\nATGTAATACGGGCTAGCCGGGGATGCCGACGATTAAACACGCTGTCATA'
        query2 += 'GTAGCGTCTTCCTAGGGTTTTCCCCATGGAATCGGTTATCGTGATACGTTAAATTT\n'
        # f is direct, r is reversed
        query3 = '>seq11 f\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCC'
        query3 += '\n'
        query4 = '>seq11 r\nAAATTTAACGTATCACGATAACCGATTCCATGGGGAAAACCCTAGGAAG'
        query4 += 'ACGCTACTATGACAGCGTGTTTAATCGTCGGCATCCCCGGCTAGCCCGTATTACAT\n'

        query_f = query1 + query3
        query_r = query2 + query4

        f_fhand = NamedTemporaryFile()
        f_fhand.write(query_f)
        f_fhand.flush()
        r_fhand = NamedTemporaryFile()
        r_fhand.write(query_r)
        r_fhand.flush()
        paired_fpaths = (f_fhand.name, r_fhand.name)

        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath,
                                   extra_params=['-a', '-f'],
                                   paired_fpaths=paired_fpaths)
        map_process_to_bam(bowtie2, bam_fhand.name)
        samfile = pysam.Samfile(bam_fhand.name)
Ejemplo n.º 5
0
    def _setup_checks(self, filterpacket):
        index_fpath = self._index_fpath
        get_or_create_bowtie2_index(index_fpath)
        seqs = [s for seqs in filterpacket[SEQS_PASSED] for s in seqs]
        seq_class = seqs[0].kind
        extra_params = []
        # Which format do we need for the bowtie2 input read file fasta or
        # fastq?
        if seq_class == SEQRECORD:
            if 'phred_quality' in seqs[0].object.letter_annotations.viewkeys():
                file_format = 'fastq'
            else:
                extra_params.append('-f')
                file_format = 'fasta'
        elif seq_class == SEQITEM:
            file_format = get_file_format(seqs[0])
            if 'illumina' in file_format:
                extra_params.append('--phred64')
            elif 'fasta' in file_format:
                extra_params.append('-f')
            elif 'fastq' in file_format:
                pass
            else:
                msg = 'For FilterBowtie2Match and SeqItems fastq or fasta '
                msg += 'files are required'
                raise RuntimeError(msg)
        else:
            raise NotImplementedError()

        reads_fhand = NamedTemporaryFile(suffix=file_format)
        write_seqs(seqs, reads_fhand, file_format=file_format)
        reads_fhand.flush()

        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bowtie2(index_fpath,
                         bam_fhand.name,
                         unpaired_fpaths=[reads_fhand.name],
                         extra_params=extra_params)

        self.mapped_reads = _get_mapped_reads(bam_fhand.name, self.min_mapq)
Ejemplo n.º 6
0
    def _setup_checks(self, filterpacket):
        index_fpath = self._index_fpath
        get_or_create_bowtie2_index(index_fpath)
        seqs = [s for seqs in filterpacket[SEQS_PASSED]for s in seqs]
        seq_class = seqs[0].kind
        extra_params = []
        # Which format do we need for the bowtie2 input read file fasta or
        # fastq?
        if seq_class == SEQRECORD:
            if 'phred_quality' in seqs[0].object.letter_annotations.viewkeys():
                file_format = 'fastq'
            else:
                extra_params.append('-f')
                file_format = 'fasta'
        elif seq_class == SEQITEM:
            file_format = get_file_format(seqs[0])
            if 'illumina' in file_format:
                extra_params.append('--phred64')
            elif 'fasta' in file_format:
                extra_params.append('-f')
            elif 'fastq' in file_format:
                pass
            else:
                msg = 'For FilterBowtie2Match and SeqItems fastq or fasta '
                msg += 'files are required'
                raise RuntimeError(msg)
        else:
            raise NotImplementedError()

        reads_fhand = NamedTemporaryFile(suffix=file_format)
        write_seqs(seqs, reads_fhand, file_format=file_format)
        reads_fhand.flush()

        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bowtie2(index_fpath, bam_fhand.name,
                         unpaired_fpaths=[reads_fhand.name],
                         extra_params=extra_params)

        self.mapped_reads = _get_mapped_reads(bam_fhand.name, self.min_mapq)
Ejemplo n.º 7
0
    def test_map_with_bowtie2(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, unpaired_fpath=reads_fpath)
        map_process_to_bam(bowtie2, bam_fhand.name)
        directory.close()

        #With paired_fpahts option
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        forward_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        reverse_fpath = NamedTemporaryFile().name
        paired_fpaths = (forward_fpath, reverse_fpath)
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, paired_fpaths=paired_fpaths)
        map_process_to_bam(bowtie2, bam_fhand.name)
        directory.close()
Ejemplo n.º 8
0
    def test_map_with_bowtie2(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, unpaired_fpath=reads_fpath)
        map_process_to_bam(bowtie2, bam_fhand.name)
        directory.close()

        # With paired_fpahts option
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        forward_fpath = os.path.join(TEST_DATA_DIR, 'arabreads_1.fastq')
        reverse_fpath = os.path.join(TEST_DATA_DIR, 'arabreads_2.fastq')
        paired_fpaths = (forward_fpath, reverse_fpath)
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, paired_fpaths=paired_fpaths)
        map_process_to_bam(bowtie2, bam_fhand.name)
        directory.close()