Esempio n. 1
0
    def test_tophat_paired(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_1_fpath = os.path.join(TEST_DATA_DIR, 'reads_1.fastq')
        reads_2_fpath = os.path.join(TEST_DATA_DIR, 'reads_2.fastq')
        try:
            directory = TemporaryDir()
            index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                      directory.name)
            map_with_tophat(index_fpath,
                            directory.name,
                            paired_fpaths=[reads_1_fpath, reads_2_fpath])
            os.path.exists(os.path.join(directory.name, 'accepted_hits.bam'))
            self.fail('runtimeError expected')
        except RuntimeError:
            pass
        finally:
            directory.close()

        try:
            directory = TemporaryDir()
            index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                      directory.name)
            map_with_tophat(index_fpath,
                            directory.name,
                            paired_fpaths=[reads_1_fpath, reads_2_fpath],
                            mate_inner_dist=350,
                            mate_std_dev=50)
            os.path.exists(os.path.join(directory.name, 'accepted_hits.bam'))
        finally:
            directory.close()
Esempio n. 2
0
    def test_tophat_paired(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_1_fpath = os.path.join(TEST_DATA_DIR, 'reads_1.fastq')
        reads_2_fpath = os.path.join(TEST_DATA_DIR, 'reads_2.fastq')
        try:
            directory = TemporaryDir()
            index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                      directory.name)
            map_with_tophat(index_fpath, directory.name,
                            paired_fpaths=[reads_1_fpath, reads_2_fpath])
            os.path.exists(os.path.join(directory.name, 'accepted_hits.bam'))
            self.fail('runtimeError expected')
        except RuntimeError:
            pass
        finally:
            directory.close()

        try:
            directory = TemporaryDir()
            index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                      directory.name)
            map_with_tophat(index_fpath, directory.name,
                            paired_fpaths=[reads_1_fpath, reads_2_fpath],
                            mate_inner_dist=350, mate_std_dev=50)
            os.path.exists(os.path.join(directory.name, 'accepted_hits.bam'))
        finally:
            directory.close()
Esempio n. 3
0
    def test_rev_compl_fragmented_reads(self):
        reference_seq = GENOME

        #with unpaired_reads
        query_f = '>seq1\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCCTG'
        query_f += 'AGGACACCCAGTCTCCCGGGAGTCTTTTCCAAGGTGTGCTCCTGATCGCCGTGTTA\n'

        query_r = '>seq2\nTAACACGGCGATCAGGAGCACACCTTGGAAAAGACTCCCGGGAGACTGGGTG'
        query_r += 'TCCTCAGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT\n'

        query = query_f + query_r
        in_fhand = NamedTemporaryFile()
        in_fhand.write(query)
        in_fhand.flush()
        ref_fhand = NamedTemporaryFile()
        ref_fhand.write(reference_seq)
        ref_fhand.flush()

        index_fpath = get_or_create_bowtie2_index(ref_fhand.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, extra_params=['-a', '-f'],
                                   unpaired_fpaths=[in_fhand.name])
        map_process_to_bam(bowtie2, bam_fhand.name)
        samfile = pysam.Samfile(bam_fhand.name)
        #for aligned_read in samfile:
        #    print aligned_read

        #with paired_reads.
        #f is reversed r is direct
        query1 = '>seq10 f\nGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT'
        query1 += '\n'
        query2 = '>seq10 r\nATGTAATACGGGCTAGCCGGGGATGCCGACGATTAAACACGCTGTCATA'
        query2 += 'GTAGCGTCTTCCTAGGGTTTTCCCCATGGAATCGGTTATCGTGATACGTTAAATTT\n'
        #f is direct, r is reversed
        query3 = '>seq11 f\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCC'
        query3 += '\n'
        query4 = '>seq11 r\nAAATTTAACGTATCACGATAACCGATTCCATGGGGAAAACCCTAGGAAG'
        query4 += 'ACGCTACTATGACAGCGTGTTTAATCGTCGGCATCCCCGGCTAGCCCGTATTACAT\n'

        query_f = query1 + query3
        query_r = query2 + query4

        f_fhand = NamedTemporaryFile()
        f_fhand.write(query_f)
        f_fhand.flush()
        r_fhand = NamedTemporaryFile()
        r_fhand.write(query_r)
        r_fhand.flush()
        paired_fpaths = [[f_fhand.name], [r_fhand.name]]
        ref_fhand = NamedTemporaryFile()
        ref_fhand.write(reference_seq)
        ref_fhand.flush()

        index_fpath = get_or_create_bowtie2_index(ref_fhand.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, extra_params=['-a', '-f'],
                                   paired_fpaths=paired_fpaths)
        map_process_to_bam(bowtie2, bam_fhand.name)
        samfile = pysam.Samfile(bam_fhand.name)
Esempio n. 4
0
    def test_get_or_create_index(self):
        db_name = 'arabidopsis_genes'
        seq_fpath = os.path.join(TEST_DATA_DIR, db_name)
        assert _bowtie2_index_exists(seq_fpath)

        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(seq_fpath, directory.name)
        expected_index = os.path.join(directory.name,
                                      os.path.basename(db_name))
        assert index_fpath == expected_index
        assert _bowtie2_index_exists(index_fpath)

        # already exists
        index_fpath = get_or_create_bowtie2_index(seq_fpath, directory.name)
        assert index_fpath == expected_index
        assert _bowtie2_index_exists(index_fpath)
        directory.close()
Esempio n. 5
0
    def test_get_or_create_index(self):
        db_name = 'arabidopsis_genes'
        seq_fpath = os.path.join(TEST_DATA_DIR, db_name)
        assert not _bowtie2_index_exists(seq_fpath)

        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(seq_fpath, directory.name)
        expected_index = os.path.join(directory.name,
                                      os.path.basename(db_name))
        assert index_fpath == expected_index
        assert _bowtie2_index_exists(index_fpath)

        # already exists
        index_fpath = get_or_create_bowtie2_index(seq_fpath, directory.name)
        assert index_fpath == expected_index
        assert _bowtie2_index_exists(index_fpath)
        directory.close()
Esempio n. 6
0
 def test_tophat(self):
     reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
     reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
     directory = TemporaryDir()
     index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                               directory.name)
     map_with_tophat(index_fpath, directory.name,
                     unpaired_fpath=reads_fpath)
     os.path.exists(os.path.join(directory.name, 'accepted_hits.bam'))
     directory.close()
Esempio n. 7
0
 def test_tophat(self):
     reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
     reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
     directory = TemporaryDir()
     index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                               directory.name)
     map_with_tophat(index_fpath,
                     directory.name,
                     unpaired_fpath=reads_fpath)
     os.path.exists(os.path.join(directory.name, 'accepted_hits.bam'))
     directory.close()
Esempio n. 8
0
    def test_map_with_bowtie2(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bowtie2(index_fpath, bam_fhand.name,
                         unpaired_fpaths=[reads_fpath])

        directory.close()
Esempio n. 9
0
    def test_map_with_bowtie2(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bowtie2(index_fpath,
                         bam_fhand.name,
                         unpaired_fpaths=[reads_fpath])

        directory.close()
Esempio n. 10
0
    def _setup_checks(self, filterpacket):
        index_fpath = self._index_fpath
        get_or_create_bowtie2_index(index_fpath)
        seqs = [s for seqs in filterpacket[SEQS_PASSED]for s in seqs]
        seq_class = seqs[0].kind
        extra_params = []
        # Which format do we need for the bowtie2 input read file fasta or
        # fastq?
        if seq_class == SEQRECORD:
            if 'phred_quality' in seqs[0].object.letter_annotations.viewkeys():
                file_format = 'fastq'
            else:
                extra_params.append('-f')
                file_format = 'fasta'
        elif seq_class == SEQITEM:
            file_format = get_file_format(seqs[0])
            if 'illumina' in file_format:
                extra_params.append('--phred64')
            elif 'fasta' in file_format:
                extra_params.append('-f')
            elif 'fastq' in file_format:
                pass
            else:
                msg = 'For FilterBowtie2Match and SeqItems fastq or fasta '
                msg += 'files are required'
                raise RuntimeError(msg)
        else:
            raise NotImplementedError()

        reads_fhand = NamedTemporaryFile(suffix=file_format)
        write_seqs(seqs, reads_fhand, file_format=file_format)
        reads_fhand.flush()

        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_process = map_with_bowtie2(index_fpath,
                                       unpaired_fpaths=[reads_fhand.name],
                                       extra_params=extra_params)
        map_process_to_bam(map_process, bam_fhand.name)

        self.mapped_reads = _get_mapped_reads(bam_fhand.name, self.min_mapq)
Esempio n. 11
0
    def _setup_checks(self, filterpacket):
        index_fpath = self._index_fpath
        get_or_create_bowtie2_index(index_fpath)
        seqs = [s for seqs in filterpacket[SEQS_PASSED] for s in seqs]
        seq_class = seqs[0].kind
        extra_params = []
        # Which format do we need for the bowtie2 input read file fasta or
        # fastq?
        if seq_class == SEQRECORD:
            if 'phred_quality' in seqs[0].object.letter_annotations.viewkeys():
                file_format = 'fastq'
            else:
                extra_params.append('-f')
                file_format = 'fasta'
        elif seq_class == SEQITEM:
            file_format = get_file_format(seqs[0])
            if 'illumina' in file_format:
                extra_params.append('--phred64')
            elif 'fasta' in file_format:
                extra_params.append('-f')
            elif 'fastq' in file_format:
                pass
            else:
                msg = 'For FilterBowtie2Match and SeqItems fastq or fasta '
                msg += 'files are required'
                raise RuntimeError(msg)
        else:
            raise NotImplementedError()

        reads_fhand = NamedTemporaryFile(suffix=file_format)
        write_seqs(seqs, reads_fhand, file_format=file_format)
        reads_fhand.flush()

        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bowtie2(index_fpath,
                         bam_fhand.name,
                         unpaired_fpaths=[reads_fhand.name],
                         extra_params=extra_params)

        self.mapped_reads = _get_mapped_reads(bam_fhand.name, self.min_mapq)
Esempio n. 12
0
    def test_map_with_bowtie2(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, unpaired_fpath=reads_fpath)
        map_process_to_bam(bowtie2, bam_fhand.name)
        directory.close()

        #With paired_fpahts option
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        forward_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        reverse_fpath = NamedTemporaryFile().name
        paired_fpaths = (forward_fpath, reverse_fpath)
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, paired_fpaths=paired_fpaths)
        map_process_to_bam(bowtie2, bam_fhand.name)
        directory.close()
Esempio n. 13
0
    def test_map_with_bowtie2(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, unpaired_fpath=reads_fpath)
        map_process_to_bam(bowtie2, bam_fhand.name)
        directory.close()

        # With paired_fpahts option
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        forward_fpath = os.path.join(TEST_DATA_DIR, 'arabreads_1.fastq')
        reverse_fpath = os.path.join(TEST_DATA_DIR, 'arabreads_2.fastq')
        paired_fpaths = (forward_fpath, reverse_fpath)
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, paired_fpaths=paired_fpaths)
        map_process_to_bam(bowtie2, bam_fhand.name)
        directory.close()
Esempio n. 14
0
    def test_filter_by_bowtie2_bin():
        filter_bin = os.path.join(BIN_DIR, 'filter_by_bowtie2')
        assert 'usage' in check_output([filter_bin, '-h'])
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(os.path.join(TEST_DATA_DIR,
                                                          'arabidopsis_genes'),
                                                  directory=directory.name)

        fastq_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        fasta_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fasta')
        for reads_fpath in [fastq_fpath, fasta_fpath]:
            out_fhand = NamedTemporaryFile(suffix='.seqs')
            filtered_fhand = NamedTemporaryFile(suffix='.seqs')
            cmd = [filter_bin, '-i', index_fpath, '-o', out_fhand.name,
                   '-e', filtered_fhand.name, reads_fpath]
            check_output(cmd)
            assert 'no_arabi' in open(out_fhand.name).read()
            assert 'read1' in open(filtered_fhand.name).read()
        directory.close()
Esempio n. 15
0
    def test_filter_by_bowtie2_bin():
        filter_bin = os.path.join(BIN_DIR, 'filter_by_bowtie2')
        assert 'usage' in check_output([filter_bin, '-h'])
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(os.path.join(
            TEST_DATA_DIR, 'arabidopsis_genes'),
                                                  directory=directory.name)

        fastq_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        fasta_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fasta')
        for reads_fpath in [fastq_fpath, fasta_fpath]:
            out_fhand = NamedTemporaryFile(suffix='.seqs')
            filtered_fhand = NamedTemporaryFile(suffix='.seqs')
            cmd = [
                filter_bin, '-i', index_fpath, '-o', out_fhand.name, '-e',
                filtered_fhand.name, reads_fpath
            ]
            check_output(cmd)
            assert 'no_arabi' in open(out_fhand.name).read()
            assert 'read1' in open(filtered_fhand.name).read()
        directory.close()
Esempio n. 16
0
    def test_filter_by_bowtie2():
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(os.path.join(TEST_DATA_DIR,
                                                          'arabidopsis_genes'),
                                                  directory=directory.name)
        fastq_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        fasta_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fasta')

        passed = ['no_arabi']
        for preffered_classes in [[SEQITEM], [SEQRECORD]]:
            for reads_fpath in [fastq_fpath, fasta_fpath]:
                seq_packets = read_seq_packets([open(reads_fpath)],
                                        prefered_seq_classes=preffered_classes)
                filter_packets = seq_to_filterpackets(seq_packets)
                filter_ = FilterBowtie2Match(index_fpath)
                filter_packet = list(filter_packets)[0]
                filter_packets = filter_(filter_packet)
                assert _seqs_to_names(filter_packets[SEQS_PASSED]) == passed
                assert _seqs_to_names(filter_packets[SEQS_FILTERED_OUT]) == [
                                                     'read1', 'read2', 'read3']
        directory.close()
Esempio n. 17
0
    def test_filter_by_bowtie2():
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(os.path.join(
            TEST_DATA_DIR, 'arabidopsis_genes'),
                                                  directory=directory.name)
        fastq_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        fasta_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fasta')

        passed = ['no_arabi']
        for preffered_classes in [[SEQITEM], [SEQRECORD]]:
            for reads_fpath in [fastq_fpath, fasta_fpath]:
                seq_packets = read_seq_packets(
                    [open(reads_fpath)],
                    prefered_seq_classes=preffered_classes)
                filter_packets = seq_to_filterpackets(seq_packets)
                filter_ = FilterBowtie2Match(index_fpath)
                filter_packet = list(filter_packets)[0]
                filter_packets = filter_(filter_packet)
                assert _seqs_to_names(filter_packets[SEQS_PASSED]) == passed
                assert _seqs_to_names(filter_packets[SEQS_FILTERED_OUT]) == [
                    'read1', 'read2', 'read3'
                ]
        directory.close()