class ReadAligner(object):
    """An abstraction layer for different short read aligners."""

    def __init__(self, segemehl_bin, show_progress):
        self.segemehl = Segemehl(segemehl_bin, show_progress=show_progress)

    def build_index(self, ref_seq_paths, index_path):
        self.segemehl.build_index(ref_seq_paths, index_path)

    def run_alignment(
        self,
        read_path_or_pair,
        index_path,
        ref_seq_path,
        output_path,
        nomatch_path,
        threads,
        accuracy,
        evalue,
        split,
        paired_end=False,
    ):
        self.segemehl.align_reads(
            read_path_or_pair,
            index_path,
            ref_seq_path,
            output_path,
            nonmatch_file=nomatch_path,
            threads=threads,
            accuracy=accuracy,
            evalue=evalue,
            split=split,
            paired_end=paired_end,
        )
class ReadAligner(object):
    """An abstraction layer for different short read aligners."""

    def __init__(self, segemehl_bin, show_progress):
        self.segemehl = Segemehl(segemehl_bin, show_progress=show_progress)
    
    def build_index(self, ref_seq_paths, index_path):
        self.segemehl.build_index(ref_seq_paths, index_path)

    def run_alignment(self, read_path_or_pair, index_path, ref_seq_path, output_path, 
                      nomatch_path, threads, accuracy, evalue, split, paired_end=False):
        self.segemehl.align_reads(read_path_or_pair, index_path, ref_seq_path,
                                  output_path, nonmatch_file=nomatch_path,
                                  threads=threads, accuracy=accuracy, 
                                  evalue=evalue, split=split, paired_end=paired_end)
 def _align_paired_end_reads(self):
     """Manage the actual alignemnt of paired end reads."""
     read_aligner = Segemehl(self._args.segemehl_bin, self._args.progress)
     if self._helpers.file_needs_to_be_created(self._paths.index_path):
         read_aligner.build_index(self._paths.ref_seq_paths,
                                  self._paths.index_path)
     for read_path_pair, output_path, nomatch_path, bam_path in zip(
             self._paths.processed_read_path_pairs,
             self._paths.primary_read_aligner_sam_paths,
             self._paths.unaligned_reads_paths,
             self._paths.primary_read_aligner_bam_paths):
         if not self._helpers.file_needs_to_be_created(output_path):
             continue
         elif not self._helpers.file_needs_to_be_created(bam_path):
             continue
         read_aligner.run_alignment(read_path_pair,
                                    self._paths.index_path,
                                    self._paths.ref_seq_paths,
                                    output_path,
                                    int(self._args.processes),
                                    nomatch_path,
                                    int(self._args.hit_strategy),
                                    int(self._args.segemehl_accuracy),
                                    float(self._args.segemehl_evalue),
                                    self._args.split,
                                    paired_end=True)
Exemple #4
0
 def _align_single_end_reads(self):
     """Manage the actual alignment of single end reads."""
     read_aligner = Segemehl(
         self._args.segemehl_bin, self._args.progress)
     if self._file_needs_to_be_created(self._paths.index_path):
         read_aligner.build_index(
             self._paths.ref_seq_paths, self._paths.index_path)
     for read_path, output_path, nomatch_path, bam_path in zip(
         self._paths.processed_read_paths,
         self._paths.primary_read_aligner_sam_paths,
         self._paths.unaligned_reads_paths,
             self._paths.read_alignment_bam_paths):
         if not self._file_needs_to_be_created(output_path):
             continue
         elif not self._file_needs_to_be_created(bam_path):
             continue
         read_aligner.run_alignment(
             read_path, self._paths.index_path, self._paths.ref_seq_paths,
             output_path, int(self._args.processes), nomatch_path,
             int(self._args.hit_strategy),
             int(self._args.segemehl_accuracy),
             float(self._args.segemehl_evalue), self._args.split,
             paired_end=False)
Exemple #5
0
def data_segemehl():
    fasta_file_path = "/tmp/test.fa"
    index_file_path = "/tmp/test.idx"
    read_fasta_file_path = "/tmp/test_reads.fa"
    aligning_result_path = "/tmp/test_aligning_results.sam"
    unmapped_reads_path = "/tmp/test_unmapped_reads.fa"
    segemehl = Segemehl(segemehl_bin="segemehl.x")
    maxDiff = None
    genome_fasta_lower = """>SL1344 genome sequence
agagattacgtctggttgcaagagatcatgacagggggaattggttgaaaataaatatat
cgccagcagcacatgaacaagtttcggaatgtgatcaatttaaaaatttattgacttagg
cgggcagatactttaaccaatataggaatacaagacagacaaataaaaatgacagagtac
acaacatccatgaaccgcatcagcaccaccaccattaccaccatcaccattaccacaggt
aacggtgcgggctgacgcgtacaggaaacacagaaaaaagcccgcacctgaacagtgcgg
gcttttttttcgaccagagatcacgaggtaacaaccatgcgagtgttgaagttcggcggt
acatcagtggcaaatgcagaacgttttctgcgtgttgccgatattctggaaagcaatgcc
aggcaagggcaggtagcgaccgtactttccgcccccgcgaaaattaccaaccatctggtg
gcaatgattgaaaaaactatcggcggccaggatgctttgccgaatatcagcgatgcagaa
cgtattttttctgacctgctcgcaggacttgccagcgcgcagccgggattcccgcttgca
cggttgaaaatggttgtcgaacaagaattcgctcagatcaaacatgttctgcatggtatc
agcctgctgggtcagtgcccggatagcatcaacgccgcgctgatttgccgtggcgaaaaa
atgtcgatcgcgattatggcgggacttctggaggcgcgtgggcatcgcgtcacggtgatc
gatccggtagaaaaattgctggcggtgggccattaccttgaatctaccgtcgatatcgcg
gaatcgactcgccgtatcgccgccagccagatcccggccgatcacatgatcctgatggcg
ggctttaccgccggtaatgaaaagggtgaactggtggtgctgggccgtaatggttccgac
"""
    genome_fasta_upper = """>SL1344 genome sequence
AGAGATTACGTCTGGTTGCAAGAGATCATGACAGGGGGAATTGGTTGAAAATAAATATAT
CGCCAGCAGCACATGAACAAGTTTCGGAATGTGATCAATTTAAAAATTTATTGACTTAGG
CGGGCAGATACTTTAACCAATATAGGAATACAAGACAGACAAATAAAAATGACAGAGTAC
ACAACATCCATGAACCGCATCAGCACCACCACCATTACCACCATCACCATTACCACAGGT
AACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGAACAGTGCGG
GCTTTTTTTTCGACCAGAGATCACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGT
ACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCC
AGGCAAGGGCAGGTAGCGACCGTACTTTCCGCCCCCGCGAAAATTACCAACCATCTGGTG
GCAATGATTGAAAAAACTATCGGCGGCCAGGATGCTTTGCCGAATATCAGCGATGCAGAA
CGTATTTTTTCTGACCTGCTCGCAGGACTTGCCAGCGCGCAGCCGGGATTCCCGCTTGCA
CGGTTGAAAATGGTTGTCGAACAAGAATTCGCTCAGATCAAACATGTTCTGCATGGTATC
AGCCTGCTGGGTCAGTGCCCGGATAGCATCAACGCCGCGCTGATTTGCCGTGGCGAAAAA
ATGTCGATCGCGATTATGGCGGGACTTCTGGAGGCGCGTGGGCATCGCGTCACGGTGATC
GATCCGGTAGAAAAATTGCTGGCGGTGGGCCATTACCTTGAATCTACCGTCGATATCGCG
GAATCGACTCGCCGTATCGCCGCCAGCCAGATCCCGGCCGATCACATGATCCTGATGGCG
GGCTTTACCGCCGGTAATGAAAAGGGTGAACTGGTGGTGCTGGGCCGTAATGGTTCCGAC
"""
    sam_result_aligned_1 = """
read_01	0	SL1344	181	255	60M	*	0	0	ACAACATCCATGAACCGCATCAGCACCACCACCATTACCACCATCACCATTACCACAGGT	*	NM:i:0	MD:Z:60	NH:i:1	XI:i:0	XA:Z:Q
"""
    sam_result_aligned_2 = """
read_03	0	SL1344	301	255	20M	*	0	0	GCTTTTTTTTCGACCAGACA	*	NM:i:1	MD:Z:18G1	NH:i:1	XI:i:0	XA:Z:Q
"""

    sam_result_aligned_3 = """
read_05	0	SL1344	301	255	20M	*	0	0	GCTTTTTTTTCGACCAGTCA	*	NM:i:2	MD:Z:17A0G1	NH:i:1	XI:i:0	XA:Z:Q
"""
    sam_result_no_aligned = """
"""
    global fasta_file_path
    global index_file_path
    global read_fasta_file_path
    global aligning_result_path
    global unmapped_reads_path
    global segemehl
    global maxDiff
    global genome_fasta_lower
    global genome_fasta_upper
    global sam_result_aligned_1
    global sam_result_aligned_2
    global sam_result_aligned_3
    global sam_result_no_aligned
 def setUp(self):
     self.segemehl = Segemehl(segemehl_bin="segemehl.x")
     self.example_data = ExampleData()
     self.maxDiff = None
Exemple #7
0
 def __init__(self, segemehl_bin, show_progress):
     self.segemehl = Segemehl(segemehl_bin, show_progress=show_progress)