class ReadAligner(object): """An abstraction layer for different short read aligners.""" def __init__(self, segemehl_bin, show_progress): self.segemehl = Segemehl(segemehl_bin, show_progress=show_progress) def build_index(self, ref_seq_paths, index_path): self.segemehl.build_index(ref_seq_paths, index_path) def run_alignment( self, read_path_or_pair, index_path, ref_seq_path, output_path, nomatch_path, threads, accuracy, evalue, split, paired_end=False, ): self.segemehl.align_reads( read_path_or_pair, index_path, ref_seq_path, output_path, nonmatch_file=nomatch_path, threads=threads, accuracy=accuracy, evalue=evalue, split=split, paired_end=paired_end, )
class ReadAligner(object): """An abstraction layer for different short read aligners.""" def __init__(self, segemehl_bin, show_progress): self.segemehl = Segemehl(segemehl_bin, show_progress=show_progress) def build_index(self, ref_seq_paths, index_path): self.segemehl.build_index(ref_seq_paths, index_path) def run_alignment(self, read_path_or_pair, index_path, ref_seq_path, output_path, nomatch_path, threads, accuracy, evalue, split, paired_end=False): self.segemehl.align_reads(read_path_or_pair, index_path, ref_seq_path, output_path, nonmatch_file=nomatch_path, threads=threads, accuracy=accuracy, evalue=evalue, split=split, paired_end=paired_end)
def _align_paired_end_reads(self): """Manage the actual alignemnt of paired end reads.""" read_aligner = Segemehl(self._args.segemehl_bin, self._args.progress) if self._helpers.file_needs_to_be_created(self._paths.index_path): read_aligner.build_index(self._paths.ref_seq_paths, self._paths.index_path) for read_path_pair, output_path, nomatch_path, bam_path in zip( self._paths.processed_read_path_pairs, self._paths.primary_read_aligner_sam_paths, self._paths.unaligned_reads_paths, self._paths.primary_read_aligner_bam_paths): if not self._helpers.file_needs_to_be_created(output_path): continue elif not self._helpers.file_needs_to_be_created(bam_path): continue read_aligner.run_alignment(read_path_pair, self._paths.index_path, self._paths.ref_seq_paths, output_path, int(self._args.processes), nomatch_path, int(self._args.hit_strategy), int(self._args.segemehl_accuracy), float(self._args.segemehl_evalue), self._args.split, paired_end=True)
def _align_single_end_reads(self): """Manage the actual alignment of single end reads.""" read_aligner = Segemehl( self._args.segemehl_bin, self._args.progress) if self._file_needs_to_be_created(self._paths.index_path): read_aligner.build_index( self._paths.ref_seq_paths, self._paths.index_path) for read_path, output_path, nomatch_path, bam_path in zip( self._paths.processed_read_paths, self._paths.primary_read_aligner_sam_paths, self._paths.unaligned_reads_paths, self._paths.read_alignment_bam_paths): if not self._file_needs_to_be_created(output_path): continue elif not self._file_needs_to_be_created(bam_path): continue read_aligner.run_alignment( read_path, self._paths.index_path, self._paths.ref_seq_paths, output_path, int(self._args.processes), nomatch_path, int(self._args.hit_strategy), int(self._args.segemehl_accuracy), float(self._args.segemehl_evalue), self._args.split, paired_end=False)
def data_segemehl(): fasta_file_path = "/tmp/test.fa" index_file_path = "/tmp/test.idx" read_fasta_file_path = "/tmp/test_reads.fa" aligning_result_path = "/tmp/test_aligning_results.sam" unmapped_reads_path = "/tmp/test_unmapped_reads.fa" segemehl = Segemehl(segemehl_bin="segemehl.x") maxDiff = None genome_fasta_lower = """>SL1344 genome sequence agagattacgtctggttgcaagagatcatgacagggggaattggttgaaaataaatatat cgccagcagcacatgaacaagtttcggaatgtgatcaatttaaaaatttattgacttagg cgggcagatactttaaccaatataggaatacaagacagacaaataaaaatgacagagtac acaacatccatgaaccgcatcagcaccaccaccattaccaccatcaccattaccacaggt aacggtgcgggctgacgcgtacaggaaacacagaaaaaagcccgcacctgaacagtgcgg gcttttttttcgaccagagatcacgaggtaacaaccatgcgagtgttgaagttcggcggt acatcagtggcaaatgcagaacgttttctgcgtgttgccgatattctggaaagcaatgcc aggcaagggcaggtagcgaccgtactttccgcccccgcgaaaattaccaaccatctggtg gcaatgattgaaaaaactatcggcggccaggatgctttgccgaatatcagcgatgcagaa cgtattttttctgacctgctcgcaggacttgccagcgcgcagccgggattcccgcttgca cggttgaaaatggttgtcgaacaagaattcgctcagatcaaacatgttctgcatggtatc agcctgctgggtcagtgcccggatagcatcaacgccgcgctgatttgccgtggcgaaaaa atgtcgatcgcgattatggcgggacttctggaggcgcgtgggcatcgcgtcacggtgatc gatccggtagaaaaattgctggcggtgggccattaccttgaatctaccgtcgatatcgcg gaatcgactcgccgtatcgccgccagccagatcccggccgatcacatgatcctgatggcg ggctttaccgccggtaatgaaaagggtgaactggtggtgctgggccgtaatggttccgac """ genome_fasta_upper = """>SL1344 genome sequence AGAGATTACGTCTGGTTGCAAGAGATCATGACAGGGGGAATTGGTTGAAAATAAATATAT CGCCAGCAGCACATGAACAAGTTTCGGAATGTGATCAATTTAAAAATTTATTGACTTAGG CGGGCAGATACTTTAACCAATATAGGAATACAAGACAGACAAATAAAAATGACAGAGTAC ACAACATCCATGAACCGCATCAGCACCACCACCATTACCACCATCACCATTACCACAGGT AACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGAACAGTGCGG GCTTTTTTTTCGACCAGAGATCACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGT ACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCC AGGCAAGGGCAGGTAGCGACCGTACTTTCCGCCCCCGCGAAAATTACCAACCATCTGGTG GCAATGATTGAAAAAACTATCGGCGGCCAGGATGCTTTGCCGAATATCAGCGATGCAGAA CGTATTTTTTCTGACCTGCTCGCAGGACTTGCCAGCGCGCAGCCGGGATTCCCGCTTGCA CGGTTGAAAATGGTTGTCGAACAAGAATTCGCTCAGATCAAACATGTTCTGCATGGTATC AGCCTGCTGGGTCAGTGCCCGGATAGCATCAACGCCGCGCTGATTTGCCGTGGCGAAAAA ATGTCGATCGCGATTATGGCGGGACTTCTGGAGGCGCGTGGGCATCGCGTCACGGTGATC GATCCGGTAGAAAAATTGCTGGCGGTGGGCCATTACCTTGAATCTACCGTCGATATCGCG GAATCGACTCGCCGTATCGCCGCCAGCCAGATCCCGGCCGATCACATGATCCTGATGGCG GGCTTTACCGCCGGTAATGAAAAGGGTGAACTGGTGGTGCTGGGCCGTAATGGTTCCGAC """ sam_result_aligned_1 = """ read_01 0 SL1344 181 255 60M * 0 0 ACAACATCCATGAACCGCATCAGCACCACCACCATTACCACCATCACCATTACCACAGGT * NM:i:0 MD:Z:60 NH:i:1 XI:i:0 XA:Z:Q """ sam_result_aligned_2 = """ read_03 0 SL1344 301 255 20M * 0 0 GCTTTTTTTTCGACCAGACA * NM:i:1 MD:Z:18G1 NH:i:1 XI:i:0 XA:Z:Q """ sam_result_aligned_3 = """ read_05 0 SL1344 301 255 20M * 0 0 GCTTTTTTTTCGACCAGTCA * NM:i:2 MD:Z:17A0G1 NH:i:1 XI:i:0 XA:Z:Q """ sam_result_no_aligned = """ """ global fasta_file_path global index_file_path global read_fasta_file_path global aligning_result_path global unmapped_reads_path global segemehl global maxDiff global genome_fasta_lower global genome_fasta_upper global sam_result_aligned_1 global sam_result_aligned_2 global sam_result_aligned_3 global sam_result_no_aligned
def setUp(self): self.segemehl = Segemehl(segemehl_bin="segemehl.x") self.example_data = ExampleData() self.maxDiff = None
def __init__(self, segemehl_bin, show_progress): self.segemehl = Segemehl(segemehl_bin, show_progress=show_progress)