def test_different_ungapped_legnths(self): """Test that we can build an MSA with seqs of diff ungapped length.""" s1 = Sequence("s1", "------TCGCGTAGC", 100, 129) s2 = Sequence("s2", "---------CGCAGC", 1000, 1006) s3 = Sequence("s3", "ATCGCGT--------", 120, 127) msa = MultipleSequenceAlignment([s1, s2, s3]) self.assertEqual(msa.get_column(7), {"s1": "T", "s2": "-", "s3": "T"})
def test_empty_seq_with_diff_length(self): """Test using set of sequences where an empty seq doesn't match in size.""" s1 = Sequence("s1", "-TCGCGTAGC---CGC-TAGCTGATGCGAT-CTGA", 100, 129) s2 = Sequence("s2", "ATCGCGTAGCTAGCGCG-AGCTG---CGATGCT--", 1000, 1029) s3 = UnknownSequence("s3", 25049, 25049 + 1601, "+", 50103) msa = MultipleSequenceAlignment([s1, s2, s3]) self.assertEqual(msa.get_column(1), {"s1": "-", "s2": "A"})
def __init__(self, seq, name=None, qual=None, use_mut_str=False): """ Constructor for FastqSequence class; see class level documentation for descriptions of parameters. """ if len(seq) != len(qual): msg = ( "failed to create FastqSequence object -- length of sequence " + "data (" + str(len(seq)) + ") does not match length of quality " + "string (" + str(len(qual)) + "). seq data: " + seq + " qual data: " + qual) raise NGSReadError(msg) Sequence.__init__(self, name, seq, use_mut_str) self.seq_qual = qual # for quality scores # ILLUMINA 1.3+ Phred+64 self.LOWSET_SCORE = 64 self.HIGHEST_SCORE = 104 # Illumina 1.8+ Phred+33 self.LOWSET_SCORE_ILL_18_PHRD_33 = 33 self.HIGHEST_SCORE_ILL_18_PHRD_33 = 74
def __init__(self, seq, name=None, qual=None, use_mut_str=False): """ Constructor for FastqSequence class; see class level documentation for descriptions of parameters. """ if len(seq) != len(qual): msg = ( "failed to create FastqSequence object -- length of sequence " + "data (" + str(len(seq)) + ") does not match length of quality " + "string (" + str(len(qual)) + "). seq data: " + seq + " qual data: " + qual ) raise NGSReadError(msg) Sequence.__init__(self, name, seq, use_mut_str) self.seq_qual = qual # for quality scores self.LOWSET_SCORE = 64 self.HIGHEST_SCORE = 104
def setUp(self): """Set up a few alignments to use in the tests.""" s1 = Sequence("s1", "-TCGCGTAGC---CGC-TAGCTGATGCGAT-CTGA", 100, 129) s2 = Sequence("s2", "ATCGCGTAGCTAGCGCG-AGCTG---CGATGCT--", 1000, 1029) s3 = Sequence("s3", "ATCGCGTAGCTAGCGCG-AGCTG---CGATGCT--", 969, 998, "-") self.pa1 = PairwiseAlignment(s1, s2) self.pa2 = PairwiseAlignment(s1, s3) self.msa1 = MultipleSequenceAlignment([s1, s2, s3])
def testClipadaptor(self): input_seq = NGSRead("ACTGCTAGCGATCGACT", "n1", "QQQQQQQQQQQQQQQQQ") adaptor = Sequence("adap", "AGCGATAGACT") expect = NGSRead("ACTGCTNNNNNNNNNNN", "n1", "QQQQQQQQQQQQQQQQQ") clip_adaptor(input_seq, adaptor) got = input_seq self.assertTrue(expect == got)
def __eq__(self, seq): """ determine whether two fastqSequence objects are equal. To be equal, their sequence data (name, nuc. sequence) must match, as well as their quality data. :param seq: the other sequence to compare against. :return: True if this sequence is equal to seq, else False. """ return Sequence.__eq__(self, seq) and self.seq_qual == seq.seq_qual
def __eq__(self, seq): """ determine whether two fastqSequence objects are equal. To be equal, their sequence data (name, nuc. sequence) must match, as well as their quality data. :param seq: the other sequence to compare against. :return: True if this sequence is equal to seq, else False. """ return (Sequence.__eq__(self, seq) and self.seq_qual == seq.seq_qual)
def __ne__(self, read): """ determine whether two fastqSequence objects are not equal. They are considered unequal if any of their sequence data (name, nuc. sequence) does not match, or if their quality data does not match. :param seq: the other sequence to compare against. :return: True if this sequence is not equal to seq, else False. """ return (Sequence.__ne__(self, read) or self.seq_qual != read.seq_qual)
def __ne__(self, read): """ determine whether two fastqSequence objects are not equal. They are considered unequal if any of their sequence data (name, nuc. sequence) does not match, or if their quality data does not match. :param seq: the other sequence to compare against. :return: True if this sequence is not equal to seq, else False. """ return Sequence.__ne__(self, read) or self.seq_qual != read.seq_qual
def __build_sequence(parts): """Build a sequence object using the pre-tokenized parts from a MAF line. s -- a sequence line; has 6 fields in addition to 's': * source sequence, * start coord. of seq., zero-based. If -'ve strand, rel to start of rev. comp. * ungapped length of the sequence * strand * src size -- the full length of the source sequence * the sequence itself """ strand = parts[4] seq_length = int(parts[3]) total_seq_len = int(parts[5]) start = (int(parts[2]) if strand == "+" else total_seq_len - int(parts[2]) - seq_length) end = start + seq_length remain = total_seq_len - end return Sequence(parts[1], parts[6], start, end, strand, remain)
def fastaIterator(fn, useMutableString=False, verbose=False): """ A generator function which yields fastaSequence objects from a fasta-format file or stream. :param fn: a file-like stream or a string; if this is a string, it's treated as a filename, else it's treated it as a file-like object, which must have a readline() method. :param useMustableString: if True, construct sequences from lists of chars, rather than python string objects, to allow more efficient editing. Use with caution. :param verbose: if True, output additional status messages to stderr about progress """ fh = fn if type(fh).__name__ == "str": fh = open(fh) if verbose: try: pind = __build_progress_indicator(fh) except ProgressIndicatorError as e: sys.stderr.write("Warning: unable to show progress for stream. " + "Reason: " + str(e)) verbose = False prev_line = None while True: seqHeader = __read_seq_header(fh, prev_line) name = seqHeader[1:].strip() seq_data, prev_line = __read_seq_data(fh) if verbose: pind.done = fh.tell() pind.showProgress(to_strm=sys.stderr) yield Sequence(name, seq_data, useMutableString) # remember where we stopped for next call, or finish if prev_line == "": break
def test_failure_on_different_lengths(self): """Test failure when sequences passed to constructor are ragged.""" s1 = Sequence("s1", "-TCGCGTAGC---CGC-TAGGATGCGAT-CTGA", 100, 127) s2 = Sequence("s2", "ATCGCGTAGCTAGCGCG-AGCTG---CGATGCT--", 1000, 1029) args = [s1, s2] self.assertRaises(MultipleAlignmentError, MultipleSequenceAlignment, args)
def setUp(self): """Set up some MAF files to use in unit tests.""" b1_hg19_seq = "atctccaagagggcataaaacac-tgagtaaacagctcttttatatgtgtttcctgga" b1_panTro_s = "atctccaagagggcataaaacac-tgagtaaacagctctt--atatgtgtttcctgga" b1_panTro_q = "99999999999999999999999-9999999999999999--9999999999999999" b1_tarSyr_s = "atctccaagagggctgaaaatgc-caaatga-----------tcacacgtttcctgga" b1_tarSyr_q = "79295966999999999999998-9999799-----------9999999999765775" b1_tupBel_s = "ttcaggaagggggcccaaaacgcttgagtggtcagctctta-ttttgcgtttactgga" b1_tupBel_q = "79648579699867994997775679665662767577569-6998745597677632" b1 = "a score=28680.000000\n" +\ "s hg19.chr22 1711 57 + 51304566 " + b1_hg19_seq + "\n" +\ "s panTro2.chrUn 1110 59 + 58616431 " + b1_panTro_s + "\n" +\ "q panTro2.chrUn " + b1_panTro_q + "\n" +\ "i panTro2.chrUn C 0 C 0 " + "\n" +\ "s tarSyr1.scaffold_5923 2859 50 - 8928 " + b1_tarSyr_s + "\n" +\ "q tarSyr1.scaffold_5923 " + b1_tarSyr_q + "\n" +\ "i tarSyr1.scaffold_5923 N 0 C 0 " + "\n" +\ "s tupBel1.scaffold_803 33686 61 + 85889 " + b1_tupBel_s + "\n" +\ "q tupBel1.scaffold_803 " + b1_tupBel_q + "\n" +\ "i tupBel1.scaffold_803 I 1 C 0 " + "\n" +\ "e mm4.chr6 53310102 58 + 151104725 I" self.b1_hg19 = Sequence("hg19.chr22", b1_hg19_seq, 1711, 1768, "+", 51302798) self.b1_panTro = Sequence( "panTro2.chrUn", b1_panTro_s, 1110, 1169, "+", 58616431 - 1169, { QUALITY_META_KEY: b1_panTro_q, LEFT_STATUS_KEY: "C", LEFT_COUNT_KEY: 0, RIGHT_STATUS_KEY: "C", RIGHT_COUNT_KEY: 0 }) self.b1_tarSyr = Sequence( "tarSyr1.scaffold_5923", b1_tarSyr_s, 8928 - 2859 - 50, 8928 - 2859, "-", 2859, { QUALITY_META_KEY: b1_tarSyr_q, LEFT_STATUS_KEY: "N", LEFT_COUNT_KEY: 0, RIGHT_STATUS_KEY: "C", RIGHT_COUNT_KEY: 0 }) self.b1_mm4 = UnknownSequence("mm4.chr6", 53310102, 53310102 + 58, "+", 151104725 - (53310102 + 58), {EMPTY_ALIGNMENT_STATUS_KEY: "I"}) b2_hg19_seq = "ccttcttttaattaattttgttaagg----gatttcctctagggccactgcacgtca" b2_panTro_s = "ccttcttttaattaattttgttatgg----gatttcgtctagggtcactgcacatca" b2_panTro_q = "99999999999999999999999999----999999099999999999999999999" b2_tarSyr_s = "tcttcttttaattaattttattgagggattgattccttattgggccactacacatta" b2_tarSyr_q = "999999899978999999999999999977989997998678865952859999899" b2_tupBel_s = "cct--gtttaaattactgtattg-gg----gatttcctatagggccgcttctcgtcc" b2_tupBel_q = "666--958759455555746366-68----656846556554745443677468565" b2 = "a score=31725.000000\n" +\ "s hg19.chr22 1772 53 + 51304566 " + b2_hg19_seq + "\n" +\ "s panTro2.chrUn 1169 53 + 58616431 " + b2_panTro_s + "\n" +\ "q panTro2.chrUn " + b2_panTro_q + "\n" +\ "i panTro2.chrUn C 0 C 0 " + "\n" +\ "s tarSyr1.scaffold_5923 2909 124 - 8928 " + b2_tarSyr_s + "\n" +\ "q tarSyr1.scaffold_5923 " + b2_tarSyr_q + "\n" +\ "i tarSyr1.scaffold_5923 C 0 N 0 " + "\n" +\ "s tupBel1.scaffold_803 33747 113 + 85889 " + b2_tupBel_s + "\n" +\ "q tupBel1.scaffold_803 " + b2_tupBel_q + "\n" +\ "i tupBel1.scaffold_803 C 0 N 0 " self.maf1 = b1 + "\n\n" + b2 self.b2_hg19 = Sequence("hg19.chr22", b2_hg19_seq, 1772, 1825, "+", 51302741) self.b2_panTro = Sequence( "panTro2.chrUn", b2_panTro_s, 1169, 1169 + 53, "+", 58616431 - (1169 + 53), { QUALITY_META_KEY: b2_panTro_q, LEFT_STATUS_KEY: "C", LEFT_COUNT_KEY: 0, RIGHT_STATUS_KEY: "C", RIGHT_COUNT_KEY: 0 }) self.b2_tarSyr = Sequence( "tarSyr1.scaffold_5923", b2_tarSyr_s, 8928 - 2909 - 124, 8928 - 2909, "-", 2909, { QUALITY_META_KEY: b2_tarSyr_q, LEFT_STATUS_KEY: "C", LEFT_COUNT_KEY: 0, RIGHT_STATUS_KEY: "N", RIGHT_COUNT_KEY: 0 }) self.b1 = b1 self.b2 = b2
def reverse_complement(self, is_RNA=None): """ Reverse complement this read in-place. """ Sequence.reverseComplement(self, is_RNA) self.seq_qual = self.seq_qual[::-1]
def setUp(self): """Set up some genome alignment blocks and genome alignments for tests.""" self.block1 = GenomeAlignmentBlock([Sequence("s1.c1", "TCTCGC-A", 11, 18), Sequence("s2.c1", "ACTGGC--", 25, 31), Sequence("s3.c2", "ACTGCCTA", 5, 13), Sequence("s4.c1", "ACT-GCTA", 58, 65)], "s1") self.block2 = GenomeAlignmentBlock([Sequence("s1.c2", "C------G", 21, 23), Sequence("s2.c2", "CGGTCAGG", 85, 94), Sequence("s3.c2", "-GGTC-GG", 1, 7), Sequence("s4.c3", "-GGCCAGG", 3, 11)], "s1") self.block3 = GenomeAlignmentBlock([Sequence("s1.c1", "CA-TAGC-G", 20, 26), Sequence("s2.c1", "CAGTAGC-G", 38, 35), Sequence("s3.c2", "C-GT-GCAG", 5, 13), Sequence("s4.c1", "CACT-GC-G", 58, 65)], "s1") self.block4 = GenomeAlignmentBlock([Sequence("s1.c1", "CG-TCGA", 51, 57), Sequence("s2.c1", "CGCT-GA", 38, 35), Sequence("s3.c2", "AGGTCGC", 5, 13), Sequence("s4.c1", "CGCT-GA", 58, 65)], "s1") # this block defines an ambiguous alignment of part of block1 self.block1p = GenomeAlignmentBlock([Sequence("s1.c1", "GCACGCT", 15, 22), Sequence("s2.c8", "GCAC-CT", 25, 31), Sequence("s3.c8", "GC-CGCT", 5, 13), Sequence("s4.c8", "GC--GCT", 58, 65)], "s1") self.ga1 = GenomeAlignment([self.block1, self.block2]) self.ga2 = GenomeAlignment([self.block1, self.block2, self.block1p])
class GATestHelper(object): """Helper for tests involving genome alignments in concrete MAF syntax.""" # this defines a single genome alignmnet block b1_hg19_seq = "atctccaagagggcataaaacac-tgagtaaacagctcttttatatgtgtttcctgga" b1_panTro_s = "atctccaagagggcataaaacac-tgagtaaacagctctt--atatgtgtttcctgga" b1_panTro_q = "99999999999999999999999-9999999999999999--9999999999999999" b1_tarSyr_s = "atctccaagagggctgaaaatgc-caaatga-----------tcacacgtttcctgga" b1_tarSyr_q = "79295966999999999999998-9999799-----------9999999999765775" b1_tupBel_s = "ttcaggaagggggcccaaaacgcttgagtggtcagctctta-ttttgcgtttactgga" b1_tupBel_q = "79648579699867994997775679665662767577569-6998745597677632" b1 = "a score=28680.000000\n" +\ "s hg19.chr22 1711 57 + 51304566 " + b1_hg19_seq + "\n" +\ "s panTro2.chrUn 1110 59 + 58616431 " + b1_panTro_s + "\n" +\ "q panTro2.chrUn " + b1_panTro_q + "\n" +\ "i panTro2.chrUn C 0 C 0 " + "\n" +\ "s tarSyr1.scaffold_5923 2859 50 - 8928 " + b1_tarSyr_s + "\n" +\ "q tarSyr1.scaffold_5923 " + b1_tarSyr_q + "\n" +\ "i tarSyr1.scaffold_5923 N 0 C 0 " + "\n" +\ "s tupBel1.scaffold_803 33686 61 + 85889 " + b1_tupBel_s + "\n" +\ "q tupBel1.scaffold_803 " + b1_tupBel_q + "\n" +\ "i tupBel1.scaffold_803 I 1 C 0 " + "\n" +\ "e mm4.chr6 53310102 58 + 151104725 I" b1_hg19 = Sequence("hg19.chr22", b1_hg19_seq, 1711, 1768, "+", 51302798) b1_panTro = Sequence( "panTro2.chrUn", b1_panTro_s, 1110, 1169, "+", 58616431 - 1169, { maf.QUALITY_META_KEY: b1_panTro_q, maf.LEFT_STATUS_KEY: "C", maf.LEFT_COUNT_KEY: 0, maf.RIGHT_STATUS_KEY: "C", maf.RIGHT_COUNT_KEY: 0 }) b1_tarSyr = Sequence( "tarSyr1.scaffold_5923", b1_tarSyr_s, 8928 - 2859 - 50, 8928 - 2859, "-", 2859, { maf.QUALITY_META_KEY: b1_tarSyr_q, maf.LEFT_STATUS_KEY: "N", maf.LEFT_COUNT_KEY: 0, maf.RIGHT_STATUS_KEY: "C", maf.RIGHT_COUNT_KEY: 0 }) b1_mm4 = UnknownSequence("mm4.chr6", 53310102, 53310102 + 58, "+", 151104725 - (53310102 + 58), {maf.EMPTY_ALIGNMENT_STATUS_KEY: "I"}) # this defines a second genome alignmnet block b2_hg19_seq = "ccttcttttaattaattttgttaagg----gatttcctctagggccactgcacgtca" b2_panTro_s = "ccttcttttaattaattttgttatgg----gatttcgtctagggtcactgcacatca" b2_panTro_q = "99999999999999999999999999----999999099999999999999999999" b2_tarSyr_s = "tcttcttttaattaattttattgagggattgattccttattgggccactacacatta" b2_tarSyr_q = "999999899978999999999999999977989997998678865952859999899" b2_tupBel_s = "cct--gtttaaattactgtattg-gg----gatttcctatagggccgcttctcgtcc" b2_tupBel_q = "666--958759455555746366-68----656846556554745443677468565" b2 = "a score=31725.000000\n" +\ "s hg19.chr22 1772 53 + 51304566 " + b2_hg19_seq + "\n" +\ "s panTro2.chrUn 1169 53 + 58616431 " + b2_panTro_s + "\n" +\ "q panTro2.chrUn " + b2_panTro_q + "\n" +\ "i panTro2.chrUn C 0 C 0 " + "\n" +\ "s tarSyr1.scaffold_5923 2909 124 - 8928 " + b2_tarSyr_s + "\n" +\ "q tarSyr1.scaffold_5923 " + b2_tarSyr_q + "\n" +\ "i tarSyr1.scaffold_5923 C 0 N 0 " + "\n" +\ "s tupBel1.scaffold_803 33747 113 + 85889 " + b2_tupBel_s + "\n" +\ "q tupBel1.scaffold_803 " + b2_tupBel_q + "\n" +\ "i tupBel1.scaffold_803 C 0 N 0 " # define a maf 'file' by stitching the two blocks together maf1 = b1 + "\n\n" + b2 # abstract repr. of some parts of the above data. b2_hg19 = Sequence("hg19.chr22", b2_hg19_seq, 1772, 1825, "+", 51302741) b2_panTro = Sequence( "panTro2.chrUn", b2_panTro_s, 1169, 1169 + 53, "+", 58616431 - (1169 + 53), { maf.QUALITY_META_KEY: b2_panTro_q, maf.LEFT_STATUS_KEY: "C", maf.LEFT_COUNT_KEY: 0, maf.RIGHT_STATUS_KEY: "C", maf.RIGHT_COUNT_KEY: 0 }) b2_tarSyr = Sequence( "tarSyr1.scaffold_5923", b2_tarSyr_s, 8928 - 2909 - 124, 8928 - 2909, "-", 2909, { maf.QUALITY_META_KEY: b2_tarSyr_q, maf.LEFT_STATUS_KEY: "C", maf.LEFT_COUNT_KEY: 0, maf.RIGHT_STATUS_KEY: "N", maf.RIGHT_COUNT_KEY: 0 })
def repeat_masker_alignment_iterator(fn, index_friendly=True, verbose=False): """ Iterator for repeat masker alignment files; yields multiple alignment objects. Iterate over a file/stream of full repeat alignments in the repeatmasker format. Briefly, this format is as follows: each record (alignment) begins with a header line (see _rm_parse_header_line documentation for details of header format), followed by the alignment itself (example below) and finally a set of key-value meta-data pairs. The actual alignment looks like this:: chr1 11 CCCTGGAGATTCTTATT--AGTGATTTGGGCT 41 ii v -- v i i v C MER5B#DNA/hAT 10 CCCCAGAGATTCTGATTTAATTGGTCTGGGGT 42 chr1 42 GACTG 47 v C MER5B#DNA/hAT 43 CACTG 48 The 'C' indicates that its the reverse complement of the consensus. The central string gives information about matches; "-" indicates an insertion/deletion, "i" a transition (G<->A, C<->T) and "v" a transversion (all other substitutions). :param fh: filename or stream-like object to read from. :param index_friendly: if True, we will ensure the file/stream position is before the start of the record when we yield it; this requires the ability to seek within the stream though, so if iterating over a stream wtihout that ability, you'll have to set this to false. Further, this will disable buffering for the file, to ensure file.tell() behaves correctly, so a performance hit will be incurred. :param verbose: if true, output progress messages to stderr. """ # step 1 -- build our iterator for the stream.. try: fh = open(fn) except (TypeError): fh = fn iterable = fh if index_friendly: iterable = iter(fh.readline, '') # build progress indicator, if we want one and we're able to if verbose: try: m_fn = ": " + fh.name except TypeError: m_fn = "" try: current = fh.tell() fh.seek(0, 2) total_progress = fh.tell() fh.seek(current) pind = ProgressIndicator( totalToDo=total_progress, messagePrefix="completed", messageSuffix="of processing repeat-masker " "alignment file" + m_fn) except IOError: pind = None old_fh_pos = None new_fh_pos = fh.tell() s1 = None s2 = None s1_name = None s2_name = None s1_start = None s1_end = None s2_start = None s2_end = None meta_data = None alignment_line_counter = 0 alig_l_space = 0 prev_seq_len = 0 rev_comp_match = None remaining_repeat = None remaining_genomic = None for line in iterable: if verbose and pind is not None: pind.done = fh.tell() pind.showProgress() if index_friendly: old_fh_pos = new_fh_pos new_fh_pos = fh.tell() line = line.rstrip() if line.lstrip() == "" and alignment_line_counter % 3 != 1: continue s_pres_split = re.split(r'(\s+)', line) parts = [x for x in s_pres_split if not (x.isspace() or x == "")] n = len(parts) for i in REPEATMASKER_FIELDS_TO_TRIM: if n >= i + 1: parts[i] = parts[i].strip() # decide what to do with this line -- is it a header line, part of the # alignment or a meta-data key-value line if alignment_line_counter % 3 == 1: if (REPEATMASKER_VALIDATE_MUTATIONS and not _rm_is_valid_annotation_line(line)): raise IOError("invalid mutation line: " + line) l_space = _rm_compute_leading_space(s_pres_split) - alig_l_space pad_right = prev_seq_len - (l_space + len(line.strip())) meta_data[ANNOTATION_KEY] += ((' ' * l_space) + line.strip() + (' ' * pad_right)) alignment_line_counter += 1 elif _rm_is_header_line(parts, n): if not (s1 is None and s2 is None and meta_data is None): if ANNOTATION_KEY in meta_data: meta_data[ANNOTATION_KEY] = meta_data[ ANNOTATION_KEY].rstrip() if index_friendly: fh.seek(old_fh_pos) ss1 = Sequence(s1_name, s1, s1_start, s1_end, "+", remaining_genomic) s2s = "-" if rev_comp_match else "+" ss2 = Sequence(s2_name, s2, s2_start, s2_end, s2s, remaining_repeat) yield PairwiseAlignment(ss1, ss2, meta_data) if index_friendly: fh.seek(new_fh_pos) meta_data = {} s1 = "" s2 = "" s1_name, s2_name = _rm_get_names_from_header(parts) s1_start, s1_end = _rm_get_reference_coords_from_header(parts) s2_start, s2_end = _rm_get_repeat_coords_from_header(parts) rev_comp_match = _rm_is_reverse_comp_match(parts) remaining_repeat = _rm_get_remaining_repeat_from_header(parts) remaining_genomic = _rm_get_remaining_genomic_from_header(parts) _rm_parse_header_line(parts, meta_data) alignment_line_counter = 0 elif _rm_is_alignment_line(parts, s1_name, s2_name): alignment_line_counter += 1 name, seq = _rm_extract_sequence_and_name(parts, s1_name, s2_name) if name == s1_name: s1 += seq elif name == s2_name: s2 += seq alig_l_space = _rm_compute_leading_space_alig(s_pres_split, seq) prev_seq_len = len(seq) else: k, v = _rm_parse_meta_line(parts) meta_data[k] = v if index_friendly: fh.seek(old_fh_pos) ss1 = Sequence(s1_name, s1, s1_start, s1_end, "+", remaining_genomic) s2s = "-" if rev_comp_match else "+" ss2 = Sequence(s2_name, s2, s2_start, s2_end, s2s, remaining_repeat) yield PairwiseAlignment(ss1, ss2, meta_data) if index_friendly: fh.seek(new_fh_pos)