def _trim_all_sequences(aligned_sequences: AlignedDNAFASTAFormat, trim_positions: dict) -> AlignedDNAFASTAFormat: """ Trim all sequences within given alignment based on provided positions. Arguments: aligned_sequences (AlignedDNAFASTAFormat): original, aligned sequences trim_positions (dict): dictionary containing positions for trimming Returns: result (AlignedDNAFASTAFormat): trimmed aligned sequences """ result = AlignedDNAFASTAFormat() with result.open() as out_fasta: for seq in aligned_sequences.view(AlignedDNAIterator): seq_trimmed = _trim_sequence(seq, trim_positions["start"], trim_positions["end"]) seq_trimmed.write(out_fasta) return result
def _locate_primer_positions( alignment_with_primers: AlignedDNAFASTAFormat) -> dict: """ Identify position of each primer within the alignment. Arguments: alignment_with_primers (AlignedDNAFASTAFormat): sequence alignment containing at least one aligned primer Returns: (dict): dictionary containing trimming positions using 0-based indexing """ primers_aligned = dict() for aln_seq in alignment_with_primers.view(DNAIterator): if aln_seq.metadata["id"] in ["forward", "reverse"]: primers_aligned[aln_seq.metadata["id"]] = (str(aln_seq)) primer_positions = dict() for primer_id, primer_seq in primers_aligned.items(): primer_positions[primer_id] = { 'start': next((i for i, nt in enumerate(primer_seq) if nt != "-")), 'end': len(primer_seq) - next( (i for i, nt in enumerate(primer_seq[::-1]) if nt != "-")) } pos_start, pos_end = _find_terminal_positions(primer_positions) # not doing any validation like in _prepare_positions since none of # the conditions checked there are possible to run into with the way # _locate_primer_positions and _find_terminal_positions are implemented return {"start": pos_start, "end": pos_end}