Exemple #1
0
def _trim_all_sequences(aligned_sequences: AlignedDNAFASTAFormat,
                        trim_positions: dict) -> AlignedDNAFASTAFormat:
    """
    Trim all sequences within given alignment based on provided positions.

    Arguments:
        aligned_sequences (AlignedDNAFASTAFormat): original, aligned sequences
        trim_positions (dict): dictionary containing positions for trimming

    Returns:
        result (AlignedDNAFASTAFormat): trimmed aligned sequences
    """

    result = AlignedDNAFASTAFormat()
    with result.open() as out_fasta:
        for seq in aligned_sequences.view(AlignedDNAIterator):
            seq_trimmed = _trim_sequence(seq, trim_positions["start"],
                                         trim_positions["end"])
            seq_trimmed.write(out_fasta)
    return result
Exemple #2
0
def _locate_primer_positions(
        alignment_with_primers: AlignedDNAFASTAFormat) -> dict:
    """
    Identify position of each primer within the alignment.

    Arguments:
        alignment_with_primers (AlignedDNAFASTAFormat): sequence alignment
                        containing at least one aligned primer

    Returns:
        (dict): dictionary containing trimming positions
                using 0-based indexing
    """

    primers_aligned = dict()
    for aln_seq in alignment_with_primers.view(DNAIterator):
        if aln_seq.metadata["id"] in ["forward", "reverse"]:
            primers_aligned[aln_seq.metadata["id"]] = (str(aln_seq))

    primer_positions = dict()
    for primer_id, primer_seq in primers_aligned.items():
        primer_positions[primer_id] = {
            'start':
            next((i for i, nt in enumerate(primer_seq) if nt != "-")),
            'end':
            len(primer_seq) - next(
                (i for i, nt in enumerate(primer_seq[::-1]) if nt != "-"))
        }

    pos_start, pos_end = _find_terminal_positions(primer_positions)

    # not doing any validation like in _prepare_positions since none of
    # the conditions checked there are possible to run into with the way
    # _locate_primer_positions and _find_terminal_positions are implemented

    return {"start": pos_start, "end": pos_end}