Ejemplo n.º 1
0
    def __encrypt_unmapped(alignment: pysam.AlignedSegment, secret: bytes):
        """
        Stream cipher encryption / decryption.
        alignment + secret => encrypted_alignment
        encrypted_alignment + secret => alignment
        :param alignment:
        :param secret:
        :return: encrypter/decrypted alignment
        """
        if alignment.is_unmapped:
            if secret is None:
                raise ValueError(
                    'Secret key must be present when unmapped alignments are iterated.'
                )

            # use 64B long hash (encrypts 256 bases)
            sha512 = hashlib.sha512()
            sha512.update(secret + alignment.query_name.encode())
            mut_seq = cmn.stream_cipher(alignment.query_sequence,
                                        sha512.digest())

            # change and preserve quality
            # TODO: maybe something else with the quality?
            quality = alignment.query_qualities
            alignment.query_sequence = mut_seq
            alignment.query_qualities = quality
Ejemplo n.º 2
0
def test_sam_alignment_to_padded_alignment():
    alignment = AlignedSegment()
    alignment.reference_start = 0
    alignment.query_sequence = 'AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG'
    alignment.cigartuples = ((0,10), (2,1), (0,25))
    ref = Reference('test', 'AGCTTAGCTAAGCTACCTATATCTTGGTCTTGGCCG')

    (pad_ref, pad_match, pad_query) = sam_alignment_to_padded_alignment(alignment, ref)

    assert pad_ref == 'AGCTTAGCTAAGCTACCTATATCTTGGTCTTGGCCG'
    assert pad_match == '|||||||||| |||||||||||||||||||||||||'
    assert pad_query == 'AGCTTAGCTA-GCTACCTATATCTTGGTCTTGGCCG'
Ejemplo n.º 3
0
    def _set_length_dependent_fields(
        self,
        rec: pysam.AlignedSegment,
        length: int,
        bases: Optional[str] = None,
        quals: Optional[List[int]] = None,
        cigar: Optional[str] = None,
    ) -> None:
        """Fills in bases, quals and cigar on a record.

        If any of bases, quals or cigar are defined, they must all have the same length/query
        length.  If none are defined then the length parameter is used.  Undefined values are
        synthesize at the inferred length.

        Args:
            rec: a SAM record
            length: the length to use if all of bases/quals/cigar are None
            bases: an optional string of bases for the read
            quals: an optional list of qualities for the read
            cigar: an optional cigar string for the read
        """

        # Do some validation to make sure all defined things have the same lengths
        lengths = set()
        if bases is not None:
            lengths.add(len(bases))
        if quals is not None:
            lengths.add(len(quals))
        if cigar is not None:
            cig = sam.Cigar.from_cigarstring(cigar)
            lengths.add(sum([elem.length_on_query for elem in cig.elements]))

        if not lengths:
            lengths.add(length)

        if len(lengths) != 1:
            raise ValueError(
                "Provided bases/quals/cigar are not length compatible.")

        # Fill in the record, making any parts that were not defined as params
        length = lengths.pop()
        rec.query_sequence = bases if bases else self._bases(length)
        rec.query_qualities = quals if quals else [self.base_quality] * length
        if not rec.is_unmapped:
            rec.cigarstring = cigar if cigar else f"{length}M"
Ejemplo n.º 4
0
def _make_read_unmapped(rec: AlignedSegment) -> None:
    """Removes mapping information from a read."""
    if rec.is_reverse:
        quals = rec.query_qualities
        quals.reverse()
        rec.query_sequence = dnautils.reverse_complement(rec.query_sequence)
        rec.query_qualities = quals
        rec.is_reverse = False

    rec.reference_id = sam.NO_REF_INDEX
    rec.reference_start = sam.NO_REF_POS
    rec.cigar = None
    rec.mapping_quality = 0
    rec.template_length = 0
    rec.is_duplicate = False
    rec.is_secondary = False
    rec.is_supplementary = False
    rec.is_proper_pair = False
    rec.is_unmapped = True