def __encrypt_unmapped(alignment: pysam.AlignedSegment, secret: bytes): """ Stream cipher encryption / decryption. alignment + secret => encrypted_alignment encrypted_alignment + secret => alignment :param alignment: :param secret: :return: encrypter/decrypted alignment """ if alignment.is_unmapped: if secret is None: raise ValueError( 'Secret key must be present when unmapped alignments are iterated.' ) # use 64B long hash (encrypts 256 bases) sha512 = hashlib.sha512() sha512.update(secret + alignment.query_name.encode()) mut_seq = cmn.stream_cipher(alignment.query_sequence, sha512.digest()) # change and preserve quality # TODO: maybe something else with the quality? quality = alignment.query_qualities alignment.query_sequence = mut_seq alignment.query_qualities = quality
def test_sam_alignment_to_padded_alignment(): alignment = AlignedSegment() alignment.reference_start = 0 alignment.query_sequence = 'AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG' alignment.cigartuples = ((0,10), (2,1), (0,25)) ref = Reference('test', 'AGCTTAGCTAAGCTACCTATATCTTGGTCTTGGCCG') (pad_ref, pad_match, pad_query) = sam_alignment_to_padded_alignment(alignment, ref) assert pad_ref == 'AGCTTAGCTAAGCTACCTATATCTTGGTCTTGGCCG' assert pad_match == '|||||||||| |||||||||||||||||||||||||' assert pad_query == 'AGCTTAGCTA-GCTACCTATATCTTGGTCTTGGCCG'
def _set_length_dependent_fields( self, rec: pysam.AlignedSegment, length: int, bases: Optional[str] = None, quals: Optional[List[int]] = None, cigar: Optional[str] = None, ) -> None: """Fills in bases, quals and cigar on a record. If any of bases, quals or cigar are defined, they must all have the same length/query length. If none are defined then the length parameter is used. Undefined values are synthesize at the inferred length. Args: rec: a SAM record length: the length to use if all of bases/quals/cigar are None bases: an optional string of bases for the read quals: an optional list of qualities for the read cigar: an optional cigar string for the read """ # Do some validation to make sure all defined things have the same lengths lengths = set() if bases is not None: lengths.add(len(bases)) if quals is not None: lengths.add(len(quals)) if cigar is not None: cig = sam.Cigar.from_cigarstring(cigar) lengths.add(sum([elem.length_on_query for elem in cig.elements])) if not lengths: lengths.add(length) if len(lengths) != 1: raise ValueError( "Provided bases/quals/cigar are not length compatible.") # Fill in the record, making any parts that were not defined as params length = lengths.pop() rec.query_sequence = bases if bases else self._bases(length) rec.query_qualities = quals if quals else [self.base_quality] * length if not rec.is_unmapped: rec.cigarstring = cigar if cigar else f"{length}M"
def _make_read_unmapped(rec: AlignedSegment) -> None: """Removes mapping information from a read.""" if rec.is_reverse: quals = rec.query_qualities quals.reverse() rec.query_sequence = dnautils.reverse_complement(rec.query_sequence) rec.query_qualities = quals rec.is_reverse = False rec.reference_id = sam.NO_REF_INDEX rec.reference_start = sam.NO_REF_POS rec.cigar = None rec.mapping_quality = 0 rec.template_length = 0 rec.is_duplicate = False rec.is_secondary = False rec.is_supplementary = False rec.is_proper_pair = False rec.is_unmapped = True