def test_protein_subsequence_does_not_overlap_substitution(): # testing that we got the correct properties for case where "SIINFEKL" was # mutated into "SIINFEQL" and then sliced to keep just "FEQL" p = ProteinSequence(amino_acids="SIINFEQL", contains_mutation=True, mutation_start_idx=len("SIINFE"), mutation_end_idx=len("SIINFEQ"), ends_with_stop_codon=True, frameshift=False, translations=[]) p2 = p.subsequence(0, len("SIIN")) eq_(p2.amino_acids, "SIIN") eq_(p2.contains_mutation, False) eq_(p2.contains_deletion, False) eq_(p2.frameshift, False) eq_(p2.ends_with_stop_codon, False) eq_(p2.num_mutant_amino_acids, 0) eq_(p2.mutant_amino_acids, "") eq_(len(p2), 4)
def test_protein_subsequence_does_not_overlap_deletion(): # testing that we got correct properties for the case # where "SIINFEKL" was mutated into "SIINFEL" (by a deletion of "K") # and then we took the subsequence "SIINFE" p = ProteinSequence(amino_acids="SIINFEL", contains_mutation=True, mutation_start_idx=len("SIINFE"), mutation_end_idx=len("SIINFE"), ends_with_stop_codon=True, frameshift=False, translations=[]) p2 = p.subsequence(None, len("SIINFE")) eq_(len(p2), 6) eq_(p2.amino_acids, "SIINFE") eq_(p2.contains_deletion, False) eq_(p2.contains_mutation, False) eq_(p2.frameshift, False) eq_(p2.ends_with_stop_codon, False) eq_(p2.mutant_amino_acids, "") eq_(p2.num_mutant_amino_acids, 0)
def make_dummy_protein_sequence( n_supporting_variant_reads, n_supporting_variant_sequences, n_supporting_reference_transcripts, n_total_variant_sequences=None, n_total_variant_reads=None, n_total_reference_transcripts=None, amino_acids="MKHW", # ATG=M|AAA=K|CAC=H|TGG=W cdna_sequence="CCCATGAAACACTGGTAG", variant_cdna_interval_start=8, # assuming variant was AAC>AAA variant_cdna_interval_end=9, variant_aa_interval_start=1, variant_aa_interval_end=2, num_mismatches=1): """ Creates ProteinSequence object with None filled in for most fields """ if n_total_variant_reads is None: n_total_variant_reads = n_supporting_variant_reads if n_total_variant_sequences is None: n_total_variant_sequences = n_supporting_variant_sequences if n_total_reference_transcripts is None: n_total_reference_transcripts = n_total_reference_transcripts assert n_supporting_variant_sequences <= n_supporting_variant_reads assert n_supporting_variant_sequences <= n_total_variant_sequences assert n_supporting_reference_transcripts <= n_total_reference_transcripts n_translations = n_total_reference_transcripts * n_total_variant_sequences translation = make_dummy_translation( amino_acids=amino_acids, cdna_sequence=cdna_sequence, offset_to_first_complete_codon=3, variant_cdna_interval_start=variant_cdna_interval_start, # assuming variant was AAC>AAA variant_cdna_interval_end=variant_cdna_interval_end, variant_aa_interval_start=variant_aa_interval_start, variant_aa_interval_end=variant_aa_interval_end, num_mismatches=num_mismatches, n_variant_reads=n_total_variant_reads) return ProteinSequence( translations=[translation] * n_translations)
def test_protein_sequence_deletion(): # testing that we got the correct properties in the case # where "SIINFEKL" was mutated into "SIINFEL" by deletion of one amino acid p = ProteinSequence(amino_acids="SIINFEL", contains_mutation=True, mutation_start_idx=len("SIINFE"), mutation_end_idx=len("SIINFE"), ends_with_stop_codon=True, frameshift=False, translations=[]) eq_(p.amino_acids, "SIINFEL") eq_(len(p), 7) eq_(p.num_mutant_amino_acids, 0) eq_(p.mutant_amino_acids, "") eq_(p.contains_mutation, True) eq_(p.contains_deletion, True) eq_(p.frameshift, False) eq_(p.ends_with_stop_codon, True)