def test_protein_subsequence_does_not_overlap_substitution():
    # testing that we got the correct properties for case where "SIINFEKL" was
    # mutated into "SIINFEQL" and then sliced to keep just "FEQL"
    p = ProteinSequence(amino_acids="SIINFEQL",
                        contains_mutation=True,
                        mutation_start_idx=len("SIINFE"),
                        mutation_end_idx=len("SIINFEQ"),
                        ends_with_stop_codon=True,
                        frameshift=False,
                        translations=[])
    p2 = p.subsequence(0, len("SIIN"))
    eq_(p2.amino_acids, "SIIN")
    eq_(p2.contains_mutation, False)
    eq_(p2.contains_deletion, False)
    eq_(p2.frameshift, False)
    eq_(p2.ends_with_stop_codon, False)
    eq_(p2.num_mutant_amino_acids, 0)
    eq_(p2.mutant_amino_acids, "")
    eq_(len(p2), 4)
def test_protein_subsequence_does_not_overlap_deletion():
    # testing that we got correct properties for the case
    # where "SIINFEKL" was mutated into "SIINFEL" (by a deletion of "K")
    # and then we took the subsequence "SIINFE"
    p = ProteinSequence(amino_acids="SIINFEL",
                        contains_mutation=True,
                        mutation_start_idx=len("SIINFE"),
                        mutation_end_idx=len("SIINFE"),
                        ends_with_stop_codon=True,
                        frameshift=False,
                        translations=[])
    p2 = p.subsequence(None, len("SIINFE"))
    eq_(len(p2), 6)
    eq_(p2.amino_acids, "SIINFE")
    eq_(p2.contains_deletion, False)
    eq_(p2.contains_mutation, False)
    eq_(p2.frameshift, False)
    eq_(p2.ends_with_stop_codon, False)
    eq_(p2.mutant_amino_acids, "")
    eq_(p2.num_mutant_amino_acids, 0)
def make_dummy_protein_sequence(
        n_supporting_variant_reads,
        n_supporting_variant_sequences,
        n_supporting_reference_transcripts,
        n_total_variant_sequences=None,
        n_total_variant_reads=None,
        n_total_reference_transcripts=None,
        amino_acids="MKHW",  # ATG=M|AAA=K|CAC=H|TGG=W
        cdna_sequence="CCCATGAAACACTGGTAG",
        variant_cdna_interval_start=8,  # assuming variant was AAC>AAA
        variant_cdna_interval_end=9,
        variant_aa_interval_start=1,
        variant_aa_interval_end=2,
        num_mismatches=1):
    """
    Creates ProteinSequence object with None filled in for most fields
    """
    if n_total_variant_reads is None:
        n_total_variant_reads = n_supporting_variant_reads

    if n_total_variant_sequences is None:
        n_total_variant_sequences = n_supporting_variant_sequences

    if n_total_reference_transcripts is None:
        n_total_reference_transcripts = n_total_reference_transcripts

    assert n_supporting_variant_sequences <= n_supporting_variant_reads
    assert n_supporting_variant_sequences <= n_total_variant_sequences
    assert n_supporting_reference_transcripts <= n_total_reference_transcripts

    n_translations = n_total_reference_transcripts * n_total_variant_sequences

    translation = make_dummy_translation(
        amino_acids=amino_acids,
        cdna_sequence=cdna_sequence,
        offset_to_first_complete_codon=3,
        variant_cdna_interval_start=variant_cdna_interval_start,  # assuming variant was AAC>AAA
        variant_cdna_interval_end=variant_cdna_interval_end,
        variant_aa_interval_start=variant_aa_interval_start,
        variant_aa_interval_end=variant_aa_interval_end,
        num_mismatches=num_mismatches,
        n_variant_reads=n_total_variant_reads)

    return ProteinSequence(
        translations=[translation] * n_translations)
def test_protein_sequence_deletion():
    # testing that we got the correct properties in the case
    # where "SIINFEKL" was mutated into "SIINFEL" by deletion of one amino acid
    p = ProteinSequence(amino_acids="SIINFEL",
                        contains_mutation=True,
                        mutation_start_idx=len("SIINFE"),
                        mutation_end_idx=len("SIINFE"),
                        ends_with_stop_codon=True,
                        frameshift=False,
                        translations=[])
    eq_(p.amino_acids, "SIINFEL")
    eq_(len(p), 7)
    eq_(p.num_mutant_amino_acids, 0)
    eq_(p.mutant_amino_acids, "")
    eq_(p.contains_mutation, True)
    eq_(p.contains_deletion, True)
    eq_(p.frameshift, False)
    eq_(p.ends_with_stop_codon, True)