def test_degenerate_union_decode(self):
        codons = ["TTT", "TTA", "ATT", "GTT", "ACT", "AAT", "GAT", "GGG"]

        original_aminos = set(e_coli.table.forward_table[codon]
                              for codon in codons)

        degenerate_codons = DegenerateTripletWithAminos.set_cover_with_degenerate_code(
            [
                DegenerateTripletWithAminos.parse_from_codon_string(
                    codon, e_coli.table.forward_table) for codon in codons
            ])

        decoded_aminos_per_degenerate_codon = [
            set(
                DegenerateTriplet.degenerate_codon_to_aminos(
                    str(deg_codon), e_coli.table.forward_table))
            for deg_codon in degenerate_codons
        ]

        # Before checking the decoding we just quickly check if the
        # set cover is disjoint.
        for a, b in itertools.combinations(decoded_aminos_per_degenerate_codon,
                                           2):
            self.assertEqual(len(a.intersection(b)), 0)

        all_decoded_aminos = set.union(*decoded_aminos_per_degenerate_codon)

        self.assertEqual(original_aminos, all_decoded_aminos)
    def test_degenerate_union_in_two_bases(self):
        codon1 = DegenerateTripletWithAminos.parse_from_codon_string(
            "RAT", e_coli.table.forward_table)
        codon2 = DegenerateTripletWithAminos.parse_from_codon_string(
            "RAG", e_coli.table.forward_table)

        degenerate_codon = codon1.union(codon2)

        self.assertEqual("RAK", str(degenerate_codon))
    def test_set_cover_with_degenerate_code_no_single_solution(self):
        degenerate = DegenerateTripletWithAminos.set_cover_with_degenerate_code(
            [
                DegenerateTripletWithAminos.parse_from_codon_string(
                    codon, e_coli.table.forward_table)
                for codon in ["TTT", "CTT", "ATT", "ACG"]
            ])

        self.assertEqual(set(str(deg) for deg in degenerate), {"HTT", "ACG"})
    def test_set_cover_with_degenerate_code(self):
        codon1 = DegenerateTripletWithAminos.parse_from_codon_string(
            "TTT", e_coli.table.forward_table)
        codon2 = DegenerateTripletWithAminos.parse_from_codon_string(
            "CTT", e_coli.table.forward_table)

        degenerate = DegenerateTripletWithAminos.set_cover_with_degenerate_code(
            [codon1, codon2])

        self.assertEqual(str(degenerate.pop()), "YTT")
Beispiel #5
0
def step_impl(context, triplets):
    context.expected = [
        DegenerateTripletWithAminos.create_from_string(triplet, "")
        for triplet in triplets.split(",")
    ]
    for triplet in context.result:
        assert triplet in context.expected, f"Triplet {triplet} not in {context.expected}"
    def test_parse_from_codon_string(self):
        triplet = DegenerateTripletWithAminos.parse_from_codon_string(
            "BGG", e_coli.table.forward_table)

        self.assertEqual({"C", "T", "G"}, triplet.base1.bases)
        self.assertEqual({"G"}, triplet.base2.bases)
        self.assertEqual({"G"}, triplet.base3.bases)

        self.assertEqual({"W", "R", "G"}, set(triplet.aminos))
    def test_find_two_similar(self):

        bag = set()
        amino1 = DegenerateTripletWithAminos.parse_from_codon_string(
            "TTT", e_coli.table.forward_table)
        bag.add(amino1)
        amino2 = DegenerateTripletWithAminos.parse_from_codon_string(
            "TTA", e_coli.table.forward_table)
        bag.add(amino2)
        bag.add(
            DegenerateTripletWithAminos.parse_from_codon_string(
                "CCC", e_coli.table.forward_table))

        res = DegenerateTripletWithAminos.find_two_similar(bag)

        self.assertIsNotNone(res)
        self.assertEqual(2, len(res))
        self.assertIn(amino1, res)
        self.assertIn(amino2, res)
    def test_create_subsets_for_primers(self):
        test_cases = [([["AAA"], ["GGG"]], [["AAA", "GGG"]]),
                      ([["AAA", "GAA"], ["GGG", "GGT"]], [['AAA', 'GGG'],
                                                          ['AAA', 'GGT'],
                                                          ['GAA', 'GGG'],
                                                          ['GAA', 'GGT']])]

        for test_case in test_cases:
            result = DegenerateTripletWithAminos.create_subsets_for_primers(
                test_case[0])

            self.assertEqual(test_case[1], result)

        uneven_test_case = [["AAA", "GGG"], ["CCC"]]

        uneven_result = DegenerateTripletWithAminos.create_subsets_for_primers(
            uneven_test_case)

        self.assertIn(["AAA", "CCC"], uneven_result)
        self.assertIn(["GGG", "CCC"], uneven_result)
    def test_site_separate_set_cover(self):
        test_cases = [
            ([{"AAA"}, {"GGG"}], [{"AAA"}, {"GGG"}]),
            ([{"AAA", "GAA"}, {"GGG", "GGT"}], [{"RAA"}, {"GGK"}]),
            (
                # In this case each site has only one degenerate solution
                [{"AAA", "GAA", "CCC", "TTT"}, {"GGG", "CGG", "TTT", "AAA"}],
                [{"RAA", "CCC", "TTT"}, {"SGG", "TTT", "AAA"}]),
            (
                # In this case the second site doesn't have any degenerate solutions
                [{"AAA", "GAA", "CCC", "CCA"}, {"GGG", "CCC", "TTT", "AAA"}],
                [{"RAA", "CCM"}, {"GGG", "CCC", "TTT", "AAA"}])
        ]

        for test_case in test_cases:
            solution = DegenerateTripletWithAminos.stringified_site_separate_set_cover(
                test_case[0], e_coli.table.forward_table)
            self.assertEqual(test_case[1], solution)
Beispiel #10
0
def step_impl(context):
    context.result = DegenerateTripletWithAminos.set_cover_with_degenerate_code(
        context.triplets)
Beispiel #11
0
def step_impl(context, triplets):
    context.triplets = [
        DegenerateTripletWithAminos.create_from_string(triplet, "")
        for triplet in triplets.split(",")
    ]
Beispiel #12
0
    def solve(self, input_data: QCLMInput, mutations: List[MutationSite]) \
            -> QCLMOutput:

        """
        Find a solution to the QCLM problem.

        :param input_data:
        :param mutations: A list of requested mutations
        :return:
        """

        mutations = sorted(mutations, key=lambda m: m.position)

        print("----------------------------------------START mutations:", ",".join([str(m) for m in mutations]))
        #
        # GENERATE CODONS FOR EACH MUTATION SITE
        #

        # Get a list of amino acid sets with wild types. Each set contains mutations required for one site.
        aminos_for_sites = [(set(AminoAcid(a) for a in mut.new_aminos))
                             for mut in mutations]

        print("----------------------------------------Aminos for site {}".format(",".join([str(am) for am in
                                                                                            aminos_for_sites])))


        # Compute the degenerate codon solution
        valid_set_cover = False
        codons_for_site = []
        wild_type_codons = []
        if self.config.use_degeneracy_codon:
            timeout = time.time() + 60 * 1  # setting up 1 min timer. After 2 mins it will switch to non-degenerate case
            while(not valid_set_cover):
                codons_for_site = solve_set_cover(self.config, aminos_for_sites)
                wild_type_codons = get_wildtype_codons_degenerate(mutations, codons_for_site)
                valid_set_cover = check_set_cover(codons_for_site)
                if time.time() > timeout:
                    break

        if not valid_set_cover:
            # Pick codons for the aminos randomly
            codons_for_site = self.pick_random_codons(aminos_for_sites,
                                                      self.usages,
                                                      self.config.codon_usage_frequency_threshold)
            wild_type_codons, codons_for_site = get_replace_wildtype_codons(mutations, codons_for_site, self.sequence)


        #
        # FIND POSSIBLE SPLITS OF THE MUTATION SITES TO SEQUENCES SUCH THAT EACH SEQUENCE CAN
        # BE COVERED BY A SINGLE PRIMER.
        #

        mutation_subsets_combinations: List[SetOfMutationSiteSequences] = \
            self.find_mutation_coverage_options(mutations)

        all_site_splits: SiteSplits = SiteSplits.from_list_of_SetOfMutationSiteSequences(mutation_subsets_combinations)

        # If the user requested non-overlapping primers, then we optimize primers separately for each mutation site split, as we have
        # to consider borders of other primers that will be part of the same solution.
        # Otherwise, we can optimize primers for a given site set independently, so iterating through site splits is not needed.
        sets_of_splits_to_optimize: List[SiteSplits]
        if self.config.non_overlapping_primers:
            sets_of_splits_to_optimize = []
            for site_split in all_site_splits.splits:
                single_split = SiteSplits()
                single_split.add(site_split)
                sets_of_splits_to_optimize.append(single_split)
        else:
            sets_of_splits_to_optimize = [all_site_splits]

        # Build an index for mutation site offsets
        mut_site_offsets = [Offset(m.position) for m in mutations]
        index_of_site = {offset: i for (i, offset) in enumerate(mut_site_offsets)}

        mutated_dna_sequence = DNASequenceForMutagenesis(self.sequence, mut_site_offsets)
        for site_splits in sets_of_splits_to_optimize:

            #
            # FIND CODONS DEFINING MUTATIONS IN PRIMERS, FOR EACH SITE SEQUENCE APPEARING IN ANY CONSIDERED SITE SPLIT
            #

            current_primers = QCLMPrimers(site_splits, mutated_dna_sequence, self.config, self.temp_calculator)

            # noinspection PyUnusedLocal
            seq: SiteSequence
            sorted_site_sequences = sorted(site_splits.get_site_sequences(), key=lambda s: min(s))
            for ind, seq in enumerate(sorted_site_sequences):
                print("Processing site sequence: {} ".format(",".join([str(site) for site in seq])))
                # Get a list of codon sets for the site sequence
                codons_for_sequence = []
                for offset in seq:
                    codons_for_sequence.append(codons_for_site[index_of_site[offset]])

                # Get a list of wild type codons for the site sequence
                wt_for_sequence = []
                for offset in seq:
                    wt_for_sequence.append(wild_type_codons[index_of_site[offset]])

                # Create primer definitions (sequences of codons) for the site sequence
                primer_codons: List[List[Codon]] = \
                    DegenerateTripletWithAminos.create_subsets_for_primers(codons_for_sequence)

                #
                # GENERATE PRIMERS OF MINIMUM PERMISSIBLE LENGTH FOR THESE PRIMER DEFINITIONS
                #

                # In case of non-overlapping solution, get the right limit (<) for primers for the previous site sequence.
                # This will be the minimum offset for primers for this site sequence.
                min_primer_start = current_primers.range(frozenset(sorted_site_sequences[ind-1]))[1] \
                                    if self.config.non_overlapping_primers and ind > 0 \
                                    else 0

                for primer in primer_codons:
                    current_primers.add_minimal_primers(frozenset(seq), primer, min_start=min_primer_start)

            #
            # GROW THE PRIMERS UNTIL THEY REACH A SELECTED TEMPERATURE THRESHOLD.
            # COLLECT A QCLM SOLUTION FOR EACH TEMPERATURE THRESHOLD.
            #

            solutions: List[QCLMSolution] = []
            score_fun = PrimerScoring(mutated_dna_sequence, self.config)

            eps = 1e-6
            step = self.config.temp_threshold_step
            for temp_threshold in np.arange(self.config.min_temperature, self.config.max_temperature + eps, step):
                current_primers.grow(temp_threshold)
                temperature = temp_threshold + step / 2.

                # Select best primers for each site sequence
                best_primers: Mapping[SiteSet, Sequence[ScoredPrimer]] = \
                    current_primers.collect_best_primers(score_fun, temperature)

                # Find the site split which provides the best solution when using the selected primers
                new_solution = \
                    self.select_best_site_split(best_primers, site_splits, temperature, mutations, self.config,
                                                mutated_dna_sequence)

                if new_solution.primers: # Solution is not empty
                    solutions.append(new_solution)

        #
        # SELECT THE BEST OVERALL SOLUTIONS.
        #

        sorted_solutions = sorted(solutions, key=lambda s: s.score())
        best_solution = sorted_solutions[0]

        print("FOUND SOLUTIONS: ====================================================================================")

        for sol in solutions:
            print(repr(sol))

        print("FOUND SOLUTIONS: ====================================================================================")

        output = self.create_new_output(input_data, best_solution)

        #
        # CHECK WHETHER THE SOLUTION FULFILLS ALL CONSTRAINTS.
        #

        failed_primers = best_solution.get_breaking_primers(self.sequence)
        mutation_coverage = best_solution.mutation_coverage()

        print(repr(best_solution))

        print_input = False
        print("\nSOLUTION DEFECTS:")
        if mutation_coverage < 1 - eps:
            print(f"Solution coverage for requested mutations is only {100 * mutation_coverage:.1f}%.")
            print_input = True

        if failed_primers:
            for primer in failed_primers:
                pprint(primer)
            print_input = True

        if print_input:
            pprint(self.sequence)
            print(output.input_data)
        else:
            print("NONE")
        print("\n")

        return output