def test_degenerate_union_decode(self): codons = ["TTT", "TTA", "ATT", "GTT", "ACT", "AAT", "GAT", "GGG"] original_aminos = set(e_coli.table.forward_table[codon] for codon in codons) degenerate_codons = DegenerateTripletWithAminos.set_cover_with_degenerate_code( [ DegenerateTripletWithAminos.parse_from_codon_string( codon, e_coli.table.forward_table) for codon in codons ]) decoded_aminos_per_degenerate_codon = [ set( DegenerateTriplet.degenerate_codon_to_aminos( str(deg_codon), e_coli.table.forward_table)) for deg_codon in degenerate_codons ] # Before checking the decoding we just quickly check if the # set cover is disjoint. for a, b in itertools.combinations(decoded_aminos_per_degenerate_codon, 2): self.assertEqual(len(a.intersection(b)), 0) all_decoded_aminos = set.union(*decoded_aminos_per_degenerate_codon) self.assertEqual(original_aminos, all_decoded_aminos)
def test_degenerate_union_in_two_bases(self): codon1 = DegenerateTripletWithAminos.parse_from_codon_string( "RAT", e_coli.table.forward_table) codon2 = DegenerateTripletWithAminos.parse_from_codon_string( "RAG", e_coli.table.forward_table) degenerate_codon = codon1.union(codon2) self.assertEqual("RAK", str(degenerate_codon))
def test_set_cover_with_degenerate_code_no_single_solution(self): degenerate = DegenerateTripletWithAminos.set_cover_with_degenerate_code( [ DegenerateTripletWithAminos.parse_from_codon_string( codon, e_coli.table.forward_table) for codon in ["TTT", "CTT", "ATT", "ACG"] ]) self.assertEqual(set(str(deg) for deg in degenerate), {"HTT", "ACG"})
def test_set_cover_with_degenerate_code(self): codon1 = DegenerateTripletWithAminos.parse_from_codon_string( "TTT", e_coli.table.forward_table) codon2 = DegenerateTripletWithAminos.parse_from_codon_string( "CTT", e_coli.table.forward_table) degenerate = DegenerateTripletWithAminos.set_cover_with_degenerate_code( [codon1, codon2]) self.assertEqual(str(degenerate.pop()), "YTT")
def step_impl(context, triplets): context.expected = [ DegenerateTripletWithAminos.create_from_string(triplet, "") for triplet in triplets.split(",") ] for triplet in context.result: assert triplet in context.expected, f"Triplet {triplet} not in {context.expected}"
def test_parse_from_codon_string(self): triplet = DegenerateTripletWithAminos.parse_from_codon_string( "BGG", e_coli.table.forward_table) self.assertEqual({"C", "T", "G"}, triplet.base1.bases) self.assertEqual({"G"}, triplet.base2.bases) self.assertEqual({"G"}, triplet.base3.bases) self.assertEqual({"W", "R", "G"}, set(triplet.aminos))
def test_find_two_similar(self): bag = set() amino1 = DegenerateTripletWithAminos.parse_from_codon_string( "TTT", e_coli.table.forward_table) bag.add(amino1) amino2 = DegenerateTripletWithAminos.parse_from_codon_string( "TTA", e_coli.table.forward_table) bag.add(amino2) bag.add( DegenerateTripletWithAminos.parse_from_codon_string( "CCC", e_coli.table.forward_table)) res = DegenerateTripletWithAminos.find_two_similar(bag) self.assertIsNotNone(res) self.assertEqual(2, len(res)) self.assertIn(amino1, res) self.assertIn(amino2, res)
def test_create_subsets_for_primers(self): test_cases = [([["AAA"], ["GGG"]], [["AAA", "GGG"]]), ([["AAA", "GAA"], ["GGG", "GGT"]], [['AAA', 'GGG'], ['AAA', 'GGT'], ['GAA', 'GGG'], ['GAA', 'GGT']])] for test_case in test_cases: result = DegenerateTripletWithAminos.create_subsets_for_primers( test_case[0]) self.assertEqual(test_case[1], result) uneven_test_case = [["AAA", "GGG"], ["CCC"]] uneven_result = DegenerateTripletWithAminos.create_subsets_for_primers( uneven_test_case) self.assertIn(["AAA", "CCC"], uneven_result) self.assertIn(["GGG", "CCC"], uneven_result)
def test_site_separate_set_cover(self): test_cases = [ ([{"AAA"}, {"GGG"}], [{"AAA"}, {"GGG"}]), ([{"AAA", "GAA"}, {"GGG", "GGT"}], [{"RAA"}, {"GGK"}]), ( # In this case each site has only one degenerate solution [{"AAA", "GAA", "CCC", "TTT"}, {"GGG", "CGG", "TTT", "AAA"}], [{"RAA", "CCC", "TTT"}, {"SGG", "TTT", "AAA"}]), ( # In this case the second site doesn't have any degenerate solutions [{"AAA", "GAA", "CCC", "CCA"}, {"GGG", "CCC", "TTT", "AAA"}], [{"RAA", "CCM"}, {"GGG", "CCC", "TTT", "AAA"}]) ] for test_case in test_cases: solution = DegenerateTripletWithAminos.stringified_site_separate_set_cover( test_case[0], e_coli.table.forward_table) self.assertEqual(test_case[1], solution)
def step_impl(context): context.result = DegenerateTripletWithAminos.set_cover_with_degenerate_code( context.triplets)
def step_impl(context, triplets): context.triplets = [ DegenerateTripletWithAminos.create_from_string(triplet, "") for triplet in triplets.split(",") ]
def solve(self, input_data: QCLMInput, mutations: List[MutationSite]) \ -> QCLMOutput: """ Find a solution to the QCLM problem. :param input_data: :param mutations: A list of requested mutations :return: """ mutations = sorted(mutations, key=lambda m: m.position) print("----------------------------------------START mutations:", ",".join([str(m) for m in mutations])) # # GENERATE CODONS FOR EACH MUTATION SITE # # Get a list of amino acid sets with wild types. Each set contains mutations required for one site. aminos_for_sites = [(set(AminoAcid(a) for a in mut.new_aminos)) for mut in mutations] print("----------------------------------------Aminos for site {}".format(",".join([str(am) for am in aminos_for_sites]))) # Compute the degenerate codon solution valid_set_cover = False codons_for_site = [] wild_type_codons = [] if self.config.use_degeneracy_codon: timeout = time.time() + 60 * 1 # setting up 1 min timer. After 2 mins it will switch to non-degenerate case while(not valid_set_cover): codons_for_site = solve_set_cover(self.config, aminos_for_sites) wild_type_codons = get_wildtype_codons_degenerate(mutations, codons_for_site) valid_set_cover = check_set_cover(codons_for_site) if time.time() > timeout: break if not valid_set_cover: # Pick codons for the aminos randomly codons_for_site = self.pick_random_codons(aminos_for_sites, self.usages, self.config.codon_usage_frequency_threshold) wild_type_codons, codons_for_site = get_replace_wildtype_codons(mutations, codons_for_site, self.sequence) # # FIND POSSIBLE SPLITS OF THE MUTATION SITES TO SEQUENCES SUCH THAT EACH SEQUENCE CAN # BE COVERED BY A SINGLE PRIMER. # mutation_subsets_combinations: List[SetOfMutationSiteSequences] = \ self.find_mutation_coverage_options(mutations) all_site_splits: SiteSplits = SiteSplits.from_list_of_SetOfMutationSiteSequences(mutation_subsets_combinations) # If the user requested non-overlapping primers, then we optimize primers separately for each mutation site split, as we have # to consider borders of other primers that will be part of the same solution. # Otherwise, we can optimize primers for a given site set independently, so iterating through site splits is not needed. sets_of_splits_to_optimize: List[SiteSplits] if self.config.non_overlapping_primers: sets_of_splits_to_optimize = [] for site_split in all_site_splits.splits: single_split = SiteSplits() single_split.add(site_split) sets_of_splits_to_optimize.append(single_split) else: sets_of_splits_to_optimize = [all_site_splits] # Build an index for mutation site offsets mut_site_offsets = [Offset(m.position) for m in mutations] index_of_site = {offset: i for (i, offset) in enumerate(mut_site_offsets)} mutated_dna_sequence = DNASequenceForMutagenesis(self.sequence, mut_site_offsets) for site_splits in sets_of_splits_to_optimize: # # FIND CODONS DEFINING MUTATIONS IN PRIMERS, FOR EACH SITE SEQUENCE APPEARING IN ANY CONSIDERED SITE SPLIT # current_primers = QCLMPrimers(site_splits, mutated_dna_sequence, self.config, self.temp_calculator) # noinspection PyUnusedLocal seq: SiteSequence sorted_site_sequences = sorted(site_splits.get_site_sequences(), key=lambda s: min(s)) for ind, seq in enumerate(sorted_site_sequences): print("Processing site sequence: {} ".format(",".join([str(site) for site in seq]))) # Get a list of codon sets for the site sequence codons_for_sequence = [] for offset in seq: codons_for_sequence.append(codons_for_site[index_of_site[offset]]) # Get a list of wild type codons for the site sequence wt_for_sequence = [] for offset in seq: wt_for_sequence.append(wild_type_codons[index_of_site[offset]]) # Create primer definitions (sequences of codons) for the site sequence primer_codons: List[List[Codon]] = \ DegenerateTripletWithAminos.create_subsets_for_primers(codons_for_sequence) # # GENERATE PRIMERS OF MINIMUM PERMISSIBLE LENGTH FOR THESE PRIMER DEFINITIONS # # In case of non-overlapping solution, get the right limit (<) for primers for the previous site sequence. # This will be the minimum offset for primers for this site sequence. min_primer_start = current_primers.range(frozenset(sorted_site_sequences[ind-1]))[1] \ if self.config.non_overlapping_primers and ind > 0 \ else 0 for primer in primer_codons: current_primers.add_minimal_primers(frozenset(seq), primer, min_start=min_primer_start) # # GROW THE PRIMERS UNTIL THEY REACH A SELECTED TEMPERATURE THRESHOLD. # COLLECT A QCLM SOLUTION FOR EACH TEMPERATURE THRESHOLD. # solutions: List[QCLMSolution] = [] score_fun = PrimerScoring(mutated_dna_sequence, self.config) eps = 1e-6 step = self.config.temp_threshold_step for temp_threshold in np.arange(self.config.min_temperature, self.config.max_temperature + eps, step): current_primers.grow(temp_threshold) temperature = temp_threshold + step / 2. # Select best primers for each site sequence best_primers: Mapping[SiteSet, Sequence[ScoredPrimer]] = \ current_primers.collect_best_primers(score_fun, temperature) # Find the site split which provides the best solution when using the selected primers new_solution = \ self.select_best_site_split(best_primers, site_splits, temperature, mutations, self.config, mutated_dna_sequence) if new_solution.primers: # Solution is not empty solutions.append(new_solution) # # SELECT THE BEST OVERALL SOLUTIONS. # sorted_solutions = sorted(solutions, key=lambda s: s.score()) best_solution = sorted_solutions[0] print("FOUND SOLUTIONS: ====================================================================================") for sol in solutions: print(repr(sol)) print("FOUND SOLUTIONS: ====================================================================================") output = self.create_new_output(input_data, best_solution) # # CHECK WHETHER THE SOLUTION FULFILLS ALL CONSTRAINTS. # failed_primers = best_solution.get_breaking_primers(self.sequence) mutation_coverage = best_solution.mutation_coverage() print(repr(best_solution)) print_input = False print("\nSOLUTION DEFECTS:") if mutation_coverage < 1 - eps: print(f"Solution coverage for requested mutations is only {100 * mutation_coverage:.1f}%.") print_input = True if failed_primers: for primer in failed_primers: pprint(primer) print_input = True if print_input: pprint(self.sequence) print(output.input_data) else: print("NONE") print("\n") return output