def __call__(self, dna_sequence: str) -> tuple:
     # -- calculation of CAI --
     w_list = []  # list of individual Relative Adaptivness
     length = len(dna_sequence) - 1
     dna_with_stop = dna_sequence + 'x'  # add stop character to the end of dna sequence to be able to iterate by codons
     get_aa = DegenerateTriplet(
     )  # this instance is needed to later get the list of amino acids generated by a given codon
     n_codons = len(dna_sequence) / 3  # number of codons on a sequence
     for i in range(0, len(dna_sequence) - 1, 3):
         codon = dna_sequence[i:i + 3]  # specifying given codon
         aa = get_aa.degenerate_codon_to_aminos(
             str(codon), self.codonUsage.table.forward_table)[
                 0]  # getting a corresponding amino acid for this codon
         all_codons = self.get_codons(
             aa, self.threshold
         )  # getting a list of all codons for a given dna sequence
         c_max = all_codons[max(
             all_codons, key=all_codons.get
         )]  # identifying maximal codon usage for a given amino acid
         c_current = all_codons[str(
             codon)]  # identifying usage value for a given codon
         w = c_current / c_max  # calculating Relative Adaptivness for a given codon
         w_list.append(w)
     CAI_score = (reduce(lambda x, y: x * y, w_list))**(
         1 / n_codons
     )  # calculating CAI (codon adaptation index) which is the exponent of the product of all
     #-- calculation of GC content ratio
     gc_desired = (
         self.gc_range[0] + self.gc_range[1]
     ) // 2  # identifying middle of the desired gc content region
     gc_sequence = GC(dna_sequence)
     GC_score = abs(float(
         (gc_desired - gc_sequence)) / 100)  # calculating the score
     return (CAI_score, GC_score)  # final score function
Beispiel #2
0
    def __init__(self, config, is_dna_sequence, is_mutations_as_codons):
        self.config = config
        self.is_dna_sequence = is_dna_sequence
        self.is_mutations_as_codons = is_mutations_as_codons
        self.wild_dna_sequence = ""
        self.temp_calculator = config.temperature_config.create_calculator()
        self.gene = None
        # problem 1 specific
        # for future
        self.tm_distances = []
        self.avoided_motifs = config.avoided_motifs
        self.get_aa = DegenerateTriplet()

        if config.organism == 'e-coli':
            self.usage_table = UsageTable(
            ).ecoli_usage  # e_coli organism is chosen
            self.codonUsage = CodonUsage(config.organism)

        elif config.organism == 'yeast':
            self.usage_table = UsageTable(
            ).yeast_usage  # yeast organism is chosen
            self.codonUsage = CodonUsage(config.organism)

        else:
            org = Organism(config.organism)
            self.usage_table = org.codon_table  # other by name organism is chosen
            self.codonUsage = CodonUsage(config.organism)
    def protein_from_dna(self, dna_sequence):

        get_aa = DegenerateTriplet()
        AA = []
        for i in range(0, len(dna_sequence) - 1, 3):
            codon = dna_sequence[i:i + 3]
            aa = get_aa.degenerate_codon_to_aminos(
                str(codon), self.e_coli.table.forward_table)[0]
            AA.append(aa)
        return ''.join(AA)
Beispiel #4
0
    def mutation_coverage(self) -> float:
        """ Returns a ratio (in [0,1]), of number of aminos generated by the solution primers and
            the number of amino acid mutations requested.
        """

        aminos_for_sites = [
            (set(AminoAcid(a)
                 for a in mut.new_aminos) | {AminoAcid(mut.old_amino)})
            for mut in self.mutations
        ]

        mut_site_offsets = [Offset(m.position) for m in self.mutations]
        index_of_site = {
            offset: i
            for (i, offset) in enumerate(mut_site_offsets)
        }

        aminos_covered: List[Set[AminoAcid]] = [set() for _ in self.mutations]

        for site_set, primers in self.primers.items():
            site_list = sorted(site_set)
            for primer in primers:
                for i, codon in enumerate(primer.spec.codons):
                    aminos = DegenerateTriplet.degenerate_codon_to_aminos(
                        codon, self.usages.table.forward_table)
                    aminos_covered[index_of_site[site_list[i]]].update(aminos)

        total_aminos = sum(len(amino_set) for amino_set in aminos_for_sites)

        total_aminos_covered = sum(
            len(amino_set & amino_set_covered)
            for (amino_set,
                 amino_set_covered) in zip(aminos_for_sites, aminos_covered))

        return total_aminos_covered / total_aminos
     def test_mutations_on_sites(self):
        """ Assures that on every site, the generated mutations coincide with user's input """

        pas_seq, config, is_mutations_as_codons, mutations, fragments, solution, goi_offset = self.generate_example()
        mutations_list = parse_input_mutations(is_mutations_as_codons,mutations) # list of mutations is generated from the input
        sequence, goi_offset = pas_seq.get_full_sequence_with_offset() # full sequence, offset value
        generator = OligoGenerator(config, is_mutations_as_codons, config.organism)
        codon_usage = CodonUsage("e-coli")

        for i, frag in enumerate(solution.get_fragments()):
            oligos_set = generator(frag.get_sequence(
                solution.gene.sequence), mutations, frag, goi_offset, 250)
            mutations_on_fragment = mutations_on_fragments(frag.get_start(), frag.get_end(), mutations_list, goi_offset) # filtering out mutations on this fragment
            mutations_on_site = self.get_mutations_on_sites(mutations_on_fragment) # get list of mutations for every mutation site
            for site, mutations_i in mutations_on_site.items():
                wild_type_codon = self.get_codon_on_position(site, frag.get_sequence(sequence), goi_offset, frag.get_start()) # for every mutation site get wild codons on this position
                mutated_codons_on_site = [] # list of mutated codons on this site
                for oligo in oligos_set:
                    mutated_codons_on_site.append(self.get_codon_on_position(site, oligo.sequence, goi_offset, frag.get_start())) # get all codons on this site from all oligos together
                try:
                    mutated_codons_on_site.remove(wild_type_codon) # remove wild codons if present
                except:
                    pass
                created_mutations = [] # list of mutated amino acids on a particular mutation site
                for codon in set(mutated_codons_on_site):
                    temp_list = DegenerateTriplet.degenerate_codon_to_aminos(codon, codon_usage.table.forward_table) # codons to amino acids
                    for i in temp_list:
                        created_mutations.append(i)
                with self.subTest(i=site):
                    self.assertEqual(set(created_mutations), set(mutations_i))
    def test_degenerate_union_decode(self):
        codons = ["TTT", "TTA", "ATT", "GTT", "ACT", "AAT", "GAT", "GGG"]

        original_aminos = set(e_coli.table.forward_table[codon]
                              for codon in codons)

        degenerate_codons = DegenerateTripletWithAminos.set_cover_with_degenerate_code(
            [
                DegenerateTripletWithAminos.parse_from_codon_string(
                    codon, e_coli.table.forward_table) for codon in codons
            ])

        decoded_aminos_per_degenerate_codon = [
            set(
                DegenerateTriplet.degenerate_codon_to_aminos(
                    str(deg_codon), e_coli.table.forward_table))
            for deg_codon in degenerate_codons
        ]

        # Before checking the decoding we just quickly check if the
        # set cover is disjoint.
        for a, b in itertools.combinations(decoded_aminos_per_degenerate_codon,
                                           2):
            self.assertEqual(len(a.intersection(b)), 0)

        all_decoded_aminos = set.union(*decoded_aminos_per_degenerate_codon)

        self.assertEqual(original_aminos, all_decoded_aminos)
    def test_degenerate_codon_to_aminos(self):
        test_cases = [("AAA", ["K"]), ("KAT", ["Y", "D"]),
                      ("BGG", ["W", "R", "G"])]

        for degenerate_codon, aminos in test_cases:
            generated_aminos = DegenerateTriplet.degenerate_codon_to_aminos(
                degenerate_codon, e_coli.table.forward_table)

            self.assertEqual(set(aminos), set(generated_aminos))
Beispiel #8
0
    def __init__(self,
                 config: PASConfig,
                 is_mutations_as_codons,
                 organism='e-coli'):
        """ Initializing a class instance """
        self.dna_by_name = CodonTable.unambiguous_dna_by_name["Standard"]
        self.threshold_usage, self.gc_range, self.use_degeneracy = parse_input_config(
            config)
        self.get_aa = DegenerateTriplet(
        )  # this instance is needed to later get the list of amino acids generated
        # by a given codon
        if organism == 'e-coli':
            self.usage_table = UsageTable(
            ).ecoli_usage  # e_coli organism is chosen
            self.fw_table = CodonTable.unambiguous_dna_by_name[
                "Standard"].forward_table
            self.codonUsage = CodonUsage(organism)
            self.scoring = TranslationScoring(
                self.threshold_usage, self.gc_range, self.codonUsage,
                self.usage_table)  # initializing scoring instance
        elif organism == 'yeast':
            self.usage_table = UsageTable(
            ).yeast_usage  # yeast organism is chosen
            self.codonUsage = CodonUsage(organism)
            self.fw_table = CodonTable.unambiguous_dna_by_name[
                "Standard"].forward_table
            self.scoring = TranslationScoring(
                self.threshold_usage, self.gc_range, self.codonUsage,
                self.usage_table)  # initializing scoring instance

        else:
            org = Organism(organism)
            self.usage_table = org.codon_table  # other by name organism is chosen
            self.codonUsage = CodonUsage(organism)
            self.fw_table = org.translation_table.forward_table
            self.scoring = TranslationScoring(self.threshold_usage,
                                              self.gc_range, self.codonUsage,
                                              self.usage_table)

        self.get_motifs = Motifs()
        self.avoided_motifs = self.get_motifs(
            config.avoided_motifs)  # getting list of avoided motifs
        self.degeneracy = Degeneracy(config, organism)
        self.is_mutations_as_codons = is_mutations_as_codons
    def test_non_degenerate_triplets(self):
        test_cases = [
            ("AAA", ["AAA"]), ("NAA", ["AAA", "CAA", "TAA", "GAA"]),
            ("KAK", ["GAG", "GAT", "TAG", "TAT"]),
            ("WSY", ["AGC", "AGT", "TGC", "TGT", "ACC", "ACT", "TCC", "TCT"])
        ]

        for degenerate_codon, non_degenerate_codons in test_cases:
            non_deg_codons = DegenerateTriplet.get_all_non_degenerate_codons(
                degenerate_codon)

            self.assertEqual(set(non_deg_codons), set(non_degenerate_codons))
Beispiel #10
0
    def degenerate_codon_to_aminos(codon: str, codonUsage) -> List:
        """
        Converts a degenerate codon string to a list of aminos generated by that codon.
        """
        assert len(codon) == 3
        non_degenerate_codons = DegenerateTriplet.get_all_non_degenerate_codons(
            codon)
        coded_aminos = []
        for c in non_degenerate_codons:
            try:
                coded_aminos.append(codonUsage.table.forward_table[c])
            except:
                pass

        return list(set(coded_aminos))
Beispiel #11
0
class OligoGenerator(object):
    """ Function object for oligos generation for the fragment. """
    threshold_usage: float  # threshold for the codon frequency
    gc_range: List[int]  # desired GC content range
    organism: str  # organism chosen to use as codon frequency reference
    dna: str  # initial dna fragment
    mutations: List  # list of mutations, including mutation sites and probabilities
    aminos_with_probabilities: Dict  # dictionary of mutations with their probabilities grouped by mutation sites
    aminos_with_codons: Dict  # dictionary of mutations with corresponding codons grouped by mutation sites

    def __init__(self,
                 config: PASConfig,
                 is_mutations_as_codons,
                 organism='e-coli'):
        """ Initializing a class instance """
        self.dna_by_name = CodonTable.unambiguous_dna_by_name["Standard"]
        self.threshold_usage, self.gc_range, self.use_degeneracy = parse_input_config(
            config)
        self.get_aa = DegenerateTriplet(
        )  # this instance is needed to later get the list of amino acids generated
        # by a given codon
        if organism == 'e-coli':
            self.usage_table = UsageTable(
            ).ecoli_usage  # e_coli organism is chosen
            self.fw_table = CodonTable.unambiguous_dna_by_name[
                "Standard"].forward_table
            self.codonUsage = CodonUsage(organism)
            self.scoring = TranslationScoring(
                self.threshold_usage, self.gc_range, self.codonUsage,
                self.usage_table)  # initializing scoring instance
        elif organism == 'yeast':
            self.usage_table = UsageTable(
            ).yeast_usage  # yeast organism is chosen
            self.codonUsage = CodonUsage(organism)
            self.fw_table = CodonTable.unambiguous_dna_by_name[
                "Standard"].forward_table
            self.scoring = TranslationScoring(
                self.threshold_usage, self.gc_range, self.codonUsage,
                self.usage_table)  # initializing scoring instance

        else:
            org = Organism(organism)
            self.usage_table = org.codon_table  # other by name organism is chosen
            self.codonUsage = CodonUsage(organism)
            self.fw_table = org.translation_table.forward_table
            self.scoring = TranslationScoring(self.threshold_usage,
                                              self.gc_range, self.codonUsage,
                                              self.usage_table)

        self.get_motifs = Motifs()
        self.avoided_motifs = self.get_motifs(
            config.avoided_motifs)  # getting list of avoided motifs
        self.degeneracy = Degeneracy(config, organism)
        self.is_mutations_as_codons = is_mutations_as_codons

    def generate_solution(self, dna: str, mutations_list: List[tuple], start,
                          goi_offset) -> List[PASOligo]:
        """ Main logic of solution's generation is implemented here:

        1. For a given mutation site generate random codons 2. For these codons solve set cover problem 3. Check if
        aminos from same degenerate codon share same probability 3.1 If yes: 4. In the list of aminos with their
        probabilities keep only one amino from the ones sharing probabilities, and multiply it's probability to the
        number of covered aminos 3.2 If no: Leave codons from p.1 5. Proceed to next mutation site and repeat pp 1. -
        4. 6. Generate combinations with concetrations for different sites with the help of cartesian multiplication
        7. For every combination replace aminos on mutation sites with selected previousely codons (degenerete or
        normal ones)
        """

        mutations_sites = Mutations.list_of_mutation_sites(
            mutations_list
        )  # get a list of mutation sites for a given fragment
        mutations_on_site_with_prob = []
        chosen_codons_on_sites = []
        for site in mutations_sites:
            aminos_with_codons = {}
            aminos_with_probabilities = {}
            for item in mutations_list:
                if item[0] == site:
                    if self.is_mutations_as_codons:
                        am = self.get_aa.degenerate_codon_to_aminos(
                            item[1], self.fw_table)[
                                0]  # getting amino for a chosen by user codon
                        aminos_with_codons[am] = item[
                            1]  # generating a dictionary storing aminos with corresponding randomly chosen codons
                        # grouped by mutation sites
                        aminos_with_probabilities[am] = item[
                            2]  # generating dictionary storing aminos with corresponding probabilities grouped by
                        # mutation sites
                    else:
                        aminos_with_codons[
                            item[1]] = Codons.return_random_codon(
                                self.codonUsage, self.threshold_usage,
                                self.usage_table,
                                item[1])  # generating a dictionary
                        # storing aminos with corresponding randomly chosen codons grouped by mutation sites
                        aminos_with_probabilities[item[1]] = item[
                            2]  # generating dictionary storing aminos with corresponding probabilities grouped by
                        # mutation sites

            sum_of_probabilities = sum(aminos_with_probabilities.values(
            ))  # checking if we need to take into account wild type codon
            if sum_of_probabilities != 1:
                wild_type_prob = 1 - sum_of_probabilities
                wild_type_codon = Codons.get_wild_type_codon(
                    site, dna, start, goi_offset)
                wild_type_amino = self.get_aa.degenerate_codon_to_aminos(
                    wild_type_codon, self.fw_table)[0]
                aminos_with_codons[
                    wild_type_amino] = wild_type_codon  # creating wild type record with corresponding codon
                aminos_with_probabilities[
                    wild_type_amino] = wild_type_prob  # creating wild type record with corresponding probability

            if self.use_degeneracy:
                # pprint.pprint(aminos_with_probabilities)
                candidates_for_set_cover = find_candidates_for_set_cover(
                    aminos_with_probabilities)
                for candidate in candidates_for_set_cover:
                    set_cover = Codons.solve_set_cover(candidate,
                                                       self.degeneracy)
                    if len(aminos_with_codons) > len(set_cover):
                        modify_lists(
                            set_cover, aminos_with_probabilities,
                            aminos_with_codons, self.get_aa, self.fw_table
                        )  # if the degeneracy problem is solved successfully - modify
                        # aminos_with_codons and aminos_with_probabilities dictionaries to reflect the result (
                        # recalculating the probabilities as well)

            mutations_on_site_with_prob.append(
                aminos_with_probabilities
            )  # generate the final list of mutations on sites
            chosen_codons_on_sites.append(
                aminos_with_codons
            )  # generate the final list of corresponding codons

        mutations_combinations_with_probabilitites = Mutations.generate_mutation_combinations(
            mutations_on_site_with_prob
        )  # find all combinations of mutations for a given fragment and calculate the
        # concentrations
        return generate_oligos_from_combinations(
            mutations_combinations_with_probabilitites, chosen_codons_on_sites,
            dna, mutations_sites, start,
            goi_offset)  # generate oligos from the combinations

    def __call__(self, dna: str, mutations, frag, goi_offset,
                 niter: int) -> List[PASOligo]:
        """ Generates niter number of solutions and chooses the one with minimal number of oligos. """
        start = frag.get_start()
        end = frag.get_end()
        mutations_list = parse_input_mutations(self.is_mutations_as_codons,
                                               mutations)
        mutations_list = mutations_on_fragments(start, end, mutations_list,
                                                goi_offset)
        if len(mutations_list) != 0:
            solutions = []
            i = 0
            while len(solutions) < 100 and i < niter:
                i += 1
                solution = self.generate_solution(dna, mutations_list, start,
                                                  goi_offset)
                motifs_in_dna = [
                    motif for motif in self.avoided_motifs if motif.search(dna)
                ]  # list of avoided motifs which is contained in generated dna sequence
                if len(motifs_in_dna) == 0:
                    solutions.append(solution)

                if len(solutions) < 100 and i == niter:
                    raise PASNoSolutionException(
                        'Not possible to avoid specified combination of motifs!'
                    )

            solution = min(
                solutions,
                key=len)  # choose the solution with minimal number of oligos
            solution: List[PASOligo]
            return solution
        else:
            return [PASOligo(sequence=dna, ratio=1)]
Beispiel #12
0
class Output:
    def __init__(self, config, is_dna_sequence, is_mutations_as_codons):
        self.config = config
        self.is_dna_sequence = is_dna_sequence
        self.is_mutations_as_codons = is_mutations_as_codons
        self.wild_dna_sequence = ""
        self.temp_calculator = config.temperature_config.create_calculator()
        self.gene = None
        # problem 1 specific
        # for future
        self.tm_distances = []
        self.avoided_motifs = config.avoided_motifs
        self.get_aa = DegenerateTriplet()

        if config.organism == 'e-coli':
            self.usage_table = UsageTable(
            ).ecoli_usage  # e_coli organism is chosen
            self.codonUsage = CodonUsage(config.organism)

        elif config.organism == 'yeast':
            self.usage_table = UsageTable(
            ).yeast_usage  # yeast organism is chosen
            self.codonUsage = CodonUsage(config.organism)

        else:
            org = Organism(config.organism)
            self.usage_table = org.codon_table  # other by name organism is chosen
            self.codonUsage = CodonUsage(config.organism)

    def combine_mutations_list(self,
                               fragment: PASFragment,
                               oligos_group,
                               mutation_sites_on_fragment: List,
                               mutations_on_fragment: [PASMutationSite],
                               sequence=None,
                               goi_offset=None) -> List:
        """ Combines a list of mutations on a fragment with additional details needed for frontend """
        list_of_mutations = []
        # Create mutated oligos
        for mut_site in mutations_on_fragment:
            for mutt in mut_site.mutations:
                if self.is_mutations_as_codons:
                    mutation = self.get_aa.degenerate_codon_to_aminos(
                        str(mutt.mutation),
                        self.codonUsage.table.forward_table)[0]
                else:
                    mutation = str(mutt.mutation)
                position = mut_site.position
                frequency = float(mutt.frequency)
                wild_type_codon = Codons.get_wild_type_codon(
                    position, sequence, fragment.get_start(), goi_offset)
                wild_type_amino = self.get_aa.degenerate_codon_to_aminos(
                    wild_type_codon, self.codonUsage.table.forward_table)[0]
                mutated_codon = ""

                # extreact mutated codons from the changed oligos
                for oligo in oligos_group:
                    codon_on_position = get_codon_on_position(
                        position, oligo.sequence, fragment.get_start(),
                        goi_offset)
                    amino_on_position = self.get_aa.degenerate_codon_to_aminos(
                        codon_on_position, self.codonUsage.table.forward_table)
                    if mutation in amino_on_position:
                        mutated_codon = codon_on_position

                sublist_of_mutation = PASMutationFormatted(
                    position=position,
                    mutated_amino=mutation,
                    wild_type_amino=wild_type_amino,
                    wild_type_codon=wild_type_codon,
                    mutated_codon=mutated_codon,
                    frequency=frequency,
                    wild_type=False)
                list_of_mutations.append(sublist_of_mutation)

        # Adding wild type mutations
        for site in mutation_sites_on_fragment:
            frequencies = []
            for mut in mutations_on_fragment:
                if mut.position == site:
                    frequencies.append(mut.mutations[0].frequency)
            if np.sum(frequencies) < 1:
                frequency = 1 - np.sum(frequencies)
                position = site
                wild_type_codon = Codons.get_wild_type_codon(
                    site, sequence, fragment.get_start(), goi_offset)
                wild_type_amino = self.get_aa.degenerate_codon_to_aminos(
                    wild_type_codon, self.codonUsage.table.forward_table)[0]
                mutated_codon = wild_type_codon
                mutated_amino = wild_type_amino
                sublist_of_mutation = PASMutationFormatted(
                    position=position,
                    mutated_amino=mutated_amino,
                    wild_type_amino=wild_type_amino,
                    wild_type_codon=wild_type_codon,
                    mutated_codon=mutated_codon,
                    frequency=frequency,
                    wild_type=True)
                list_of_mutations.append(sublist_of_mutation)

        list_of_mutations.sort(key=sort_func)
        return list_of_mutations

    def __call__(self, best_solution: PASSolution,
                 mutations: [PASMutationSite],
                 sequences: PASSequences) -> [PASResult]:
        """
        Returns list of results
        """

        # two shifted iterators to iterate over fragment and next fragment in the same time
        # in purpose to calculate overlaps
        frag_current_it = iter(best_solution.get_fragments())
        frag_lagged_it = iter(best_solution.get_fragments())
        next(frag_lagged_it)
        results = []
        goi_offset = sequences.get_goi_offset()
        # sorted list of all mutations sites
        mutation_sites = list(set([mut.position for mut in mutations]))
        mutation_sites.sort()

        # creating the output values for every fragment
        for i, frag_current in enumerate(frag_current_it):

            # getting oligos for a fragment, and fragment parameters
            generator = OligoGenerator(self.config,
                                       self.is_mutations_as_codons,
                                       self.config.organism)
            oligos_group = generator(
                frag_current.get_sequence(best_solution.gene.sequence),
                mutations, frag_current, goi_offset, 250)

            fragment_sequence = frag_current.get_sequence(
                best_solution.gene.sequence)

            # getting list of mutations on a fragment a prepare it in a desired json format
            mutation_sites_on_fragment = [
                site for site in mutation_sites
                if ((goi_offset + (site - 1) * 3) >= frag_current.get_start()
                    and (goi_offset +
                         (site - 1) * 3 + 2) <= frag_current.get_end())
            ]
            mutations_on_fragment = [
                mut for mut in mutations
                if mut.position in mutation_sites_on_fragment
            ]
            mutations_on_fragment_formatted = self.combine_mutations_list(
                frag_current, oligos_group, mutation_sites_on_fragment,
                mutations_on_fragment, fragment_sequence, goi_offset)
            list_oligos = combine_oligos_list(oligos_group,
                                              mutations_on_fragment_formatted,
                                              mutation_sites_on_fragment,
                                              goi_offset, frag_current)
            # getting overlap and its parameters
            try:
                frag_next = next(frag_lagged_it)
                overlap = frag_current.get_overlap_seq(
                    frag_next, sequences.get_full_sequence())
                overlap_Tm = best_solution.temp_calculator(overlap)
                overlap_GC = GC(overlap)
                overlap_length = len(overlap)
            except:  # when lagged iterator returns None set all overlaps info to None
                overlap = overlap_Tm = overlap_GC = overlap_length = None

            # every fragment at even position should be reverse complement of the original sub-sequence
            # doing it here because previous code requires fragment in original forward direction
            if i % 2 == 1:
                for oligo in list_oligos:
                    oligo.make_reverse_complement()
                fragment_sequence = reverse_complement(fragment_sequence)
            # combining the results together
            result_oligo = PASResult(fragment=fragment_sequence,
                                     start=frag_current.get_start(),
                                     end=frag_current.get_end(),
                                     length=frag_current.get_length(),
                                     overlap=overlap,
                                     overlap_Tm=overlap_Tm,
                                     overlap_GC=overlap_GC,
                                     overlap_length=overlap_length,
                                     mutations=mutations_on_fragment_formatted,
                                     oligos=list_oligos)
            results.append(result_oligo)

        # preparing input data for final json
        # list of all mutation on a gene in a desired json format

        # returning output json
        return results
Beispiel #13
0
    def create_new_output(self, input_data: QCLMInput, solution: QCLMSolution) \
            -> QCLMOutput:
        """
        Parse QCLM solution and create output object which can be automatically translated to json.
        """

        sites_boundaries = compute_starts(solution)
        results: List[QCLMMutationOutput] = []
        parsed_mutations = input_data.parse_mutations(self.goi_offset)


        for site_set, scored_primers in solution.primers.items():
            site_sequence = sorted(site_set)
            mutated_dna_sequence_with_primer_sites = \
                DNASequenceForMutagenesis(self.sequence, site_sequence)

            # noinspection PyUnusedLocal
            primer: ScoredPrimer
            for primer in scored_primers:
                primer_sequence = primer.spec.get_sequence(mutated_dna_sequence_with_primer_sites)

                primer_mutations: List[MutationSite] = [
                    parsed_mutation for parsed_mutation in parsed_mutations
                    if parsed_mutation.get_start() in site_sequence
                ]

                sorted_primer_mutations = sorted(primer_mutations, key=lambda mut: mut.position)

                user_mutation_strings: List[str] = []

                for mutation, codon in zip(sorted_primer_mutations, primer.spec.codons):
                    coded_aminos = DegenerateTriplet.degenerate_codon_to_aminos(codon, self.usages.table.forward_table)
                    user_code = mutation.user_string_with_aminos(coded_aminos)
                    user_mutation_strings.append(user_code)

                # check if we have overlap with any primers.
                if check_for_overlap(sites_boundaries, site_set, primer.spec.offset,primer.spec.offset + primer.spec.length):
                    overlap_with_next = True
                    print("We have an overlap")
                else:
                    overlap_with_next = False

                results.append(QCLMMutationOutput(
                    result_found=True,
                    mutations=user_mutation_strings,
                    primers=[PrimerOutput(
                        sequence=primer_sequence,
                        start=primer.spec.offset,
                        length=primer.spec.length,
                        temperature=round(primer.tm, ndigits=2),
                        gc_content=round(GC(primer_sequence), ndigits=2),
                        degenerate_codons=list(primer.spec.codons),
                        overlap_with_following=overlap_with_next
                    )]
                ))

        return QCLMOutput(
            results=results,
            full_sequence=self.sequence,
            goi_offset=self.goi_offset,
            input_data=input_data,
        )