Beispiel #1
0
 def get_codon(self, amino, exclude_codons=[], exclude_pattern='', force_excludes=False):
     '''Performs weighted-selection of codons for target amino, optionally excluding
     specified codons or an IUPAC codon pattern. Exclusion may be useful for (e.g.)
     adding only non-NGG codons to the first portion of a CDS, where NGG can encourage
     translation abortion.'''
     # Copy because if exclude_codons is used, it'll be modifying the table!
     codons = self.table[amino].copy()
     # Expand exclude_pattern and extend exclude_codons with the results
     if exclude_pattern:
         exclude_codons.extend(sequtils.bioseq(exclude_pattern).all_iupac_permus("dna"))
     # Remove all codons we want to exclude
     if exclude_codons:
         for key in exclude_codons: codons.pop(key, None)
         if not sum([codons[x] for x in codons]):
             # That is, if there are no codons remaining, or none with frequency > 0!
             if force_excludes:
                 raise IndexError("No valid codons left for amino "+amino+" after exclusions: "+str(codons))
             else:
                 # Print a log of the problem and carry on. Can't win 'em all.
                 import sys
                 print("Warning: Could not exclude codons",codons,
                     ", and still select a codon for",amino+".",
                     "Have reverted to unmodified codon table in this case.",
                     file=sys.stderr)
                 codons = self.table[amino].copy()
         codons = self.normalise_codon_frequencies(codons)
     return self.random_category(codons)
Beispiel #2
0
 def build_regex_list(self, iupac_list):
     regex_list = []
     for iupac_pattern in iupac_list:
         pattern_object = sequtils.bioseq(iupac_pattern.upper())
         regex_list.append(pattern_object.iupac_to_regex(
                             include_complement=self.search_complement,
                             strip_wildcards=True))
     return regex_list
Beispiel #3
0
 def codon_indices(self, sequence, frame=0):
     '''Wraps map_string to provide codon-indexed, and codon-spanning, data.
     If given a string and optional frame, then converts map_string output to
     codon-mapped data. If given a list of codons, assumes frame 0 and does
     likewise. Can return negative codon indices for positive frames if map_string
     returns mapped sites in the frame-truncated leader sequence.
     Wheareas map_string returns indices for matches at the first nucleotide,
     this function may group together matches within the same codon. That is,
     the returned set of patterns may have matched any of the nucleotides in
     the first codon, not just the first.
     Returned dict is of form (int):{"span":(int) codons, "patterns":(list) regexes},
     for example {2:{'spans':3,'patterns':['GC[AT][AT][AT]GCGG']}'''
     # Accept either str or list for sequence, but convert to str.
     if isinstance(sequence, list):
         sequence = ''.join(sequence)
         frame = 0 # If codons are given then it is assumed they are in frame
     if not isinstance(sequence, str): # Is true of str subclasses like bioseq.
         raise TypeError("codon_indices accepts either string or list-of-codons input.")
     self.verbose_msg("codon_indices called on sequence",
         ' '.join([str(x[1])+'-'+x[0] for x in zip(sequtils.bioseq(sequence).codons(),range(0,100))]),
         ", frame",frame)
     # First get string indices, patterns and spans.
     stringindices = self.map_string(sequence)
     # This is int-indexed dicts containing keys "span" and "patterns".
     # Need to convert the indexes and "span" value to codons instead.
     # Do this by converting index and index+span into codon indices,
     # then converting span into codon_index(index+span)-codon_index(index).
     codonindices = {}
     for index in stringindices:
         if index < frame: continue # Ignore string indices "before" the current frame.
         self.verbose_msg("Mapping hit at string index",index,
              "({0}) to codons.".format(sequence[abs(index-2):abs(index+3)]))
         nuc_span = stringindices[index]['span']
         nuc_patterns = stringindices[index].get('patterns',[])
         nuc_structures = stringindices[index].get('structures',[])
         codon_n = self.codon_index(index,frame)
         span_nuc_n = index+nuc_span + 1
         span_codon_n = self.codon_index(span_nuc_n,frame)
         spans_codons = 1 + span_codon_n - codon_n
         # From here, have to be careful not to overwrite entries from prior
         # nucleotides in the same codon!
         codon_entry = codonindices.setdefault(codon_n, {})
         # Dicts being mutable, it should be OK to edit codon_entry and have
         # changes map back to its value in codonindices without further reference.
         codon_entry.setdefault("patterns",[]).extend(nuc_patterns)
         codon_entry['span'] = max(codon_entry.get("span",0),spans_codons)
         if self.verbose:
             # In verbose mode, spit out an additional dict in each codon index
             # containing subdicts for each nucleotide index processed:
             nucdebug = codon_entry.setdefault('debug',{}).setdefault(index,{})
             nucdebug['sequence'] = sequence
             nucdebug['string_vs_codon'] = (index, codon_n)
             nucdebug['original'] = stringindices[index]
             nucdebug['span_nuc_n'] = span_nuc_n
             nucdebug['span_codon_n'] = span_codon_n
     return codonindices
Beispiel #4
0
def tests():
    print("Running tests on DNAMapper.")
    import re, random
    testsequence = sequtils.bioseq(sequtils.random_dna(100))
    maptargets = [sequtils.random_iupac_dna(random.randint(2,10)) for x in range(2,7)]
    mapper = DNAMapper(maptargets,verbose=True)

    def verify_stringmap():
        result = True
        string_map = mapper.map_string(testsequence)
        print("\t ----- Checking Hits for Consistency ----- ")
        for x in string_map:
            x_dict = string_map[x]
            for pattern in x_dict['patterns']:
                thisre = re.compile(pattern)
                if not thisre.match(testsequence[x:x+x_dict['span']]):
                    print("\tFailed to match",pattern,"with substring",testsequence[x:])
                    result = False
                else:
                    print("\tMatched",pattern,"successfully at target site.")
        return result

    def verify_framemap(frame):
        result = True
        #mycodons = testsequence.as_codon_list(trailing=True)
        mycodons = testsequence.codons(leading=False,trailing=True) # Leading?
        codon_map = mapper.codon_indices(testsequence,frame)
        print("\t ----- Checking Hits for Consistency ----- ")
        for indexC in sorted(codon_map.keys()):
            region = ''.join(mycodons[indexC:indexC+codon_map[indexC]['span']])
            for pattern in codon_map[indexC]["patterns"]:
                thisre = re.compile(pattern)
                if not thisre.findall(region):
                    print("\tNon-match with pattern",thisre.pattern,"in string",region)
                    print("\t\tDebug dict:",codon_map[indexC]['debug'])
                    result = False
                else:
                    pass
                    #print("\tMatch with pattern",thisre.pattern,"in string",region)
        return result

    allpassing = True
    if verify_stringmap():
        print("Passed stringmap tests.\n","==============================\n")
    else:
        print("Failed stringmap tests.\n","==============================\n")
        allpassing = False
    for x in range(0,3):
        if verify_framemap(x):
            print("Passed framemap test at frame",str(x)+".\n","==============================\n")
        else:
            print("Failed framemap test at frame",str(x)+".\n","==============================\n")
            allpassing = False
    if allpassing: print("\n >>> All Tests Passed! <<<")
    else: print("\n >>> Tests Failed <<<")
Beispiel #5
0
 def heat_map(self, sequence, frame=0):
     '''Returns an iterator for codons in sequence[frame:] that yields (codon, frequency).
     This can be used to analyse the codon usage bias in a sequence.'''
     if isinstance(sequence, list):
         frame = 0 # Accept in frame as given.
         sequence = ''.join(sequence)
     sequence = sequtils.bioseq(sequence.upper().replace("U","T"))
     codons = sequence.codons(frame, leading=False, trailing=False)
     results = []
     for codon in codons:
         referred_amino = self.for_table.translate_codon(codon)
         # Get the float value of codon's frequency relative to synonymous codons.
         results.append(self.table[referred_amino][codon])
     return results