def _handleComment(flob, c0): """Do the right thing with [a nexus comment]. The only comments we are interested in are the tree weights, eg [&W 1/2] and command comments, eg [& foo]. Visible comments are not returned, but if var.nexus_writeVisibleComments is turned on they are written to stdout.""" # nexus_writeVisibleComments # write, but do not get, all [!...] # nexus_getP4CommandComments # all [&&p4 ...] # nexus_getWeightCommandComments # all [&w ...] # nexus_getAllCommandComments # all [&...] gm = ["nexustoken._handleComment()"] localPieces = [c0] # presumably c0 is '[' c = flob.read(1) localPieces.append(c) if not c: # empty string, ie at the end of the flob. level can be assumed to be # more than zero. gm.append( "Reached the end while still in a comment. No comment close.") raise P4Error(gm) if c == ']': return None # appears to be '[]' if c == '!' and var.nexus_writeVisibleComments: theComment = _getComment(flob, localPieces) print(theComment) return None elif c in ['&']: # , '\\']: if var.nexus_getAllCommandComments: # print("_handleComment(), var.nexus_getAllCommandComments is set") return _getComment(flob, localPieces) c2 = flob.read(1) if not c2: gm.append("Reached the end while still in a comment.") raise P4Error(gm) localPieces.append(c2) if c == '&' and c2.lower( ) == 'w' and var.nexus_getWeightCommandComments: return _getComment(flob, localPieces) elif c2 == '&' and var.nexus_getP4CommandComments: # We may need to get back here, and relative seek will not work. startSpot = flob.tell() # ask whether the next 3 characters are 'p4 ' (ie p, 4, space) c3 = flob.read(3) if len(c3) != 3 or c3.lower() != 'p4 ': flob.seek(startSpot) _skipComment(flob) return # So it is a p4 comment. Get it. localPieces.append(c3) return _getComment(flob, localPieces) else: return _skipComment(flob) # None, normally else: _skipComment(flob) return
def nexusSkipPastBlockEnd(flob): """Read up to and including a block 'end' or 'endblock'.""" # This should only ever be issued after a semi-colon complaintHead = '\nNexus: nexusSkipPastBlockEnd()' if hasattr(flob, 'name'): complaintHead += " file: %s" % flob.name while 1: tok = nextTok(flob) if tok: lowTok = string.lower(tok) if lowTok == 'end' or lowTok == 'endblock': tok2 = nextTok(flob) if not tok2 or tok2 != ';': gm = [complaintHead] gm.append(" Expecting a semicolon after %s" % tok) if not tok2: gm.append("Got nothing.") else: gm.append("Got '%s'" % tok2) raise P4Error(gm) return # for pathological cases where the last command is a ';' by itself. elif lowTok == ';': continue else: pf.nexusSkipPastNextSemiColon(nt.nexusToken, flob) else: break gm = [complaintHead] gm.append("Failed to find either 'end' or 'endblock'") gm.append("Premature end of file?") raise P4Error(gm)
def composition(self, sequenceNumberList=None): """Like Alignment.composition(), but for the part, only.""" # print "About to start part composition()" gm = ['Part: composition()'] if not sequenceNumberList: sequenceNumberList = range(self.nTax) else: if not isinstance(sequenceNumberList, list): gm.append("The sequenceNumberList should be a list, ok?") raise P4Error(gm) for i in sequenceNumberList: if not isinstance(i, int): gm.append("The sequenceNumberList should be integers, ok?") raise P4Error(gm) if i < 0 or i > self.nTax - 1: gm.append( "Item '%i' in sequenceNumberList is out of range" % i) raise P4Error(gm) if not self.cPart: self.alignment._initParts() for i in range(self.nTax): if i in sequenceNumberList: # print "self.cPart = %s, i = %s" % (self.cPart, i) pf.pokePartTaxListAtIndex(self.cPart, 1, i) else: pf.pokePartTaxListAtIndex(self.cPart, 0, i) return pf.partComposition(self.cPart)
def give_degen(self, codon, pos=1): """This method returns the degenerate codon representing the group of codons synonymous to *codon* and having the same nucleotide at position *pos* as *codon*.""" if codon not in self.cod2degen[pos]: if len(codon.aas) == 1: aa = list(codon.aas)[0] else: msg = CAT(["%s is already a degenerate codon." % codon, "More thinking will be necessary in order to decide ", "how to deal with such a case.\n"]) raise P4Error(msg) for degenerate in list(self.degen_by_aa[aa][pos]): if codon in degenerate: self.cod2degen[pos][codon] = degenerate break try: return self.cod2degen[pos][codon] except KeyError: # The loop on the list of degenerate codons # finished without a suitable codon to be found. msg = CAT(["The degenerate codon for %s " % codon, "having the same nucleotide at position %d " % pos, "cannot be found.\nIt could be that %s " % codon, "is already degenerate and spans several degeneracy classes.\n"]) raise P4Error(msg)
def nexusSkipPastBlockEnd(flob): """Read up to and including a block 'end' or 'endblock'.""" # This should only ever be issued after a semi-colon gm = ['nexustoken.nexusSkipPastBlockEnd()'] while 1: tok = nextTok(flob) # print "nexusSkipPastBlockEnd() tok=%s" % tok if tok: lowTok = tok.lower() if lowTok == 'end' or lowTok == 'endblock': tok2 = nextTok(flob) if not tok2 or tok2 != ';': print(complaintHead) gm.append("Expecting a semicolon after %s" % tok) if not tok2: gm.append("Got nothing.") else: gm.append("Got '%s'" % tok2) raise P4Error(gm) return # for pathological cases where the last command is a ';' by itself. elif lowTok == ';': continue else: nexusSkipPastNextSemiColon(flob) else: break gm.append("Failed to find either 'end' or 'endblock'") gm.append("Premature end of file?") raise P4Error(gm)
def _setBinSize(self, binSize): try: theBinSize = float(binSize) except (ValueError, TypeError): raise P4Error("Arg binSize, if set, should be a float.") if theBinSize <= 0.0: raise P4Error("Arg binSize, if set, should be a positive float.") self._binSize = theBinSize
def __init__(self, tree, runNum, program='p4', mbBaseName=None, directory='.', verbose=1): gm = ["PosteriorSamples() init"] if not isinstance(tree, Tree) or not tree.model: gm.append("Instantiate with a p4 tree with a model attached.") raise P4Error(gm) if not tree.taxNames: gm.append( "The tree should have taxNames (in proper order!) attached.") raise P4Error(gm) self.tree = tree # Check the tree, by calculating the likelihood # self.tree.calcLogLike(verbose=False) self.model = copy.deepcopy(self.tree.model) for pNum in range(self.model.nParts): for compNum in range(self.model.parts[pNum].nComps): if self.model.parts[pNum].comps[compNum].spec == 'empirical': self.model.parts[pNum].comps[compNum].spec = 'specified' if not self.model.parts[pNum].comps[compNum].val[0]: gm.append( "Comp %i in partition %i has no val set." % (compNum, pNum)) gm.append("Maybe fix by calculating a likelihood before?") raise P4Error(gm) self.model.cModel = None self.runNum = int(runNum) self.goodPrograms = ['p4', 'mrbayes'] lowProgram = program.lower() if program not in self.goodPrograms: gm.append( "The program generating the files should be one of %s" % self.goodPrograms) raise P4Error(gm) self.program = lowProgram self.verbose = verbose assert os.path.isdir(directory) self.directory = directory if self.program == 'p4': self._readP4Files() elif self.program == 'mrbayes': self.mbBaseName = mbBaseName self._readMrBayesFiles() self.nSamples = len(self.tLines) if self.tree.model.nFreePrams: nPLines = len(self.pLines) if self.nSamples and self.nSamples == nPLines: if self.verbose >= 1: print("Got %i samples." % self.nSamples) else: gm.append( "Got %i tree samples, but %i parameter samples." % (self.nSamples, nPLines)) raise P4Error(gm) else: # print "Got %i samples. (no free parameters)" % self.nSamples pass
def updateToCommonLeafSetWithTaxa(self, tfl, dict, bitkeys, taxnames): gm = ['updateToCommonLeafSetWithTaxa(tfl, dict, bitkeys, taxnames)'] uniqueSet = set() for i in range(tfl.nSamples): t = tfl.getTree(i) if not t._taxNames: t._setTaxNamesFromLeaves() for name in t.taxNames: uniqueSet.add(name) list = [] for name in uniqueSet: list.append(name) for name in list: if not name in taxnames: gm.append( 'Found taxa in that does not appear in the supplied taxa list: ' + name) raise P4Error(gm) for name in taxnames: if not name in list: gm.append( 'Found taxa in supplied taxa list that does not appear in the supertree: ' + name) raise P4Error(gm) self.weights = [] splits = [] for i in range(tfl.nSamples): t = tfl.getTree(i) if not t._taxNames: t._setTaxNamesFromLeaves() t.missingTaxa = 0 for name in taxnames: if not t.taxNames.count(name): t.missingTaxa = t.missingTaxa | dict[name] for n in t.iterLeavesNoRoot(): n.br.rc = dict[n.name] t._makeRCSplitKeys() weight = 1.0 if hasattr(t, "weight"): if t.weight != None: weight = t.weight elif hasattr(t, "recipWeight"): if t.recipWeight != None: weight = 1.0 / int(t.recipWeight) self.weights.append(weight) t.splits = [] for n in t.iterInternalsNoRoot(): for m in n.br.rcList: t.splits.append([m, t.missingTaxa]) splits.append(t.splits) return splits
def __init__(self, trees=None, taxNames=[]): gm = ['Trees()'] if trees == None: if not var.trees: gm.append( "Arg trees is not given or is empty, and var.trees is empty. No trees." ) raise P4Error(gm) else: self.trees = var.trees else: if type(trees) != type([]): gm.append("If provided, the trees arg should be a list.") raise P4Error(gm) if not len(trees): gm.append("The list of trees appears to be empty.") raise P4Error(gm) if not isinstance(trees[0], Tree): gm.append( "The first item in the input list is not a Tree object.") raise P4Error(gm) self.trees = trees if taxNames: if type(taxNames) == type([]) and len(taxNames) and type( taxNames[0]) == type('string'): pass else: gm.append( "If provided, arg taxNames should be a list of (at least one) string(s)." ) else: if trees: if trees[0].taxNames: taxNames = trees[0].taxNames else: if var.trees[0].taxNames: taxNames = var.trees[0].taxNames if not taxNames: gm.append("I can't find a taxNames list in the input trees. ") gm.append( "In this case you need to feed this a taxNames list when you instantiate." ) gm.append( "Often you can get a good taxNames from yourAlignment.taxNames." ) gm.append( "(The order often matters, and generally should be same for all your analyses.)" ) raise P4Error(gm) self.taxNames = None self.setTaxNames(taxNames) self.data = None self.writeBranchLengths = True
def writeNexus(self, fName=None, writeTaxaBlock=1, append=0, digitsAfterDecimal=6): """Write out self in Nexus format. If writeTaxaBlock=1, then a Taxa block is written before the Distances block. Append, if 0, writes #NEXUS first. If 1, does not write #NEXUS. """ gm = ["DistanceMatrix.writeNexus()"] import string assert self.dim, "Distance Matrix.writeNexus() no dim" if fName == None or fName == sys.stdout: f = sys.stdout else: if append: import os if os.path.isfile(fName): try: f = open(fName, 'a') except IOError: gm.append("Can't open %s for appending." % fName) raise P4Error(gm) else: print(gm[0]) assert not os.path.lexists() print(" 'append' is requested,") print( " but '%s' is not a regular file (maybe it doesn't exist?)." % fName) print(" Writing to a new file instead.") try: f = open(fName, 'w') f.write('#NEXUS\n\n') except IOError: gm.append("Can't open %s for writing." % fName) raise P4Error(gm) else: try: f = open(fName, 'w') except IOError: gm.append("Can't open %s for writing." % fName) raise P4Error(gm) self.writeNexusToOpenFile(f, writeTaxaBlock, append, digitsAfterDecimal) if f != sys.stdout: f.close()
def histo(self, verbose=True, binSize=None, padMin=None, padMax=None): """Put the data nicely into bins. After you do this, the bins are available in self.bins, a list of pairs. Args *padMin* and *padMax* extend the range up or down, where the extended bins would have zero content, so they would be placeholders. It would be good for making two different histos have exactly the same range, eg so you can plot them in the same plot. """ gm = ['Numbers.histo()'] if padMin != None: assert padMin <= self.min if padMax != None: assert padMax >= self.max, "padMax is %f, self.max is %f" % ( padMax, self.max) if binSize: # a property, so it checks to make sure it is a float self.binSize = binSize # It is possible that the data are all the same, so its really # not clear how to make bins. Unless there is a binSize defined. if self.binSize: pass else: if not self.range: self.dump() gm.append( "The data are all the same. max=min. That will not work." ) raise P4Error(gm) self._makeBins(padMin, padMax) if not self.bins: gm.append("No bins.") raise P4Error(gm) if verbose: print( "%i data points, min=%s, max=%s, binSize=%s, nBins=%i" % (len(self.data), self.min, self.max, self.binSize, self.nBins)) if padMin != None or padMax != None: print("padMin=%s, padMax=%s" % (padMin, padMax)) print("%i points at min, %i points at max" % (self.data.count(self.min), self.data.count(self.max))) for bin in self.bins: print(f"{bin[0]:.10} {bin[1]}")
def _readFastaMakeSeq(self, splHeadLine, sLineList): gm = ['SequenceList._readFastaMakeSeq()'] if not splHeadLine or not splHeadLine[0]: gm.append("No name for new fasta sequence. This should not happen.") raise P4Error(gm) if not sLineList: gm.append("No sequence for %s" % splHeadLine) raise P4Error(gm) mySeq = Sequence() mySeq.name = splHeadLine[0] if len(splHeadLine) == 2: mySeq.comment = splHeadLine[1] mySeq.sequence = ''.join(sLineList).lower() return mySeq
def __init__(self, taxNames, constraintTree=None, rooting=False): gm = ["Constraints init()"] self.taxNames = taxNames self.cTree = constraintTree self.rooting = rooting self.constraints = [] self.allOnes = 2 ** (len(self.taxNames)) - 1 if self.cTree: self.cTree.taxNames = taxNames self.cTree.makeSplitKeys() for n in self.cTree.iterInternalsNoRoot(): # n.name = n.br.splitKey if n.br.splitKey not in self.constraints: self.constraints.append(n.br.splitKey) if self.rooting: isCTreeBiRoot = self.cTree.isBiRoot() if not isCTreeBiRoot: isCTreeTriRoot = self.cTree.isTriRoot() if not isCTreeTriRoot: gm.append("Constraints cTree is neither biRoot nor triRoot") gm.append("When rooting is on, the tree should be one or the other") raise P4Error(gm) assert self.constraints, "No constraints?"
def _getComment(flob): startPos = flob.tell() # ie level of nested comments. This assumes that we have not already read # one '['. level = 0 while 1: c = flob.read(1) if 0: if c in string.whitespace: print("gc %3i whitespace" % flob.tell()) else: print("gc %3i %s" % (flob.tell(), c)) if not c: gm = ["NexusToken._getComment()"] gm.append("Reached the end while still in a comment.") raise P4Error(gm) if c == '[': level = level + 1 elif c == ']': level = level - 1 if level == 0: endPos = flob.tell() flob.seek(startPos) theLen = endPos - startPos return flob.read(theLen)
def clustalo(self): """Do an alignment with clustalo. Its all done in memory -- no files are written. An alignment object is returned. The order of the sequences in the new alignment is made to be the same as the order in self. """ flob = io.BytesIO() # gotta be Bytes for subprocess self.writeFastaToBytesFlob(flob) p = Popen(["clustalo", "-i", "-"], stdin=PIPE, stdout=PIPE, stderr=PIPE) ret = p.communicate(input=flob.getvalue()) #ret = p.communicate() if ret[1]: print(ret) raise P4Error("clustalo() Something wrong here ...") flob.close() #print(ret) # it is a bytes string a = p4.func.readAndPop(ret[0].decode()) a.makeSequenceForNameDict() newSequenceList = [] for sSelf in self.sequences: newSequenceList.append(a.sequenceForNameDict[sSelf.name]) a.sequences = newSequenceList return a
def muscle(self): """Do an alignment with muscle. Its all done in memory -- no files are written. An alignment object is returned. The order of the sequences in the new alignment is made to be the same as the order in self. """ flob = io.BytesIO() self.writeFastaToBytesFlob(flob) p = Popen(["muscle"], stdin=PIPE, stdout=PIPE, stderr=PIPE) ret = p.communicate(input=flob.getvalue()) flob.close() try: a = p4.func.readAndPop(ret[0].decode()) except P4Error: print(ret) raise P4Error("Something didn't work ...") a.makeSequenceForNameDict() newSequenceList = [] for sSelf in self.sequences: newSequenceList.append(a.sequenceForNameDict[sSelf.name]) a.sequences = newSequenceList return a
def renameForPhylip(self, dictFName='p4_renameForPhylip_dict.py'): """Rename with strict phylip-friendly short boring names. It saves the old names (together with the new) in a python dictionary, in a file, by default named p4_renameForPhylip_dict.py""" gm = ['SequenceList.renameForPhylip()'] if os.path.exists(dictFName): gm.append("The dictionary file '%s' already exists." % dictFName) raise P4Error(gm) if hasattr(self, 'taxNames'): originalNames = self.taxNames else: originalNames = [s.name for s in self.sequences] d = {} for i in range(len(self.sequences)): s = self.sequences[i] newName = 's%i' % i d[newName] = s.name s.name = newName f = open(dictFName, 'w') f.write("p4_renameForPhylip_originalNames = %s\np4_renameForPhylip_dict = %s\n" % ( originalNames, d)) f.close()
def set_aas(self, code=None): """This method determines the set of amino-acids coded by the non-degenerate codons implied by self. A genetic code can be specified according to which the codon should be translated. *code* is the Biopython name of the genetic code under which degeneracy has to be interpreted, or a Code object. By default, the genetic code of the codon will be used. Alternatively, a dictionary can be provided. It should take codon as keys and their translation as values. """ if code is None: # self.code should be something valid. code = self.code elif isinstance(code, str): code = getBiopythonCode(code) else: msg = "code must be a dictionary, or a string naming the code in Biopython." assert isinstance(code, dict), msg # We assume that the "codons" have all the same length, # and we look at the first codon in the dictionary to know this length. codelength = len(code.keys()[0]) assert codelength == 3, "Amino-acids should be coded by triplets of nucleotides." try: if self.degenerate: self.aas = set([code[str(codon)] for codon in self.decomposition()]) else: self.aas = set([code[str(self)]]) except KeyError: raise P4Error("The code does not say what the translation " "of %s should be.\n" % str(self))
def generateInputTrees(self): gm = ['SuperTreeInputTrees.generateInputTrees()'] # Check if input values are valid if not self.useTaxonDistribution: if self.noTaxaToRemove >= len(self.inputTree.taxNames) - 2: gm.append( 'The number of taxa to remove would leave less than 3 taxa in the tree, quite uninformative') raise P4Error(gm) # Prepare the distribution by normalizing it to the size of the input tree if self.useTaxonDistribution: self._prepareDistribution(len(self.inputTree.taxNames)) # Creates the output trees and removes taxa from them accoring to the settings # Checks if the output tree has the correct number of taxa as a precation for i in range(self._noOutputTrees()): tree = self.inputTree.dupe() tree.name = 'inputtree' + str(i + 1) taxa2Remove = self._noTaxaToRemove() for j in range(taxa2Remove): tree.removeNode(self._getTaxaToRemove(tree.nodes)) tree._setTaxNamesFromLeaves() if len(tree.taxNames) == len(self.inputTree.taxNames) - taxa2Remove: self.outputTrees.append(tree) else: print('Bugger, the correct number of taxa were not removed, taxa remaining: ', len(tree.taxNames)) print('Expected: ', len(self.inputTree.taxNames) - taxa2Remove) # Writes the trees to file if self.writeInputTreesToFile: for tree in self.outputTrees: # tree.draw() tree. writeNewick(fName=self.outputFile, withTranslation=0, translationHash=None, doMcmcCommandComments=0, toString=False, append=True)
def _commonCStuff(self, resetEmpiricalComps=True): """Allocate and set c-stuff, and setPrams.""" if not self.data: if self.name: gm = ["Tree %s (_commonCStuff)" % self.name] else: gm = ["Tree (_commonCStuff)"] gm.append( "This tree has no data attached. Before doing an optimization, likelihood") gm.append( "calculation, or simulation, you need to do something like this:") gm.append(" theTree.data = theData") raise P4Error(gm) # print "self.cTree = %s" % self.cTree if not self.cTree: # This calls self.modelSanityCheck(), which calls # self.setEmpiricalComps() self._allocCStuff(resetEmpiricalComps=resetEmpiricalComps) # print "About to self.model.setCStuff()" self.model.setCStuff() # print "About to self.setCStuff()" self.setCStuff() # print "about to p4_setPrams()..." pf.p4_setPrams(self.cTree, -1) # "-1" means do all parts
def _set_nexus_getLineEndingsAsTokens(self, newVal): try: newVal = int(newVal) except: gm = ['This property should be set to an int.'] raise P4Error(gm) self._nexus_getLineEndingsAsTokens[0] = newVal
def _set_interactiveHelper(self, newVal): goodValues = [None, 'p3rlcompleter', 'bpython', 'ipython'] if newVal in goodValues: self._interactiveHelper = newVal else: gm = ['This property should be set to one of %s' % goodValues] raise P4Error(gm)
def _set_nexus_getAllCommandComments(self, newVal): try: newVal = int(newVal) except: gm = ['This property should be set to an int.'] raise P4Error(gm) self._nexus_getAllCommandComments[0] = newVal
def reverseComplement(self): """Convert self.sequence, a DNA sequence, to its reverse complement. Ambigs are handled correctly. I think. """ assert self.dataType == 'dna' self.sequence = list(self.sequence) self.sequence.reverse() # {'b': 'cgt', 'd': 'agt', 'h': 'act', 'k': 'gt', 'm': 'ac', # 'n': 'acgt', 's': 'cg', 'r': 'ag', 'w': 'at', 'v': 'acg', 'y': 'ct'} # 'x': 'acgt', for i in range(len(self.sequence)): c = self.sequence[i] if c == 'a': self.sequence[i] = 't' elif c == 't': self.sequence[i] = 'a' elif c == 'c': self.sequence[i] = 'g' elif c == 'g': self.sequence[i] = 'c' elif c == '-': pass elif c == 'n': pass # elif c == 'x': # pass elif c == 'r': self.sequence[i] = 'y' elif c == 'y': self.sequence[i] = 'r' elif c == 'b': self.sequence[i] = 'v' elif c == 'd': self.sequence[i] = 'h' elif c == 'h': self.sequence[i] = 'd' elif c == 'k': self.sequence[i] = 'm' elif c == 'm': self.sequence[i] = 'k' elif c == 's': pass #self.sequence[i] = 's' elif c == 'w': pass #self.sequence[i] = 'w' elif c == 'v': self.sequence[i] = 'b' else: gm = ["Sequence.reverseComplement()"] if c in string.ascii_uppercase: gm.append("Got uppercase '%s' How did that happen? -- can only handle lowercase." % c) else: gm.append("Sequence.reverseComplement(). Got char '%s' What is it?" % c) raise P4Error(gm) self.sequence = ''.join(self.sequence)
def recode_sequence(sequence, converter, positions=None, code="Standard"): """uses the correspondence rules provided by the dictionary *converter* to produce a recoded version of *sequence*, and returns it. *positions* determines which codon positions are recoded. By default, all positions are recoded. """ gm = ['p4.code_utils.recode_sequence()'] if isinstance(code, str): code = getBiopythonCode(code) else: msg = "code must be a dictionary, or a string naming the code in Biopython." assert isinstance(code, dict), msg # To get the size of the motifs being substituted, we look at the first one in the dictionary. subst_size = len(converter.keys()[0]) if len(sequence) % subst_size != 0: gm.append("The length of the sequence should be a multiple of %i" % subst_size) raise P4Error(gm) if positions is not None: # Filter the converter. for codon in converter.keys(): convert = converter[codon] # Replace the positions to be recoded by the converted codon, but keep the others. converter[codon] = CAT( [(convert[i-1] if i in positions else codon[i-1]) for i in range( 1, subst_size+1)]) # Build the recoded version of the sequence. new_seq = "" # Loop over the codons (triplets, if subst_size == 3). for i in range(len(sequence) // subst_size): try: # Make a Codon instance (to convert it afterwards). codon = Codon(sequence[(subst_size * i):(subst_size * (i+1))], code) except CodonTranslationError(e): sys.stderr.write( "%s\nProblem at sequence slice %i:%i\n" % ( e, subst_size * i, subst_size * (i+1))) warnings.warn("We will replace the codon by indels.\n") try: codon = Codon("-" * subst_size, code) except CodonTranslationError(e): sys.stderr.write("We still don't know how to translate the codon. " "Bad implementation?\n") sys.exit(1) # Convert the codon. # If the converter has no entry for the codon, we don't convert it, # hence the converter.get() syntax, using a default value. if codon.degenerate: # The codon is decomposed into non-degenerate codons. # These codons are converted, and the resulting conversions # are "recomposed" into a new codon. # Can it be done more efficiently ? new_seq += str(reduce_by_or( [Codon(converter.get( motif, motif), code) for motif in [str(cod) for cod in codon.decomposition()]])) else: #motif = str(codon) #new_seq += str(Codon(converter.get(motif, motif))) new_seq += str(Codon(converter.get(str(codon), str(codon)), code)) return new_seq
def writeEmpiricalProteinModelInPAMLFormat(self, compNum, rMatrixNum, outFileName): gm = ["ModelPart.writeEmpiricalProteinModelInPAMLFormat()"] if self.dim != 20: gm.append(f"dim should be 20, got {self.dim}") raise P4Error(gm) # Get comp cVal = self.comps[compNum].val assert isinstance(cVal, numpy.ndarray) assert cVal.shape == (20, ) # Get rMatrix r = self.rMatrices[rMatrixNum] if r.spec in var.rMatrixProteinSpecs: rVal = p4.func.getProteinEmpiricalModelRMatrix( r.spec, upperTriangle=False) # full r matrix else: # print(r.val) assert isinstance(r.val, numpy.ndarray) if r.val.shape not in [(190, )]: gm.append(f"r.val.shape is {r.val.shape}, expecting (190,)") raise P4Error(gm) rVal = numpy.zeros((20, 20)) counter = 0 for row in range(0, 20): for col in range(row + 1, 20): rVal[row][col] = r.val[counter] rVal[col][row] = r.val[counter] counter += 1 assert counter == 190 assert rVal.shape == (20, 20) # write it fout = open(outFileName, "w") for row in range(1, 20): for col in range(0, row): # lower triangle print(rVal[row][col], end=" ", file=fout) print(file=fout) print(file=fout) for it in range(20): print(cVal[it], end=" ", file=fout) print(file=fout) fout.close()
def __init__(self, theTree=None): gm = ['TreePicture.__init__()'] self.tree = None if theTree: try: if len(theTree.nodes) < 2: if len(theTree.nodes) == 1: gm.append( "Only 1 node in this tree, so it can't be drawn.") else: gm.append("No nodes.") raise P4Error(gm) except AttributeError: gm.append("Expecting a Tree instance.") raise P4Error(gm) self.tree = theTree if self.tree: for n in self.tree.nodes: n.xPos = None n.yPos = None self.width = None self.fName = None self.heightFactor = None self.pointsPerLetter = None self.textSize = 11 self.labelTextSize = 9 self.nameOffset = self.textSize / 3.0 self.nameDrop = self.textSize / 4.0 self.showInternalNodeNames = 1 self.textShowNodeNums = 1 self.putInternalNamesOnBranches = 0 self.addToBrLen = 0.0 self.xOrigin = 0.0 self.yOrigin = 0.0 self.xScale = None self.yScale = None self.partNum = -1 self.textDrawModelThing = None self.svg = False
def checkTaxNames(self): """Check that all trees have all taxNames.""" gm = ['Trees.checkTaxNames()'] if not self.taxNames: gm.append("No taxNames.") raise P4Error(gm) for t in self.trees: if t.taxNames != self.taxNames: if t.name: gm.append( "Tree %s taxNames is not the same object as self.taxNames." % t.name) else: gm.append( "Tree taxNames is not the same object as self.taxNames.") raise P4Error(gm) for t in self.trees: t.checkTaxNames()
def resetSequencesFromParts(self): """Gets the sequences from Part.cPart, and installs them in self.""" # print "Alignment.resetSequencesFromParts() here." if (not self.parts) or len(self.parts) == 0: gm = ["Alignment.resetSequencesFromParts()"] gm.append("No parts.") raise P4Error(gm) if not var.doDataPart: if len(self.parts) == 1 and self.parts[0].name == 'all': allSeq = pf.symbolSequences(self.parts[0].cPart) # print "allSeq[0:20] = %s" % allSeq[0:20] for i in range(len(self.sequences)): self.sequences[i].sequence = allSeq[( i * self.length):((i + 1) * self.length)] else: for i in range(len(self.sequences)): self.sequences[i].sequence = list( self.sequences[i].sequence) for i in range(len(self.parts)): partSeq = pf.symbolSequences(self.parts[i].cPart) # print partSeq spot = 0 m = self.nexusSets.charPartition.subsets[i].mask for s in self.sequences: for k in range(self.length): if m[k] == '1': s.sequence[k] = partSeq[spot] spot += 1 for i in range(len(self.sequences)): self.sequences[i].sequence = ''.join( self.sequences[i].sequence) else: if len(self.parts) == 1: for i in range(len(self.sequences)): self.sequences[i].sequence = self.parts[0].sequenceString( i) else: for i in range(len(self.sequences)): self.sequences[i].sequence = list( self.sequences[i].sequence) for pNum in range(len(self.parts)): for sNum in range(len(self.sequences)): partSeq = self.parts[pNum].sequenceString(sNum) print(partSeq) spot = 0 m = self.nexusSets.charPartition.subsets[pNum].mask s = self.sequences[sNum] for k in range(self.length): if m[k] == '1': s.sequence[k] = partSeq[spot] spot += 1 for i in range(len(self.sequences)): self.sequences[i].sequence = ''.join( self.sequences[i].sequence)
def __getitem__(self, i): """returns the *i*-th nucleotide value of *self*.""" if i == 1: return self.v1 elif i == 2: return self.v2 elif i == 3: return self.v3 else: raise P4Error("A codon has only 3 positions.")