def _handleComment(flob, c0):
    """Do the right thing with [a nexus comment].

    The only comments we are interested in are the tree weights, eg
    [&W 1/2] and command comments, eg [& foo].  Visible comments are
    not returned, but if var.nexus_writeVisibleComments is turned on
    they are written to stdout."""

    #   nexus_writeVisibleComments      # write, but do not get, all [!...]
    #   nexus_getP4CommandComments      # all [&&p4 ...]
    #   nexus_getWeightCommandComments  # all [&w ...]
    #   nexus_getAllCommandComments     # all [&...]

    gm = ["nexustoken._handleComment()"]
    localPieces = [c0]
    # presumably c0 is '['

    c = flob.read(1)
    localPieces.append(c)
    if not c:
        # empty string, ie at the end of the flob.  level can be assumed to be
        # more than zero.
        gm.append(
            "Reached the end while still in a comment. No comment close.")
        raise P4Error(gm)
    if c == ']':
        return None  # appears to be '[]'
    if c == '!' and var.nexus_writeVisibleComments:
        theComment = _getComment(flob, localPieces)
        print(theComment)
        return None
    elif c in ['&']:  # , '\\']:
        if var.nexus_getAllCommandComments:
            # print("_handleComment(), var.nexus_getAllCommandComments is set")
            return _getComment(flob, localPieces)
        c2 = flob.read(1)
        if not c2:
            gm.append("Reached the end while still in a comment.")
            raise P4Error(gm)
        localPieces.append(c2)
        if c == '&' and c2.lower(
        ) == 'w' and var.nexus_getWeightCommandComments:
            return _getComment(flob, localPieces)
        elif c2 == '&' and var.nexus_getP4CommandComments:
            # We may need to get back here, and relative seek will not work.
            startSpot = flob.tell()
            # ask whether the next 3 characters are 'p4 ' (ie p, 4, space)
            c3 = flob.read(3)
            if len(c3) != 3 or c3.lower() != 'p4 ':
                flob.seek(startSpot)
                _skipComment(flob)
                return
            # So it is a p4 comment.  Get it.
            localPieces.append(c3)
            return _getComment(flob, localPieces)
        else:
            return _skipComment(flob)  # None, normally
    else:
        _skipComment(flob)
        return
Beispiel #2
0
def nexusSkipPastBlockEnd(flob):
    """Read up to and including a block 'end' or 'endblock'."""
    # This should only ever be issued after a semi-colon

    complaintHead = '\nNexus: nexusSkipPastBlockEnd()'
    if hasattr(flob, 'name'):
        complaintHead += " file: %s" % flob.name
    while 1:
        tok = nextTok(flob)
        if tok:
            lowTok = string.lower(tok)
            if lowTok == 'end' or lowTok == 'endblock':
                tok2 = nextTok(flob)
                if not tok2 or tok2 != ';':
                    gm = [complaintHead]
                    gm.append("    Expecting a semicolon after %s" % tok)
                    if not tok2:
                        gm.append("Got nothing.")
                    else:
                        gm.append("Got '%s'" % tok2)
                    raise P4Error(gm)
                return
            # for pathological cases where the last command is a ';' by itself.
            elif lowTok == ';':
                continue
            else:
                pf.nexusSkipPastNextSemiColon(nt.nexusToken, flob)
        else:
            break
    gm = [complaintHead]
    gm.append("Failed to find either 'end' or 'endblock'")
    gm.append("Premature end of file?")
    raise P4Error(gm)
Beispiel #3
0
    def composition(self, sequenceNumberList=None):
        """Like Alignment.composition(), but for the part, only."""

        # print "About to start part composition()"
        gm = ['Part: composition()']
        if not sequenceNumberList:
            sequenceNumberList = range(self.nTax)
        else:
            if not isinstance(sequenceNumberList, list):
                gm.append("The sequenceNumberList should be a list, ok?")
                raise P4Error(gm)
            for i in sequenceNumberList:
                if not isinstance(i, int):
                    gm.append("The sequenceNumberList should be integers, ok?")
                    raise P4Error(gm)
                if i < 0 or i > self.nTax - 1:
                    gm.append(
                        "Item '%i' in sequenceNumberList is out of range" % i)
                    raise P4Error(gm)

        if not self.cPart:
            self.alignment._initParts()

        for i in range(self.nTax):
            if i in sequenceNumberList:
                # print "self.cPart = %s, i = %s" % (self.cPart, i)
                pf.pokePartTaxListAtIndex(self.cPart, 1, i)
            else:
                pf.pokePartTaxListAtIndex(self.cPart, 0, i)
        return pf.partComposition(self.cPart)
 def give_degen(self, codon, pos=1):
     """This method returns the degenerate codon representing the group of codons
     synonymous to *codon* and having the same nucleotide at position *pos* as *codon*."""
     if codon not in self.cod2degen[pos]:
         if len(codon.aas) == 1:
             aa = list(codon.aas)[0]
         else:
             msg = CAT(["%s is already a degenerate codon." % codon,
                        "More thinking will be necessary in order to decide ",
                        "how to deal with such a case.\n"])
             raise P4Error(msg)
         for degenerate in list(self.degen_by_aa[aa][pos]):
             if codon in degenerate:
                 self.cod2degen[pos][codon] = degenerate
                 break
     try:
         return self.cod2degen[pos][codon]
     except KeyError:
         # The loop on the list of degenerate codons
         # finished without a suitable codon to be found.
         msg = CAT(["The degenerate codon for %s " % codon,
                    "having the same nucleotide at position %d " % pos,
                    "cannot be found.\nIt could be that %s " % codon,
                    "is already degenerate and spans several degeneracy classes.\n"])
         raise P4Error(msg)
def nexusSkipPastBlockEnd(flob):
    """Read up to and including a block 'end' or 'endblock'."""
    # This should only ever be issued after a semi-colon
    gm = ['nexustoken.nexusSkipPastBlockEnd()']
    while 1:
        tok = nextTok(flob)
        # print "nexusSkipPastBlockEnd() tok=%s" % tok
        if tok:
            lowTok = tok.lower()
            if lowTok == 'end' or lowTok == 'endblock':
                tok2 = nextTok(flob)
                if not tok2 or tok2 != ';':
                    print(complaintHead)
                    gm.append("Expecting a semicolon after %s" % tok)
                    if not tok2:
                        gm.append("Got nothing.")
                    else:
                        gm.append("Got '%s'" % tok2)
                    raise P4Error(gm)
                return
            # for pathological cases where the last command is a ';' by itself.
            elif lowTok == ';':
                continue
            else:
                nexusSkipPastNextSemiColon(flob)
        else:
            break
    gm.append("Failed to find either 'end' or 'endblock'")
    gm.append("Premature end of file?")
    raise P4Error(gm)
Beispiel #6
0
 def _setBinSize(self, binSize):
     try:
         theBinSize = float(binSize)
     except (ValueError, TypeError):
         raise P4Error("Arg binSize, if set, should be a float.")
     if theBinSize <= 0.0:
         raise P4Error("Arg binSize, if set, should be a positive float.")
     self._binSize = theBinSize
    def __init__(self, tree, runNum, program='p4', mbBaseName=None, directory='.', verbose=1):

        gm = ["PosteriorSamples()  init"]
        if not isinstance(tree, Tree) or not tree.model:
            gm.append("Instantiate with a p4 tree with a model attached.")
            raise P4Error(gm)
        if not tree.taxNames:
            gm.append(
                "The tree should have taxNames (in proper order!) attached.")
            raise P4Error(gm)

        self.tree = tree

        # Check the tree, by calculating the likelihood
        # self.tree.calcLogLike(verbose=False)
        self.model = copy.deepcopy(self.tree.model)
        for pNum in range(self.model.nParts):
            for compNum in range(self.model.parts[pNum].nComps):
                if self.model.parts[pNum].comps[compNum].spec == 'empirical':
                    self.model.parts[pNum].comps[compNum].spec = 'specified'
                if not self.model.parts[pNum].comps[compNum].val[0]:
                    gm.append(
                        "Comp %i in partition %i has no val set." % (compNum, pNum))
                    gm.append("Maybe fix by calculating a likelihood before?")
                    raise P4Error(gm)
        self.model.cModel = None
        self.runNum = int(runNum)
        self.goodPrograms = ['p4', 'mrbayes']
        lowProgram = program.lower()
        if program not in self.goodPrograms:
            gm.append(
                "The program generating the files should be one of %s" % self.goodPrograms)
            raise P4Error(gm)
        self.program = lowProgram
        self.verbose = verbose
        assert os.path.isdir(directory)
        self.directory = directory

        if self.program == 'p4':
            self._readP4Files()
        elif self.program == 'mrbayes':
            self.mbBaseName = mbBaseName
            self._readMrBayesFiles()

        self.nSamples = len(self.tLines)
        if self.tree.model.nFreePrams:
            nPLines = len(self.pLines)
            if self.nSamples and self.nSamples == nPLines:
                if self.verbose >= 1:
                    print("Got %i samples." % self.nSamples)
            else:
                gm.append(
                    "Got %i tree samples, but %i parameter samples." % (self.nSamples, nPLines))
                raise P4Error(gm)
        else:
            # print "Got %i samples. (no free parameters)" % self.nSamples
            pass
Beispiel #8
0
    def updateToCommonLeafSetWithTaxa(self, tfl, dict, bitkeys, taxnames):
        gm = ['updateToCommonLeafSetWithTaxa(tfl, dict, bitkeys, taxnames)']

        uniqueSet = set()
        for i in range(tfl.nSamples):
            t = tfl.getTree(i)
            if not t._taxNames:
                t._setTaxNamesFromLeaves()
            for name in t.taxNames:
                uniqueSet.add(name)
        list = []
        for name in uniqueSet:
            list.append(name)

        for name in list:
            if not name in taxnames:
                gm.append(
                    'Found taxa in that does not appear in the supplied taxa list: '
                    + name)
                raise P4Error(gm)

        for name in taxnames:
            if not name in list:
                gm.append(
                    'Found taxa in supplied taxa list that does not appear in the supertree: '
                    + name)
                raise P4Error(gm)

        self.weights = []
        splits = []
        for i in range(tfl.nSamples):
            t = tfl.getTree(i)
            if not t._taxNames:
                t._setTaxNamesFromLeaves()
            t.missingTaxa = 0
            for name in taxnames:
                if not t.taxNames.count(name):
                    t.missingTaxa = t.missingTaxa | dict[name]
            for n in t.iterLeavesNoRoot():
                n.br.rc = dict[n.name]
            t._makeRCSplitKeys()
            weight = 1.0
            if hasattr(t, "weight"):
                if t.weight != None:
                    weight = t.weight
            elif hasattr(t, "recipWeight"):
                if t.recipWeight != None:
                    weight = 1.0 / int(t.recipWeight)
            self.weights.append(weight)
            t.splits = []
            for n in t.iterInternalsNoRoot():
                for m in n.br.rcList:
                    t.splits.append([m, t.missingTaxa])
            splits.append(t.splits)

        return splits
Beispiel #9
0
 def __init__(self, trees=None, taxNames=[]):
     gm = ['Trees()']
     if trees == None:
         if not var.trees:
             gm.append(
                 "Arg trees is not given or is empty, and var.trees is empty.  No trees."
             )
             raise P4Error(gm)
         else:
             self.trees = var.trees
     else:
         if type(trees) != type([]):
             gm.append("If provided, the trees arg should be a list.")
             raise P4Error(gm)
         if not len(trees):
             gm.append("The list of trees appears to be empty.")
             raise P4Error(gm)
         if not isinstance(trees[0], Tree):
             gm.append(
                 "The first item in the input list is not a Tree object.")
             raise P4Error(gm)
         self.trees = trees
     if taxNames:
         if type(taxNames) == type([]) and len(taxNames) and type(
                 taxNames[0]) == type('string'):
             pass
         else:
             gm.append(
                 "If provided, arg taxNames should be a list of (at least one) string(s)."
             )
     else:
         if trees:
             if trees[0].taxNames:
                 taxNames = trees[0].taxNames
         else:
             if var.trees[0].taxNames:
                 taxNames = var.trees[0].taxNames
     if not taxNames:
         gm.append("I can't find a taxNames list in the input trees. ")
         gm.append(
             "In this case you need to feed this a taxNames list when you instantiate."
         )
         gm.append(
             "Often you can get a good taxNames from yourAlignment.taxNames."
         )
         gm.append(
             "(The order often matters, and generally should be same for all your analyses.)"
         )
         raise P4Error(gm)
     self.taxNames = None
     self.setTaxNames(taxNames)
     self.data = None
     self.writeBranchLengths = True
    def writeNexus(self,
                   fName=None,
                   writeTaxaBlock=1,
                   append=0,
                   digitsAfterDecimal=6):
        """Write out self in Nexus format.

        If writeTaxaBlock=1, then a Taxa block is written before the
        Distances block. Append, if 0, writes #NEXUS first.  If 1,
        does not write #NEXUS.  """

        gm = ["DistanceMatrix.writeNexus()"]
        import string
        assert self.dim, "Distance Matrix.writeNexus() no dim"

        if fName == None or fName == sys.stdout:
            f = sys.stdout
        else:
            if append:
                import os
                if os.path.isfile(fName):
                    try:
                        f = open(fName, 'a')
                    except IOError:
                        gm.append("Can't open %s for appending." % fName)
                        raise P4Error(gm)
                else:
                    print(gm[0])
                    assert not os.path.lexists()
                    print("    'append' is requested,")
                    print(
                        "    but '%s' is not a regular file (maybe it doesn't exist?)."
                        % fName)
                    print("    Writing to a new file instead.")
                    try:
                        f = open(fName, 'w')
                        f.write('#NEXUS\n\n')
                    except IOError:
                        gm.append("Can't open %s for writing." % fName)
                        raise P4Error(gm)

            else:
                try:
                    f = open(fName, 'w')
                except IOError:
                    gm.append("Can't open %s for writing." % fName)
                    raise P4Error(gm)
        self.writeNexusToOpenFile(f, writeTaxaBlock, append,
                                  digitsAfterDecimal)
        if f != sys.stdout:
            f.close()
Beispiel #11
0
    def histo(self, verbose=True, binSize=None, padMin=None, padMax=None):
        """Put the data nicely into bins.

        After you do this, the bins are available in self.bins, a list
        of pairs.  

        Args *padMin* and *padMax* extend the range up or down, where
        the extended bins would have zero content, so they would be
        placeholders.  It would be good for making two different
        histos have exactly the same range, eg so you can plot them in
        the same plot.
        """

        gm = ['Numbers.histo()']
        if padMin != None:
            assert padMin <= self.min
        if padMax != None:
            assert padMax >= self.max, "padMax is %f, self.max is %f" % (
                padMax, self.max)
        if binSize:
            # a property, so it checks to make sure it is a float
            self.binSize = binSize

        # It is possible that the data are all the same, so its really
        # not clear how to make bins.  Unless there is a binSize defined.
        if self.binSize:
            pass
        else:
            if not self.range:
                self.dump()
                gm.append(
                    "The data are all the same.  max=min.  That will not work."
                )
                raise P4Error(gm)

        self._makeBins(padMin, padMax)
        if not self.bins:
            gm.append("No bins.")
            raise P4Error(gm)
        if verbose:
            print(
                "%i data points, min=%s, max=%s, binSize=%s, nBins=%i" %
                (len(self.data), self.min, self.max, self.binSize, self.nBins))
            if padMin != None or padMax != None:
                print("padMin=%s, padMax=%s" % (padMin, padMax))
            print("%i points at min, %i points at max" %
                  (self.data.count(self.min), self.data.count(self.max)))
            for bin in self.bins:
                print(f"{bin[0]:.10}  {bin[1]}")
Beispiel #12
0
    def _readFastaMakeSeq(self, splHeadLine, sLineList):
        gm = ['SequenceList._readFastaMakeSeq()']

        if not splHeadLine or not splHeadLine[0]:
            gm.append("No name for new fasta sequence.  This should not happen.")
            raise P4Error(gm)
        if not sLineList:
            gm.append("No sequence for %s" % splHeadLine)
            raise P4Error(gm)

        mySeq = Sequence()
        mySeq.name = splHeadLine[0]
        if len(splHeadLine) == 2:
            mySeq.comment = splHeadLine[1]
        mySeq.sequence = ''.join(sLineList).lower()
        return mySeq
    def __init__(self, taxNames, constraintTree=None, rooting=False):

        gm = ["Constraints init()"]
        self.taxNames = taxNames
        self.cTree = constraintTree
        self.rooting = rooting
        self.constraints = []
        self.allOnes = 2 ** (len(self.taxNames)) - 1

        if self.cTree:
            self.cTree.taxNames = taxNames
            self.cTree.makeSplitKeys()
            for n in self.cTree.iterInternalsNoRoot():
                # n.name = n.br.splitKey
                if n.br.splitKey not in self.constraints:
                    self.constraints.append(n.br.splitKey)

        if self.rooting:
            isCTreeBiRoot = self.cTree.isBiRoot()
            if not isCTreeBiRoot:
                isCTreeTriRoot = self.cTree.isTriRoot()
                if not isCTreeTriRoot:
                    gm.append("Constraints cTree is neither biRoot nor triRoot")
                    gm.append("When rooting is on, the tree should be one or the other")
                    raise P4Error(gm)

        assert self.constraints, "No constraints?"
Beispiel #14
0
def _getComment(flob):
    startPos = flob.tell()
    # ie level of nested comments.  This assumes that we have not already read
    # one '['.
    level = 0
    while 1:
        c = flob.read(1)
        if 0:
            if c in string.whitespace:
                print("gc %3i  whitespace" % flob.tell())
            else:
                print("gc %3i  %s" % (flob.tell(), c))
        if not c:
            gm = ["NexusToken._getComment()"]
            gm.append("Reached the end while still in a comment.")
            raise P4Error(gm)
        if c == '[':
            level = level + 1
        elif c == ']':
            level = level - 1
        if level == 0:
            endPos = flob.tell()
            flob.seek(startPos)
            theLen = endPos - startPos
            return flob.read(theLen)
Beispiel #15
0
    def clustalo(self):
        """Do an alignment with clustalo.

        Its all done in memory -- no files are written.

        An alignment object is returned.

        The order of the sequences in the new alignment is made to be
        the same as the order in self.

        """
        flob = io.BytesIO()          # gotta be Bytes for subprocess
        self.writeFastaToBytesFlob(flob)
        p = Popen(["clustalo", "-i", "-"], stdin=PIPE, stdout=PIPE, stderr=PIPE)
        ret = p.communicate(input=flob.getvalue())
        #ret = p.communicate()
        if ret[1]:
            print(ret)
            raise P4Error("clustalo()  Something wrong here ...")
        flob.close()
        #print(ret)      # it is a bytes string
        a = p4.func.readAndPop(ret[0].decode())
        a.makeSequenceForNameDict()
        newSequenceList = []
        for sSelf in self.sequences:
            newSequenceList.append(a.sequenceForNameDict[sSelf.name])
        a.sequences = newSequenceList
        return a
Beispiel #16
0
    def muscle(self):
        """Do an alignment with muscle.

        Its all done in memory -- no files are written.

        An alignment object is returned.

        The order of the sequences in the new alignment is made to be
        the same as the order in self.

        """
        flob = io.BytesIO()
        self.writeFastaToBytesFlob(flob)
        p = Popen(["muscle"], stdin=PIPE, stdout=PIPE, stderr=PIPE)
        ret = p.communicate(input=flob.getvalue())
        flob.close()
        try:
            a = p4.func.readAndPop(ret[0].decode())
        except P4Error:
            print(ret)
            raise P4Error("Something didn't work ...")

        a.makeSequenceForNameDict()
        newSequenceList = []
        for sSelf in self.sequences:
            newSequenceList.append(a.sequenceForNameDict[sSelf.name])
        a.sequences = newSequenceList
        return a
Beispiel #17
0
    def renameForPhylip(self, dictFName='p4_renameForPhylip_dict.py'):
        """Rename with strict phylip-friendly short boring names.

        It saves the old names (together with the new) in a python
        dictionary, in a file, by default named
        p4_renameForPhylip_dict.py"""

        gm = ['SequenceList.renameForPhylip()']
        if os.path.exists(dictFName):
            gm.append("The dictionary file '%s' already exists." % dictFName)
            raise P4Error(gm)
        if hasattr(self, 'taxNames'):
            originalNames = self.taxNames
        else:
            originalNames = [s.name for s in self.sequences]
        d = {}
        for i in range(len(self.sequences)):
            s = self.sequences[i]
            newName = 's%i' % i
            d[newName] = s.name
            s.name = newName
        f = open(dictFName, 'w')
        f.write("p4_renameForPhylip_originalNames = %s\np4_renameForPhylip_dict = %s\n" % (
            originalNames, d))
        f.close()
Beispiel #18
0
 def set_aas(self, code=None):
     """This method determines the set of amino-acids coded by the non-degenerate codons
     implied by self.
     A genetic code can be specified according to which the codon should be translated.
     *code* is the Biopython name of the genetic code under which degeneracy has to be
     interpreted, or a Code object. By default, the genetic code of the codon will be used.
     Alternatively, a dictionary can be provided. It should take codon as keys and their
     translation as values.
     """
     if code is None:
         # self.code should be something valid.
         code = self.code
     elif isinstance(code, str):
         code = getBiopythonCode(code)
     else:
         msg = "code must be a dictionary, or a string naming the code in Biopython."
         assert isinstance(code, dict), msg
     # We assume that the "codons" have all the same length,
     # and we look at the first codon in the dictionary to know this length.
     codelength = len(code.keys()[0])
     assert codelength == 3, "Amino-acids should be coded by triplets of nucleotides."
     try:
         if self.degenerate:
             self.aas = set([code[str(codon)] for codon in self.decomposition()])
         else:
             self.aas = set([code[str(self)]])
     except KeyError:
         raise P4Error("The code does not say what the translation "
                                     "of %s should be.\n" % str(self))
    def generateInputTrees(self):
        gm = ['SuperTreeInputTrees.generateInputTrees()']
    # Check if input values are valid
        if not self.useTaxonDistribution:
            if self.noTaxaToRemove >= len(self.inputTree.taxNames) - 2:
                gm.append(
                    'The number of taxa to remove would leave less than 3 taxa in the tree, quite uninformative')
                raise P4Error(gm)

    # Prepare the distribution by normalizing it to the size of the input tree
        if self.useTaxonDistribution:
            self._prepareDistribution(len(self.inputTree.taxNames))

    # Creates the output trees and removes taxa from them accoring to the settings
    # Checks if the output tree has the correct number of taxa as a precation
        for i in range(self._noOutputTrees()):
            tree = self.inputTree.dupe()
            tree.name = 'inputtree' + str(i + 1)
            taxa2Remove = self._noTaxaToRemove()
            for j in range(taxa2Remove):
                tree.removeNode(self._getTaxaToRemove(tree.nodes))

            tree._setTaxNamesFromLeaves()
            if len(tree.taxNames) == len(self.inputTree.taxNames) - taxa2Remove:
                self.outputTrees.append(tree)
            else:
                print('Bugger, the correct number of taxa were not removed, taxa remaining: ', len(tree.taxNames))
                print('Expected: ', len(self.inputTree.taxNames) - taxa2Remove)

        # Writes the trees to file
        if self.writeInputTreesToFile:
            for tree in self.outputTrees:
                #    tree.draw()
                tree. writeNewick(fName=self.outputFile, withTranslation=0,
                                  translationHash=None, doMcmcCommandComments=0, toString=False, append=True)
    def _commonCStuff(self, resetEmpiricalComps=True):
        """Allocate and set c-stuff, and setPrams."""
        if not self.data:
            if self.name:
                gm = ["Tree %s  (_commonCStuff)" % self.name]
            else:
                gm = ["Tree (_commonCStuff)"]
            gm.append(
                "This tree has no data attached.  Before doing an optimization, likelihood")
            gm.append(
                "calculation, or simulation, you need to do something like this:")
            gm.append("    theTree.data = theData")
            raise P4Error(gm)

        # print "self.cTree = %s" % self.cTree
        if not self.cTree:
            # This calls self.modelSanityCheck(), which calls
            # self.setEmpiricalComps()
            self._allocCStuff(resetEmpiricalComps=resetEmpiricalComps)
        # print "About to self.model.setCStuff()"
        self.model.setCStuff()
        # print "About to self.setCStuff()"
        self.setCStuff()
        # print "about to p4_setPrams()..."
        pf.p4_setPrams(self.cTree, -1)  # "-1" means do all parts
Beispiel #21
0
 def _set_nexus_getLineEndingsAsTokens(self, newVal):
     try:
         newVal = int(newVal)
     except:
         gm = ['This property should be set to an int.']
         raise P4Error(gm)
     self._nexus_getLineEndingsAsTokens[0] = newVal
Beispiel #22
0
 def _set_interactiveHelper(self, newVal):
     goodValues = [None, 'p3rlcompleter', 'bpython', 'ipython']
     if newVal in goodValues:
         self._interactiveHelper = newVal
     else:
         gm = ['This property should be set to one of %s' % goodValues]
         raise P4Error(gm)
Beispiel #23
0
 def _set_nexus_getAllCommandComments(self, newVal):
     try:
         newVal = int(newVal)
     except:
         gm = ['This property should be set to an int.']
         raise P4Error(gm)
     self._nexus_getAllCommandComments[0] = newVal
Beispiel #24
0
    def reverseComplement(self):
        """Convert self.sequence, a DNA sequence, to its reverse complement.

        Ambigs are handled correctly.  I think.
        """

        assert self.dataType == 'dna'
        self.sequence = list(self.sequence)
        self.sequence.reverse()
        # {'b': 'cgt', 'd': 'agt', 'h': 'act', 'k': 'gt', 'm': 'ac',
        #  'n': 'acgt', 's': 'cg', 'r': 'ag', 'w': 'at', 'v': 'acg', 'y': 'ct'}  # 'x': 'acgt',
        for i in range(len(self.sequence)):
            c = self.sequence[i]
            if c == 'a':
                self.sequence[i] = 't'
            elif c == 't':
                self.sequence[i] = 'a'
            elif c == 'c':
                self.sequence[i] = 'g'
            elif c == 'g':
                self.sequence[i] = 'c'
            elif c == '-':
                pass
            elif c == 'n':
                pass
            # elif c == 'x':
            #    pass
            elif c == 'r':
                self.sequence[i] = 'y'
            elif c == 'y':
                self.sequence[i] = 'r'

            elif c == 'b':
                self.sequence[i] = 'v'
            elif c == 'd':
                self.sequence[i] = 'h'
            elif c == 'h':
                self.sequence[i] = 'd'
            elif c == 'k':
                self.sequence[i] = 'm'
            elif c == 'm':
                self.sequence[i] = 'k'
            elif c == 's':
                pass
                #self.sequence[i] = 's'
            elif c == 'w':
                pass
                #self.sequence[i] = 'w'
            elif c == 'v':
                self.sequence[i] = 'b'
            else:
                gm = ["Sequence.reverseComplement()"]
                if c in string.ascii_uppercase:
                    gm.append("Got uppercase '%s' How did that happen? -- can only handle lowercase." % c)
                else:
                    gm.append("Sequence.reverseComplement().  Got char '%s' What is it?" % c)
                raise P4Error(gm)

        self.sequence = ''.join(self.sequence)
Beispiel #25
0
def recode_sequence(sequence, converter, positions=None, code="Standard"):
    """uses the correspondence rules provided by the dictionary *converter*
    to produce a recoded version of *sequence*, and returns it.
    *positions* determines which codon positions are recoded.
    By default, all positions are recoded.
    """
    gm = ['p4.code_utils.recode_sequence()']
    if isinstance(code, str):
        code = getBiopythonCode(code)
    else:
        msg = "code must be a dictionary, or a string naming the code in Biopython."
        assert isinstance(code, dict), msg
    # To get the size of the motifs being substituted, we look at the first one in the dictionary.
    subst_size = len(converter.keys()[0])
    if len(sequence) % subst_size != 0:
        gm.append("The length of the sequence should be a multiple of %i" % subst_size)
        raise P4Error(gm)
    if positions is not None:
        # Filter the converter.
        for codon in converter.keys():
            convert = converter[codon]
            # Replace the positions to be recoded by the converted codon, but keep the others.
            converter[codon] = CAT(
                [(convert[i-1] if i in positions else codon[i-1]) for i in range(
                    1, subst_size+1)])
    # Build the recoded version of the sequence.
    new_seq = ""
    # Loop over the codons (triplets, if subst_size == 3).
    for i in range(len(sequence) // subst_size):
        try:
            # Make a Codon instance (to convert it afterwards).
            codon = Codon(sequence[(subst_size * i):(subst_size * (i+1))], code)
        except CodonTranslationError(e):
            sys.stderr.write(
                "%s\nProblem at sequence slice %i:%i\n" % (
                    e, subst_size * i, subst_size * (i+1)))
            warnings.warn("We will replace the codon by indels.\n")
            try:
                codon = Codon("-" * subst_size, code)
            except CodonTranslationError(e):
                sys.stderr.write("We still don't know how to translate the codon. "
                                 "Bad implementation?\n")
                sys.exit(1)
        # Convert the codon.
        # If the converter has no entry for the codon, we don't convert it,
        # hence the converter.get() syntax, using a default value.
        if codon.degenerate:
            # The codon is decomposed into non-degenerate codons.
            # These codons are converted, and the resulting conversions
            # are "recomposed" into a new codon.
            # Can it be done more efficiently ?
            new_seq += str(reduce_by_or(
                [Codon(converter.get(
                    motif, motif), code) for motif in [str(cod) for cod in codon.decomposition()]]))
        else:
            #motif = str(codon)
            #new_seq += str(Codon(converter.get(motif, motif)))
            new_seq += str(Codon(converter.get(str(codon), str(codon)), code))
    return new_seq
Beispiel #26
0
    def writeEmpiricalProteinModelInPAMLFormat(self, compNum, rMatrixNum,
                                               outFileName):
        gm = ["ModelPart.writeEmpiricalProteinModelInPAMLFormat()"]

        if self.dim != 20:
            gm.append(f"dim should be 20, got {self.dim}")
            raise P4Error(gm)

        # Get comp
        cVal = self.comps[compNum].val
        assert isinstance(cVal, numpy.ndarray)
        assert cVal.shape == (20, )

        # Get rMatrix
        r = self.rMatrices[rMatrixNum]
        if r.spec in var.rMatrixProteinSpecs:
            rVal = p4.func.getProteinEmpiricalModelRMatrix(
                r.spec, upperTriangle=False)  # full r matrix
        else:
            # print(r.val)
            assert isinstance(r.val, numpy.ndarray)
            if r.val.shape not in [(190, )]:
                gm.append(f"r.val.shape is {r.val.shape}, expecting (190,)")
                raise P4Error(gm)
            rVal = numpy.zeros((20, 20))
            counter = 0
            for row in range(0, 20):
                for col in range(row + 1, 20):
                    rVal[row][col] = r.val[counter]
                    rVal[col][row] = r.val[counter]
                    counter += 1
            assert counter == 190
        assert rVal.shape == (20, 20)

        # write it
        fout = open(outFileName, "w")
        for row in range(1, 20):
            for col in range(0, row):  # lower triangle
                print(rVal[row][col], end=" ", file=fout)
            print(file=fout)

        print(file=fout)
        for it in range(20):
            print(cVal[it], end=" ", file=fout)
        print(file=fout)
        fout.close()
    def __init__(self, theTree=None):
        gm = ['TreePicture.__init__()']
        self.tree = None
        if theTree:
            try:
                if len(theTree.nodes) < 2:
                    if len(theTree.nodes) == 1:
                        gm.append(
                            "Only 1 node in this tree, so it can't be drawn.")
                    else:
                        gm.append("No nodes.")
                    raise P4Error(gm)
            except AttributeError:
                gm.append("Expecting a Tree instance.")
                raise P4Error(gm)

            self.tree = theTree
            if self.tree:
                for n in self.tree.nodes:
                    n.xPos = None
                    n.yPos = None

        self.width = None
        self.fName = None
        self.heightFactor = None
        self.pointsPerLetter = None
        self.textSize = 11
        self.labelTextSize = 9
        self.nameOffset = self.textSize / 3.0
        self.nameDrop = self.textSize / 4.0

        self.showInternalNodeNames = 1
        self.textShowNodeNums = 1
        self.putInternalNamesOnBranches = 0
        self.addToBrLen = 0.0

        self.xOrigin = 0.0
        self.yOrigin = 0.0
        self.xScale = None
        self.yScale = None

        self.partNum = -1
        self.textDrawModelThing = None

        self.svg = False
Beispiel #28
0
    def checkTaxNames(self):
        """Check that all trees have all taxNames."""

        gm = ['Trees.checkTaxNames()']
        if not self.taxNames:
            gm.append("No taxNames.")
            raise P4Error(gm)
        for t in self.trees:
            if t.taxNames != self.taxNames:
                if t.name:
                    gm.append(
                        "Tree %s taxNames is not the same object as self.taxNames." % t.name)
                else:
                    gm.append(
                        "Tree taxNames is not the same object as self.taxNames.")
                raise P4Error(gm)
        for t in self.trees:
            t.checkTaxNames()
    def resetSequencesFromParts(self):
        """Gets the sequences from Part.cPart, and installs them in self."""

        # print "Alignment.resetSequencesFromParts() here."
        if (not self.parts) or len(self.parts) == 0:
            gm = ["Alignment.resetSequencesFromParts()"]
            gm.append("No parts.")
            raise P4Error(gm)

        if not var.doDataPart:
            if len(self.parts) == 1 and self.parts[0].name == 'all':
                allSeq = pf.symbolSequences(self.parts[0].cPart)
                # print "allSeq[0:20] = %s" % allSeq[0:20]
                for i in range(len(self.sequences)):
                    self.sequences[i].sequence = allSeq[(
                        i * self.length):((i + 1) * self.length)]
            else:
                for i in range(len(self.sequences)):
                    self.sequences[i].sequence = list(
                        self.sequences[i].sequence)
                for i in range(len(self.parts)):
                    partSeq = pf.symbolSequences(self.parts[i].cPart)
                    # print partSeq
                    spot = 0
                    m = self.nexusSets.charPartition.subsets[i].mask
                    for s in self.sequences:
                        for k in range(self.length):
                            if m[k] == '1':
                                s.sequence[k] = partSeq[spot]
                                spot += 1
                for i in range(len(self.sequences)):
                    self.sequences[i].sequence = ''.join(
                        self.sequences[i].sequence)
        else:
            if len(self.parts) == 1:
                for i in range(len(self.sequences)):
                    self.sequences[i].sequence = self.parts[0].sequenceString(
                        i)
            else:
                for i in range(len(self.sequences)):
                    self.sequences[i].sequence = list(
                        self.sequences[i].sequence)
                for pNum in range(len(self.parts)):
                    for sNum in range(len(self.sequences)):
                        partSeq = self.parts[pNum].sequenceString(sNum)
                        print(partSeq)
                        spot = 0
                        m = self.nexusSets.charPartition.subsets[pNum].mask
                        s = self.sequences[sNum]
                        for k in range(self.length):
                            if m[k] == '1':
                                s.sequence[k] = partSeq[spot]
                                spot += 1
                for i in range(len(self.sequences)):
                    self.sequences[i].sequence = ''.join(
                        self.sequences[i].sequence)
Beispiel #30
0
 def __getitem__(self, i):
     """returns the *i*-th nucleotide value of *self*."""
     if i == 1:
         return self.v1
     elif i == 2:
         return self.v2
     elif i == 3:
         return self.v3
     else:
         raise P4Error("A codon has only 3 positions.")