Beispiel #1
0
    def fastme(self):
        """Use fastme to make a minimum-evolution tree, which is returned.

        The resulting tree is read in by p4, and is returned.

        We interact with fastme by writing files, but care is taken that
        existing files are not overwritten, because new file names are
        made to be unique. 

        If the branch lengths are less than zero, they are made to be zero.
        """

        gm = ["DistanceMatrix.fastme()"]

        flob_dm, dmFName_fq = func.uniqueFile('tmp.dm')
        flob_tf, treeFName_fq = func.uniqueFile(
            'tmp.tree')  #tempfile.mkstemp(suffix='tree', dir=theDir)
        flob_tf.close()

        # Throw the dir and dirname away.
        dirname, dmFName = os.path.split(dmFName_fq)
        dirname, treeFName = os.path.split(treeFName_fq)

        # Write the files, do the analysis
        self.writePhylipToOpenFile(flob_dm, 6)
        flob_dm.close()

        os.system("fastme -i %s -o %s" % (dmFName, treeFName))

        # This is the result.  The tree, if it exists, is read in by p4.
        oldLen = len(var.trees)
        func.read(treeFName)
        newLen = len(var.trees)
        if newLen == oldLen + 1:
            pass
        else:
            gm.append("I was expecting exactly one tree.  Got %i" %
                      (oldLen - newLen))
            raise Glitch, gm
        t = var.trees.pop()

        # Tidy up.
        os.remove(treeFName)
        os.remove(dmFName)

        for n in t.iterNodesNoRoot():
            if n.br.len < 0.0:
                n.br.len = 0.0
            if not n.isLeaf:
                n.name = None

        return t
    def fastme(self):
        """Use fastme to make a minimum-evolution tree, which is returned.

        The resulting tree is read in by p4, and is returned.

        We interact with fastme by writing files, but care is taken that
        existing files are not overwritten, because new file names are
        made to be unique. 

        If the branch lengths are less than zero, they are made to be zero.
        """

        gm = ["DistanceMatrix.fastme()"]

        flob_dm, dmFName_fq = func.uniqueFile('tmp.dm')
        # tempfile.mkstemp(suffix='tree', dir=theDir)
        flob_tf, treeFName_fq = func.uniqueFile('tmp.tree')
        flob_tf.close()

        # Throw the dir and dirname away.
        dirname, dmFName = os.path.split(dmFName_fq)
        dirname, treeFName = os.path.split(treeFName_fq)

        # Write the files, do the analysis
        self.writePhylipToOpenFile(flob_dm, 6)
        flob_dm.close()

        os.system("fastme -i %s -o %s" % (dmFName, treeFName))

        # This is the result.  The tree, if it exists, is read in by p4.
        oldLen = len(var.trees)
        func.read(treeFName)
        newLen = len(var.trees)
        if newLen == oldLen + 1:
            pass
        else:
            gm.append("I was expecting exactly one tree.  Got %i" %
                      (oldLen - newLen))
            raise P4Error(gm)
        t = var.trees.pop()

        # Tidy up.
        os.remove(treeFName)
        os.remove(dmFName)

        for n in t.iterNodesNoRoot():
            if n.br.len < 0.0:
                n.br.len = 0.0
            if not n.isLeaf:
                n.name = None

        return t
Beispiel #3
0
def func2():
    data = pd.read_csv('./data/train.csv')
    data = data[['user_id','brand_id','type','month','day']]
    data = np.array(data)
    user_id = func.read('user_id')
    item_id = func.read('item_id')

    user_item_time = func.build_user_item_time(data,user_id)
    item_time = itemmodel.build_item_time(data,item_id)
    item_factor = itemmodel.cal_item_time_factor(item_time)
    func.write(user_item_time,'user_item_time')
    func.write(item_time,'item_time')
    return (user_item_time,item_time,item_factor)
Beispiel #4
0
def func3():
    train,test = func.divide_data()
    user_id =  func.read('user_id')
    item_id = func.read('item_id')
    train_u_i_t = func.build_user_item_time(train,user_id)
    #建立测试集的用户-物品词典,这里不需要时间序列
    test_u_i  = func.build_user_item(test,user_id)
    #建立训练集的物品-时间词典
    train_item_time = itemmodel.build_item_time(train,item_id)
    #建立物品的时间衰减因子
    item_factor = itemmodel.cal_item_time_factor(train_item_time)

    func.write(train_u_i_t,'train_user_item_time')
    func.write(test_u_i,'test_user_item')
    func.write(train_item_time,'train_item_time')
    return (train_u_i_t,test_u_i,train_item_time,item_factor)
Beispiel #5
0
def fun1():
    data = pd.read_csv('./data/train.csv')
    data = data[['user_id','brand_id','type','month','day']]
    data = np.array(data)
    user_id = func.read('user_id')
    user_time_item = func.build_user_time_item(data,user_id)
    func.write(user_time_item,'user_time_item')
Beispiel #6
0
def index():
    ""
    ''' We store guest messages in a json file.
    First we open it to load the data.
    Then we pass the data to the read() function to transform it into the (<name>, <date>, <message>) format.
    The transformed data will become a list containing the rows with the format above.
    Finally we can pass the list to render_template() to display the messages when we visit index.html.
    '''
    with open('data.json', 'r') as file:
        data = json.load(file)
        rows = read(data)
        num_messages_to_show = checkLen(rows) # We also want to check how many messages are in the data

    # Here we instantiate a form object
    form = InputForm()

    ''' If the user enters valid inputs and presses Post, the code block below will run.
    We retrieve the user's name and message and store them in name and message.
    Then we update the rows list to add the received message and return it.
    Finally we can update the data json file using json.dump().    
    '''
    if form.validate_on_submit():
        name = form.name.data
        message = form.message.data

        data = write(rows, name, message)
        with open('data.json', 'w') as file:
            json.dump(data, file)

        return redirect(url_for('index')) # We also want to refresh the index page to show the change

    # In the render_template, you can pass the variables to be used in the html file. This is to connect the back-end to the front-end
    return render_template('index.html', rows=rows, num_messages_to_show=num_messages_to_show, form=form)
Beispiel #7
0
def getdata(peer):
    f = read("userdata/user_%s.pref" % (peer))
    if not f:
        d = _basic
        write("userdata/user_%s.pref" % (peer), json.dumps(d))
    else:
        d = json.loads(f)
    return d
def getdata(peer):
    d = {}
    f = read("custom/preset_%s.pref"%(peer))
    if not f:
        d = _basic
        write("custom/preset_%s.pref"%(peer), json.dumps(d))
    else:
        d = json.loads(f)
    return d
    def __init__(self, supertree, inputTrees):

        #        There are two ways of decorating the supertree with the support values.
        #        Standard conforms to the consensus tree tradition, i.e. values are presented between
        #        0 to 100 percent. Non standard adhears to the few supertree papers regarding support values
        #        i.e -1 to 1.
        self.doStandardDecoration = True

        #        The decorated supertree can be saved to file
        self.doSaveDecoratedTree = False
        self.decoratedFilename = "superTreeSupport.nex"

        #        There is a option to save a supertree decorated with index values instead of support values.
        #        This can then be used with a csv file containing the support values for each index.
        #        Further analysis of the support values can be performed and then matched to the indecies in the
        #        decorated supertree
        self.doSaveIndexTree = False
        self.indexFilename = "supertreeIndex.nex"
        self.csvFilename = "supertreeIndex.csv"

        #        Draws the decorated supertree to screen
        self.doDrawTree = False

        #        Produces output to screen
        self.verbose = 1

        # Placeholders that allows access to the data after completing
        # calculations
        self.decoratedSuperTree = None
        self.indexSuperTree = None
        self.csvList = None

        #       Keeps track of splits for producing output
        self.indexIntersections = []
        self.csvValues = []
        self.intersections = []

        #        Let t be the number of input trees,
        #        s the number of input trees supporting a supertree clade,
        #        r the number of input trees that are irrelevant to the supertree clade,
        #        q the number of input trees that conflict with the supertree clade,
        #        p the number of input trees that permit the supertree clade,
        #        so that t = p + q + r + s.

        self.T = 0  # no. of input trees;
        self.L = 0  # no. of leaves;
        # coverage (average proportion of leaves in the input tree);
        self.C = 0.0
        self.SC = 0  # number of supertree clades;
        self.U = 0  # no. of unsupported supertree clades;
        # no. of unsupported supertree clades that conflict with at least one
        # input tree;
        self.UC = 0
        # no. of unsupported clades conflicting with all relevant input trees;
        self.UCC = 0
        # average qualitative support for supertree clades. Figures in
        # parentheses are ranges.
        self.QS = 0.0
        self.S = 0.0  # average support
        self.P = 0.0  # average permitted
        self.Q = 0.0  # average conflict
        self.R = 0.0  # average relevance
        self.wS = 0.0  # average weighted support
        self.wP = 0.0  # average weighted permitance
        self.V = 0.0  # average V for supertree cladesV = (s minus q)/(s + q)
        self.VV = 0.0  # V+ = (s minus q +p)/(s + q + p)
        self.Vv = 0.0  # V minus = (s minus q minus p)/(s + q + p)
        self.wV = 0.0  # wV = (ws minus q)/(ws + q)
        self.wVV = 0.0  # wVV = (ws minus q +wp)/(ws + q + wp)
        self.wVv = 0.0  # wVv = (ws minus q minus wp)/(ws + q + wp)

        gm = ["SuperTreeSupport()"]

        var.warnReadNoFile = False

        if type(inputTrees) == type([]):
            for t in inputTrees:
                if not isinstance(t, Tree):
                    gm.append("Input trees should be a list of p4 Tree objects. Got %s" % t)
                    raise P4Error(gm)
            self.inputTrees = inputTrees
        elif type(inputTrees) == type(""):
            var.trees = []
            read(inputTrees)
            if len(var.trees) < 1:
                gm.append("Sorry, at least one tree must be supplied as input tree")
                raise P4Error(gm)
            self.inputTrees = var.trees
        else:
            gm.append("Input trees are neither a list of p4 Tree objects nor a valid filename.")
            raise P4Error(gm)

        if isinstance(supertree, Tree):
            self.supertree = supertree  # not a list.
        elif type(supertree) == type(""):
            var.trees = []
            read(supertree)
            if len(var.trees) > 1:
                gm.append("Sorry, supply only one tree as supertree")
                raise P4Error(gm)
            # this was originally a list, ie [var.trees.pop()]
            self.supertree = var.trees.pop()
        else:
            gm.append("Supertree was neither a p4 Tree nor a valid filename")
            gm.append("Got %s" % supertree)
            raise P4Error(gm)

        for tree in self.inputTrees:
            if not tree._taxNames:
                tree._setTaxNamesFromLeaves()

        # Mean and median overlap of the input trees
        overlapList = []
        meanOverlap = 0.0
        index = 0
        for i in range(0, len(self.inputTrees) - 1):
            for j in range(i + 1, len(self.inputTrees)):
                overlap = len(set(self.inputTrees[i].taxNames).intersection(set(self.inputTrees[j].taxNames)))
                overlapList.append(overlap)
                meanOverlap += overlap
                index += 1

        if index == 0:
            self.mean = 0
            self.median = 0
        else:
            self.mean = meanOverlap / index
            overlapList.sort()
            self.median = overlapList[len(overlapList) / 2]

        commonLeafSet = CommonLeafSet()
        self.splits = commonLeafSet.updateTreesToCommonLeafSet([self.inputTrees, [self.supertree]])
        self.bitkeys = commonLeafSet.getCommonBitkeys()
        self.taxnames = commonLeafSet.getCommonTaxNames()
        self.taxa2Bitkey = commonLeafSet.getCommonTaxa2Bitkey()
    def __init__(self, inputTree, distributionTrees=None):
        """
        SuperTreeInputTrees is a utility to create sets of input trees. 
        The input trees are primarily to be used to evaluate super tree
        construction methods. 

        Invocation removing a fixed number of taxa from each prospective input tree:

            stit = SuperTreeInputTrees(inputTree)
            stit.writeInputTreesToFile = True
            stit.outputFile = 'myInputtrees.tre'
            stit.noTaxaToRemove = 32 
            stit.noOutputTrees = 10
            stit.generateInputTrees()


        Invocation using built in distribution gathered from real world super tree cases::

            stit = SuperTreeInputTrees(inputTree)
            stit.writeInputTreesToFile = True
            stit.outputFile = 'myInputtrees.tre'
            stit.useTaxonDistribution = True
            stit.generateInputTrees()

        The user can generate a distribution of their own by supplying a list of p4 trees or a tree file. 
        The order of the trees is important, supertree and then all other trees. This goes for both list and 
        file. Like so::

            stit = SuperTreeInputTrees(inputTree, distributionTrees='myTreefile.nex')
            stit.writeInputTreesToFile = True
            stit.outputFile = 'myInputtrees.tre'
            stit.useTaxonDistribution = True
            stit.generateInputTrees()

        Placeholders which allow access to data after completed computations::

            stit.outputTrees 
            stit.dist

        """

        self.writeInputTreesToFile = False
        self.outputFile = "inputtrees.tre"

        # Set to False if you want to have a set number of taxa in the output
        # trees
        self.useTaxonDistribution = False
        # Only meaningful if setting useTaxonDistribution = False
        self.noTaxaToRemove = 32
        self.noOutputTrees = 10

        gm = ["SuperTreeInputTrees()"]

        if isinstance(inputTree, Tree):
            self.inputTree = inputTree  # not a list.
        elif type(inputTree) == type(""):
            var.trees = []
            read(inputTree)
            if len(var.trees) > 1:
                gm.append("Sorry, supply only one tree as supertree")
                raise P4Error(gm)
            # this was originally a list, ie [var.trees.pop()]
            self.inputTree = var.trees.pop()
        else:
            gm.append("Input tree was neither a p4 Tree nor a valid filename")
            gm.append("Got %s" % inputTree)
            raise P4Error(gm)

        if not self.inputTree._taxNames:
            self.inputTree._setTaxNamesFromLeaves()

        self.outputTrees = []

        self.normalizedDist = []

        # Distributions gathered from real world supertree input
        # The dists are first a list of input tree taxon set sizes and the supertree taxon set size
        # Using this data we can normalize the dists to fit the size of trees
        # we want

        # BunnyRSVNormal set from Wilkinson et al 2005, Syst Biol 54:823
        #        self.dist = [[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 13, 13, 13, 13, 13, 14, 14, 15, 17, 17, 18, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 21, 22, 22, 23, 24, 25, 25, 25, 25, 25, 25, 26, 27, 28, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 37, 38, 38, 40, 40, 41, 47, 51, 51, 52, 52, 52, 68, 70, 78, 78, 79, 80, 80], 80]

        # CanidaeRVS set from Wilkinson et al 2005, Syst Biol 54:823
        # self.dist = [[3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 6, 7, 8, 8, 9, 10, 11, 11, 11, 12, 16, 16, 20, 23, 24, 30, 30, 33, 34, 34, 34, 34, 34], 34]

        # CarnivoraRVS set from Wilkinson et al 2005, Syst Biol 54:823
        # self.dist = [[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12], 12]

        # DavideDinoMRP set from Wilkinson et al 2005, Syst Biol 54:823
        # self.dist = [[4, 4, 4, 5, 6, 6, 6, 7, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 17, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 20, 20, 20, 22, 23, 23, 24, 24, 25, 26, 27, 27, 28, 28, 29, 29, 29, 29, 29, 30, 30, 30, 31, 31, 31, 31, 33, 33, 33, 33, 36, 37, 37, 38, 38, 39, 42, 45, 47, 48, 50, 53, 53, 66, 70, 71, 74, 74, 75, 75, 76, 78, 78, 80, 86, 86, 92, 94, 96, 100, 101, 102, 102, 103, 105, 110, 111, 111, 139, 148, 149, 153, 173, 199, 204, 217, 240, 269, 270, 271, 272, 272, 273, 273, 273, 273, 274, 275], 277]

        # FelidaeRVS set from Wilkinson et al 2005, Syst Biol 54:823
        self.dist = [
            [
                3,
                3,
                3,
                3,
                3,
                4,
                4,
                4,
                5,
                6,
                6,
                6,
                7,
                7,
                7,
                7,
                9,
                9,
                10,
                10,
                14,
                16,
                17,
                24,
                25,
                28,
                29,
                29,
                30,
                30,
                32,
                34,
                36,
                36,
                36,
                36,
                36,
                36,
                36,
                36,
            ],
            36,
        ]

        # KennedyPageData set from Wilkinson et al 2005, Syst Biol 54:823
        # self.dist = [[14, 16, 17, 20, 30, 30, 90], 122]

        # ViverridaeRVS set from Wilkinson et al 2005, Syst Biol 54:823
        # self.dist = [[4, 5, 10, 16, 19, 33, 34, 34, 34], 34]

        if distributionTrees:
            self.useTaxonDistribution = True
            if type(distributionTrees) == type([]):
                for t in distributionTrees:
                    if not isinstance(t, Tree):
                        gm.append("Input trees should be a list of p4 Tree objects. Got %s" % t)
                        raise P4Error(gm)
                superTree = distributionTrees.pop(0)
                inputTrees = distributionTrees
            elif type(distributionTrees) == type(""):
                var.trees = []
                read(distributionTrees)
                if len(var.trees) < 1:
                    gm.append("Sorry, at least one tree must be supplied as input tree")
                    raise P4Error(gm)
                superTree = var.trees.pop(0)
                inputTrees = var.trees
            self._generateDistribution(superTree, inputTrees)
    def njUsingPaup(self, paupPath='paup'):
        """Use paup to make a neighbor-joining tree, which is returned.

        The resulting tree is read in by p4, and is returned.

        We interact with paup by writing files, but care is taken that
        existing files are not overwritten, because new file names are
        made to be unique. 

        If this does not work well, try setting the paupPath arg.
        """

        gm = ["DistanceMatrix.njUsingPaup()"]

        #filename    = sha.new(str(os.getpid())).hexdigest()[-10:]
        #dmFName     = os.path.join(pathPrefix, "%s.dmat" % filename)
        #treeFName   = os.path.join(pathPrefix, "%s.tree" % filename)
        #pFName      = os.path.join(pathPrefix, "%s.cmds" % filename)

        #tempfile.mkstemp(suffix='', prefix='tmp', dir=None, text=False)
        #if pathPrefix:
        #    theDir = pathPrefix
        #else:
        #    theDir = None
        flob_dm, dmFName_fq = func.uniqueFile('tmp.dm')
        flob_tf, treeFName_fq = func.uniqueFile('tmp.tree') #tempfile.mkstemp(suffix='tree', dir=theDir)
        flob_tf.close()
        flob_pf, pFName = func.uniqueFile('tmp.cmds') # tempfile.mkstemp(suffix='cmds', dir=theDir)

        # Throw the dir and dirname away. 
        dirname, dmFName = os.path.split(dmFName_fq)
        dirname, treeFName = os.path.split(treeFName_fq)

        # Make the paup commands
        paupCommandString = """#nexus
        begin paup;
          execute %s;
          set crit=dist;
          dset negbrlen=setzero;
          nj;
          savetrees file=%s format=altnex brlens=yes taxablk=yes replace=yes;
          quit;
        end;
        
        """ % (dmFName, treeFName)

        #print paupCommandString

        # Write the files, do the analysis
        #writeNexusToOpenFile(self, flob, writeTaxaBlock, append, digitsAfterDecimal)
        self.writeNexusToOpenFile(flob_dm, True, False, 6)
        flob_dm.close()
        flob_pf.write(paupCommandString)
        flob_pf.close()

        os.system("%s -n %s > /dev/null" % (paupPath, pFName))

        # This is the result.  The tree, if it exists, is read in by p4.
        oldLen = len(var.trees)
        func.read(treeFName)
        newLen = len(var.trees)
        if newLen == oldLen + 1:
            pass
        else:
            gm.append("I was expecting exactly one tree.  Got %i" % (oldLen - newLen))
            raise Glitch, gm
        t = var.trees.pop()

        # Tidy up.
        os.remove(treeFName)
        os.remove(pFName)
        os.remove(dmFName)

        for n in t.iterNodesNoRoot():
            if n.br.len < 0.0:
                n.br.len = 0.0

        return t
Beispiel #12
0
    def relist(self):

        try:
            print(self.library)
            self.library is not None
        except:
            self.open()

        print(self.library)
        try:
            self.library is not None
        except:
            return

        self.list_cards.clear()
        self.list_cards.setRowCount(0)
        self.row = 0

        if self.set.currentText() == 'HT':

            self.esc = func.read(self.library)
            self.cards = list(
                map(lambda x: x[:x.rfind('.png')], list(self.esc.keys())))

            for i in self.cards:
                self.row += 1
                self.list_cards.setRowCount(self.row + 1)
                self.list_cards.setItem(self.row - 1, 0, QTableWidgetItem(i))
                self.list_cards.setItem(self.row - 1, 1, QTableWidgetItem('1'))
                self.list_cards.setItem(self.row - 1, 2, QTableWidgetItem('1'))
                self.list_cards.setItem(self.row - 1, 3, QTableWidgetItem('1'))
                self.list_cards.setItem(self.row - 1, 4, QTableWidgetItem('1'))
            self.list_cards.setRowCount(self.row - 1)

            self.completer = QCompleter(self.cards)
            self.name.setCompleter(self.completer)

        elif self.set.currentText() == 'TK':

            self.esc_TK = func.read(self.library[:self.library.rfind('/')] +
                                    '/TK.xml')
            self.TK = list(
                map(lambda x: x[:x.rfind('.png')], list(self.esc_TK.keys())))

            for i in self.TK:
                self.row += 1
                self.list_cards.setRowCount(self.row + 1)
                self.list_cards.setItem(self.row - 1, 0, QTableWidgetItem(i))
                self.list_cards.setItem(self.row - 1, 1, QTableWidgetItem('1'))
                self.list_cards.setItem(self.row - 1, 2, QTableWidgetItem('1'))
                self.list_cards.setItem(self.row - 1, 3, QTableWidgetItem('1'))
                self.list_cards.setItem(self.row - 1, 4, QTableWidgetItem('1'))
            self.list_cards.setRowCount(self.row - 1)

            self.completer = QCompleter(self.TK)
            self.name.setCompleter(self.completer)

        elif self.set.currentText() == 'Standart':

            self.esc_std = func.read(self.library[:self.library.rfind('/')] +
                                     '/StandardCards.xml')
            self.std = list(
                map(lambda x: x[:x.rfind('.png')], list(self.esc_std.keys())))

            for i in self.std:
                self.row += 1
                self.list_cards.setRowCount(self.row + 1)
                self.list_cards.setItem(self.row - 1, 0, QTableWidgetItem(i))
                self.list_cards.setItem(self.row - 1, 1, QTableWidgetItem('1'))
                self.list_cards.setItem(self.row - 1, 2, QTableWidgetItem('1'))
                self.list_cards.setItem(self.row - 1, 3, QTableWidgetItem('1'))
                self.list_cards.setItem(self.row - 1, 4, QTableWidgetItem('1'))
            self.list_cards.setRowCount(self.row - 1)

            self.completer = QCompleter(self.std)
            self.name.setCompleter(self.completer)

        else:
            QMessageBox.about(self, "Ошибка", "Не удалось подгрузить карты")

        self.list_cards.setHorizontalHeaderLabels(
            ["Имя", "Тип", "Класс", "Редкость", "Стоимость"])
Beispiel #13
0
    def njUsingPaup(self, paupPath='paup'):
        """Use paup to make a neighbor-joining tree, which is returned.

        The resulting tree is read in by p4, and is returned.

        We interact with paup by writing files, but care is taken that
        existing files are not overwritten, because new file names are
        made to be unique. 

        If this does not work well, try setting the paupPath arg.
        """

        gm = ["DistanceMatrix.njUsingPaup()"]

        #filename    = sha.new(str(os.getpid())).hexdigest()[-10:]
        #dmFName     = os.path.join(pathPrefix, "%s.dmat" % filename)
        #treeFName   = os.path.join(pathPrefix, "%s.tree" % filename)
        #pFName      = os.path.join(pathPrefix, "%s.cmds" % filename)

        #tempfile.mkstemp(suffix='', prefix='tmp', dir=None, text=False)
        #if pathPrefix:
        #    theDir = pathPrefix
        #else:
        #    theDir = None
        flob_dm, dmFName_fq = func.uniqueFile('tmp.dm')
        flob_tf, treeFName_fq = func.uniqueFile(
            'tmp.tree')  #tempfile.mkstemp(suffix='tree', dir=theDir)
        flob_tf.close()
        flob_pf, pFName = func.uniqueFile(
            'tmp.cmds')  # tempfile.mkstemp(suffix='cmds', dir=theDir)

        # Throw the dir and dirname away.
        dirname, dmFName = os.path.split(dmFName_fq)
        dirname, treeFName = os.path.split(treeFName_fq)

        # Make the paup commands
        paupCommandString = """#nexus
        begin paup;
          execute %s;
          set crit=dist;
          dset negbrlen=setzero;
          nj;
          savetrees file=%s format=altnex brlens=yes taxablk=yes replace=yes;
          quit;
        end;
        
        """ % (dmFName, treeFName)

        #print paupCommandString

        # Write the files, do the analysis
        #writeNexusToOpenFile(self, flob, writeTaxaBlock, append, digitsAfterDecimal)
        self.writeNexusToOpenFile(flob_dm, True, False, 6)
        flob_dm.close()
        flob_pf.write(paupCommandString)
        flob_pf.close()

        os.system("%s -n %s > /dev/null" % (paupPath, pFName))

        # This is the result.  The tree, if it exists, is read in by p4.
        oldLen = len(var.trees)
        func.read(treeFName)
        newLen = len(var.trees)
        if newLen == oldLen + 1:
            pass
        else:
            gm.append("I was expecting exactly one tree.  Got %i" %
                      (oldLen - newLen))
            raise Glitch, gm
        t = var.trees.pop()

        # Tidy up.
        os.remove(treeFName)
        os.remove(pFName)
        os.remove(dmFName)

        for n in t.iterNodesNoRoot():
            if n.br.len < 0.0:
                n.br.len = 0.0

        return t
Beispiel #14
0
    def __init__(self, supertree, inputTrees):

        #        There are two ways of decorating the supertree with the support values.
        #        Standard conforms to the consensus tree tradition, i.e. values are presented between
        #        0 to 100 percent. Non standard adhears to the few supertree papers regarding support values
        #        i.e -1 to 1.
        self.doStandardDecoration = True

#        The decorated supertree can be saved to file
        self.doSaveDecoratedTree = False
        self.decoratedFilename = 'superTreeSupport.nex'

#        There is a option to save a supertree decorated with index values instead of support values.
#        This can then be used with a csv file containing the support values for each index.
#        Further analysis of the support values can be performed and then matched to the indecies in the
#        decorated supertree
        self.doSaveIndexTree = False
        self.indexFilename = 'supertreeIndex.nex'
        self.csvFilename = 'supertreeIndex.csv'

#        Draws the decorated supertree to screen
        self.doDrawTree = False

#        Produces output to screen
        self.verbose = 1

# Placeholders that allows access to the data after completing
# calculations
        self.decoratedSuperTree = None
        self.indexSuperTree = None
        self.csvList = None

#       Keeps track of splits for producing output
        self.indexIntersections = []
        self.csvValues = []
        self.intersections = []

#        Let t be the number of input trees,
#        s the number of input trees supporting a supertree clade,
#        r the number of input trees that are irrelevant to the supertree clade,
#        q the number of input trees that conflict with the supertree clade,
#        p the number of input trees that permit the supertree clade,
#        so that t = p + q + r + s.

        self.T = 0  # no. of input trees;
        self.L = 0  # no. of leaves;
        # coverage (average proportion of leaves in the input tree);
        self.C = 0.0
        self.SC = 0  # number of supertree clades;
        self.U = 0  # no. of unsupported supertree clades;
        # no. of unsupported supertree clades that conflict with at least one
        # input tree;
        self.UC = 0
        # no. of unsupported clades conflicting with all relevant input trees;
        self.UCC = 0
        # average qualitative support for supertree clades. Figures in
        # parentheses are ranges.
        self.QS = 0.0
        self.S = 0.0  # average support
        self.P = 0.0  # average permitted
        self.Q = 0.0  # average conflict
        self.R = 0.0  # average relevance
        self.wS = 0.0  # average weighted support
        self.wP = 0.0  # average weighted permitance
        self.V = 0.0  # average V for supertree cladesV = (s minus q)/(s + q)
        self.VV = 0.0  # V+ = (s minus q +p)/(s + q + p)
        self.Vv = 0.0  # V minus = (s minus q minus p)/(s + q + p)
        self.wV = 0.0  # wV = (ws minus q)/(ws + q)
        self.wVV = 0.0  # wVV = (ws minus q +wp)/(ws + q + wp)
        self.wVv = 0.0  # wVv = (ws minus q minus wp)/(ws + q + wp)

        gm = ['SuperTreeSupport()']

        var.warnReadNoFile = False

        if type(inputTrees) == type([]):
            for t in inputTrees:
                if not isinstance(t, Tree):
                    gm.append(
                        "Input trees should be a list of p4 Tree objects. Got %s" % t)
                    raise P4Error(gm)
            self.inputTrees = inputTrees
        elif type(inputTrees) == type(""):
            var.trees = []
            read(inputTrees)
            if len(var.trees) < 1:
                gm.append(
                    'Sorry, at least one tree must be supplied as input tree')
                raise P4Error(gm)
            self.inputTrees = var.trees
        else:
            gm.append(
                "Input trees are neither a list of p4 Tree objects nor a valid filename.")
            raise P4Error(gm)

        if isinstance(supertree, Tree):
            self.supertree = supertree            # not a list.
        elif type(supertree) == type(""):
            var.trees = []
            read(supertree)
            if len(var.trees) > 1:
                gm.append('Sorry, supply only one tree as supertree')
                raise P4Error(gm)
            # this was originally a list, ie [var.trees.pop()]
            self.supertree = var.trees.pop()
        else:
            gm.append("Supertree was neither a p4 Tree nor a valid filename")
            gm.append("Got %s" % supertree)
            raise P4Error(gm)

        for tree in self.inputTrees:
            if not tree._taxNames:
                tree._setTaxNamesFromLeaves()

        # Mean and median overlap of the input trees
        overlapList = []
        meanOverlap = 0.0
        index = 0
        for i in range(0, len(self.inputTrees) - 1):
            for j in range(i + 1, len(self.inputTrees)):
                overlap = len(set(self.inputTrees[i].taxNames).intersection(
                    set(self.inputTrees[j].taxNames)))
                overlapList.append(overlap)
                meanOverlap += overlap
                index += 1

        if index == 0:
            self.mean = 0
            self.median = 0
        else:
            self.mean = meanOverlap / index
            overlapList.sort()
            self.median = overlapList[len(overlapList) / 2]

        commonLeafSet = CommonLeafSet()
        self.splits = commonLeafSet.updateTreesToCommonLeafSet(
            [self.inputTrees, [self.supertree]])
        self.bitkeys = commonLeafSet.getCommonBitkeys()
        self.taxnames = commonLeafSet.getCommonTaxNames()
        self.taxa2Bitkey = commonLeafSet.getCommonTaxa2Bitkey()
Beispiel #15
0
    def __init__(self, inputTree, distributionTrees=None):
        """
        SuperTreeInputTrees is a utility to create sets of input trees. 
        The input trees are primarily to be used to evaluate super tree
        construction methods. 

        Invocation removing a fixed number of taxa from each prospective input tree:

            stit = SuperTreeInputTrees(inputTree)
            stit.writeInputTreesToFile = True
            stit.outputFile = 'myInputtrees.tre'
            stit.noTaxaToRemove = 32 
            stit.noOutputTrees = 10
            stit.generateInputTrees()


        Invocation using built in distribution gathered from real world super tree cases::

            stit = SuperTreeInputTrees(inputTree)
            stit.writeInputTreesToFile = True
            stit.outputFile = 'myInputtrees.tre'
            stit.useTaxonDistribution = True
            stit.generateInputTrees()

        The user can generate a distribution of their own by supplying a list of p4 trees or a tree file. 
        The order of the trees is important, supertree and then all other trees. This goes for both list and 
        file. Like so::

            stit = SuperTreeInputTrees(inputTree, distributionTrees='myTreefile.nex')
            stit.writeInputTreesToFile = True
            stit.outputFile = 'myInputtrees.tre'
            stit.useTaxonDistribution = True
            stit.generateInputTrees()

        Placeholders which allow access to data after completed computations::

            stit.outputTrees 
            stit.dist

        """

        self.writeInputTreesToFile = False
        self.outputFile = 'inputtrees.tre'

        # Set to False if you want to have a set number of taxa in the output
        # trees
        self.useTaxonDistribution = False
        # Only meaningful if setting useTaxonDistribution = False
        self.noTaxaToRemove = 32
        self.noOutputTrees = 10

        gm = ['SuperTreeInputTrees()']

        if isinstance(inputTree, Tree):
            self.inputTree = inputTree            # not a list.
        elif type(inputTree) == type(""):
            var.trees = []
            read(inputTree)
            if len(var.trees) > 1:
                gm.append('Sorry, supply only one tree as supertree')
                raise P4Error(gm)
            # this was originally a list, ie [var.trees.pop()]
            self.inputTree = var.trees.pop()
        else:
            gm.append("Input tree was neither a p4 Tree nor a valid filename")
            gm.append("Got %s" % inputTree)
            raise P4Error(gm)

        if not self.inputTree._taxNames:
            self.inputTree._setTaxNamesFromLeaves()

        self.outputTrees = []

        self.normalizedDist = []

        # Distributions gathered from real world supertree input
        # The dists are first a list of input tree taxon set sizes and the supertree taxon set size
        # Using this data we can normalize the dists to fit the size of trees
        # we want

        # BunnyRSVNormal set from Wilkinson et al 2005, Syst Biol 54:823
#        self.dist = [[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 13, 13, 13, 13, 13, 14, 14, 15, 17, 17, 18, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 21, 22, 22, 23, 24, 25, 25, 25, 25, 25, 25, 26, 27, 28, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 37, 38, 38, 40, 40, 41, 47, 51, 51, 52, 52, 52, 68, 70, 78, 78, 79, 80, 80], 80]

        # CanidaeRVS set from Wilkinson et al 2005, Syst Biol 54:823
        #self.dist = [[3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 6, 7, 8, 8, 9, 10, 11, 11, 11, 12, 16, 16, 20, 23, 24, 30, 30, 33, 34, 34, 34, 34, 34], 34]

        # CarnivoraRVS set from Wilkinson et al 2005, Syst Biol 54:823
        #self.dist = [[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12], 12]

        # DavideDinoMRP set from Wilkinson et al 2005, Syst Biol 54:823
        #self.dist = [[4, 4, 4, 5, 6, 6, 6, 7, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 17, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 20, 20, 20, 22, 23, 23, 24, 24, 25, 26, 27, 27, 28, 28, 29, 29, 29, 29, 29, 30, 30, 30, 31, 31, 31, 31, 33, 33, 33, 33, 36, 37, 37, 38, 38, 39, 42, 45, 47, 48, 50, 53, 53, 66, 70, 71, 74, 74, 75, 75, 76, 78, 78, 80, 86, 86, 92, 94, 96, 100, 101, 102, 102, 103, 105, 110, 111, 111, 139, 148, 149, 153, 173, 199, 204, 217, 240, 269, 270, 271, 272, 272, 273, 273, 273, 273, 274, 275], 277]

        # FelidaeRVS set from Wilkinson et al 2005, Syst Biol 54:823
        self.dist = [[3, 3, 3, 3, 3, 4, 4, 4, 5, 6, 6, 6, 7, 7, 7, 7, 9, 9, 10, 10, 14,
                      16, 17, 24, 25, 28, 29, 29, 30, 30, 32, 34, 36, 36, 36, 36, 36, 36, 36, 36], 36]

        # KennedyPageData set from Wilkinson et al 2005, Syst Biol 54:823
        #self.dist = [[14, 16, 17, 20, 30, 30, 90], 122]

        # ViverridaeRVS set from Wilkinson et al 2005, Syst Biol 54:823
        #self.dist = [[4, 5, 10, 16, 19, 33, 34, 34, 34], 34]

        if distributionTrees:
            self.useTaxonDistribution = True
            if type(distributionTrees) == type([]):
                for t in distributionTrees:
                    if not isinstance(t, Tree):
                        gm.append(
                            "Input trees should be a list of p4 Tree objects. Got %s" % t)
                        raise P4Error(gm)
                superTree = distributionTrees.pop(0)
                inputTrees = distributionTrees
            elif type(distributionTrees) == type(""):
                var.trees = []
                read(distributionTrees)
                if len(var.trees) < 1:
                    gm.append(
                        'Sorry, at least one tree must be supplied as input tree')
                    raise P4Error(gm)
                superTree = var.trees.pop(0)
                inputTrees = var.trees
            self._generateDistribution(superTree, inputTrees)