Esempi in Python per TreeTools.Newick2Nexus

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: CGAT

Classe/tipologia: TreeTools

Metodo/funzione: Newick2Nexus

Esempi su hotexamples.com: 30

TreeTools.Newick2Nexus in Python: 30 esempi trovati. Questi sono i migliori esempi reali in Python per CGAT.TreeTools.Newick2Nexus, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Newick2Nexus(30)

Tree2Newick(9)

GetTaxa(8)

PruneTree(5)

GetSize(5)

Nexus2Newick(5)

MapTaxa(4)

TreeDFS(3)

GetSubsets(3)

Newick2Tree(3)

calculatePatternsFromTree(2)

GetLeaves(2)

IsCompatible(2)

GetDistanceToRoot(1)

IsMonophyleticForTaxa(1)

GetNodeMap(1)

Tree2Graph(1)

GetMaxIndex(1)

Unroot(1)

GetAllNodes(1)

Esempio n. 1

Mostra file

File: evaluate_trees.py Progetto: santayana/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: optic/evaluate_trees.py 2781 2009-09-10 11:33:14Z andreas $"
    )

    parser.add_option("-r",
                      "--reference=",
                      dest="filename_reference_tree",
                      help="filename with reference tree.",
                      type="string")

    parser.set_defaults(filename_reference_tree=None)

    (options, args) = E.Start(parser)

    if not options.filename_reference_tree:
        print "please supply reference tree."

    if options.loglevel >= 1:
        print "# reading reference tree."

    nexus = TreeTools.Newick2Nexus(open(options.filename_reference_tree, "r"))
    reference_tree = nexus.trees[0]

    if options.loglevel >= 1:
        print "# reading sample trees."

    nexus2 = TreeTools.Newick2Nexus(sys.stdin)

    ntotal, nok, nfailed = 0, 0, 0
    ntopology, ntaxa, nleaves = 0, 0, 0
    for t in nexus2.trees:
        ntotal += 1
        is_ok, reason = TreeTools.IsCompatible(reference_tree, t)
        if is_ok:
            nok += 1
        else:
            nfailed += 1
            if reason == "topology":
                ntopology += 1
            elif reason == "taxa":
                ntaxa += 1
            elif reason == "leaves":
                nleaves += 1

    print "# total=%i, compatible=%i, failed=%i, topology=%i, taxa=%i, leaves=%i" %\
          (ntotal, nok, nfailed, ntopology, ntaxa, nleaves)

    E.Stop()

Esempio n. 2

Mostra file

File: tree2plot.py Progetto: yangjl/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: tree2plot.py 2782 2009-09-10 11:40:29Z andreas $")

    parser.set_defaults()

    (options, args) = E.Start(parser, add_pipe_options=True)

    lines = filter(lambda x: x[0] != "#", sys.stdin.readlines())

    nexus = TreeTools.Newick2Nexus(lines)

    input_tree = nexus.trees[0]

    treegraph = TreeGraph(support=None, loglevel=options.loglevel)

    print treegraph.Run(input_tree)

    E.Stop()

Esempio n. 3

Mostra file

File: simgram.py Progetto: logust79/cgat-apps

    def run(self, grammar, tree=None, dump=0, test=False, options={}):

        self.mTempdir = tempfile.mkdtemp()
        self.mFilenameGrammar = "grammar.eg"
        self.mFilenameTree = "tree.nh"
        self.mFilenameOutput = None
        self.mWarnings = []

        if test:
            print "# temporary directory is %s" % self.mTempdir

        outfile = open(self.mTempdir + "/" + self.mFilenameGrammar, "w")
        outfile.write(grammar.getGrammar())
        outfile.close()

        if tree:

            outfile = open(self.mTempdir + "/" + self.mFilenameTree, "w")

            ## check what kind of tree is given.
            if type(tree) == StringType:
                t = tree.strip()
                if t[0] == "(" and t[-1] in ");":
                    outfile.write("%s\n" % t)

                else:
                    nexus = TreeTools.Newick2Nexus(open(tree, "r"))
                    t = nexus.trees[0]
                    outfile.write("%s\n" % TreeTools.Tree2Newick(t))

            outfile.close()

        # use your own random seed. Time won't do, if simgram
        # is called in quick succession.
        # Are there any restrictions on seeds? Ian using an even number.
        statement = "%s -rndseed %i -g %s -t %s" % (
            self.mExecutable, random.randint(
                0, 4294967296), self.mFilenameGrammar, self.mFilenameTree)

        s = subprocess.Popen(statement,
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             cwd=self.mTempdir,
                             close_fds=True)

        (out, err) = s.communicate()

        if s.returncode != 0:
            raise UsageError, "Error in running %s \n%s\n%s\nTemporary directory in %s" % (
                self.mExecutable, err, out, self.mTempdir)

        if dump:
            print "# stdout output of %s:\n%s\n######################################" % (
                self.mExecutable, out)

        if not test:
            shutil.rmtree(self.mTempdir)

        return self.parseOutput(out.split("\n"))

Esempio n. 4

Mostra file

File: xrate_blocks.py Progetto: logust79/cgat-apps

def trainMali( mali, options ):
    """train a grammar on a multiple alignment."""

    ## remove empty columns and masked columns
    if options.clean_mali:
        mali.mGapChars = mali.mGapChars + ("n", "N")
        mali.removeGaps( minimum_gaps = 1, frame=1 )
    
    length = mali.getNumColumns()

    input_model = prepareGrammar( options )

    for id in mali.getIdentifiers():
        if options.separator in id:
            species = id.split(options.separator)[0]
            mali.rename( id, species )

    map_new2old = mali.mapIdentifiers()
    map_old2new = IOTools.getInvertedDictionary( map_new2old, make_unique = True )
    
    ids = mali.getIdentifiers()

    if options.input_filename_tree:
        nexus = TreeTools.Newick2Nexus( open(options.input_filename_tree,"r") )
        tree = nexus.trees[0]
        try:
            tree.relabel( map_old2new, warn = True )
        except KeyError, msg:
            raise KeyError( "names in mali and tree are not congruent: %s" % msg )

Esempio n. 5

Mostra file

    def processChunk(lines, map_strain2species, options):

        nexus = TreeTools.Newick2Nexus(lines)
        global ninput, noutput, nskipped, nmerged

        for tree in nexus.trees:
            ninput += 1

            if options.loglevel >= 3:
                tree.display()

            mergers = getSpeciesTreeMergers(tree, map_strain2species, options)

            if options.loglevel >= 3:
                options.stdlog.write(
                    "# found %i nodes in the tree that will be merged.\n" % (len(mergers)))

            if len(mergers) > 0:
                nmerged += 1

            n = applySpeciesTreeMergers(
                tree, mergers, map_strain2species, options)

            if len(tree.get_terminals()) <= 1:
                nskipped += 1
                continue

            tree.writeToFile(options.stdout, format=options.output_format)
            noutput += 1

Esempio n. 6

Mostra file

File: tree2taxa.py Progetto: santayana/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: tree2taxa.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option(
        "--skip-trees",
        dest="skip_trees",
        action="store_true",
        help="do not output tree names in third field [default=%default].")

    parser.set_defaults(skip_trees=False)

    (options, args) = E.Start(parser, add_pipe_options=True)

    nexus = TreeTools.Newick2Nexus(sys.stdin)
    if options.loglevel >= 1:
        options.stdlog.write("# read %i trees from stdin.\n" %
                             len(nexus.trees))

    ntree = 0
    ntotal = len(nexus.trees)

    if ntotal == 1:
        options.stdout.write("taxon\n")
    else:
        if options.skip_trees:
            options.stdout.write("taxon\ttree\n")
        else:
            options.stdout.write("taxon\ttree\tname\n")

    for tree in nexus.trees:
        ntree += 1
        taxa = TreeTools.GetTaxa(tree)

        if ntotal == 1:
            for t in taxa:
                options.stdout.write("%s\n" % (t))
        elif options.skip_trees:
            for t in taxa:
                options.stdout.write("%s\t%i\n" % (t, ntree))
        else:
            for t in taxa:
                options.stdout.write("%s\t%i\t%s\n" % (t, ntree, tree.name))

    if options.loglevel >= 1:
        options.stdlog.write("# ntotal=%i\n" % (ntotal))

    E.Stop()

Esempio n. 7

Mostra file

File: tree2patterns.py Progetto: santayana/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: tree2patterns.py 2781 2009-09-10 11:33:14Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--reference-tree",
                      dest="reference_tree",
                      type="string",
                      help="reference tree to read.")

    parser.add_option("-s",
                      "--sort-order",
                      dest="sort_order",
                      type="string",
                      help="output order of OTU.")

    parser.set_defaults(
        reference_tree=None,
        sort_order=[],
    )

    (options, args) = E.Start(parser)

    if not options.sort_order:
        for nx in reference_tree.get_terminals():
            options.sort_order.append(reference_tree.node(nx).get_data().taxon)
    else:
        options.sort_order = options.sort_order.split(",")

    if not options.reference_tree:
        raise "no reference tree defined."

    nexus = TreeTools.Newick2Nexus(options.reference_tree)
    reference_tree = nexus.trees[0]

    if options.loglevel >= 3:
        print "# reference tree:"
        print reference_tree.display()

    patterns = TreeTools.calculatePatternsFromTree(tree, options.sort_order)

    for p in patterns:
        print p

    E.Stop()

Esempio n. 8

Mostra file

File: WrapperNJTree.py Progetto: santayana/cgat

    def WriteTree(self, tree):
        """write tree to file.
        """

        nexus = TreeTools.Newick2Nexus(tree)
        t = nexus.trees[0]
        TreeTools.MapTaxa(t, self.mMapOld2New)

        outfile = open(self.mTempdir + "/" + self.mFilenameTree, "w")
        outfile.write("%i 1\n" % self.mNumSequences)
        outfile.write("%s\n" % TreeTools.Tree2Newick(t))
        outfile.close()

Esempio n. 9

Mostra file

File: evaluate_bootstrap.py Progetto: santayana/cgat

def ParseTree(reference_tree, rx_species):

    nexus = TreeTools.Newick2Nexus(reference_tree)
    reference_tree = nexus.trees[0]
    if param_loglevel >= 3:
        print "# reference tree:"
        reference_tree.display()

    map_taxon2id = {}
    for nx in reference_tree.get_terminals():
        otu = reference_tree.node(nx).get_data().taxon
        map_taxon2id[otu] = len(map_taxon2id)
        if param_loglevel >= 2:
            print "# %s\t%i" % (otu, map_taxon2id[otu])
    map_taxon2id["unknown"] = len(map_taxon2id)

    return reference_tree, map_taxon2id

Esempio n. 10

Mostra file

    def testGetMergers(self):
        """
        test.

        TODO: add testing for transcripts
        """
        print "testGetMergers()"

        for lines, reference, map_strain2species, options in self.mTestData:
            nexus = TreeTools.Newick2Nexus(lines)
            mergers = tree_strain2species.getMergers(
                nexus.trees[0], map_strain2species, options)
            for node_id, species, strain_x, gene_x, strain_y, gene_y in mergers:
                key1 = ((strain_x, gene_x), (strain_y, gene_y))
                key2 = ((strain_y, gene_y), (strain_x, gene_x))
                if key1 not in reference and key2 not in reference:
                    self.fail("%s not in reference %s" %
                              (str(key1), str(reference)))

Esempio n. 11

Mostra file

File: WrapperNJTree.py Progetto: santayana/cgat

    def parseOutput(self, lines, out, err):

        lines = re.sub("\s", "", "".join(lines))
        lines = re.sub("\[[^\]]+\]", "", lines)

        t = TreeTools.Newick2Nexus("".join(lines))

        result = Result()
        t = t.trees[0]

        TreeTools.MapTaxa(t, self.mMapNew2Old)

        result.mTree = t

        result.mLog = out
        result.mErr = err

        return result

Esempio n. 12

Mostra file

File: tree_strain2species.py Progetto: santayana/cgat

    def processChunk(lines, map_strain2species, options):

        nexus = TreeTools.Newick2Nexus(lines)
        global ninput, noutput, nskipped, nmerged

        for tree in nexus.trees:
            ninput += 1

            if options.loglevel >= 3:
                tree.display()

            mergers = getMergers(tree, map_strain2species, options)

            if options.loglevel >= 3:
                options.stdlog.write(
                    "# found %i pairs of genes that will be merged.\n" %
                    (len(mergers)))

            if len(mergers) > 0:
                nmerged += 1

            n = applyMergers(tree, mergers, counters, map_strain2species,
                             options)

            if len(tree.get_terminals()) <= 1:
                nskipped += 1
                continue

            for new_name, values in n.items():
                for strain, gene in values:
                    if (strain, gene) in merged:
                        options.stdlog.write(
                            "# warning: strain %s and gene %s already appeared in tree %s"
                            % (merged[(strain, gene)]))
                        nwarnings += 1
                    merged[(strain, gene)] = None
                    output_genes.write("%s\t%s\n" % (options.separator.join(
                        (strain, gene)), new_name))

            tree.writeToFile(options.stdout, format=options.output_format)
            noutput += 1

Esempio n. 13

Mostra file

def GetPrunedReferenceTree( mask, present_orgs, reference_tree ):

    # reread and process species tree
    # has to be done for every new pass, because
    # the tree is modified later on (and I haven't found
    # a copy mechanism (because I did not look)).
    nexus = TreeTools.Newick2Nexus( reference_tree )
    reference_tree = nexus.trees[0]

    ###########################################################################
    # prune reference tree and keep only those taxa, which are present in the cluster.
    for nx in reference_tree.get_terminals():
        otu = reference_tree.node(nx).get_data().taxon
        if otu not in present_orgs:
            Prune( reference_tree, otu )
        
    if param_loglevel >= 3:
        print "# pruned reference tree for %s:" % (",".join(present_orgs.keys()))
        reference_tree.display()

    return reference_tree

Esempio n. 14

Mostra file

def processMali(mali, options):

    ncols = mali.getNumColumns()

    if ncols == 0:
        raise "refusing to process empty alignment."

    ## add annotation of states
    if options.block_size != None:
        if options.block_size < 1:
            size = int(float(ncols) / 3.0 * options.block_size) * 3
        else:
            size = int(options.block_size) * 3

        size = min(size, ncols)
        mali.addAnnotation("STATE", "N" * size + "C" * (ncols - size))

    ## remove gene ids
    for id in mali.getIdentifiers():
        if options.separator in id:
            species = id.split(options.separator)[0]
            mali.rename(id, species)

    map_new2old = mali.mapIdentifiers()
    map_old2new = IOTools.getInvertedDictionary(map_new2old, make_unique=True)

    ids = mali.getIdentifiers()
    xgram = XGram.XGram()

    if options.xrate_min_increment:
        xgram.setMinIncrement(options.xrate_min_increment)

    ninput, noutput, nskipped = 0, 0, 0

    # remove empty columns and masked columns
    if options.clean_mali:
        mali.mGapChars = mali.mGapChars + ("n", "N")
        mali.removeGaps(minimum_gaps=1, frame=3)

    if options.input_filename_tree:
        nexus = TreeTools.Newick2Nexus(open(options.input_filename_tree, "r"))
        tree = nexus.trees[0]
        tree.relabel(map_old2new)
    else:
        tree = None

    annotation = mali.getAnnotation("STATE")
    chars = set(list(annotation))
    for c in chars:
        assert c in (
            "N", "C"), "unknown annotation %s: only 'N' and 'C' are recognized"
    if len(chars) == 1:
        if options.loglevel >= 1:
            options.stdlog.write("# WARNING: only a single block")
        blocks = (("B0_", chars[0]), )
    else:
        blocks = (("B0_", "N"), ("B1_", "C"))

    result, mali, ids = prepareGrammar(xgram, mali, tree, map_old2new, blocks,
                                       options)

    trained_model = result.getModel()

    pis, matrices = RateEstimation.getRateMatrix(trained_model)

    annotation = mali.getAnnotation("STATE")

    for block, code in blocks:

        terminals = ("%sCOD0" % block, "%sCOD1" % block, "%sCOD2" % block)

        pi = pis[terminals]

        if options.shared_rates == "all":
            rate_prefix_rs = ""
            rate_prefix_rn = ""
            rate_prefix_ri = ""
            rate_prefix_rv = ""
        elif options.shared_rates == "kappa":
            rate_prefix_rs = block
            rate_prefix_rn = block
            rate_prefix_ri = ""
            rate_prefix_rv = ""
        elif options.shared_rates == "kappa-ds":
            rate_prefix_rs = ""
            rate_prefix_rn = block
            rate_prefix_ri = ""
            rate_prefix_rv = ""
        elif options.shared_rates == "omega":
            rate_prefix_rs = ""
            rate_prefix_rn = ""
            rate_prefix_ri = block
            rate_prefix_rv = block
        elif options.shared_rates == "omega-ds":
            rate_prefix_rs = ""
            rate_prefix_rn = ""
            rate_prefix_ri = block
            rate_prefix_rv = ""
        elif options.shared_rates == "ds":
            rate_prefix_rs = ""
            rate_prefix_rn = block
            rate_prefix_ri = block
            rate_prefix_rv = block
        else:
            rate_prefix_rs = block
            rate_prefix_rn = block
            rate_prefix_ri = block
            rate_prefix_rv = block

        if options.shared_frequencies:
            frequency_prefix = ""
        else:
            frequency_prefix = block

        rs = trained_model.mGrammar.getParameter('%sRs' % rate_prefix_rs)
        rn = trained_model.mGrammar.getParameter('%sRn' % rate_prefix_rn)
        ri = trained_model.mGrammar.getParameter('%sRi' % rate_prefix_ri)
        rv = trained_model.mGrammar.getParameter('%sRv' % rate_prefix_rv)

        nchars = annotation.count(code)

        msg = "iter=%i Rs=%6.4f Rn=%6.4f Ri=%6.4f Rv=%6.4f" % (
            result.getNumIterations(), rs, rn, ri, rv)

        try:
            Q, t = RateEstimation.getQMatrix(pi,
                                             Rsi=rs * ri,
                                             Rsv=rs * rv,
                                             Rni=rn * ri,
                                             Rnv=rn * rv)
            avg_omega = (rs + rn) / 2.0
            Q0, t0 = RateEstimation.getQMatrix(pi,
                                               Rsi=ri * avg_omega,
                                               Rsv=rv * avg_omega,
                                               Rni=ri * avg_omega,
                                               Rnv=rv * avg_omega)

            avg_kappa = (ri + rv) / 2.0
            Q1, t1 = RateEstimation.getQMatrix(pi,
                                               Rsi=rs * avg_kappa,
                                               Rsv=rs * avg_kappa,
                                               Rni=rn * avg_kappa,
                                               Rnv=rn * avg_kappa)

            rI, rV, rS, rN = RateEstimation.countSubstitutions(pi, Q)
            rI0, rV0, rS0, rN0 = RateEstimation.countSubstitutions(pi, Q0)
            rI1, rV1, rS1, rN1 = RateEstimation.countSubstitutions(pi, Q1)

            dS = rS / (3 * rS0) * t
            dN = rN / (3 * rN0) * t

            o_kappa = options.value_format % (rI / rI0 * rV0 / rV)
            o_omega = options.value_format % (dN / dS)

            o_dn = options.value_format % dN
            o_ds = options.value_format % dS
            o_rn = options.value_format % rN
            o_rs = options.value_format % rS
            o_rn0 = options.value_format % rN0
            o_rs0 = options.value_format % rS0
            o_t = options.value_format % t
            o_t0 = options.value_format % t0

        except ZeroDivisionError:

            o_kappa = "na"
            o_omega = "na"
            o_dn = "na"
            o_ds = "na"
            o_rn = "na"
            o_rs = "na"
            o_rn0 = "na"
            o_rs0 = "na"
            o_t = "na"
            o_t0 = "na"
            Q = None
            msg = "insufficient data to estimate rate matrix."

        options.stdout.write("\t".join(
            map(str, (code, block, o_dn, o_ds, o_omega, "na", "na", "na", "na",
                      o_kappa, result.getLogLikelihood(), "na", nchars))))

        if options.with_rho:
            options.stdout.write(
                "\t" +
                "\t".join(map(str, (o_rn, o_rs, o_t, o_rn0, o_rs0, o_t0))))

        options.stdout.write("\t%s\n" % msg)

Esempio n. 15

Mostra file

File: SVGTree.py Progetto: santayana/cgat

        colour_by_species=None,
        tree=None,
        branch_scale=0,
        height_scale=0,
    )

    (options, args) = Experiment.Start(parser, add_pipe_options=True)

    if options.filename_tree:
        tree_lines = open(options.filename_tree, "r").readlines()
    elif options.tree:
        tree_lines = options.tree
    else:
        raise "please supply a species tree."

    nexus = TreeTools.Newick2Nexus(tree_lines)
    Tree.updateNexus(nexus)
    tree = nexus.trees[0]

    if options.loglevel >= 2:
        tree.display()

    plot = SVGTree(tree)

    plot.setBranchScale(options.branch_scale)
    plot.setHeightScale(options.height_scale)

    if options.colour_by_species:
        rx = re.compile(options.species_regex)
        extract_species = lambda x: rx.search(x).groups()[0]
        plot.setDecoratorExternalNodes(

Esempio n. 16

Mostra file

def processMali(mali, options):

    map_new2old = mali.mapIdentifiers()
    ids = mali.getIdentifiers()

    invalid_chars = options.gap_chars + options.mask_chars

    has_non_overlaps = False

    pairs = []

    if options.iteration == "all-vs-all":
        for x in range(len(ids)):
            for y in range(0, x):
                pairs.append((x, y))
    elif options.iteration == "first-vs-all":
        for y in range(1, len(ids)):
            pairs.append((0, y))
    elif options.iteration == "pairwise":
        if len(ids) % 2 != 0:
            raise "uneven number of sequences (%i) not compatible with --iteration=pairwise" % len(
                ids)
        for x in range(0, len(ids), 2):
            pairs.append((x, x + 1))
    elif options.iteration == "tree":
        pairs = []
    else:
        raise "unknown iteration mode: %s" % (options.iteration)

    if options.remove_stops:
        for id, entry in mali.items():
            s = entry.mString.upper()
            fragments = []
            for x in range(0, len(s), 3):
                codon = s[x:x + 3]
                if Genomics.IsStopCodon(codon):
                    codon = "NNN"

                fragments.append(codon)

            entry.mString = "".join(fragments)

    for x, y in pairs:
        noverlap = 0
        for a, b in zip(mali[ids[x]], mali[ids[y]]):
            if a not in invalid_chars and b not in invalid_chars:
                noverlap += 1
                if noverlap >= options.min_overlap:
                    break
        else:
            has_non_overlaps = True
            break

    if options.tree:
        tree = TreeTools.Newick2Nexus(options.tree).trees[0]
        map_old2new = IOTools.getInvertedDictionary(map_new2old,
                                                    make_unique=True)
        tree.relabel(map_old2new)
    else:
        tree = None

    if options.method == "paml":
        runCodeML(mali, tree, has_non_overlaps, pairs, map_new2old, options)

    elif options.method == "xrate":
        runXrate(mali, has_non_overlaps, pairs, map_new2old, options)

Esempio n. 17

Mostra file

File: data2phylocontrasts.py Progetto: lesheng/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: data2phylocontrasts.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-c",
                      "--columns",
                      dest="columns",
                      type="string",
                      help="columns to take for calculating histograms.")
    parser.add_option("-t",
                      "--filename-tree",
                      dest="filename_tree",
                      type="string",
                      help="filename with tree(s).")
    parser.add_option("--skip-header",
                      dest="add_header",
                      action="store_false",
                      help="do not add header to flat format.")
    parser.add_option("--write-header",
                      dest="write_header",
                      action="store_true",
                      help="write header and exit.")
    parser.add_option("--debug",
                      dest="debug",
                      action="store_true",
                      help="debug mode")
    parser.add_option("--display-tree",
                      dest="display_tree",
                      action="store_true",
                      help="display the tree")

    parser.add_option("-m",
                      "--method",
                      dest="methods",
                      type="choice",
                      action="append",
                      choices=("contrasts", "spearman", "pearson", "compute"),
                      help="methods to perform on contrasts.")

    parser.set_defaults(
        columns="all",
        filename_tree=None,
        add_header=True,
        write_header=False,
        debug=False,
        methods=[],
        value_format="%6.4f",
        pvalue_format="%e",
        display_tree=False,
    )

    (options, args) = E.Start(parser, quiet=True)

    if options.columns not in ("all", "all-but-first"):
        options.columns = map(lambda x: int(x) - 1, options.columns.split(","))

    phylip = WrapperPhylip.Phylip()

    if options.debug:
        phylip.setLogLevel(options.loglevel)

    phylip.setProgram("contrast")

    ##########################################################
    ##########################################################
    ##########################################################
    # retrieve data and give to phylip
    data = []
    headers = []
    first = True
    for line in sys.stdin:
        if line[0] == "#":
            continue
        d = line[:-1].strip().split("\t")
        if first:
            first = False
            headers = d[1:]
            continue
        data.append(d)

    phylip.setData(data)
    ncolumns = len(headers)
    nrows = len(data)

    ##########################################################
    ##########################################################
    ##########################################################
    # read trees
    nexus = None
    if options.filename_tree:
        nexus = TreeTools.Newick2Nexus(open(options.filename_tree, "r"))

    if not nexus:
        raise ValueError("please provide trees with branchlenghts")

    ##########################################################
    ##########################################################
    ##########################################################
    # set up phylip
    phylip_options = []
    # print out contrasts
    phylip_options.append("C")
    phylip_options.append("Y")
    phylip.setOptions(phylip_options)

    ##########################################################
    ##########################################################
    ##########################################################
    # main loop
    ##########################################################
    for tree in nexus.trees:

        if options.display_tree:
            tree.display()

        # compute this before giving the tree to the phylip module,
        # as it remaps taxon names.
        map_node2data = {}
        for x in range(nrows):
            taxon = data[x][0]
            map_node2data[tree.search_taxon(taxon)] = x

        phylip.setTree(tree)

        result = phylip.run()

        for method in options.methods:

            if method in ("pearson", "spearman"):

                options.stdout.write("header1\theader2\tr\tp\tcode\n")

                n = len(result.mContrasts)
                columns = []
                for c in range(ncolumns):
                    columns.append(map(lambda x: x[c], result.mContrasts))

                for x in range(0, ncolumns - 1):
                    for y in range(x + 1, ncolumns):

                        # phylip value
                        phy_r = result.mCorrelations[x][y]

                        import rpy
                        from rpy import r as R

                        # Various ways to calculate r. It is not possible to use
                        # cor.test or lsfit directly, as you have to perform a
                        # regression through the origin.

                        # uncomment to check pearson r against phylip's value
                        ## r = calculateCorrelationCoefficient( columns[x], columns[y] )

                        # for significance, use linear regression models in R
                        rpy.set_default_mode(rpy.NO_CONVERSION)
                        linear_model = R.lm(R("y ~ x - 1"),
                                            data=R.data_frame(x=columns[x],
                                                              y=columns[y]))
                        rpy.set_default_mode(rpy.BASIC_CONVERSION)

                        ss = R.summary(linear_model)

                        # extract the p-value
                        p = ss['coefficients'][-1][-1]

                        if p < 0.001:
                            code = "***"
                        elif p < 0.01:
                            code = "**"
                        elif p < 0.05:
                            code = "*"
                        else:
                            code = ""

                        options.stdout.write("\t".join(
                            (headers[x], headers[y], options.value_format %
                             phy_r, options.pvalue_format % p, code)) + "\n")

            elif method == "contrasts":

                options.stdout.write("\t".join(headers) + "\n")
                for d in result.mContrasts:
                    options.stdout.write(
                        "\t".join(map(lambda x: options.value_format % x, d)) +
                        "\n ")

            elif method == "compute":

                # make room for all internal nodes and one dummy node
                # for unrooted trees.
                max_index = TreeTools.GetMaxIndex(tree) + 2
                variances = [None] * max_index
                values = [[None] * nrows for x in range(max_index)]
                contrasts = []
                for x in range(max_index):
                    contrasts.append([None] * ncolumns)
                branchlengths = [None] * max_index

                def update_data(
                    node_id,
                    bl,
                    c1,
                    c2,
                ):

                    b1, b2 = branchlengths[c1], branchlengths[c2]
                    rb1 = 1.0 / b1
                    rb2 = 1.0 / b2
                    # compute variance
                    variance = math.sqrt(b1 + b2)

                    # extend branch length of this node to create correct
                    # variance for parent
                    branchlengths[node_id] = bl + (b1 * b2) / (b1 + b2)
                    variances[node_id] = variance

                    for c in range(ncolumns):
                        v1, v2 = values[c1][c], values[c2][c]
                        # save ancestral value as weighted mean
                        values[node_id][c] = (
                            (rb1 * v1 + rb2 * v2)) / (rb1 + rb2)
                        # compute normalized contrast
                        contrasts[node_id][c] = (v1 - v2) / variance

                def update_contrasts(node_id):
                    """update contrasts for a node."""
                    node = tree.node(node_id)
                    if node.succ:
                        if len(node.succ) == 2:
                            c1, c2 = node.succ
                            update_data(node_id, node.data.branchlength, c1,
                                        c2)
                        else:
                            assert (node_id == tree.root)
                            assert (len(node.succ) == 3)
                            update_data(node_id, node.data.branchlength,
                                        node.succ[0], node.succ[1])
                            update_data(max_index - 1, node.data.branchlength,
                                        node_id, node.succ[2])
                    else:
                        for c in range(ncolumns):
                            values[node_id][c] = float(
                                data[map_node2data[node_id]][c + 1])

                        branchlengths[node_id] = node.data.branchlength

                tree.dfs(tree.root, post_function=update_contrasts)

                options.stdout.write("node_id\tvariance\t%s\n" %
                                     "\t".join(headers))
                for node_id in range(max_index):
                    if variances[node_id] is None:
                        continue
                    options.stdout.write("%s\t%s\t%s\n" % (
                        node_id,
                        options.value_format % variances[node_id],
                        "\t".join(
                            map(lambda x: options.value_format % x,
                                contrasts[node_id])),
                    ))

    E.Stop()

Esempio n. 18

Mostra file

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id: trees2tree.py 2782 2009-09-10 11:40:29Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("-m", "--method", dest="method", type="choice",
                      choices=("counts", "min", "max", "sum", "mean", "median", "stddev", "non-redundant", "consensus",
                               "select-largest"),
                      help="aggregation function.")

    parser.add_option("-r", "--regex-id", dest="regex_id", type="string",
                      help="regex pattern to extract identifier from tree name for the selection functions.")

    parser.add_option("-w", "--write-values", dest="write_values", type="string",
                      help="if processing multiple trees, write values to file.")

    parser.add_option("-e", "--error-branchlength", dest="error_branchlength", type="float",
                      help="set branch length without counts to this value.")

    parser.set_defaults(
        method="mean",
        regex_id=None,
        filtered_branch_lengths=(-999.0, 999.0),
        write_values = None,
        error_branchlength = None,
        separator=":",
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    if options.loglevel >= 2:
        options.stdlog.write("# reading trees from stdin.\n")
        options.stdlog.flush()

    nexus = TreeTools.Newick2Nexus(sys.stdin)
    if options.loglevel >= 1:
        options.stdlog.write(
            "# read %i trees from stdin.\n" % len(nexus.trees))

    nskipped = 0
    ninput = len(nexus.trees)
    noutput = 0
    nerrors = 0

    if options.method == "non-redundant":
        # compute non-redudant trees
        template_trees = []
        template_counts = []
        ntree = 0
        for tree in nexus.trees:

            for x in range(0, len(template_trees)):
                is_compatible, reason = TreeTools.IsCompatible(
                    tree, template_trees[x])
                if is_compatible:
                    template_counts[x] += 1
                    break
            else:
                template_counts.append(1)
                template_trees.append(tree)

            if options.loglevel >= 2:
                options.stdlog.write(
                    "# tree=%i, ntemplates=%i\n" % (ntree, len(template_trees)))

            ntree += 1

        for x in range(0, len(template_trees)):
            if options.loglevel >= 1:
                options.stdlog.write("# tree: %i, counts: %i, percent=%5.2f\n" %
                                     (x, template_counts[x], template_counts[x] * 100.0 / ntotal))
            options.stdout.write(
                TreeTools.Tree2Newick(template_trees[x]) + "\n")

    elif options.method in ("select-largest",):
        # select one of the trees with the same name.
        clusters = {}
        for x in range(0, len(nexus.trees)):
            n = nexus.trees[x].name

            if options.regex_id:
                n = re.search(options.regex_id, n).groups()[0]

            if n not in clusters:
                clusters[n] = []
            clusters[n].append(x)

        new_trees = []

        for name, cluster in clusters.items():
            new_trees.append(
                getBestTree([nexus.trees[x] for x in cluster], options.method))

        for x in range(0, len(new_trees)):
            options.stdout.write(">%s\n" % new_trees[x].name)
            options.stdout.write(TreeTools.Tree2Newick(new_trees[x],) + "\n")
            noutput += 1

        nskipped = ntotal - noutput

    elif options.method == "consensus":

        phylip = WrapperPhylip.Phylip()
        phylip.setLogLevel(options.loglevel - 2)
        phylip.setProgram("consense")
        phylip_options = []
        phylip_options.append("Y")

        phylip.setOptions(phylip_options)
        phylip.setTrees(nexus.trees)

        result = phylip.run()

        options.stdout.write(
            "# consensus tree built from %i trees\n" % (phylip.mNInputTrees))
        options.stdout.write(
            TreeTools.Tree2Newick(result.mNexus.trees[0]) + "\n")
        noutput = 1

    else:
        if options.method in ("min", "max", "sum", "mean", "counts"):

            xtree = nexus.trees[0]
            for n in xtree.chain.keys():
                if xtree.node(n).data.branchlength in options.filtered_branch_lengths:
                    xtree.node(n).data.branchlength = 0
                ntotals = [1] * len(xtree.chain.keys())

            if options.method == "min":
                f = min
            elif options.method == "max":
                f = max
            elif options.method == "sum":
                f = lambda x, y: x + y
            elif options.method == "mean":
                f = lambda x, y: x + y
            elif options.method == "counts":
                f = lambda x, y: x + 1
                for n in xtree.chain.keys():
                    if xtree.node(n).data.branchlength not in options.filtered_branch_lengths:
                        xtree.node(n).data.branchlength = 1
                    else:
                        xtree.node(n).data.branchlength = 0
            else:
                raise "unknown option %s" % options.method

            for tree in nexus.trees[1:]:

                for n in tree.chain.keys():
                    if tree.node(n).data.branchlength not in options.filtered_branch_lengths:
                        xtree.node(n).data.branchlength = f(
                            xtree.node(n).data.branchlength, tree.node(n).data.branchlength)
                        ntotals[n] += 1

            if options.method == "mean":
                for n in xtree.chain.keys():
                    if ntotals[n] > 0:
                        xtree.node(n).data.branchlength = float(
                            xtree.node(n).data.branchlength) / ntotals[n]
                    else:
                        if options.error_branchlength is not None:
                            xtree.node(
                                n).data.branchlength = options.error_branchlength
                            if options.loglevel >= 1:
                                options.stdlog.write(
                                    "# no counts for node %i - set to %f\n" % (n, options.error_branchlength))
                                nerrors += 1
                        else:
                            raise "no counts for node %i" % n

        else:
            # collect all values for trees
            values = [[] for x in range(TreeTools.GetSize(nexus.trees[0]))]

            for tree in nexus.trees:
                for n, node in tree.chain.items():
                    if node.data.branchlength not in options.filtered_branch_lengths:
                        values[n].append(node.data.branchlength)

            tree = nexus.trees[0]
            for n, node in tree.chain.items():
                if len(values[n]) > 0:
                    if options.method == "stddev":
                        node.data.branchlength = scipy.std(values[n])
                    elif options.method == "median":
                        node.data.branchlength = scipy.median(values[n])
                else:
                    if options.error_branchlength is not None:
                        node.data.branchlength = options.error_branchlength
                        if options.loglevel >= 1:
                            options.stdlog.write(
                                "# no counts for node %i - set to %f\n" % (n, options.error_branchlength))
                            nerrors += 1
                    else:
                        raise "no counts for node %i" % n

            if options.write_values:
                outfile = open(options.write_values, "w")
                for n, node in tree.chain.items():
                    values[n].sort()
                    id = options.separator.join(
                        sorted(TreeTools.GetLeaves(tree, n)))
                    outfile.write("%s\t%s\n" %
                                  (id, ";".join(map(str, values[n]))))
                outfile.close()

        del nexus.trees[1:]
        options.stdout.write(TreeTools.Nexus2Newick(nexus) + "\n")
        noutput = 1

    if options.loglevel >= 1:
        options.stdlog.write("# ntotal=%i, nskipped=%i, noutput=%i, nerrors=%i\n" % (
            ninput, nskipped, noutput, nerrors))

    E.Stop()

Esempio n. 19

Mostra file

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: tree2stats.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-m",
                      "--method",
                      dest="methods",
                      type="choice",
                      action="append",
                      choices=("branchlengths", ),
                      help="methods to apply.")

    parser.set_defaults(
        methods=[],
        filtered_branch_length=-999,
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    nexus = TreeTools.Newick2Nexus(sys.stdin)
    if options.loglevel >= 1:
        options.stdlog.write("# read %i trees from stdin.\n" %
                             len(nexus.trees))

    ninput = len(nexus.trees)

    nskipped = 0

    for method in options.methods:

        outfile = options.stdout

        if method == "branchlengths":

            outfile.write(
                "tree\t%s\n" %
                "\t".join(Stats.DistributionalParameters().getHeaders()))

            for tree in nexus.trees:
                branchlengths = []
                for node in tree.chain.values():
                    # ignore branch length of root if it is zero
                    if not node.prev and node.data.branchlength == 0: continue

                    if node.data.branchlength == options.filtered_branch_length:
                        continue

                    branchlengths.append(node.data.branchlength)

                s = Stats.DistributionalParameters(branchlengths)
                outfile.write("%s\t%s\n" % (tree.name, str(s)))

    if options.loglevel >= 1:
        options.stdlog.write("# ninput=%i, nskipped=%i\n" % (ninput, nskipped))

    E.Stop()

Esempio n. 20

Mostra file

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: matrix2tree.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option("-i",
                      "--invert-map",
                      dest="invert_map",
                      action="store_true",
                      help="""invert map.""")

    parser.add_option("--input-format",
                      dest="input_format",
                      type="choice",
                      choices=("phylip", "full"),
                      help="""input format.""")

    parser.add_option("-t",
                      "--filename-tree",
                      dest="filename_tree",
                      type="string",
                      help="""filename with tree to fit.""")

    parser.add_option("-m",
                      "--method",
                      dest="method",
                      type="choice",
                      choices=("nj", "kitsch", "fitch"),
                      help="""algorithm to run.""")

    parser.add_option("-e",
                      "--replicates",
                      dest="replicates",
                      action="store_true",
                      help="replicates.")

    parser.add_option("-r",
                      "--root",
                      dest="root",
                      action="store_true",
                      help="midpoint root (if it is not rooted).")

    parser.add_option("-u",
                      "--unroot",
                      dest="unroot",
                      action="store_true",
                      help="unroot tree (if it is rooted).")

    parser.add_option("--skip-separators",
                      dest="write_separators",
                      action="store_false",
                      help="do not echo separators (starting with >)")

    #    parser.add_option("-i", "--iterations", dest="iterations", type="int",
    #                      help="number of iterations." )

    parser.add_option("-p",
                      "--power",
                      dest="power",
                      type="float",
                      help="power.")

    parser.add_option(
        "--prune-tree",
        dest="prune_tree",
        action="store_true",
        help=
        "prune tree such to include only taxa which are part of the input matrix."
    )

    parser.add_option(
        "--add-random",
        dest="add_random",
        action="store_true",
        help="add small random value to off-diagonal zero elements in matrix.")

    parser.add_option(
        "--pseudo-replicates",
        dest="pseudo_replicates",
        action="store_true",
        help=
        "add small random value to off-diagonal zero elements in matrix, even if they have no replicates."
    )

    parser.add_option("--debug",
                      dest="debug",
                      action="store_true",
                      help="dump debug information.")

    parser.set_defaults(
        value=0,
        method="nj",
        input_format="phylip",
        filename_tree=None,
        outgroup=None,
        replicates=False,
        root=False,
        unroot=False,
        power=0,
        write_separators=True,
        prune_tree=False,
        add_random=False,
        debug=False,
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    phylip = WrapperPhylip.Phylip()

    if options.debug:
        phylip.setLogLevel(options.loglevel)

    phylip.setPruneTree(options.prune_tree)

    lines = filter(lambda x: x[0] != "#", sys.stdin.readlines())

    chunks = filter(lambda x: lines[x][0] == ">", range(len(lines)))

    if not chunks:
        options.write_separators = False
        chunks = [-1]

    chunks.append(len(lines))

    for x in range(len(chunks) - 1):

        matrix = lines[chunks[x] + 1:chunks[x + 1]]

        # parse phylip matrix
        if options.add_random:
            mm = []
            ids = []
            for l in range(1, len(matrix)):
                values = re.split("\s+", matrix[l][:-1])
                ids.append(values[0])
                mm.append(map(lambda x: x.strip(), values[1:]))

            d = len(mm)
            if options.replicates:
                for row in range(d - 1):
                    for col in range(row + 1, d):
                        cc = col * 2
                        rr = row * 2
                        if mm[row][cc] == "0" and mm[row][cc + 1] != "0":
                            mm[row][cc + 1] = "1"
                            mm[col][rr + 1] = "1"
                            v = str(random.random() / 10000.0)
                            mm[row][cc] = v
                            mm[col][rr] = v

            else:
                for row in range(d - 1):
                    for col in range(row + 1, d):
                        if mm[row][col] == "0":
                            v = str(random.random() / 10000.0)
                            mm[row][col] = v
                            mm[col][row] = v

            matrix = ["%i\n" % d]
            for row in range(d):
                matrix.append(ids[row] + "    " + "    ".join(mm[row]) + "\n")

        # parse phylip matrix
        if options.pseudo_replicates:
            mm = []
            ids = []
            for l in range(1, len(matrix)):
                values = re.split("\s+", matrix[l][:-1])
                ids.append(values[0])
                mm.append(map(lambda x: x.strip(), values[1:]))

            d = len(mm)
            if options.replicates:
                for row in range(d - 1):
                    for col in range(row + 1, d):
                        cc = col * 2
                        rr = row * 2
                        if mm[row][cc + 1] == "0":
                            mm[row][cc + 1] = "1"
                            mm[col][rr + 1] = "1"
                            v = str(random.random() / 10000.0)
                            mm[row][cc] = v
                            mm[col][rr] = v
                        else:
                            mm[row][cc + 1] = "100"
                            mm[col][rr + 1] = "100"
            else:
                for row in range(d - 1):
                    for col in range(row + 1, d):
                        if mm[row][col] == "0":
                            v = str(random.random() / 10000.0)
                            mm[row][col] = v
                            mm[col][row] = v

            matrix = ["%i\n" % d]
            for row in range(d):
                matrix.append(ids[row] + "    " + "    ".join(mm[row]) + "\n")

        phylip.setMatrix(matrix)

        phylip_options = []

        if options.filename_tree:
            nexus = TreeTools.Newick2Nexus(open(options.filename_tree, "r"))
            ref_tree = nexus.trees[0]
            phylip.setTree(ref_tree)
            phylip_options.append("U")
        else:
            ref_tree = None

        if options.method == "nj":
            phylip.setProgram("neighbor")

        elif options.method == "fitch":
            phylip.setProgram("fitch")

        elif options.method == "kitsch":
            phylip.setProgram("kitsch")

        if options.replicates:
            phylip_options.append("S")

        if options.power > 0:
            phylip_options.append("P")
            phylip_options.append("%f" % options.power)

        phylip_options.append("Y")

        phylip.setOptions(phylip_options)

        result = phylip.run()

        # root with outgroup
        if options.root:
            if options.outgroup:
                pass
            # midpoint root
            else:
                for tree in result.mNexus.trees:
                    tree.root_midpoint()

        # explicitely unroot
        elif options.unroot:
            phylip.setOptions(("Y", "W", "U", "Q"))
            phylip.setProgram("retree")
            for x in range(len(result.mNexus.trees)):
                phylip.setTree(result.mNexus.trees[x])
                xresult = phylip.run()
                result.mNexus.trees[x] = xresult.mNexus.trees[0]

        if options.write_separators:
            options.stdout.write(lines[chunks[x]])

        if result.mNexus:
            options.stdout.write(TreeTools.Nexus2Newick(result.mNexus) + "\n")

        if options.loglevel >= 1:
            if ref_tree:
                nref = len(ref_tree.get_terminals())
            else:
                nref = 0
            for tree in result.mNexus.trees:
                options.stdlog.write(
                    "# ninput=%i, nreference=%i, noutput=%i\n" %
                    (len(matrix) - 1, nref, len(tree.get_terminals())))

    E.Stop()

Esempio n. 21

Mostra file

File: tree2matrix.py Progetto: yangjl/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: tree2matrix.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option("-f",
                      "--format",
                      dest="format",
                      type="string",
                      help="number format to use.")
    parser.add_option("-g",
                      "--graph",
                      dest="to_graph",
                      action="store_true",
                      help="convert tree(s) to graph(s).")
    parser.add_option("-a",
                      "--table",
                      dest="to_table",
                      action="store_true",
                      help="convert tree(s) to table.")
    parser.add_option("-t",
                      "--translate",
                      dest="do_translate",
                      action="store_true",
                      help="translate internal nodes to clades.")
    parser.add_option(
        "--output-pattern",
        dest="output_filename_pattern",
        type="string",
        help="pattern for output file if there are multiple trees in the file."
        "")
    parser.add_option("--pairs",
                      dest="pairs",
                      type="choice",
                      choices=("all", "leaves", "branches", "terminals",
                               "lineage", "between-species"),
                      help="choose pairs of nodes to output."
                      "")
    parser.add_option(
        "--species",
        dest="species",
        type="string",
        help=
        "comma separated list of species that are considered. All others are ignored."
    )

    parser.set_defaults(
        format="%6.4f",
        to_graph=False,
        to_table=False,
        do_translation=False,
        separator=":",
        do_all_on_all=False,
        do_branches=False,
        do_terminals=False,
        output_filename_pattern=None,
        pairs="branches",
        species=None,
        regex_species=("^([^|]+)"),
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    if options.species: options.species = options.species.split(",")
    nexus = TreeTools.Newick2Nexus(sys.stdin)

    if options.loglevel >= 1:
        options.stdlog.write("# read %i trees from stdin.\n" %
                             len(nexus.trees))

    ntree = 0
    outfile = None

    ## The table is a hash of lists
    table = {}

    extract_species = lambda x: re.search(options.regex_species, x).groups()[0]

    for tree in nexus.trees:

        if len(nexus.trees) == 1:
            outfile = options.stdout
        elif options.output_filename_pattern:
            ntree += 1
            if outfile != None: outfile.close()
            outfile = open(options.output_filename_pattern % ntree, "w")
        else:
            outfile = options.stdout

        ## prune tree, if an explicit species list is given
        if options.species:
            species = set(options.species)
            terminals = tree.get_terminals()
            for x in terminals:
                taxon = tree.node(x).data.taxon
                if extract_species(taxon) not in species:
                    tree.prune(taxon)

        ## define node list
        terminals = tree.get_terminals()
        set_terminals = set(terminals)
        node_list = []

        if options.pairs == "all":
            nodes = TreeTools.GetAllNodes(tree)
            for x in range(len(nodes)):
                for y in range(0, x):
                    node_list.append((nodes[x], nodes[y]))
        elif options.pairs == "terminals":
            for x in terminals:
                node_list.append((x, tree.node(x).prev))
        elif options.pairs == "leaves":
            nodes = terminals
            for x in range(len(nodes)):
                for y in range(0, x):
                    node_list.append((nodes[x], nodes[y]))
        elif options.pairs == "branches":
            nodes = TreeTools.GetAllNodes(tree)
            for x in range(len(nodes)):
                if tree.node(x).prev:
                    node_list.append((x, tree.node(x).prev))
        elif options.pairs == "between-species":
            nodes = terminals
            for x in range(len(nodes)):
                for y in range(0, x):
                    s1 = extract_species(tree.node(nodes[x]).data.taxon)
                    s2 = extract_species(tree.node(nodes[y]).data.taxon)
                    if s1 != s2:
                        node_list.append((nodes[x], nodes[y]))

        elif options.pairs == "lineage":
            raise "not implemented."

        if options.to_graph:
            outfile.write("node1\tnode2\tdistance\n")

            links = TreeTools.Tree2Graph(tree)
            for n1, n2, weight in links:

                node1 = TranslateNode(n1, tree, set_terminals, options)
                node2 = TranslateNode(n2, tree, set_terminals, options)

                if node1 > node2: node1, node2 = node2, node1
                outfile.write("%s\t%s\t%s\n" %
                              (node1, node2, options.format % weight))

        elif options.to_table:

            if options.do_all_on_all:
                nodes = TreeTools.GetAllNodes(tree)
            else:
                nodes = terminals

            for n1, n2 in node_list:

                node1 = TranslateNode(n1, tree, set_terminals, options)
                node2 = TranslateNode(n2, tree, set_terminals, options)

                if node1 > node2: node1, node2 = node2, node1

                if options.do_terminals:
                    key = "%s" % node2
                else:
                    key = "%s-%s" % (node1, node2)

                if key not in table: table[key] = []

                table[key].append(options.format % tree.distance(n1, n2))
        else:
            outfile.write("node1\tnode2\tdistance\n")

            for n1, n2 in node_list:
                node1 = TranslateNode(n1, tree, set_terminals, options)
                node2 = TranslateNode(n2, tree, set_terminals, options)

                if node1 > node2: node1, node2 = node2, node1

                outfile.write( "%s\t%s\t%s\n" % ( \
                        node1, node2,
                        options.format % tree.distance( n1, n2 )))

    if options.to_table:
        outfile = sys.stdout
        outfile.write("branch\t%s\n" %
                      ("\t".join(map(str, range(0, len(nexus.trees))))))

        for key, values in table.items():
            outfile.write("%s\t%s\n" % (key, "\t".join(values)))

    if outfile != sys.stdout:
        outfile.close()

    E.Stop()

Esempio n. 22

Mostra file

File: tree2svg.py Progetto: yangjl/cgat

def main():

    parser = E.OptionParser(
        version=
        "%prog version: $Id: plot_tree.py 2782 2009-09-10 11:40:29Z andreas $")

    parser.add_option("-i",
                      "--title",
                      dest="title",
                      type="string",
                      help="page title.")
    parser.add_option("-f",
                      "--footer",
                      dest="footer",
                      type="string",
                      help="page footer.")
    parser.add_option("-s",
                      "--filename-tree",
                      dest="filename_tree",
                      type="string",
                      help="filename with tree.")
    parser.add_option("-t", "--tree", dest="tree", type="string", help="tree.")
    parser.add_option(
        "-r",
        "--species-regex",
        dest="species_regex",
        type="string",
        help="regular expression to extract species from identifier.")
    parser.add_option("--colour-by-species",
                      dest="colour_by_species",
                      action="store_true",
                      help="colour by species.")
    parser.add_option("--support-style",
                      dest="support_style",
                      type="choice",
                      choices=("pie", "number"),
                      help="style for support information.")
    parser.add_option("--error-style",
                      dest="error_style",
                      type="choice",
                      choices=("pie", "number"),
                      help="style for error information.")
    parser.add_option("--branch-scale",
                      dest="branch_scale",
                      type="float",
                      help="branch length scale factor.")
    parser.add_option("--height-scale",
                      dest="height_scale",
                      type="float",
                      help="height scale factor.")
    parser.add_option("-a",
                      "--annotations",
                      dest="annotations",
                      type="choice",
                      action="append",
                      choices=("support", "error", "kaks", "master", "value",
                               "tables"),
                      help="annotations given by further trees.")
    parser.add_option(
        "--filename-tables",
        dest="filename_tables",
        type="string",
        help="add tables from file (need also set options -a tables) [%default]"
    )
    parser.add_option("--show-branchlengths",
                      dest="show_branchlengths",
                      action="store_true",
                      help="show branch lengths.")
    parser.add_option("--leaf-symbol",
                      dest="plot_leaf_symbol",
                      type="choice",
                      choices=("square", "circle"),
                      help="Symbol for leaves.")
    parser.add_option("--font-size-branches",
                      dest="font_size_branches",
                      type="int",
                      help="set font size for branches.")
    parser.add_option("--font-size-tips",
                      dest="font_size_tips",
                      type="int",
                      help="set font size for tips.")
    parser.add_option("--font-style-tips",
                      dest="font_style_tips",
                      type="choice",
                      choices=(
                          "normal",
                          "italic",
                      ),
                      help="set font style for tips.")
    parser.add_option("--filename-map",
                      dest="filename_map",
                      type="string",
                      help="filename with a name translation table.")
    parser.add_option("--filename-map-species2colour",
                      dest="filename_colour_map",
                      type="string",
                      help="filename with a map of species to colour.")
    parser.add_option("--no-leaf-labels",
                      dest="plot_leaf_labels",
                      action="store_false",
                      help="do not show labels at leafs.")
    parser.add_option("--no-ruler",
                      dest="plot_ruler",
                      action="store_false",
                      help="do not plot ruler.")

    parser.set_defaults(
        titles="",
        title="",
        footer="",
        filename_tree=None,
        species_regex="^([^|]+)\|",
        colour_by_species=None,
        tree=None,
        branch_scale=0,
        height_scale=0,
        support_style=None,
        error_style="number",
        kaks_style="number",
        annotations=None,
        show_branchlengths=False,
        branch_length_format="%5.2f",
        font_size_tips=None,
        font_size_branches=None,
        font_style_tips=None,
        filename_map=None,
        filename_colour_map=None,
        plot_leaf_labels=True,
        plot_leaf_symbol=None,
        plot_ruler=True,
        filename_tables=None,
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    if options.filename_tree:
        tree_lines = open(options.filename_tree, "r").readlines()
    elif options.tree:
        tree_lines = options.tree
    else:
        tree_lines = sys.stdin.readlines()

    nexus = TreeTools.Newick2Nexus(tree_lines)
    master_tree = nexus.trees[0]

    if options.filename_map:
        map_names = IOTools.ReadMap(open(options.filename_map, "r"))

        for id, node in master_tree.chain.items():
            if node.data.taxon in map_names:
                node.data.taxon = map_names[node.data.taxon]

    if options.loglevel >= 2:
        master_tree.display()

    plot = SVGTree.SVGTree(master_tree)

    if options.branch_scale:
        plot.setBranchScale(options.branch_scale)

    if options.height_scale != None:
        plot.setHeightScale(options.height_scale)

    if options.font_size_tips != None:
        plot.setFontSize(options.font_size_tips)

    if options.plot_ruler == False:
        plot.setRulerElements([])

    if options.show_branchlengths:
        b = SVGTree.BranchDecoratorHorizontalBranchLength(master_tree)
        if options.font_size_branches:
            b.setFontSize(options.font_size_branches)
        plot.setDecoratorHorizontalBranches(b)

    if options.colour_by_species:
        if options.filename_colour_map:
            map_species2colour = IOTools.ReadMap(
                open(options.filename_colour_map, "r"))
        else:
            map_species2colour = None

        rx = re.compile(options.species_regex)
        extract_species = lambda x: rx.search(x).groups()[0]
        plot.setDecoratorExternalNodes(
            SVGTree.NodeDecoratorBySpecies(
                master_tree,
                plot_symbol=options.plot_leaf_symbol,
                plot_label=options.plot_leaf_labels,
                map_species2colour=map_species2colour,
                extract_species=extract_species))

    if options.font_style_tips:
        plot.getDecoratorExternalNodes().setFontStyle(options.font_style_tips)

    plot.getDecoratorExternalNodes().setPlotLabel(options.plot_leaf_labels)

    current_tree = 1

    ## add annotations by further trees given on the command line
    branch_length_annotations = []

    current_reference_tree = master_tree

    if options.annotations:
        for annotation in options.annotations:

            tree = nexus.trees[current_tree]

            if annotation == "support":

                tree.branchlength2support()
                for id, node in tree.chain.items():
                    node.data.branchlength = 1.0

                if options.support_style == "pie":
                    plot.setDecoratorInternalNodes(
                        NodeDecoratorSupportPieChart(
                            nexus.trees[current_tree]))

            elif annotation == "error":

                if options.error_style == "number":
                    b = SVGTree.BranchDecoratorHorizontalBranchLengthError(
                        current_reference_tree, tree)
                    if options.font_size_branches:
                        b.setFontSize(options.font_size_branches)
                    branch_length_annotations.append(b)

            elif annotation == "kaks":

                if options.kaks_style == "number":
                    b = SVGTree.BranchDecoratorHorizontalBranchLengthWithKaks(
                        current_reference_tree, tree)
                    if options.font_size_branches:
                        b.setFontSize(options.font_size_branches)
                    branch_length_annotations.append(b)

            elif annotation == "value":

                b = SVGTree.BranchDecoratorHorizontalBranchLength(tree)
                if options.font_size_branches:
                    b.setFontSize(options.font_size_branches)
                branch_length_annotations.append(b)

            elif annotation == "master":
                current_reference_tree = tree

            elif annotation == "tables":
                b = BranchDecoratorTable(tree,
                                         filename=options.filename_tables)
                plot.setDecoratorHorizontalBranches(b)

            current_tree += 1

        if len(branch_length_annotations) == 1:
            b = branch_length_annotations[0]
        elif len(branch_length_annotations) == 2:
            b1, b2 = branch_length_annotations
            b1.setFontColour(SVGTree.BLUE)
            b2.setFontColour(SVGTree.RED)
            b = SVGTree.BranchDecoratorHorizontalAboveBelow(
                master_tree, b1, b2)
        elif len(branch_length_annotations) > 2:
            raise "obtained more than three branch length annotations. Layout not implemented"

        plot.setDecoratorHorizontalBranches(b)

    plot.initializePlot()

    plot.writeToFile(sys.stdout)

    E.Stop()

Esempio n. 23

Mostra file

File: tree_map_leaves.py Progetto: santayana/cgat

    print E.GetHeader()
    print E.GetParams()

    keys = {}
    if param_apply:
        infile = open(param_apply, "r")
        for line in infile:
            if line[0] == "#":
                continue
            a, b = line[:-1].split("\t")[:2]
            if param_invert:
                a, b = b, a
            keys[a] = b

    nexus = TreeTools.Newick2Nexus(sys.stdin)

    notu = 0

    for tree in nexus.trees:
        if param_loglevel >= 2:
            tree.display()

        for nx in tree.get_terminals():
            t1 = tree.node(nx).get_data().taxon

            if param_create:
                if t1 not in keys:
                    keys[t1] = "otu%i" % notu
                    notu += 1

Esempio n. 24

Mostra file

File: trees2trees.py Progetto: yangjl/cgat

def main( argv = None ):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser( version = "%prog version: $Id: trees2trees.py 2782 2009-09-10 11:40:29Z andreas $", usage = globals()["__doc__"])

    parser.add_option("-c", "--output-filename-map", dest="output_filename_map", type="string",
                      help="filename of map to output."  )

    parser.add_option("-m", "--method", dest="method", type="choice",
                      choices=("filter", "split"),
                      help="method to use: filter removed trees, while split writes them to individual files. DEFAULT=%default"  )

    parser.add_option("-d", "--output-pattern", dest="output_pattern", type="string",
                      help="filename pattern for output multiple alignment files."  )

    parser.add_option("--filter-terminal-max-length", dest="filter_max_length", type="float",
                      help="remove terminal branches with a branch length larger than this."  )

    parser.add_option("--filter-terminal-min-length", dest="filter_min_length", type="float",
                      help="remove any branches with a branch length smaller than this."  )

    parser.add_option("--filter-min-length", dest="filter_min_length", type="float",
                      help="remove terminal branches with a branch length smaller than this."  )

    parser.add_option("--filter-max-length", dest="filter_min_length", type="float",
                      help="remove any branches with a branch length smaller than this."  )

    parser.add_option("--filter-by-trees", dest="filter_by_trees", type="string", action="append",
                      help="mask branches according to trees. Give filenames with mask trees. These trees need to have the same names and structure as the input trees, but can be in any order."  )

    parser.add_option("--filter-by-monophyly", dest="filter_by_monophyly", type="string",
                      help="only retain trees where the given taxa are monphyletic. Supply taxa as a comma-separated list."  )

    parser.add_option("--min-support", dest="min_support", type="float",
                      help="for monophyly filtering, only accept trees with minimum support."  )

    parser.add_option("--filter-ntaxa", dest="filter_ntaxa", type="int", 
                      help="filter by number of taxa."  )

    parser.add_option("--filter-simple-orthologs", dest="filter_simple_orthologs", action="store_true", 
                      help="filter for trees for simple orhtologs. This works by counting the number of taxa."  )

    parser.add_option("--filter", dest="filter", type="choice",
                      choices=("taxa", "trees"),
                      help="filter removes taxa or whole trees." )

    parser.set_defaults(
        output_pattern="%s.tree",
        output_filename_map = None,
        filter_terminal_max_length = None,
        filter_terminal_min_length = None,
        filter_max_length = None,
        filter_min_length = None,
        method ="split",
        filter = "taxa",
        filtered_branch_length = -999,
        filter_by_trees = [],
        filter_by_monophyly = None,
        filter_ntaxa = None,
        filter_simple_orthologs = None,
        min_support = 0.0,
        regex_species = ("^([^|]+)" ),
        )

    (options, args) = E.Start( parser )

    nexus = TreeTools.Newick2Nexus( sys.stdin )
    
    if options.loglevel >= 1:
        options.stdlog.write("# read %i trees from stdin.\n" % len(nexus.trees))

    ninput, noutput, nskipped = 0, 0, 0
    ndiscarded = 0
    ndiscarded_taxa = 0
    ndiscarded_branches = 0

    extract_species = lambda x: re.search( options.regex_species, x).groups()[0]
    
    if options.filter_by_trees:
        nexus_filter = []
        nexus_maps = []
        for filename in options.filter_by_trees:
            nexus_filter.append( TreeTools.Newick2Nexus( open( filename, "r") ) )
            trees = nexus_filter[-1].trees
            if options.loglevel >=1 :
                options.stdlog.write("# read %i trees for filtering from %s\n" % (len(trees), filename))

            nexus_map = {}
            for x in range( len(trees)):
                nexus_map[trees[x].name] = x
            nexus_maps.append( nexus_map )

    if options.filter_by_monophyly:
        monophyly_taxa = options.filter_by_monophyly.split(",")
        if len(monophyly_taxa) == 0:
            raise "please supply at least two taxa for the monophyly test."
            
    if options.output_filename_map:
        outfile_map = open(options.output_filename_map, "a" )
    else:
        outfile_map = None

    for tree in nexus.trees:

        ninput += 1
        id = tree.name
        has_discarded = False

        if options.filter_ntaxa != None:

            ntaxa = len(tree.get_terminals())
            if ntaxa != options.filter_ntaxa:
                if options.loglevel >= 2:
                    options.stdlog.write("# tree %s: removed because number of taxa (%i) different\n" % \
                                         (id, ntaxa ) )
                has_discarded = True
                
        if options.filter_simple_orthologs:
            ntaxa = len(tree.get_terminals())
            nspecies = len(set(map( lambda x: extract_species(tree.node(x).data.taxon), tree.get_terminals() )))
            if nspecies != ntaxa:
                if options.loglevel >= 2:
                    options.stdlog.write("# tree %s: removed because not a simple ortholog cluster: ntaxa!=nspecies (%i!=%i)\n" % \
                                             (id, ntaxa, nspecies ) )

                has_discarded = True

        if options.filter_terminal_max_length != None:
            for x in tree.get_terminals():
                node = tree.node(x)
                if node.data.branchlength >= options.filter_terminal_max_length:
                    has_discarded = True
                    ndiscarded_taxa += 1                    
                    tree.prune( node.data.taxon )
                    if options.loglevel >= 2:
                        options.stdlog.write("# tree %s: removed taxon %s because terminal branchlength to large: %s\n" % \
                                             (id, node.data.taxon, str(node.data.branchlength)) )

        if options.filter_terminal_min_length != None:
            for x in tree.get_terminals():
                node = tree.node(x)
                if node.data.branchlength <= options.filter_terminal_min_length:
                    has_discarded = True
                    ndiscarded_taxa += 1                    
                    tree.prune( node.data.taxon )
                    if options.loglevel >= 2:
                        options.stdlog.write("# tree %s: removed taxon %s because terminal branchlength to small: %s\n" % \
                                             (id, node.data.taxon, str(node.data.branchlength)) )
                    
        if options.filter_max_length != None:
            for x in tree.get_nodes(tree.root):
                if x == tree.root: continue                
                node = tree.node(x)
                if node.data.branchlength >= options.filter_max_length:
                    has_discarded = True
                    ndiscarded_branches += 1                    
                    if options.loglevel >= 2:
                        options.stdlog.write("# tree %s: removed branch %i because branchlength to large: %s\n" % \
                                             (id, x, tree.name, str(node.data.branchlength)) )
                    node.data.branchlength = options.filtered_branch_length
                    
        if options.filter_min_length != None:
            for x in tree.get_nodes(tree.root):
                if x == tree.root: continue
                node = tree.node(x)
                if node.data.branchlength <= options.filter_min_length:
                    has_discarded = True
                    ndiscarded_branches += 1
                    if options.loglevel >= 2:
                        options.stdlog.write("# tree %s: removed branch %i because internal branchlength too small: %s\n" % \
                                             (id, x, str(node.data.branchlength)) )
                    node.data.branchlength = options.filtered_branch_length
                    
        if options.filter_by_trees:
            found = []
            for y in range(len(nexus_maps)):
                if id in nexus_maps[y]:
                    found.append( (y, nexus_filter[y].trees[nexus_maps[y][id]]) )

            if not found:
                ndiscarded += 1
                continue

            for x in tree.get_nodes(tree.root):
                if x == tree.root: continue
                for y, other_tree in found:
                    other_node = other_tree.node( x )
                    if other_node.data.branchlength == options.filtered_branch_length:
                        node = tree.node(x)
                        if options.loglevel >= 2:
                            options.stdlog.write("# tree %s: removed branch %i because internal branchlength masked by tree %i:%s.\n" % \
                                                 (id, x, y, other_tree.name) )
                        
                        node.data.branchlength = options.filtered_branch_length
                        has_discarded = True
                        ndiscarded_branches += 1
                        break

        if options.filter_by_monophyly:

            terminals = set(map( lambda x: tree.node(x).data.taxon, tree.get_terminals()))
            
            for t in monophyly_taxa:
                if t not in terminals:
                    if options.loglevel >= 2:
                        options.stdlog.write( "taxon %s not in tree %s\n" % (t, tree.name))
                    nskipped += 1
            succ = tree.node(tree.root).succ
            ## use minimum support at root, if it is not the same (if trees
            ## are rooted)
            if len(succ) == 2:
                m = min( map( lambda x: tree.node(x).data.support, succ) )
                for x in succ:
                    tree.node(x).data.support = m
                
            if not TreeTools.IsMonophyleticForTaxa( tree, monophyly_taxa, support=options.min_support ):
                ndiscarded += 1
                continue
            
        if has_discarded:
            ndiscarded += 1
            if options.filter=="trees" or options.filter_ntaxa:
                continue

        if options.method == "split":

            output_filename = re.sub( "%s", id, options.output_pattern )

            dirname = os.path.dirname(output_filename)

            if dirname and not os.path.exists( dirname ):
                os.makedirs( dirname )

            if not os.path.exists( output_filename ):
                outfile = open(output_filename, "w" )
                outfile.write( TreeTools.Tree2Newick( tree ) + "\n" )
                noutput += 1
            else:
                if options.loglevel >= 1:
                    options.stdlog.write("# skipping because output for tree %s already exists: %s\n" % (id, output_filename))                        
                nskipped += 1
                continue

        elif options.method == "filter":
            options.stdout.write( ">%s\n%s\n" % (tree.name, TreeTools.Tree2Newick( tree )) )
            noutput += 1
            
        if outfile_map:
            for t in TreeTools.GetTaxa( tree ):
                outfile_map.write( "%s\t%s\n" % (t, id) )

    if outfile_map:
        outfile_map.close()

    if options.loglevel >= 1:
        options.stdlog.write("# ninput=%i, noutput=%i, nskipped=%i, with_discarded=%i, discarded_taxa=%i, discarded_branches=%i.\n" %\
                             (ninput, noutput, nskipped,
                              ndiscarded, ndiscarded_taxa, ndiscarded_branches))
        
    E.Stop()

Esempio n. 25

Mostra file

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: trees2sets.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--reference-tree",
                      dest="reference_tree",
                      type="string",
                      help="reference tree to read.")

    parser.add_option("-e",
                      "--enumeration",
                      dest="enumeration",
                      type="choice",
                      choices=("monophyletic", "full", "pairwise",
                               "exhaustive", "explicit", "lineage"),
                      help="enumeration of ortholog groups.")

    parser.add_option("-o",
                      "--organisms",
                      dest="column2org",
                      type="string",
                      help="sorted list of organisms.")

    parser.add_option("-p",
                      "--filename-patterns",
                      dest="filename_patterns",
                      type="string",
                      help="filename with patterns to output.")

    parser.add_option("-u",
                      "--filename-summary",
                      dest="filename_summary",
                      type="string",
                      help="filename with summary to output.")

    parser.add_option("-m",
                      "--method",
                      dest="methods",
                      type="choice",
                      action="append",
                      choices=("strict", "degenerate", "any", "outgroup",
                               "lineage"),
                      help="sets to extract.")

    parser.add_option("-s",
                      "--species-set",
                      dest="species_set",
                      type="string",
                      help="comma separated list of species.")

    parser.add_option("-g",
                      "--outgroups",
                      dest="outgroups",
                      type="string",
                      help="comma separated list of outgroup species.")

    parser.add_option(
        "--species-regex",
        dest="species_regex",
        type="string",
        help="regular expression to extract species from identifier.")

    parser.add_option(
        "--gene-regex",
        dest="gene_regex",
        type="string",
        help="regular expression to extract gene from identifier.")

    parser.add_option("--reroot",
                      dest="reroot",
                      type="choice",
                      choices=("outgroup", "midpoint"),
                      help="reroot trees before computing sets.")

    parser.set_defaults(
        reference_tree=None,
        enumeration="full",
        column2org=None,
        separator="|",
        species_regex="^([^|]+)\|",
        gene_regex="^[^|]+\|[^|]+\|([^|]+)\|",
        filename_summary=None,
        methods=[],
        species_set=None,
        outgroups=None,
        reroot=None,
    )

    (options, args) = E.Start(parser)

    if len(options.methods) == 0:
        options.methods.append("strict")

    if options.species_set:
        options.species_set = options.species_set.split(",")
        options.enumeration = "explicit"

    #######################################################################
    # warning: outgroup method is useless, as it requires
    # only a single outgroup per tree and the tree rooted
    # with the outgroup.
    if "outgroup" in options.methods and not options.outgroups:
        raise "please supply --outgroups if method 'outgroup' is chosen."

    if options.outgroups:
        options.outgroups = options.outgroups.split(",")

    ########################################################################
    ########################################################################
    ########################################################################
    if options.reference_tree:
        if options.reference_tree[0] == "(":
            nexus = TreeTools.Newick2Nexus(options.reference_tree)
        else:
            nexus = TreeTools.Newick2Nexus(open(options.reference_tree, "r"))
        reference_tree = nexus.trees[0]

        if options.loglevel >= 3:
            options.stdlog.write("# reference tree:\n%s\n" %
                                 reference_tree.display())
    else:
        reference_tree = None
        raise ValueError("please supply a reference tree")

    ########################################################################
    ########################################################################
    ########################################################################
    # read all trees
    ########################################################################
    nexus = TreeTools.Newick2Nexus(sys.stdin)

    ########################################################################
    ########################################################################
    ########################################################################
    # sort out reference tree
    ########################################################################
    rs = re.compile(options.species_regex)
    rg = re.compile(options.gene_regex)
    extract_species = lambda x: parseIdentifier(x, options)[0]
    extract_gene = lambda x: parseIdentifier(x, options)[2]

    # prune reference tree to species present
    species_set = set()
    for tree in nexus.trees:
        try:
            species_set = species_set.union(
                set(map(extract_species, tree.get_taxa())))
        except AttributeError:
            raise "parsing error while extracting species from %s" % str(
                tree.get_taxa())

    TreeTools.PruneTree(reference_tree, species_set)

    if options.loglevel >= 1:
        options.stdlog.write("# reference tree after pruning has %i taxa.\n" %
                             len(reference_tree.get_taxa()))

    if options.column2org:
        options.column2org = options.column2org.split(",")
    elif reference_tree:
        options.column2org = []
        for nx in reference_tree.get_terminals():
            options.column2org.append(reference_tree.node(nx).get_data().taxon)

    options.org2column = {}
    for x in range(len(options.column2org)):
        options.org2column[options.column2org[x]] = x

    for method in options.methods:

        ###################################################################
        ###################################################################
        ###################################################################
        # print out a list of ortholog clusters
        ###################################################################
        writeOrthologSets(options.stdout,
                          nexus,
                          extract_species,
                          extract_gene,
                          options=options,
                          reference_tree=reference_tree,
                          method=method,
                          outgroups=options.outgroups)

    E.Stop()

Esempio n. 26

Mostra file

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: tree2tree.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-d",
                      "--value",
                      dest="value",
                      type="float",
                      help="normalizing value.")
    parser.add_option(
        "-m",
        "--method",
        dest="methods",
        type="string",
        help=
        """methods to apply [normalize|divide-by-tree|divide-by-tree|rename|set-uniform-branch-length|extract-with-pattern|build-map|remove-pattern|unroot|midpoint-root|balanced-root|add-node-names"""
    )
    parser.add_option("-2",
                      "--filename-tree2",
                      dest="filename_tree2",
                      type="string",
                      help="filename with second tree.")
    parser.add_option("-o",
                      "--outgroup",
                      dest="outgroup",
                      type="string",
                      help="reroot with outgroup before processing.")
    parser.add_option("-p",
                      "--parameters",
                      dest="parameters",
                      type="string",
                      help="parameters for methods.")
    parser.add_option(
        "-e",
        "--template-identifier",
        dest="template_identifier",
        type="string",
        help="""template identifier [%default]. A %i is replaced by the position
                      of the sequence in the file.""")
    parser.add_option("-i",
                      "--invert-map",
                      dest="invert_map",
                      action="store_true",
                      help="""invert map.""")
    parser.add_option("-f",
                      "--filter",
                      dest="filter",
                      type="choice",
                      choices=("max-branch-length", ),
                      help="filter trees")
    parser.add_option("--output-format",
                      dest="output_format",
                      type="choice",
                      choices=("nh", "nhx"),
                      help=("output format for trees."))
    parser.add_option(
        "-b",
        "--no-branch-lengths",
        dest="with_branchlengths",
        action="store_false",
        help=
        """do not write branchlengths. Per default, 0 branch lengths are added."""
    )

    parser.set_defaults(
        value=0,
        methods="",
        filename_tree2=None,
        outgroup=None,
        parameters="",
        template_identifier="ID%06i",
        write_map=False,
        invert_map=False,
        filter=None,
        output_format="nh",
        with_branchlengths=True,
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    options.methods = options.methods.split(",")
    options.parameters = options.parameters.split(",")

    other_trees = []
    # read other trees
    if options.filename_tree2:
        other_nexus = TreeTools.Newick2Nexus(open(options.filename_tree2, "r"))
        if len(other_nexus.trees) > 0:
            other_trees = other_nexus.trees
        else:
            other_tree = other_nexus.trees[0]
            other_trees = [other_tree]

    lines = sys.stdin.readlines()

    ntotal, nskipped, ntree = 0, 0, 0

    if options.filter:

        nexus = TreeTools.Newick2Nexus(lines)

        new_trees = []

        value = float(options.parameters[0])
        del options.parameters[0]

        # decision functions: return true, if tree
        # is to be skipped
        if options.filter == "max-branch-length":
            f = lambda x: x >= value

        for tree in nexus.trees:
            ntotal += 1

            for id, node in tree.chain.items():
                if f(node.data.branchlength):
                    nskipped += 1
                    break
            else:
                new_trees.append(tree)
                ntree += 1

        nexus.trees = new_trees

        options.stdout.write(
            TreeTools.Nexus2Newick(nexus, with_names=True) + "\n")

    else:

        # iterate over chunks
        chunks = filter(lambda x: lines[x][0] == ">", range(len(lines)))

        map_old2new = {}

        if chunks:
            for c in range(len(chunks) - 1):
                a, b = chunks[c], chunks[c + 1]
                options.stdout.write(lines[a])
                a += 1
                Process(lines[a:b], other_trees, options, map_old2new, ntree)

            options.stdout.write(lines[chunks[-1]])
            t, s, ntree = Process(lines[chunks[-1] + 1:], other_trees, options,
                                  map_old2new, ntree)
            ntotal += t
            nskipped += s
        else:
            ntotal, nskipped, ntree = Process(lines, other_trees, options,
                                              map_old2new, ntree)

        if options.write_map:
            p = options.parameters[0]
            if p:
                outfile = open(p, "w")
            else:
                outfile = options.stdout

            outfile.write("old\tnew\n")
            for old_id, new_id in map_old2new.items():
                outfile.write("%s\t%s\n" % (old_id, new_id))
            if p:
                outfile.close()

    if options.loglevel >= 1:
        options.stdlog.write("# ntotal=%i, nskipped=%i\n" % (ntotal, nskipped))

    E.Stop()

Esempio n. 27

Mostra file

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: optic/analyze_genetrees.py 2781 2009-09-10 11:33:14Z andreas $"
    )

    parser.add_option(
        "-r",
        "--species-regex",
        dest="species_regex",
        type="string",
        help="regular expression to extractspecies from identifier.")

    parser.add_option(
        "--gene-regex",
        dest="gene_regex",
        type="string",
        help="regular expression to extract gene from identifier.")

    parser.add_option("--filename-filter-positives",
                      dest="filename_filter_positives",
                      type="string",
                      help="filename with positive list of trees to analyze.")

    parser.add_option("-s",
                      "--filename-species-tree",
                      dest="filename_species_tree",
                      type="string",
                      help="filename with species tree.")

    parser.add_option(
        "--filename-species2colour",
        dest="filename_species2colour",
        type="string",
        help=
        "filename with map of species to colours. If not given, random colours are assigned to species."
    )

    parser.add_option("-t",
                      "--species-tree",
                      dest="species_tree",
                      type="string",
                      help="species tree.")

    parser.add_option(
        "-e",
        "--filename-locations",
        dest="filename_locations",
        type="string",
        help=
        "filename with map of transcript information to location information.")

    parser.add_option("--no-create",
                      dest="create",
                      action="store_false",
                      help="do not create files, but append to them.")

    parser.add_option(
        "--max-separation",
        dest="max_separation",
        type="int",
        help=
        "maximum allowable separation between syntenic segments for border plot (set to 0, if syntey is enough)."
    )

    parser.add_option(
        "--filename-species2url",
        dest="filename_species2url",
        type="string",
        help="filename with mapping information of species to URL.")

    parser.add_option("--prefix",
                      dest="prefix",
                      type="string",
                      help="prefix to add as first column.")

    parser.add_option(
        "--outgroup-species",
        dest="outgroup_species",
        type="string",
        help="species to used as outgroups. Separate multiple species by ','.")

    parser.add_option("--subtrees-trees",
                      dest="subtrees_trees",
                      action="store_true",
                      help="write trees for subtrees.")

    parser.add_option("--subtrees-identifiers",
                      dest="subtrees_identifiers",
                      action="store_true",
                      help="write identifiers of subtrees.")

    parser.add_option("--svg-add-ids",
                      dest="svg_add_ids",
                      action="store_true",
                      help="add node ids to svg plot.")

    parser.add_option("--svg-otus",
                      dest="svg_otus",
                      type="string",
                      help="otus to output in svg species tree.")

    parser.add_option("--svg-branch-lenghts",
                      dest="svg_branch_lengths",
                      type="choice",
                      choices=("contemporary", "uniform", "median"),
                      help="branch lengths in species tree.")

    parser.add_option("--print-totals",
                      dest="print_totals",
                      action="store_true",
                      help="output totals sections.")

    parser.add_option("--print-subtotals",
                      dest="print_subtotals",
                      action="store_true",
                      help="output subtotals sections.")

    parser.add_option(
        "--print-best",
        dest="print_best",
        action="store_true",
        help="output best node assignment for each node in gene tree.")

    parser.add_option("--print-svg",
                      dest="print_svg",
                      action="store_true",
                      help="output svg files.")

    parser.add_option("--print-species-svg",
                      dest="print_species_svg",
                      action="store_true",
                      help="output species svg files.")

    parser.add_option(
        "--output-pattern",
        dest="output_pattern",
        type="string",
        help=
        """output pattern for separate output of sections [default: %default].
                       Set to None, if output to stdout. Can contain one %s to be substituted with section."""
    )

    parser.add_option(
        "--output-pattern-svg",
        dest="output_pattern_svg",
        type="string",
        help=
        "filename for svg output. If it contains %s, this is replaced by gene_tree name."
    )

    parser.add_option(
        "--filename-node-types",
        dest="filename_node_types",
        type="string",
        help="filename with node type information from a previous run.")

    parser.add_option("--analyze-resolution-data",
                      dest="analyze_resolution_data",
                      type="choice",
                      action="append",
                      choices=("stats", "histograms"),
                      help="stdin is resolution data.")

    parser.add_option("--filter-quality",
                      dest="filter_quality",
                      type="choice",
                      choices=("all", "genes", "pseudogenes"),
                      help="filter predictions by gene type.")

    parser.add_option("--filter-location",
                      dest="filter_location",
                      type="choice",
                      choices=("all", "local", "non-local", "cis", "unplaced"),
                      help="filter predictions by location.")

    parser.add_option("--remove-unplaced",
                      dest="remove_unplaced",
                      action="store_true",
                      help="remove predictions on unplaced contigs.")

    parser.add_option("--skip-without-outgroups",
                      dest="skip_without_outgroups",
                      action="store_true",
                      help="skip clusters without outgroups.")

    parser.set_defaults(
        filter_quality="all",
        filter_location="all",
        remove_unplaced=False,
        species_regex="^([^|]+)\|",
        gene_regex="^[^|]+\|[^|]+\|([^|]+)\|",
        filename_species_tree=None,
        priority={
            "Speciation": 0,
            "SpeciationDeletion": 1,
            "Transcripts": 2,
            "DuplicationLineage": 3,
            "Duplication": 4,
            "DuplicationDeletion": 5,
            "DuplicationInconsistency": 6,
            "Outparalogs": 7,
            "InconsistentTranscripts": 8,
            "Inconsistency": 9,
            "Masked": 10
        },
        species_tree=None,
        filename_species2colour=None,
        filename_locations=None,
        max_separation=0,
        filename_species2url=None,
        separator="|",
        prefix=None,
        output_pattern=None,
        output_pattern_svg=None,
        outgroup_species=None,
        svg_add_ids=False,
        svg_branch_lengths="median",
        svg_otus=None,
        subtrees=False,
        print_svg=False,
        print_subtotals=False,
        print_totals=False,
        print_best=False,
        subtrees_identifiers=False,
        create=True,
        min_branch_length=0.00,
        filename_node_types=None,
        format_branch_length="%6.4f",
        nodetypes_inconsistency=("InconsistentTranscripts", "Inconsistency"),
        analyze_resolution_data=None,
        warning_small_branch_length=0.01,
        filename_filter_positives=None,
        skip_without_outgroups=False,
    )

    (options, args) = E.Start(parser,
                              add_psql_options=True,
                              add_csv_options=True)

    if options.outgroup_species:
        options.outgroup_species = set(options.outgroup_species.split(","))

    if options.svg_otus:
        options.svg_otus = set(options.svg_otus.split(","))

    rx_species = re.compile(options.species_regex)
    extract_species = lambda x: rx_species.match(x).groups()[0]
    if options.gene_regex:
        rx_gene = re.compile(options.gene_regex)
        extract_gene = lambda x: rx_gene.match(x).groups()[0]
    else:
        extract_gene = None

    extract_quality = lambda x: x.split(options.separator)[3]

    #########################################################################
    #########################################################################
    #########################################################################
    # read positive list of malis
    #########################################################################
    if options.filename_filter_positives:
        filter_positives, nerrors = IOTools.ReadList(
            open(options.filename_filter_positives, "r"))
        filter_positives = set(filter_positives)
    else:
        filter_positives = None

    #########################################################################
    #########################################################################
    #########################################################################
    # read location info
    #########################################################################
    if options.filename_locations:
        map_id2location = TreeReconciliation.readLocations(
            open(options.filename_locations, "r"), extract_species)
    else:
        map_id2location = {}

    if (options.remove_unplaced or options.filter_location != "all"
        ) and not options.filename_locations:
        raise "please supply a file with location information."

    #########################################################################
    #########################################################################
    #########################################################################
    # delete output files
    #########################################################################
    if options.create and options.output_pattern:
        for section in ("details", "subtrees", "subids", "details", "trees",
                        "nodes", "categories"):
            fn = options.output_pattern % section
            if os.path.exists(fn):
                if options.loglevel >= 1:
                    options.stdlog.write("# deleting file %s.\n" % fn)
                os.remove(fn)

    if options.loglevel >= 1:
        options.stdlog.write("# reading gene trees.\n")
        options.stdlog.flush()

    gene_nexus = TreeTools.Newick2Nexus(sys.stdin)

    Tree.updateNexus(gene_nexus)

    if options.loglevel >= 1:
        options.stdlog.write("# read %i gene trees from stdin.\n" %
                             len(gene_nexus.trees))
        options.stdlog.flush()

    #########################################################################
    #########################################################################
    #########################################################################
    # main loop over gene trees
    #########################################################################
    ninput, nfiltered, nskipped, noutput = 0, 0, 0, 0
    nskipped_filter, nskipped_outgroups = 0, 0

    # total counts
    total_heights_per_species = {}
    total_relheights_per_species = {}
    total_heights_per_tree = []
    total_relheights_per_tree = []

    for gene_tree in gene_nexus.trees:

        ninput += 1

        xname = re.sub("_tree.*", "", gene_tree.name)
        xname = re.sub("subtree_", "", xname)

        if filter_positives and xname not in filter_positives:
            nskipped_filter += 1
            continue

        if options.loglevel >= 6:
            gene_tree.display()

        #######################################################################
        #######################################################################
        #######################################################################
        # get identifier for this tree and update prefixes accordingly
        #######################################################################
        if options.prefix:
            if len(gene_nexus.trees) > 0:
                prefix_header = "prefix1\tprefix2\t"
                prefix_row = options.prefix + "\t" + gene_tree.name + "\t"
                prefix_prefix = options.prefix + "_" + gene_tree.name + "_"
                prefix_name = options.prefix + "_" + gene_tree.name
            else:
                prefix_header = "prefix\t"
                prefix_row = options.prefix + "\t"
                prefix_prefix = options.prefix + "_"
                prefix_name = options.prefix
        else:
            if len(gene_nexus.trees) > 0:
                prefix_header = "prefix\t"
                prefix_row = gene_tree.name + "\t"
                prefix_prefix = gene_tree.name + "\t"
                prefix_name = gene_tree.name
            else:
                prefix_header, prefix_row, prefix_prefix, prefix_name = "", "", "", ""

        #######################################################################
        #######################################################################
        #######################################################################
        # apply filters to gene tree
        #######################################################################
        TreeReconciliation.filterTree(gene_tree, options, map_id2location)

        otus = TreeTools.GetTaxa(gene_tree)

        if len(otus) <= 1:
            nfiltered += 1
            if options.loglevel >= 1:
                options.stdlog.write(
                    "# tree %s: empty after filtering - skipped.\n" %
                    gene_tree.name)
            continue

        this_species_list = map(extract_species, otus)
        # check, if only outgroups
        if options.outgroup_species:
            if not set(this_species_list).difference(options.outgroup_species):
                nfiltered += 1
                if options.loglevel >= 1:
                    options.stdlog.write(
                        "# tree %s: only outgroups after filtering - skipped.\n"
                        % gene_tree.name)
                continue

            if options.skip_without_outgroups and not set(
                    this_species_list).intersection(options.outgroup_species):
                nskipped_outgroups += 1
                if options.loglevel >= 1:
                    options.stdlog.write(
                        "# tree %s: no outgroups - skipped.\n" %
                        gene_tree.name)
                continue

        #######################################################################
        #######################################################################
        #######################################################################
        # reroot gene tree, if outgroups have been given.
        #######################################################################
        if options.outgroup_species:
            TreeReconciliation.rerootTree(gene_tree, extract_species, options)

        #######################################################################
        #######################################################################
        #######################################################################
        # compute distance to root for each node
        #######################################################################
        distance_to_root = TreeTools.GetDistanceToRoot(gene_tree)

        #######################################################################
        #######################################################################
        #######################################################################
        # compute counts
        #######################################################################
        # heights per tree
        heights_per_tree = []
        # relative heights per tree
        relheights_per_tree = []
        # distance to root
        heights_per_species = {}
        # distance to root (relative to maximum distance to root)
        relheights_per_species = {}

        analysis_set, gene_set, pseudogene_set, other_set = TreeReconciliation.getAnalysisSets(
            gene_tree, extract_quality, options)

        if len(analysis_set) == 0:
            if options.loglevel >= 1:
                options.stdlog.write(
                    "# tree %s: empty analysis set - skipped.\n" %
                    gene_tree.name)
            nskipped += 1
            continue

        reference_height = TreeReconciliation.getReferenceHeight(
            distance_to_root,
            gene_tree,
            gene_set,
            options,
            extract_species,
            method="median")

        if reference_height is None:
            if options.loglevel >= 1:
                options.stdlog.write(
                    "# tree %s: reference height not computable or 0 - skipped.\n"
                    % gene_tree.name)
            nskipped += 1
            continue

        for node_id in analysis_set:

            node = gene_tree.node(node_id)
            species = extract_species(node.data.taxon)
            height = distance_to_root[node_id]

            if height < options.warning_small_branch_length:
                options.stdlog.write(
                    "# tree %s: small distance %s to root at node %i: %s\n" %
                    (gene_tree.name, options.format_branch_length % height,
                     node_id, node.data.taxon))

            relheight = height / reference_height
            try:
                heights_per_species[species].append(height)
            except KeyError:
                heights_per_species[species] = [height]
                relheights_per_species[species] = []

            relheights_per_species[species].append(relheight)

            # do not use outgroup species
            if options.outgroup_species and species in options.outgroup_species:
                continue

            heights_per_tree.append(height)
            relheights_per_tree.append(relheight)

        if options.loglevel >= 1:
            options.stdlog.write(
                "# tree %s: reference_height=%s\n" %
                (gene_tree.name,
                 options.format_branch_length % reference_height))
            options.stdlog.flush()

        if options.print_subtotals:
            printCounts(heights_per_species, relheights_per_species,
                        heights_per_tree, relheights_per_tree, options,
                        prefix_header, prefix_row)

        #######################################################################
        #######################################################################
        #######################################################################
        # update total counts
        #######################################################################
        TreeReconciliation.appendCounts(total_heights_per_species,
                                        heights_per_species)
        TreeReconciliation.appendCounts(total_relheights_per_species,
                                        relheights_per_species)

        TreeReconciliation.appendCounts(total_heights_per_tree,
                                        heights_per_tree)
        TreeReconciliation.appendCounts(total_relheights_per_tree,
                                        relheights_per_tree)

        noutput += 1

    if options.print_totals:

        if options.prefix:
            prefix_header = "prefix1\tprefix2\t"
            prefix_row = options.prefix + "\t" + "total" + "\t"
            prefix_prefix = options.prefix + "_" + "total" + "_"
            prefix_name = options.prefix + "_" + "total"
        else:
            prefix_header = "prefix\t"
            prefix_row = "total" + "\t"
            prefix_prefix = "total" + "_"
            prefix_name = "total"

        printCounts(total_heights_per_species, total_relheights_per_species,
                    total_heights_per_tree, total_relheights_per_tree, options,
                    prefix_header, prefix_row)

    if options.loglevel >= 1:
        options.stdlog.write(
            "# ninput=%i, nfiltered=%i, nskipped=%i, nskipped_filter=%i, nskipped_outgroups=%i, noutput=%i\n"
            % (ninput, nfiltered, nskipped, nskipped_filter,
               nskipped_outgroups, noutput))

    E.Stop()

Esempio n. 28

Mostra file

def Process(lines, other_trees, options, map_old2new, ntree):

    nexus = TreeTools.Newick2Nexus(map(lambda x: x[:-1], lines))

    if options.loglevel >= 1:
        options.stdlog.write("# read %i trees.\n" % len(nexus.trees))

    nskipped = 0
    ntotal = len(nexus.trees)
    extract_pattern = None
    species2remove = None
    write_map = False

    phylip_executable = None
    phylip_options = None

    index = 0

    # default: do not output internal node names
    write_all_taxa = False

    for tree in nexus.trees:

        if options.outgroup:
            tree.root_with_outgroup(options.outgroup)

        for method in options.methods:

            if options.loglevel >= 3:
                options.stdlog.write("# applying method %s to tree %i.\n" %
                                     (method, index))

            if method == "midpoint-root":
                tree.root_midpoint()

            elif method == "balanced-root":
                tree.root_balanced()

            elif method == "unroot":
                TreeTools.Unroot(tree)

            elif method == "phylip":
                if not phylip_executable:
                    phylip_executable = options.parameters[0]
                    del options.parameters[0]
                    phylip_options = re.split("@", options.parameters[0])
                    del options.parameters[0]

                    phylip = WrapperPhylip.Phylip()
                    phylip.setProgram(phylip_executable)
                    phylip.setOptions(phylip_options)

                phylip.setTree(tree)

                result = phylip.run()

                nexus.trees[index] = result.mNexus.trees[0]

            elif method == "normalize":
                if options.value == 0:
                    v = 0
                    for n in tree.chain.keys():
                        v = max(v, tree.node(n).data.branchlength)
                else:
                    v = options.value

                for n in tree.chain.keys():
                    tree.node(n).data.branchlength /= float(options.value)

            elif method == "divide-by-tree":

                if len(other_trees) > 1:
                    other_tree = other_trees[ntree]
                else:
                    other_tree = other_trees[0]

                # the trees have to be exactly the same!!
                if options.loglevel >= 2:
                    print tree.display()
                    print other_tree.display()

                if not tree.is_identical(other_tree):
                    nskipped += 1
                    continue

                # even if the trees are the same (in topology), the node numbering might not be
                # the same. Thus build a map of node ids.
                map_a2b = TreeTools.GetNodeMap(tree, other_tree)

                for n in tree.chain.keys():
                    try:
                        tree.node(n).data.branchlength /= float(
                            other_tree.node(map_a2b[n]).data.branchlength)
                    except ZeroDivisionError:
                        options.stdlog.write(
                            "# Warning: branch for nodes %i and %i in tree-pair %i: divide by zero\n"
                            % (n, map_a2b[n], ntree))
                        continue

            elif method == "rename":
                if not map_old2new:

                    map_old2new = IOTools.ReadMap(open(options.parameters[0],
                                                       "r"),
                                                  columns=(0, 1))

                    if options.invert_map:
                        map_old2new = IOTools.getInvertedDictionary(
                            map_old2new, make_unique=True)

                    del options.parameters[0]

                unknown = []
                for n, node in tree.chain.items():
                    if node.data.taxon:
                        try:
                            node.data.taxon = map_old2new[node.data.taxon]
                        except KeyError:
                            unknown.append(node.data.taxon)

                for taxon in unknown:
                    tree.prune(taxon)

            # reformat terminals
            elif method == "extract-with-pattern":

                if not extract_pattern:
                    extract_pattern = re.compile(options.parameters[0])
                    del options.parameters[0]

                for n in tree.get_terminals():
                    node = tree.node(n)
                    node.data.taxon = extract_pattern.search(
                        node.data.taxon).groups()[0]

            elif method == "set-uniform-branchlength":
                for n in tree.chain.keys():
                    tree.node(n).data.branchlength = options.value

            elif method == "build-map":
                # build a map of identifiers
                options.write_map = True
                for n in tree.get_terminals():
                    node = tree.node(n)
                    if node.data.taxon not in map_old2new:
                        new = options.template_identifier % (len(map_old2new) +
                                                             1)
                        map_old2new[node.data.taxon] = new
                    node.data.taxon = map_old2new[node.data.taxon]

            elif method == "remove-pattern":
                if species2remove is None:
                    species2remove = re.compile(options.parameters[0])
                    del options.parameters
                taxa = []
                for n in tree.get_terminals():
                    t = tree.node(n).data.taxon
                    skip = False
                    if species2remove.search(t):
                        continue
                    if not skip:
                        taxa.append(t)
                TreeTools.PruneTree(tree, taxa)

            elif method == "add-node-names":

                inode = 0
                write_all_taxa = True
                for n, node in tree.chain.items():
                    if not node.data.taxon:
                        node.data.taxon = "inode%i" % inode
                        inode += 1

            elif method == "newick2nhx":
                # convert names to species names
                for n in tree.get_terminals():
                    t = tree.node(n).data.taxon
                    d = t.split("|")
                    if len(d) >= 2:
                        tree.node(n).data.species = d[0]

        index += 1
        ntree += 1

    if options.output_format == "nh":
        options.stdout.write(
            TreeTools.Nexus2Newick(
                nexus,
                write_all_taxa=True,
                with_branchlengths=options.with_branchlengths) + "\n")
    else:
        for tree in nexus.trees:
            tree.writeToFile(options.stdout, format=options.output_format)

    return ntotal, nskipped, ntree

Esempio n. 29

Mostra file

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: optic/count_orgs.py 1706 2007-12-11 16:46:11Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--reference-tree",
                      dest="reference_tree",
                      type="string",
                      help="reference tree to read.")

    parser.add_option("-p",
                      "--filename-patterns",
                      dest="filename_patterns",
                      type="string",
                      help="filename with patterns to output.")

    parser.add_option("-u",
                      "--filename-summary",
                      dest="filename_summary",
                      type="string",
                      help="filename with summary to output.")

    parser.add_option("-f",
                      "--format",
                      dest="format",
                      type="choice",
                      choices=("map", "links", "trees"),
                      help="output format.")

    parser.add_option("-o",
                      "--organisms",
                      dest="column2org",
                      type="string",
                      help="sorted list of organisms.")

    parser.add_option(
        "-s",
        "--species-regex",
        dest="species_regex",
        type="string",
        help="regular expression to extract species from identifier.")

    parser.add_option(
        "-g",
        "--gene-regex",
        dest="gene_regex",
        type="string",
        help="regular expression to extract gene from identifier.")

    parser.set_defaults(
        reference_tree=None,
        format="map",
        filename_patterns=None,
        column2org=None,
        species_regex="^([^|]+)\|",
        gene_regex="^[^|]+\|[^|]+\|([^|]+)\|",
        separator="|",
        filename_summary=None,
    )

    (options, args) = E.Start(parser)

    if options.reference_tree:
        if options.reference_tree[0] == "(":
            nexus = TreeTools.Newick2Nexus(options.reference_tree)
        else:
            nexus = TreeTools.Newick2Nexus(open(options.reference_tree, "r"))
        reference_tree = nexus.trees[0]

        if options.loglevel >= 3:
            print "# reference tree:"
            print reference_tree.display()

    else:
        reference_tree = None

    clusters = {}
    if options.format == "map":

        for line in sys.stdin:
            if line[0] == "#": continue
            id, r = line[:-1].split("\t")
            if r not in clusters: clusters[r] = []
            clusters[r].append(id)

    elif options.format == "trees":

        nexus = TreeTools.Newick2Nexus(sys.stdin)

        for tree in nexus.trees:
            clusters[tree.name] = tree.get_taxa()

    elif options.format == "links":
        members = set()
        id = None
        for line in sys.stdin:
            if line[0] == "#": continue

            if line[0] == ">":
                if id: clusters[id] = members
                x = re.match(">cluster #(\d+)", line[:-1])
                if x:
                    id = x.groups()[0]
                else:
                    id = line[1:-1]
                members = set()
                continue

            data = line[:-1].split("\t")[:2]
            members.add(data[0])
            members.add(data[1])

        if id: clusters[id] = members

    if len(clusters) == 0:
        raise "empty input."

    ########################################################################
    ########################################################################
    ########################################################################
    ## sort out reference tree
    ########################################################################
    rs = re.compile(options.species_regex)
    rg = re.compile(options.gene_regex)
    extract_species = lambda x: rs.search(x).groups()[0]

    ## prune tree to species present
    species_set = set()
    for cluster, members in clusters.items():
        species_set = species_set.union(set(map(extract_species, members)))

    if reference_tree:

        TreeTools.PruneTree(reference_tree, species_set)

        if options.loglevel >= 1:
            options.stdlog.write("# Tree after pruning: %i taxa.\n" %
                                 len(reference_tree.get_taxa()))

    if options.column2org:
        options.column2org = options.column2org.split(",")
    elif reference_tree:
        options.column2org = []
        for nx in reference_tree.get_terminals():
            options.column2org.append(reference_tree.node(nx).get_data().taxon)
    else:
        options.column2org = []
        for x in species_set:
            options.column2org.append(x)

    options.org2column = {}
    for x in range(len(options.column2org)):
        options.org2column[options.column2org[x]] = x

    if reference_tree:
        reference_patterns = TreeTools.calculatePatternsFromTree(
            reference_tree, options.column2org)

        if options.loglevel >= 3:
            print "# reference patterns:"
            print reference_patterns

    ##############################################################################
    notus = len(options.column2org)
    patterns = {}
    species_counts = [SpeciesCounts() for x in options.column2org]

    ## first genes, then transcripts
    options.stdout.write(
        "mali\tpattern\tpresent\tngenes\t%s\tntranscripts\t%s\n" %
        ("\t".join(options.column2org), "\t".join(options.column2org)))

    keys = clusters.keys()
    keys.sort()
    for cluster in keys:
        members = clusters[cluster]

        count_genes = [{} for x in range(len(options.org2column))]
        count_transcripts = [0] * len(options.org2column)

        for m in members:
            data = m.split(options.separator)

            if len(data) == 4:
                s, t, g, q = data
            elif len(data) == 2:
                s, g = data
                t = g

            if s not in options.org2column:
                raise "unknown species %s" % s

            col = options.org2column[s]

            count_transcripts[col] += 1
            if g not in count_genes[col]:
                count_genes[col][g] = 0

            count_genes[col][g] += 1

            species_counts[col].mGenes.add(g)
            species_counts[col].mTranscripts.add(t)
            species_counts[col].mTrees.add(cluster)

        ntotal_transcripts = reduce(lambda x, y: x + y, count_transcripts)
        npresent_transcripts = len(filter(lambda x: x > 0, count_transcripts))
        ntotal_genes = reduce(lambda x, y: x + y, map(len, count_genes))
        npresent_genes = len(filter(lambda x: x > 0, map(len, count_genes)))

        pattern = GetPattern(count_transcripts, notus)
        if pattern not in patterns: patterns[pattern] = 0
        patterns[pattern] += 1
        options.stdout.write(
            string.join(
                (cluster, pattern, str(npresent_genes), str(ntotal_genes),
                 string.join(map(str, map(len, count_genes)), "\t"),
                 str(ntotal_transcripts),
                 string.join(map(str, count_transcripts), "\t")), "\t") + "\n")

    #######################################################################################
    #######################################################################################
    #######################################################################################
    ## write pattern summary
    #######################################################################################
    xx = patterns.keys()
    xx.sort()
    if options.filename_patterns:
        outfile = open(options.filename_patterns, "w")
    else:
        outfile = sys.stdout

    for x in range(len(options.column2org)):
        outfile.write("# %i = %s\n" % (x, options.column2org[x]))

    if reference_tree:
        outfile.write("pattern\tcounts\tisok\n")
    else:
        outfile.write("pattern\tcounts\n")

    for x in xx:
        if reference_tree:
            if x in reference_patterns:
                is_ok = "1"
            else:
                is_ok = "0"
            outfile.write("%s\t%s\t%s\n" % (x, patterns[x], is_ok))
        else:
            outfile.write("%s\t%s\n" % (x, patterns[x]))

    if outfile != sys.stdout: outfile.close()

    #######################################################################################
    #######################################################################################
    #######################################################################################
    ## write summary counts per species
    #######################################################################################
    if options.filename_summary:
        outfile = open(options.filename_summary, "w")
    else:
        outfile = sys.stdout

    outfile.write("species\tntranscripts\tngenes\tntrees\n")

    for species, col in options.org2column.items():
        outfile.write(
            "%s\t%i\t%i\t%i\n" %
            (species, len(species_counts[col].mTranscripts),
             len(species_counts[col].mGenes), len(species_counts[col].mTrees)))

    if outfile != sys.stdout: outfile.close()

    E.Stop()

Esempio n. 30

Mostra file

File: evaluate_bootstrap.py Progetto: santayana/cgat

def AnalysePatterns(patterns,
                    map_id2org,
                    min_cluster_support=100,
                    min_report_support=90):
    """analyse partitions by comparing to reference tree.

    Prints out for each partition, whether left/right is consistent
    with reference tree or not.

    If there are full complements on either side, print suggested split.

    Prints summary statistics:
    for each consistent partition:
        print counts
    """

    # reread and process species tree
    # has to be done for every new pass, because
    # the tree is modified later on (and I haven't found
    # a copy mechanism (because I did not look)).
    nexus = TreeTools.Newick2Nexus(param_reference_tree)
    reference_tree = nexus.trees[0]

    norgs = len(reference_tree.get_terminals())
    notus = len(patterns[0][1])

    # complement patterns with single species patterns:
    patterns.reverse()
    for x in range(notus):
        pattern = ["."] * notus
        pattern[x] = "*"
        patterns.append((100, string.join(pattern, "")))
    patterns.reverse()

    ##########################################################################
    # first pass: separate well supported full species trees
    masks = []
    present_orgs = {}
    mask_id = 0

    for support, pattern in patterns:

        t1, t2, i1, i2 = {}, {}, [], []

        for x in range(len(pattern)):
            org, name, nid = map_id2org[x]
            if org == "unknown":
                continue
            present_orgs[org] = 1
            if pattern[x] == "*":
                t1[org] = 1
                i1.append(name)
            else:
                t2[org] = 1
                i2.append(name)

        t1 = t1.keys()
        t2 = t2.keys()
        t1.sort()
        t2.sort()

        if param_loglevel >= 4:
            print "# ", pattern, len(t1), len(t2), i1, i2
            sys.stdout.flush()

        if len(t1) == len(t2) and \
                len(t1) == norgs and \
                support >= min_cluster_support:

            mask1, notus1 = [], 0
            mask2, notus2 = [], 0

            for x in range(len(pattern)):
                if pattern[x] == "*":
                    notus1 += 1
                    mask1.append(1)
                    mask2.append(0)
                else:
                    notus2 += 1
                    mask1.append(0)
                    mask2.append(1)

            mask_id += 1
            masks.append(Results(mask1, notus1, len(t1), mask_id=mask_id))
            mask_id += 1
            masks.append(Results(mask2, notus2, len(t2), mask_id=mask_id))

            if param_loglevel >= 2:
                print "# split\tfull\t%i\t%s\t%i\t%i\t%s" % (
                    support, string.join(map(str, mask1), ""), notus1, len(t1),
                    string.join(i1, ";"))
                print "# split\tfull\t%i\t%s\t%i\t%i\t%s" % (
                    support, string.join(map(str, mask2), ""), notus2, len(t2),
                    string.join(i2, ";"))

    # add full mask
    if len(masks) == 0:
        masks.append(Results([1] * notus, notus, len(present_orgs), mask_id=1))

    ##########################################################################
    # second pass: check subtrees for each mask
    # external: edges leading to external nodes (i.e., leaves): total number = norgs
    # internal: all other edges: maximum number = 2 * (2 * norgs - 3 - norgs) = 2 * (norgs - 3)
    # 1st factor 2: two directions
    # 2nd factor: 2n-3 is number of edges in unrooted tree.
    # 3rd factor: -n = number of external edges
    for mask in masks:
        reference_tree = GetPrunedReferenceTree(mask, GetOrgs(map_id2org),
                                                param_reference_tree)
        AnalyseMask(mask, patterns, norgs, reference_tree, map_id2org,
                    min_report_support)

    if param_loglevel >= 1:
        print "# partitions after evaluation:"
        print "#", Results().printHeader()
        for m in masks:
            print "#", str(m)

    reference_tree = GetPrunedReferenceTree(mask, GetOrgs(map_id2org),
                                            param_reference_tree)
    new_masks = SelectMasks(masks, patterns, norgs, map_id2org,
                            min_report_support)

    if param_loglevel >= 1:
        print "# partitions after selection:"
        print "#", Results().printHeader()
        for m in new_masks:
            print "#", str(m)

    return new_masks