Python DataSet Examples, dendropy.DataSet Python Examples

Example #1

0

Show file

File: test_phylogenetic_tree.py Project: faircloth-lab/sate-core

 def phylogeneticTreeFromFile(self, treefile, file_format):
     dataset = Dataset()
     dataset.read(open(treefile, 'rU'), schema=file_format)
     dendropy_tree = dataset.tree_lists[0][0]
     tree = PhylogeneticTree(dendropy_tree)
     tree.calc_splits()
     delete_outdegree_one(tree._tree)
     return tree

Example #2

0

Show file

 def phylogeneticTreeFromFile(self, treefile, file_format):
     dataset = Dataset()
     dataset.read(open(treefile, 'rU'), schema=file_format)
     dendropy_tree = dataset.tree_lists[0][0]
     tree = PhylogeneticTree(dendropy_tree)
     tree.calc_splits()
     delete_outdegree_one(tree._tree)
     return tree

Example #3

0

Show file

File: __init__.py Project: snacktavish/physcraper

def get_dataset_from_treebase(study_id,
                                phylesystem_loc='api'):
    ATT_list = []
    nexson = get_nexson(study_id, phylesystem_loc)
    treebase_url =  nexson['nexml'][u'^ot:dataDeposit'][u'@href']
    if 'treebase' not in nexson['nexml'][u'^ot:dataDeposit'][u'@href']:
        sys.stderr("No treebase record associated with study ")
        sys.exit()
    else:
        tb_id = treebase_url.split(':S')[1]
        dna = DataSet.get(url="https://treebase.org/treebase-web/search/downloadAStudy.html?id={}&format=nexml".format(tb_id),
                                    schema="nexml")
        return(dna)

Example #4

0

Show file

File: opentree_helpers.py Project: cnyuanh/physcraper

def get_dataset_from_treebase(study_id, phylesystem_loc="api"):
    """Function is used to get the aln from treebase, for a tree that OpenTree has the mapped tree.
    """
    try:
        nexson = get_nexson(study_id, phylesystem_loc)
    except HTTPError as err:
        sys.stderr.write(err)
        sys.stderr.write(
            "couldn't find study id {} in phylesystem location {}\n".format(
                study_id, phylesystem_loc))
    treebase_url = nexson['nexml'][u'^ot:dataDeposit'][u'@href']
    if 'treebase' not in nexson['nexml'][u'^ot:dataDeposit'][u'@href']:
        sys.stderr.write("No treebase record associated with study ")
        sys.exit(-2)
    else:
        tb_id = treebase_url.split(':S')[1]
        url = "https://treebase.org/treebase-web/search/downloadAStudy.html?id={}&format=nexml".format(
            tb_id)
        if _DEBUG:
            sys.stderr.write(url + "\n")
        dna = DataSet.get(url=url, schema="nexml")
        return dna

Example #5

0

Show file

def parse_nexus(nexus_loc):
    """ Parses nexus file into a dictionary for template engine
    Parameters
    ----------
    nexus_loc: str
        location of nexus file

    Returns
    -------
    dict
        dictionary containing tree in newick format, and a list of taxa containing their sequences, dates, and labels.
    """
    return_dict = {"taxa": {}}

    print("Parsing nexus file... ", end="")
    nexus = DataSet.get(path=nexus_loc, schema="nexus")

    return_dict["tree"] = nexus.tree_lists[0].as_string("newick")

    count = 0

    max_date = 2020.1
    for k, v in nexus.char_matrices[0].items():
        count += 1

        date = parse_date(k.label)
        if date > max_date:
            max_date = date

        taxa_dict = {"id": k.label, "date": date, "sequence": str(v)}
        return_dict["taxa"][k.label] = taxa_dict
    print("Done. {} taxa loaded".format(count))

    return_dict["site_count"] = nexus.char_matrices[0].max_sequence_size
    return_dict["taxon_count"] = len(return_dict["taxa"])
    return_dict["cutoff"] = max_date - 2019.75
    return_dict["grid_points"] = round(return_dict["cutoff"] * 52)

    return return_dict

Example #6

0

Show file

File: scale_trees.py Project: BioinformaticsArchive/DendroPy

if __name__ == '__main__':

    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option("-m", "--multiplier", dest="multip", default=1.0,
        type="float",
        help="The multiplier used for every branch length.")
    parser.add_option("-n", "--nexus", dest="schema", action="store_const", const="NEXUS", default="NEWICK",
        help="Tree is in NEXUS schema.")
    (options, args) = parser.parse_args()


    if len(args) > 1:
        sys.exit("At most one argument (a newick tree string with branch lengths) can be specified")
    if len(args) == 1:
        s = open(args[0], 'rU')
    else:
        newick = sys.stdin.read()
        s = StringIO.StringIO(newick)

    multip = options.multip
    d = DataSet()
    
    d.read(s, schema=options.schema, rooted=True)
    if len(d.tree_lists) == 0:
        sys.exit("No trees found in file.")
    for tb in d.tree_lists:
        for tree in tb:
            scale_edges(tree, multip)
    d.write_to_stream(sys.stdout, schema=options.schema)

Example #7

0

Show file

import json
from dendropy.treesplit import encode_splits, split_to_list
from cStringIO import StringIO
from dendropy import DataSet
from dendropy.utility.error import DataParseError
from dendropy.utility.textutils import escape_nexus_token

if __name__ == '__main__':
    '''
    This method converts Newick to Nexon by splitting up the mod_name made in
    taxonomyToNewick.py to 'name' and 'ottolid'. Nexon is later used for TAG's input
    trees. 
    '''
    output = sys.stdout
    fo = open(sys.argv[1], "rU")
    dataset = DataSet()
    try:
        dataset.read(stream=fo, schema="Newick")
    except DataParseError as dfe:
        raise ValueError(str(dfe))
    if len(dataset.taxon_sets) != 1:
        raise ValueError("Expecting one set of taxa in %s" % f)
    if len(dataset.tree_lists) != 1:
        raise ValueError("Expecting one tree in %s" % f)
    taxon_set = dataset.taxon_sets[0]
    tree_list = dataset.tree_lists[0]
    number_of_taxon = len(taxon_set)
    branch_counter = 0
    code_list = [StringIO() for i in taxon_set]
    otu_counter = 0
    node_counter = 0

Example #8

0

Show file

File: treedepth.py Project: nuin/DendroPy

    parser.add_option(
        "--generate-test-tree",
        action="store_true",
        dest="gen_test_tree",
        help="Generate ultrametric tree to test function.",
    )
    (options, args) = parser.parse_args()

    if options.gen_test_tree:
        simple_test_tree()
        exit(0)

    if len(args) > 1:
        sys.exit("At most one argument (a newick tree string with branch lengths) can be specified")
    if len(args) == 1:
        s = args[0]
    else:
        newick = sys.stdin.read()
        s = StringIO.StringIO(newick)

    prec = options.prec
    d = DataSet()

    d.read(s, schema=options.schema, rooted=True)
    if len(d.trees_blocks) == 0:
        sys.exit("No trees found in file.")
    tree = d.trees_blocks[0][0]
    tree.calc_node_ages(attr_name="age", check_prec=options.prec)

    sys.stdout.write("%f\n" % tree.seed_node.age)

Example #9

0

Show file

File: long_branch_symmdiff.py Project: BioinformaticsArchive/DendroPy

                        action="store_true", default=False, help="Produce an output in the same format as PAUP's TreeDist command.")
    (options, args) = parser.parse_args()
    if len(args) == 0:
        sys.exit("Expecting a filename as an argument")
    schema = options.schema.upper()
    try:
        cutoff = int(options.cutoff)
    except ValueError:
        try:
            cutoff = float(options.cutoff)
        except ValueError:
            sys.exit('Expecting the cutoff to be a number found "%s"' % options.cutoff)

    trees = []
    taxon_set = TaxonSet()
    dataset = DataSet(taxon_set=taxon_set)
    if schema == "PHYLIP":
        schema = "NEWICK"
    for f in args:
        fo = open(f, "rU")
        dataset.read(stream=fo, schema=schema)
    for tl in dataset.tree_lists:
        trees.extend(tl)

    sd_mat = long_branch_symmdiff(trees, cutoff)
    o = sys.stdout
    if options.paup:
        o.write("%s\n" % "\t".join(["tree"] + [str(1+i) for i in xrange(len(sd_mat))]))
        for n, row in enumerate(sd_mat):
            o.write("%d\t%s\n" % ((n + 1), "\t".join([str(i) for i in row[:1 + n]])))
    else:

Example #10

0

Show file

                      '--schema',
                      dest='schema',
                      type='str',
                      default="newick",
                      help='The format/schema of the input data')
    parser.add_option('-g',
                      '--gordon',
                      dest='gordons',
                      action="store_true",
                      default=False,
                      help="Specify to use the Gordon's strict consensus")
    (options, args) = parser.parse_args()
    if len(args) == 0:
        sys.exit("Expecting a filename as an argument")
    schema = options.schema.upper()

    trees = []
    taxon_set = TaxonSet()
    dataset = DataSet(taxon_set=taxon_set)
    if schema == "PHYLIP":
        schema = "NEWICK"
    for f in args:
        fo = open(f, "rU")
        dataset.read(stream=fo, schema=schema)
    for tl in dataset.tree_lists:
        trees.extend(tl)

    o = inplace_strict_consensus_merge(trees,
                                       gordons_supertree=options.gordons)
    sys.stdout.write("%s;\n" % str(o))

Example #11

0

Show file

File: newick_to_nexon.py Project: mtholder/supertree-study

import json
from dendropy.treesplit import encode_splits,split_to_list
from cStringIO import StringIO
from dendropy import DataSet
from dendropy.utility.error import DataParseError
from dendropy.utility.textutils import escape_nexus_token

if __name__ == '__main__':
    '''
    This method converts Newick to Nexon by splitting up the mod_name made in
    taxonomyToNewick.py to 'name' and 'ottolid'. Nexon is later used for TAG's input
    trees. 
    '''
    output = sys.stdout
    fo = open(sys.argv[1], "rU")
    dataset = DataSet()
    try:
        dataset.read(stream=fo, schema="Newick")
    except DataParseError as dfe:
        raise ValueError(str(dfe))
    if len(dataset.taxon_sets) != 1:
        raise ValueError("Expecting one set of taxa in %s" % f)
    if len(dataset.tree_lists) != 1:
        raise ValueError("Expecting one tree in %s" % f)
    taxon_set = dataset.taxon_sets[0]
    tree_list = dataset.tree_lists[0]
    number_of_taxon = len(taxon_set)
    branch_counter = 0
    code_list = [StringIO() for i in taxon_set]
    otu_counter = 0
    node_counter= 0

Example #12

0

Show file

File: pars.py Project: mza0150/eebprogramming

                      default=False,
                      help="Verbose execution mode")

    (options, args) = parser.parse_args()
    if len(args) == 0:
        sys.exit("Expecting a filename as an argument")

    if options.verbose:
        _LOG.setLevel(logging.DEBUG)

    tree_index = 0

    try:
        for f in args:
            fo = open(f, "rU")
            dataset = DataSet()
            dataset.read(stream=fo, schema="NEXUS")

            if len(dataset.taxon_sets) != 1:
                raise ValueError("Expecting one set of taxa in %s" % f)
            taxon_set = dataset.taxon_sets[0]

            if len(dataset.tree_lists) != 1:
                raise ValueError("Expecting one tree block in %s" % f)
            tree_list = dataset.tree_lists[0]

            if len(dataset.char_matrices) != 1:
                raise ValueError("Expecting one character matrix in %s" % f)
            char_mat = dataset.char_matrices[0]

            num_char = len(char_mat[0])

Example #13

0

Show file

File: scale_trees.py Project: wrightaprilm/DendroPy

                      help="The multiplier used for every branch length.")
    parser.add_option("-n",
                      "--nexus",
                      dest="schema",
                      action="store_const",
                      const="NEXUS",
                      default="NEWICK",
                      help="Tree is in NEXUS schema.")
    (options, args) = parser.parse_args()

    if len(args) > 1:
        sys.exit(
            "At most one argument (a newick tree string with branch lengths) can be specified"
        )
    if len(args) == 1:
        s = open(args[0], 'rU')
    else:
        newick = sys.stdin.read()
        s = StringIO.StringIO(newick)

    multip = options.multip
    d = DataSet()

    d.read(s, schema=options.schema, rooted=True)
    if len(d.tree_lists) == 0:
        sys.exit("No trees found in file.")
    for tb in d.tree_lists:
        for tree in tb:
            scale_edges(tree, multip)
    d.write_to_stream(sys.stdout, schema=options.schema)

Example #14

0

Show file

        sys.exit('Data file not found: "%s"' % opts.data)
    if not os.path.exists(opts.tree):
        sys.exit('Tree file not found: "%s"' % opts.tree)

    tree_file_objs = [open(f, "rU") for f in tree_filepaths]

    if opts.output_filepath is None:
        output_dest = sys.stdout
    else:
        output_fpath = os.path.expanduser(
            os.path.expandvars(opts.output_filepath))
        if not confirm_overwrite(output_fpath, opts.replace):
            sys.exit(1)
        output_dest = open(output_fpath, "w")

    dataset = DataSet()
    ts = dendropy.TaxonSet()
    dataset.read(stream=open(opts.data, 'rU'), schema='NEXUS', taxon_set=ts)
    if len(dataset.char_matrices) != 1:
        sys.exit(
            "Currently the script only supports data files with a single character matrix"
        )
    if len(dataset.tree_lists) > 0:
        sys.exit(
            "Currently the script does not support trees in the data file")
    dataset.read(stream=open(opts.tree, 'rU'), schema='NEXUS', taxon_set=ts)
    if (len(dataset.tree_lists) != 1) or len(dataset.tree_lists[0]) != 1:
        sys.exit("Currently the script only not tree files with a single tree")

    if ts.get_taxon(label=BOGUS_TAXON_LABELS[0]) or ts.get_taxon(
            label=BOGUS_TAXON_LABELS[1]):

Example #15

0

Show file

File: tree.py Project: BioinformaticsArchive/sepp

 def read_tree_from_file(self, treefile, file_format):
     dataset = Dataset()
     dataset.read(open(treefile, 'rU'), schema=file_format)
     dendropy_tree = dataset.trees_blocks[0][0]
     self._tree = dendropy_tree
     self.n_leaves = self.count_leaves()

Example #16

0

Show file

File: sankoff.py Project: kmiddleton/eebprogramming

    node_list = [i for i in tree.postorder_node_iter()]
    return sankoff(node_list, step_matrix=step_matrix, taxa_to_state_set_map=taxa_to_states)

if __name__ == '__main__':
    from optparse import OptionParser
    parser = OptionParser()
    (options, args) = parser.parse_args()
    if len(args) == 0:
        sys.exit("Expecting a filename as an argument")

    tree_index = 0

    try:
        for f in args:
            fo = open(f, "rU")
            dataset = DataSet()
            dataset.read(stream=fo, schema="NEXUS")

            if len(dataset.taxon_sets) != 1:
                raise ValueError("Expecting one set of taxa in %s" % f)
            taxon_set = dataset.taxon_sets[0]

            if len(dataset.tree_lists) != 1:
                raise ValueError("Expecting one tree block in %s" % f)
            tree_list = dataset.tree_lists[0]

            if len(dataset.char_matrices) != 1:
                raise ValueError("Expecting one character matrix in %s" % f)
            char_mat = dataset.char_matrices[0]

            num_char = len(char_mat[0])

Example #17

0

Show file

File: prob_synapo.py Project: BioinformaticsArchive/DendroPy

        sys.exit('Data file not found: "%s"' % opts.data)
    if not os.path.exists(opts.tree):
        sys.exit('Tree file not found: "%s"' % opts.tree)

    tree_file_objs = [open(f, "rU") for f in tree_filepaths]

    if opts.output_filepath is None:
        output_dest = sys.stdout
    else:
        output_fpath = os.path.expanduser(os.path.expandvars(opts.output_filepath))
        if not confirm_overwrite(output_fpath, opts.replace):
            sys.exit(1)
        output_dest = open(output_fpath, "w")


    dataset = DataSet()
    ts = dendropy.TaxonSet()
    dataset.read(stream=open(opts.data, 'rU'), schema='NEXUS', taxon_set=ts)
    if len(dataset.char_matrices) != 1:
        sys.exit("Currently the script only supports data files with a single character matrix")
    if len(dataset.tree_lists) > 0:
        sys.exit("Currently the script does not support trees in the data file")
    dataset.read(stream=open(opts.tree, 'rU'), schema='NEXUS', taxon_set=ts)
    if (len(dataset.tree_lists) != 1) or len(dataset.tree_lists[0]) != 1:
        sys.exit("Currently the script only not tree files with a single tree")

    if ts.get_taxon(label=BOGUS_TAXON_LABELS[0]) or ts.get_taxon(label=BOGUS_TAXON_LABELS[1]):
        sys.exit('Give me a break. You really have a taxon named "%s" or "%s" in your data!?\nI refuse to deal with this file.\n' % (BOGUS_TAXON_LABELS[0], BOGUS_TAXON_LABELS[1]))
    tree = dataset.tree_lists[0][0]

    matrix = dataset.char_matrices[0]

Example #18

0

Show file

File: tree.py Project: sfeng1030/Sepp

 def read_tree_from_file(self, treefile, file_format):
     dataset = Dataset()
     dataset.read(open(treefile, 'rU'), schema=file_format)
     dendropy_tree = dataset.trees_blocks[0][0]
     self._tree = dendropy_tree
     self.n_leaves = self.count_leaves()

Example #19

0

Show file

File: treedepth.py Project: wrightaprilm/DendroPy

    parser.add_option("--generate-test-tree",
                      action="store_true",
                      dest="gen_test_tree",
                      help="Generate ultrametric tree to test function.")
    (options, args) = parser.parse_args()

    if options.gen_test_tree:
        simple_test_tree()
        exit(0)

    if len(args) > 1:
        sys.exit(
            "At most one argument (a newick tree string with branch lengths) can be specified"
        )
    if len(args) == 1:
        s = args[0]
    else:
        newick = sys.stdin.read()
        s = StringIO.StringIO(newick)

    prec = options.prec
    d = DataSet()

    d.read(s, schema=options.schema, rooted=True)
    if len(d.trees_blocks) == 0:
        sys.exit("No trees found in file.")
    tree = d.trees_blocks[0][0]
    tree.calc_node_ages(attr_name='age', check_prec=options.prec)

    sys.stdout.write("%f\n" % tree.seed_node.age)