def phylogeneticTreeFromFile(self, treefile, file_format): dataset = Dataset() dataset.read(open(treefile, 'rU'), schema=file_format) dendropy_tree = dataset.tree_lists[0][0] tree = PhylogeneticTree(dendropy_tree) tree.calc_splits() delete_outdegree_one(tree._tree) return tree
def get_dataset_from_treebase(study_id, phylesystem_loc='api'): ATT_list = [] nexson = get_nexson(study_id, phylesystem_loc) treebase_url = nexson['nexml'][u'^ot:dataDeposit'][u'@href'] if 'treebase' not in nexson['nexml'][u'^ot:dataDeposit'][u'@href']: sys.stderr("No treebase record associated with study ") sys.exit() else: tb_id = treebase_url.split(':S')[1] dna = DataSet.get(url="https://treebase.org/treebase-web/search/downloadAStudy.html?id={}&format=nexml".format(tb_id), schema="nexml") return(dna)
def get_dataset_from_treebase(study_id, phylesystem_loc="api"): """Function is used to get the aln from treebase, for a tree that OpenTree has the mapped tree. """ try: nexson = get_nexson(study_id, phylesystem_loc) except HTTPError as err: sys.stderr.write(err) sys.stderr.write( "couldn't find study id {} in phylesystem location {}\n".format( study_id, phylesystem_loc)) treebase_url = nexson['nexml'][u'^ot:dataDeposit'][u'@href'] if 'treebase' not in nexson['nexml'][u'^ot:dataDeposit'][u'@href']: sys.stderr.write("No treebase record associated with study ") sys.exit(-2) else: tb_id = treebase_url.split(':S')[1] url = "https://treebase.org/treebase-web/search/downloadAStudy.html?id={}&format=nexml".format( tb_id) if _DEBUG: sys.stderr.write(url + "\n") dna = DataSet.get(url=url, schema="nexml") return dna
def parse_nexus(nexus_loc): """ Parses nexus file into a dictionary for template engine Parameters ---------- nexus_loc: str location of nexus file Returns ------- dict dictionary containing tree in newick format, and a list of taxa containing their sequences, dates, and labels. """ return_dict = {"taxa": {}} print("Parsing nexus file... ", end="") nexus = DataSet.get(path=nexus_loc, schema="nexus") return_dict["tree"] = nexus.tree_lists[0].as_string("newick") count = 0 max_date = 2020.1 for k, v in nexus.char_matrices[0].items(): count += 1 date = parse_date(k.label) if date > max_date: max_date = date taxa_dict = {"id": k.label, "date": date, "sequence": str(v)} return_dict["taxa"][k.label] = taxa_dict print("Done. {} taxa loaded".format(count)) return_dict["site_count"] = nexus.char_matrices[0].max_sequence_size return_dict["taxon_count"] = len(return_dict["taxa"]) return_dict["cutoff"] = max_date - 2019.75 return_dict["grid_points"] = round(return_dict["cutoff"] * 52) return return_dict
if __name__ == '__main__': from optparse import OptionParser parser = OptionParser() parser.add_option("-m", "--multiplier", dest="multip", default=1.0, type="float", help="The multiplier used for every branch length.") parser.add_option("-n", "--nexus", dest="schema", action="store_const", const="NEXUS", default="NEWICK", help="Tree is in NEXUS schema.") (options, args) = parser.parse_args() if len(args) > 1: sys.exit("At most one argument (a newick tree string with branch lengths) can be specified") if len(args) == 1: s = open(args[0], 'rU') else: newick = sys.stdin.read() s = StringIO.StringIO(newick) multip = options.multip d = DataSet() d.read(s, schema=options.schema, rooted=True) if len(d.tree_lists) == 0: sys.exit("No trees found in file.") for tb in d.tree_lists: for tree in tb: scale_edges(tree, multip) d.write_to_stream(sys.stdout, schema=options.schema)
import json from dendropy.treesplit import encode_splits, split_to_list from cStringIO import StringIO from dendropy import DataSet from dendropy.utility.error import DataParseError from dendropy.utility.textutils import escape_nexus_token if __name__ == '__main__': ''' This method converts Newick to Nexon by splitting up the mod_name made in taxonomyToNewick.py to 'name' and 'ottolid'. Nexon is later used for TAG's input trees. ''' output = sys.stdout fo = open(sys.argv[1], "rU") dataset = DataSet() try: dataset.read(stream=fo, schema="Newick") except DataParseError as dfe: raise ValueError(str(dfe)) if len(dataset.taxon_sets) != 1: raise ValueError("Expecting one set of taxa in %s" % f) if len(dataset.tree_lists) != 1: raise ValueError("Expecting one tree in %s" % f) taxon_set = dataset.taxon_sets[0] tree_list = dataset.tree_lists[0] number_of_taxon = len(taxon_set) branch_counter = 0 code_list = [StringIO() for i in taxon_set] otu_counter = 0 node_counter = 0
parser.add_option( "--generate-test-tree", action="store_true", dest="gen_test_tree", help="Generate ultrametric tree to test function.", ) (options, args) = parser.parse_args() if options.gen_test_tree: simple_test_tree() exit(0) if len(args) > 1: sys.exit("At most one argument (a newick tree string with branch lengths) can be specified") if len(args) == 1: s = args[0] else: newick = sys.stdin.read() s = StringIO.StringIO(newick) prec = options.prec d = DataSet() d.read(s, schema=options.schema, rooted=True) if len(d.trees_blocks) == 0: sys.exit("No trees found in file.") tree = d.trees_blocks[0][0] tree.calc_node_ages(attr_name="age", check_prec=options.prec) sys.stdout.write("%f\n" % tree.seed_node.age)
action="store_true", default=False, help="Produce an output in the same format as PAUP's TreeDist command.") (options, args) = parser.parse_args() if len(args) == 0: sys.exit("Expecting a filename as an argument") schema = options.schema.upper() try: cutoff = int(options.cutoff) except ValueError: try: cutoff = float(options.cutoff) except ValueError: sys.exit('Expecting the cutoff to be a number found "%s"' % options.cutoff) trees = [] taxon_set = TaxonSet() dataset = DataSet(taxon_set=taxon_set) if schema == "PHYLIP": schema = "NEWICK" for f in args: fo = open(f, "rU") dataset.read(stream=fo, schema=schema) for tl in dataset.tree_lists: trees.extend(tl) sd_mat = long_branch_symmdiff(trees, cutoff) o = sys.stdout if options.paup: o.write("%s\n" % "\t".join(["tree"] + [str(1+i) for i in xrange(len(sd_mat))])) for n, row in enumerate(sd_mat): o.write("%d\t%s\n" % ((n + 1), "\t".join([str(i) for i in row[:1 + n]]))) else:
'--schema', dest='schema', type='str', default="newick", help='The format/schema of the input data') parser.add_option('-g', '--gordon', dest='gordons', action="store_true", default=False, help="Specify to use the Gordon's strict consensus") (options, args) = parser.parse_args() if len(args) == 0: sys.exit("Expecting a filename as an argument") schema = options.schema.upper() trees = [] taxon_set = TaxonSet() dataset = DataSet(taxon_set=taxon_set) if schema == "PHYLIP": schema = "NEWICK" for f in args: fo = open(f, "rU") dataset.read(stream=fo, schema=schema) for tl in dataset.tree_lists: trees.extend(tl) o = inplace_strict_consensus_merge(trees, gordons_supertree=options.gordons) sys.stdout.write("%s;\n" % str(o))
import json from dendropy.treesplit import encode_splits,split_to_list from cStringIO import StringIO from dendropy import DataSet from dendropy.utility.error import DataParseError from dendropy.utility.textutils import escape_nexus_token if __name__ == '__main__': ''' This method converts Newick to Nexon by splitting up the mod_name made in taxonomyToNewick.py to 'name' and 'ottolid'. Nexon is later used for TAG's input trees. ''' output = sys.stdout fo = open(sys.argv[1], "rU") dataset = DataSet() try: dataset.read(stream=fo, schema="Newick") except DataParseError as dfe: raise ValueError(str(dfe)) if len(dataset.taxon_sets) != 1: raise ValueError("Expecting one set of taxa in %s" % f) if len(dataset.tree_lists) != 1: raise ValueError("Expecting one tree in %s" % f) taxon_set = dataset.taxon_sets[0] tree_list = dataset.tree_lists[0] number_of_taxon = len(taxon_set) branch_counter = 0 code_list = [StringIO() for i in taxon_set] otu_counter = 0 node_counter= 0
default=False, help="Verbose execution mode") (options, args) = parser.parse_args() if len(args) == 0: sys.exit("Expecting a filename as an argument") if options.verbose: _LOG.setLevel(logging.DEBUG) tree_index = 0 try: for f in args: fo = open(f, "rU") dataset = DataSet() dataset.read(stream=fo, schema="NEXUS") if len(dataset.taxon_sets) != 1: raise ValueError("Expecting one set of taxa in %s" % f) taxon_set = dataset.taxon_sets[0] if len(dataset.tree_lists) != 1: raise ValueError("Expecting one tree block in %s" % f) tree_list = dataset.tree_lists[0] if len(dataset.char_matrices) != 1: raise ValueError("Expecting one character matrix in %s" % f) char_mat = dataset.char_matrices[0] num_char = len(char_mat[0])
help="The multiplier used for every branch length.") parser.add_option("-n", "--nexus", dest="schema", action="store_const", const="NEXUS", default="NEWICK", help="Tree is in NEXUS schema.") (options, args) = parser.parse_args() if len(args) > 1: sys.exit( "At most one argument (a newick tree string with branch lengths) can be specified" ) if len(args) == 1: s = open(args[0], 'rU') else: newick = sys.stdin.read() s = StringIO.StringIO(newick) multip = options.multip d = DataSet() d.read(s, schema=options.schema, rooted=True) if len(d.tree_lists) == 0: sys.exit("No trees found in file.") for tb in d.tree_lists: for tree in tb: scale_edges(tree, multip) d.write_to_stream(sys.stdout, schema=options.schema)
sys.exit('Data file not found: "%s"' % opts.data) if not os.path.exists(opts.tree): sys.exit('Tree file not found: "%s"' % opts.tree) tree_file_objs = [open(f, "rU") for f in tree_filepaths] if opts.output_filepath is None: output_dest = sys.stdout else: output_fpath = os.path.expanduser( os.path.expandvars(opts.output_filepath)) if not confirm_overwrite(output_fpath, opts.replace): sys.exit(1) output_dest = open(output_fpath, "w") dataset = DataSet() ts = dendropy.TaxonSet() dataset.read(stream=open(opts.data, 'rU'), schema='NEXUS', taxon_set=ts) if len(dataset.char_matrices) != 1: sys.exit( "Currently the script only supports data files with a single character matrix" ) if len(dataset.tree_lists) > 0: sys.exit( "Currently the script does not support trees in the data file") dataset.read(stream=open(opts.tree, 'rU'), schema='NEXUS', taxon_set=ts) if (len(dataset.tree_lists) != 1) or len(dataset.tree_lists[0]) != 1: sys.exit("Currently the script only not tree files with a single tree") if ts.get_taxon(label=BOGUS_TAXON_LABELS[0]) or ts.get_taxon( label=BOGUS_TAXON_LABELS[1]):
def read_tree_from_file(self, treefile, file_format): dataset = Dataset() dataset.read(open(treefile, 'rU'), schema=file_format) dendropy_tree = dataset.trees_blocks[0][0] self._tree = dendropy_tree self.n_leaves = self.count_leaves()
node_list = [i for i in tree.postorder_node_iter()] return sankoff(node_list, step_matrix=step_matrix, taxa_to_state_set_map=taxa_to_states) if __name__ == '__main__': from optparse import OptionParser parser = OptionParser() (options, args) = parser.parse_args() if len(args) == 0: sys.exit("Expecting a filename as an argument") tree_index = 0 try: for f in args: fo = open(f, "rU") dataset = DataSet() dataset.read(stream=fo, schema="NEXUS") if len(dataset.taxon_sets) != 1: raise ValueError("Expecting one set of taxa in %s" % f) taxon_set = dataset.taxon_sets[0] if len(dataset.tree_lists) != 1: raise ValueError("Expecting one tree block in %s" % f) tree_list = dataset.tree_lists[0] if len(dataset.char_matrices) != 1: raise ValueError("Expecting one character matrix in %s" % f) char_mat = dataset.char_matrices[0] num_char = len(char_mat[0])
sys.exit('Data file not found: "%s"' % opts.data) if not os.path.exists(opts.tree): sys.exit('Tree file not found: "%s"' % opts.tree) tree_file_objs = [open(f, "rU") for f in tree_filepaths] if opts.output_filepath is None: output_dest = sys.stdout else: output_fpath = os.path.expanduser(os.path.expandvars(opts.output_filepath)) if not confirm_overwrite(output_fpath, opts.replace): sys.exit(1) output_dest = open(output_fpath, "w") dataset = DataSet() ts = dendropy.TaxonSet() dataset.read(stream=open(opts.data, 'rU'), schema='NEXUS', taxon_set=ts) if len(dataset.char_matrices) != 1: sys.exit("Currently the script only supports data files with a single character matrix") if len(dataset.tree_lists) > 0: sys.exit("Currently the script does not support trees in the data file") dataset.read(stream=open(opts.tree, 'rU'), schema='NEXUS', taxon_set=ts) if (len(dataset.tree_lists) != 1) or len(dataset.tree_lists[0]) != 1: sys.exit("Currently the script only not tree files with a single tree") if ts.get_taxon(label=BOGUS_TAXON_LABELS[0]) or ts.get_taxon(label=BOGUS_TAXON_LABELS[1]): sys.exit('Give me a break. You really have a taxon named "%s" or "%s" in your data!?\nI refuse to deal with this file.\n' % (BOGUS_TAXON_LABELS[0], BOGUS_TAXON_LABELS[1])) tree = dataset.tree_lists[0][0] matrix = dataset.char_matrices[0]
parser.add_option("--generate-test-tree", action="store_true", dest="gen_test_tree", help="Generate ultrametric tree to test function.") (options, args) = parser.parse_args() if options.gen_test_tree: simple_test_tree() exit(0) if len(args) > 1: sys.exit( "At most one argument (a newick tree string with branch lengths) can be specified" ) if len(args) == 1: s = args[0] else: newick = sys.stdin.read() s = StringIO.StringIO(newick) prec = options.prec d = DataSet() d.read(s, schema=options.schema, rooted=True) if len(d.trees_blocks) == 0: sys.exit("No trees found in file.") tree = d.trees_blocks[0][0] tree.calc_node_ages(attr_name='age', check_prec=options.prec) sys.stdout.write("%f\n" % tree.seed_node.age)