def convert_to_nexml(obj_dict, addindent='', newl='', use_default_root_atts=True, otu_label='ot:originalLabel'): f, wrapper = get_utf_8_string_io_writer() write_obj_as_nexml(obj_dict, file_obj=wrapper, addindent=addindent, newl=newl, use_default_root_atts=use_default_root_atts, otu_label=otu_label) flush_utf_8_writer(wrapper) return f.getvalue()
def _write_nexus_format(quoted_leaf_labels, tree_name_newick_list): f, wrapper = get_utf_8_string_io_writer() wrapper.write('''#NEXUS BEGIN TAXA; Dimensions NTax = {s}; TaxLabels {l} ; END; BEGIN TREES; '''.format(s=len(quoted_leaf_labels), l=' '.join(quoted_leaf_labels))) for name, newick in tree_name_newick_list: wrapper.write(' Tree ') wrapper.write(name) wrapper.write(' = ') wrapper.write(newick) wrapper.write('\nEND;\n') flush_utf_8_writer(wrapper) return f.getvalue()
def convert_tree_to_newick(tree, otu_group, label_key, leaf_labels, needs_quotes_pattern, subtree_id=None, bracket_ingroup=False): assert label_key in PhyloSchema._NEWICK_PROP_VALS #pylint: disable=W0212 unlabeled_counter = 0 ingroup_node_id = tree.get('^ot:inGroupClade') if subtree_id: if subtree_id == 'ingroup': root_id = ingroup_node_id ingroup_node_id = None # turns of the comment pre-ingroup-marker else: root_id = subtree_id else: root_id = tree['^ot:rootNodeId'] edges = tree['edgeBySourceId'] if root_id not in edges: return None nodes = tree['nodeById'] curr_node_id = root_id curr_edge = None curr_sib_list = [] curr_stack = [] sio, out = get_utf_8_string_io_writer() going_tipward = True while True: if going_tipward: outgoing_edges = edges.get(curr_node_id) if outgoing_edges is None: curr_node = nodes[curr_node_id] unlabeled_counter = _write_newick_leaf_label(out, curr_node, otu_group, label_key, leaf_labels, unlabeled_counter, needs_quotes_pattern) _write_newick_edge_len(out, curr_edge) going_tipward = False else: te = [(i, e) for i, e in outgoing_edges.items()] te.sort() # produce a consistent rotation... Necessary? if bracket_ingroup and (ingroup_node_id == curr_node_id): out.write('[pre-ingroup-marker]') out.write('(') next_p = te.pop(0) curr_stack.append((curr_edge, curr_node_id, curr_sib_list)) curr_edge, curr_sib_list = next_p[1], te curr_node_id = curr_edge['@target'] if not going_tipward: next_up_edge_id = None while True: if curr_sib_list: out.write(',') next_up_edge_id, next_up_edge = curr_sib_list.pop(0) break if curr_stack: curr_edge, curr_node_id, curr_sib_list = curr_stack.pop(-1) curr_node = nodes[curr_node_id] out.write(')') _write_newick_internal_label(out, curr_node, otu_group, label_key, needs_quotes_pattern) _write_newick_edge_len(out, curr_edge) if bracket_ingroup and (ingroup_node_id == curr_node_id): out.write('[post-ingroup-marker]') else: break if next_up_edge_id is None: break curr_edge = next_up_edge curr_node_id = curr_edge['@target'] going_tipward = True out.write(';') flush_utf_8_writer(out) return sio.getvalue()
def convert(self, src, serialize=None, output_dest=None, src_schema=None): if src_schema is None: src_format = PhyloSchema.NEXSON current_format = None else: src_format = src_schema.format_code current_format = src_schema.version if not self.can_convert_from(): m = 'Conversion of {c} to {d} is not supported'.format(c=self.content, d=self.description) raise NotImplementedError(m) if src_format != PhyloSchema.NEXSON: raise NotImplementedError('Only conversion from NexSON is currently supported') if self.format_code == PhyloSchema.NEXSON: d = src if self.content == 'study': d = convert_nexson_format(src, out_nexson_format=self.version, current_format=current_format, remove_old_structs=True, pristine_if_invalid=False, sort_arbitrary=False) elif self.content in ('tree', 'subtree'): if self.content == 'tree' and self.cull_nonmatching: d = cull_nonmatching_trees(d, self.content_id, current_format) d = convert_nexson_format(d, out_nexson_format=self.version, current_format=current_format, remove_old_structs=True, pristine_if_invalid=False, sort_arbitrary=False) else: i_t_o_list = extract_tree_nexson(d, self.content_id, current_format) d = {} for ito_tup in i_t_o_list: i, t = ito_tup[0], ito_tup[1] d[i] = t elif self.content == 'meta': strip_to_meta_only(d, current_format) elif self.content == 'otus': d = extract_otus_nexson(d, self.content_id, current_format) elif self.content == 'otu': d = extract_otu_nexson(d, self.content_id, current_format) elif self.content == 'otumap': if self.content_id is None: r = extract_otu_nexson(d, None, current_format) else: p = extract_otus_nexson(d, self.content_id, current_format) if p is None: r = extract_otu_nexson(d, self.content_id, current_format) else: r = {} for v in p.values(): r.update(v.get('otuById', {})) if not r: return None d = _otu_dict_to_otumap(r) elif self.content == 'treelist': i_t_o_list = extract_tree_nexson(d, self.content_id, current_format) d = [i[0] for i in i_t_o_list] if d is None: return None if serialize: if output_dest: write_as_json(d, output_dest) return None else: f, wrapper = get_utf_8_string_io_writer() write_as_json(d, wrapper) flush_utf_8_writer(wrapper) return f.getvalue() else: return d # Non-NexSON types go here... if (serialize is not None) and (not serialize): raise ValueError('Conversion without serialization is only supported for the NexSON format') if output_dest: if is_str_type(output_dest): output_dest = codecs.open(output_dest, 'w', encoding='utf-8') if self.format_code == PhyloSchema.NEXML: if output_dest: write_obj_as_nexml(src, output_dest, addindent=' ', newl='\n', otu_label=self.otu_label_prop) return return convert_to_nexml(src, addindent=' ', newl='\n', otu_label=self.otu_label_prop) elif self.format_code in [PhyloSchema.NEXUS, PhyloSchema.NEWICK]: if self.content in ('tree', 'subtree'): if isinstance(self.content_id, list) or isinstance(self.content_id, tuple): ci, subtree_id = self.content_id else: ci, subtree_id = self.content_id, None else: ci, subtree_id = None, None response = extract_tree(src, ci, self, subtree_id=subtree_id) # these formats are always serialized... if output_dest: output_dest.write(response) output_dest.write('\n') return response assert False