Exemple #1
0
def convert_to_nexml(obj_dict, addindent='', newl='', use_default_root_atts=True, otu_label='ot:originalLabel'):
    f, wrapper = get_utf_8_string_io_writer()
    write_obj_as_nexml(obj_dict,
                       file_obj=wrapper,
                       addindent=addindent,
                       newl=newl,
                       use_default_root_atts=use_default_root_atts,
                       otu_label=otu_label)
    flush_utf_8_writer(wrapper)
    return f.getvalue()
Exemple #2
0
def convert_to_nexml(obj_dict, addindent='', newl='', use_default_root_atts=True, otu_label='ot:originalLabel'):
    f, wrapper = get_utf_8_string_io_writer()
    write_obj_as_nexml(obj_dict,
                       file_obj=wrapper,
                       addindent=addindent,
                       newl=newl,
                       use_default_root_atts=use_default_root_atts,
                       otu_label=otu_label)
    flush_utf_8_writer(wrapper)
    return f.getvalue()
Exemple #3
0
def _write_nexus_format(quoted_leaf_labels, tree_name_newick_list):
    f, wrapper = get_utf_8_string_io_writer()
    wrapper.write('''#NEXUS
BEGIN TAXA;
    Dimensions NTax = {s};
    TaxLabels {l} ;
END;
BEGIN TREES;
'''.format(s=len(quoted_leaf_labels), l=' '.join(quoted_leaf_labels)))
    for name, newick in tree_name_newick_list:
        wrapper.write('    Tree ')
        wrapper.write(name)
        wrapper.write(' = ')
        wrapper.write(newick)
    wrapper.write('\nEND;\n')
    flush_utf_8_writer(wrapper)
    return f.getvalue()
Exemple #4
0
def _write_nexus_format(quoted_leaf_labels, tree_name_newick_list):
    f, wrapper = get_utf_8_string_io_writer()
    wrapper.write('''#NEXUS
BEGIN TAXA;
    Dimensions NTax = {s};
    TaxLabels {l} ;
END;
BEGIN TREES;
'''.format(s=len(quoted_leaf_labels), l=' '.join(quoted_leaf_labels)))
    for name, newick in tree_name_newick_list:
        wrapper.write('    Tree ')
        wrapper.write(name)
        wrapper.write(' = ')
        wrapper.write(newick)
    wrapper.write('\nEND;\n')
    flush_utf_8_writer(wrapper)
    return f.getvalue()
Exemple #5
0
def convert_tree_to_newick(tree,
                           otu_group,
                           label_key,
                           leaf_labels,
                           needs_quotes_pattern,
                           subtree_id=None,
                           bracket_ingroup=False):
    assert label_key in PhyloSchema._NEWICK_PROP_VALS #pylint: disable=W0212
    unlabeled_counter = 0
    ingroup_node_id = tree.get('^ot:inGroupClade')
    if subtree_id:
        if subtree_id == 'ingroup':
            root_id = ingroup_node_id
            ingroup_node_id = None # turns of the comment pre-ingroup-marker
        else:
            root_id = subtree_id
    else:
        root_id = tree['^ot:rootNodeId']
    edges = tree['edgeBySourceId']
    if root_id not in edges:
        return None
    nodes = tree['nodeById']
    curr_node_id = root_id
    curr_edge = None
    curr_sib_list = []
    curr_stack = []
    sio, out = get_utf_8_string_io_writer()
    going_tipward = True
    while True:
        if going_tipward:
            outgoing_edges = edges.get(curr_node_id)
            if outgoing_edges is None:
                curr_node = nodes[curr_node_id]
                unlabeled_counter = _write_newick_leaf_label(out,
                                                             curr_node,
                                                             otu_group,
                                                             label_key,
                                                             leaf_labels,
                                                             unlabeled_counter,
                                                             needs_quotes_pattern)
                _write_newick_edge_len(out, curr_edge)
                going_tipward = False
            else:
                te = [(i, e) for i, e in outgoing_edges.items()]
                te.sort() # produce a consistent rotation... Necessary?
                if bracket_ingroup and (ingroup_node_id == curr_node_id):
                    out.write('[pre-ingroup-marker]')
                out.write('(')
                next_p = te.pop(0)
                curr_stack.append((curr_edge, curr_node_id, curr_sib_list))
                curr_edge, curr_sib_list = next_p[1], te
                curr_node_id = curr_edge['@target']
        if not going_tipward:
            next_up_edge_id = None
            while True:
                if curr_sib_list:
                    out.write(',')
                    next_up_edge_id, next_up_edge = curr_sib_list.pop(0)
                    break
                if curr_stack:
                    curr_edge, curr_node_id, curr_sib_list = curr_stack.pop(-1)
                    curr_node = nodes[curr_node_id]
                    out.write(')')
                    _write_newick_internal_label(out,
                                                 curr_node,
                                                 otu_group,
                                                 label_key,
                                                 needs_quotes_pattern)
                    _write_newick_edge_len(out, curr_edge)
                    if bracket_ingroup and (ingroup_node_id == curr_node_id):
                        out.write('[post-ingroup-marker]')
                else:
                    break
            if next_up_edge_id is None:
                break
            curr_edge = next_up_edge
            curr_node_id = curr_edge['@target']
            going_tipward = True
    out.write(';')
    flush_utf_8_writer(out)
    return sio.getvalue()
Exemple #6
0
    def convert(self, src, serialize=None, output_dest=None, src_schema=None):
        if src_schema is None:
            src_format = PhyloSchema.NEXSON
            current_format = None
        else:
            src_format = src_schema.format_code
            current_format = src_schema.version
        if not self.can_convert_from():
            m = 'Conversion of {c} to {d} is not supported'.format(c=self.content, d=self.description)
            raise NotImplementedError(m)
        if src_format != PhyloSchema.NEXSON:
            raise NotImplementedError('Only conversion from NexSON is currently supported')
        if self.format_code == PhyloSchema.NEXSON:
            d = src
            if self.content == 'study':
                d = convert_nexson_format(src,
                                          out_nexson_format=self.version,
                                          current_format=current_format,
                                          remove_old_structs=True,
                                          pristine_if_invalid=False,
                                          sort_arbitrary=False)
            elif self.content in ('tree', 'subtree'):
                if self.content == 'tree' and self.cull_nonmatching:
                    d = cull_nonmatching_trees(d, self.content_id, current_format)
                    d = convert_nexson_format(d,
                                              out_nexson_format=self.version,
                                              current_format=current_format,
                                              remove_old_structs=True,
                                              pristine_if_invalid=False,
                                              sort_arbitrary=False)

                else:
                    i_t_o_list = extract_tree_nexson(d, self.content_id, current_format)
                    d = {}
                    for ito_tup in i_t_o_list:
                        i, t = ito_tup[0], ito_tup[1]
                        d[i] = t
            elif self.content == 'meta':
                strip_to_meta_only(d, current_format)
            elif self.content == 'otus':
                d = extract_otus_nexson(d, self.content_id, current_format)
            elif self.content == 'otu':
                d = extract_otu_nexson(d, self.content_id, current_format)
            elif self.content == 'otumap':
                if self.content_id is None:
                    r = extract_otu_nexson(d, None, current_format)
                else:
                    p = extract_otus_nexson(d, self.content_id, current_format)
                    if p is None:
                        r = extract_otu_nexson(d, self.content_id, current_format)
                    else:
                        r = {}
                        for v in p.values():
                            r.update(v.get('otuById', {}))
                if not r:
                    return None
                d = _otu_dict_to_otumap(r)
            elif self.content == 'treelist':
                i_t_o_list = extract_tree_nexson(d,
                                                 self.content_id,
                                                 current_format)
                d = [i[0] for i in i_t_o_list]
            if d is None:
                return None
            if serialize:
                if output_dest:
                    write_as_json(d, output_dest)
                    return None
                else:
                    f, wrapper = get_utf_8_string_io_writer()
                    write_as_json(d, wrapper)
                    flush_utf_8_writer(wrapper)
                    return f.getvalue()
            else:
                return d
        # Non-NexSON types go here...
        if (serialize is not None) and (not serialize):
            raise ValueError('Conversion without serialization is only supported for the NexSON format')
        if output_dest:
            if is_str_type(output_dest):
                output_dest = codecs.open(output_dest, 'w', encoding='utf-8')
        if self.format_code == PhyloSchema.NEXML:
            if output_dest:
                write_obj_as_nexml(src, output_dest, addindent=' ', newl='\n', otu_label=self.otu_label_prop)
                return
            return convert_to_nexml(src, addindent=' ', newl='\n', otu_label=self.otu_label_prop)
        elif self.format_code in [PhyloSchema.NEXUS, PhyloSchema.NEWICK]:
            if self.content in ('tree', 'subtree'):
                if isinstance(self.content_id, list) or isinstance(self.content_id, tuple):
                    ci, subtree_id = self.content_id
                else:
                    ci, subtree_id = self.content_id, None
            else:
                ci, subtree_id = None, None
            response = extract_tree(src, ci, self, subtree_id=subtree_id)
            # these formats are always serialized...
            if output_dest:
                output_dest.write(response)
                output_dest.write('\n')
            return response
        assert False
Exemple #7
0
def convert_tree_to_newick(tree,
                           otu_group,
                           label_key,
                           leaf_labels,
                           needs_quotes_pattern,
                           subtree_id=None,
                           bracket_ingroup=False):
    assert label_key in PhyloSchema._NEWICK_PROP_VALS #pylint: disable=W0212
    unlabeled_counter = 0
    ingroup_node_id = tree.get('^ot:inGroupClade')
    if subtree_id:
        if subtree_id == 'ingroup':
            root_id = ingroup_node_id
            ingroup_node_id = None # turns of the comment pre-ingroup-marker
        else:
            root_id = subtree_id
    else:
        root_id = tree['^ot:rootNodeId']
    edges = tree['edgeBySourceId']
    if root_id not in edges:
        return None
    nodes = tree['nodeById']
    curr_node_id = root_id
    curr_edge = None
    curr_sib_list = []
    curr_stack = []
    sio, out = get_utf_8_string_io_writer()
    going_tipward = True
    while True:
        if going_tipward:
            outgoing_edges = edges.get(curr_node_id)
            if outgoing_edges is None:
                curr_node = nodes[curr_node_id]
                unlabeled_counter = _write_newick_leaf_label(out,
                                                             curr_node,
                                                             otu_group,
                                                             label_key,
                                                             leaf_labels,
                                                             unlabeled_counter,
                                                             needs_quotes_pattern)
                _write_newick_edge_len(out, curr_edge)
                going_tipward = False
            else:
                te = [(i, e) for i, e in outgoing_edges.items()]
                te.sort() # produce a consistent rotation... Necessary?
                if bracket_ingroup and (ingroup_node_id == curr_node_id):
                    out.write('[pre-ingroup-marker]')
                out.write('(')
                next_p = te.pop(0)
                curr_stack.append((curr_edge, curr_node_id, curr_sib_list))
                curr_edge, curr_sib_list = next_p[1], te
                curr_node_id = curr_edge['@target']
        if not going_tipward:
            next_up_edge_id = None
            while True:
                if curr_sib_list:
                    out.write(',')
                    next_up_edge_id, next_up_edge = curr_sib_list.pop(0)
                    break
                if curr_stack:
                    curr_edge, curr_node_id, curr_sib_list = curr_stack.pop(-1)
                    curr_node = nodes[curr_node_id]
                    out.write(')')
                    _write_newick_internal_label(out,
                                                 curr_node,
                                                 otu_group,
                                                 label_key,
                                                 needs_quotes_pattern)
                    _write_newick_edge_len(out, curr_edge)
                    if bracket_ingroup and (ingroup_node_id == curr_node_id):
                        out.write('[post-ingroup-marker]')
                else:
                    break
            if next_up_edge_id is None:
                break
            curr_edge = next_up_edge
            curr_node_id = curr_edge['@target']
            going_tipward = True
    out.write(';')
    flush_utf_8_writer(out)
    return sio.getvalue()
Exemple #8
0
    def convert(self, src, serialize=None, output_dest=None, src_schema=None):
        if src_schema is None:
            src_format = PhyloSchema.NEXSON
            current_format = None
        else:
            src_format = src_schema.format_code
            current_format = src_schema.version
        if not self.can_convert_from():
            m = 'Conversion of {c} to {d} is not supported'.format(c=self.content, d=self.description)
            raise NotImplementedError(m)
        if src_format != PhyloSchema.NEXSON:
            raise NotImplementedError('Only conversion from NexSON is currently supported')
        if self.format_code == PhyloSchema.NEXSON:
            d = src
            if self.content == 'study':
                d = convert_nexson_format(src,
                                          out_nexson_format=self.version,
                                          current_format=current_format,
                                          remove_old_structs=True,
                                          pristine_if_invalid=False,
                                          sort_arbitrary=False)
            elif self.content in ('tree', 'subtree'):
                if self.content == 'tree' and self.cull_nonmatching:
                    d = cull_nonmatching_trees(d, self.content_id, current_format)
                    d = convert_nexson_format(d,
                                              out_nexson_format=self.version,
                                              current_format=current_format,
                                              remove_old_structs=True,
                                              pristine_if_invalid=False,
                                              sort_arbitrary=False)

                else:
                    i_t_o_list = extract_tree_nexson(d, self.content_id, current_format)
                    d = {}
                    for ito_tup in i_t_o_list:
                        i, t = ito_tup[0], ito_tup[1]
                        d[i] = t
            elif self.content == 'meta':
                strip_to_meta_only(d, current_format)
            elif self.content == 'otus':
                d = extract_otus_nexson(d, self.content_id, current_format)
            elif self.content == 'otu':
                d = extract_otu_nexson(d, self.content_id, current_format)
            elif self.content == 'otumap':
                if self.content_id is None:
                    r = extract_otu_nexson(d, None, current_format)
                else:
                    p = extract_otus_nexson(d, self.content_id, current_format)
                    if p is None:
                        r = extract_otu_nexson(d, self.content_id, current_format)
                    else:
                        r = {}
                        for v in p.values():
                            r.update(v.get('otuById', {}))
                if not r:
                    return None
                d = _otu_dict_to_otumap(r)
            elif self.content == 'treelist':
                i_t_o_list = extract_tree_nexson(d,
                                                 self.content_id,
                                                 current_format)
                d = [i[0] for i in i_t_o_list]
            if d is None:
                return None
            if serialize:
                if output_dest:
                    write_as_json(d, output_dest)
                    return None
                else:
                    f, wrapper = get_utf_8_string_io_writer()
                    write_as_json(d, wrapper)
                    flush_utf_8_writer(wrapper)
                    return f.getvalue()
            else:
                return d
        # Non-NexSON types go here...
        if (serialize is not None) and (not serialize):
            raise ValueError('Conversion without serialization is only supported for the NexSON format')
        if output_dest:
            if is_str_type(output_dest):
                output_dest = codecs.open(output_dest, 'w', encoding='utf-8')
        if self.format_code == PhyloSchema.NEXML:
            if output_dest:
                write_obj_as_nexml(src, output_dest, addindent=' ', newl='\n', otu_label=self.otu_label_prop)
                return
            return convert_to_nexml(src, addindent=' ', newl='\n', otu_label=self.otu_label_prop)
        elif self.format_code in [PhyloSchema.NEXUS, PhyloSchema.NEWICK]:
            if self.content in ('tree', 'subtree'):
                if isinstance(self.content_id, list) or isinstance(self.content_id, tuple):
                    ci, subtree_id = self.content_id
                else:
                    ci, subtree_id = self.content_id, None
            else:
                ci, subtree_id = None, None
            response = extract_tree(src, ci, self, subtree_id=subtree_id)
            # these formats are always serialized...
            if output_dest:
                output_dest.write(response)
                output_dest.write('\n')
            return response
        assert False