Esempio n. 1
0
def parse(data, ttable=None, treename=None):
    """
    Parse a newick string.

    *data* is any file-like object that can be coerced into shlex, or
    a string (converted to StringIO)

    *ttable* is a dictionary mapping node labels in the newick string
     to other values.

    Returns: the root node.
    """
    from ivy.tree import Node

    if type(data) in types.StringTypes:
        data = StringIO(data)

    start_pos = data.tell()
    tokens = Tokenizer(data)

    node = None
    root = None
    lp = 0
    rp = 0
    rooted = 1

    previous = None

    ni = 0  # node id counter (preorder) - zero-based indexing
    li = 0  # leaf index counter
    ii = 0  # internal node index counter
    pi = 0  # postorder sequence
    while 1:
        token = tokens.get_token()
        #print token,
        if token == ';' or token == tokens.eof:
            assert lp == rp, \
                   "unbalanced parentheses in tree description: (%s, %s)" \
                   % (lp, rp)
            break

        # internal node
        elif token == '(':
            lp = lp + 1
            newnode = Node()
            newnode.ni = ni
            ni += 1
            newnode.isleaf = False
            newnode.ii = ii
            ii += 1
            newnode.treename = treename
            if node:
                if node.children: newnode.left = node.children[-1].right + 1
                else: newnode.left = node.left + 1
                node.add_child(newnode)
            else:
                newnode.left = 1
                newnode.right = 2
            newnode.right = newnode.left + 1
            node = newnode

        elif token == ')':
            rp = rp + 1
            node = node.parent
            node.pi = pi
            pi += 1
            if node.children:
                node.right = node.children[-1].right + 1

        elif token == ',':
            node = node.parent
            if node.children:
                node.right = node.children[-1].right + 1

        # branch length
        elif token == ':':
            token = tokens.get_token()
            if token == '[':
                node.length_comment = tokens.parse_embedded_comment()
                token = tokens.get_token()

            if not (token == ''):
                try:
                    brlen = float(token)
                except ValueError:
                    raise ValueError, ("invalid literal for branch length, "
                                       "'%s'" % token)
            else:
                raise 'NewickError', \
                      'unexpected end-of-file (expecting branch length)'

            node.length = brlen
        # comment
        elif token == '[':
            node.comment = tokens.parse_embedded_comment()
            if node.comment[0] == '&':
                # metadata
                meta = META.findall(node.comment[1:])
                if meta:
                    node.meta = {}
                    for k, v in meta:
                        v = eval(v.replace('{', '(').replace('}', ')'))
                        node.meta[k] = v

        # leaf node or internal node label
        else:
            if previous != ')':  # leaf node
                if ttable:
                    try:
                        ttoken = (ttable.get(int(token)) or ttable.get(token))
                    except ValueError:
                        ttoken = ttable.get(token)
                    if ttoken:
                        token = ttoken
                newnode = Node()
                newnode.ni = ni
                ni += 1
                newnode.pi = pi
                pi += 1
                newnode.label = "_".join(token.split()).replace("'", "")
                newnode.isleaf = True
                newnode.li = li
                li += 1
                if node.children: newnode.left = node.children[-1].right + 1
                else: newnode.left = node.left + 1
                newnode.right = newnode.left + 1
                newnode.treename = treename
                node.add_child(newnode)
                node = newnode
            else:  # label
                if ttable:
                    node.label = ttable.get(token, token)
                else:
                    node.label = token

        previous = token
    node.isroot = True
    return node
Esempio n. 2
0
def parse(data, ttable=None, treename=None):
    """
    Parse a newick string.

    *data* is any file-like object that can be coerced into shlex, or
    a string (converted to StringIO)

    *ttable* is a dictionary mapping node labels in the newick string
     to other values.

    Returns: the root node.
    """
    from ivy.tree import Node
    
    if type(data) in types.StringTypes:
        data = StringIO(data)
    
    start_pos = data.tell()
    tokens = Tokenizer(data)

    node = None; root = None
    lp=0; rp=0; rooted=1

    previous = None

    ni = 0 # node id counter (preorder) - zero-based indexing
    li = 0 # leaf index counter
    ii = 0 # internal node index counter
    pi = 0 # postorder sequence
    while 1:
        token = tokens.get_token()
        #print token,
        if token == ';' or token == tokens.eof:
            assert lp == rp, \
                   "unbalanced parentheses in tree description: (%s, %s)" \
                   % (lp, rp)
            break

        # internal node
        elif token == '(':
            lp = lp+1
            newnode = Node()
            newnode.ni = ni; ni += 1
            newnode.isleaf = False
            newnode.ii = ii; ii += 1
            newnode.treename = treename
            if node:
                if node.children: newnode.left = node.children[-1].right+1
                else: newnode.left = node.left+1
                node.add_child(newnode)
            else:
                newnode.left = 1; newnode.right = 2
            newnode.right = newnode.left+1
            node = newnode

        elif token == ')':
            rp = rp+1
            node = node.parent
            node.pi = pi; pi += 1
            if node.children:
                node.right = node.children[-1].right + 1
            
        elif token == ',':
            node = node.parent
            if node.children:
                node.right = node.children[-1].right + 1
            
        # branch length
        elif token == ':':
            token = tokens.get_token()
            if token == '[':
                node.length_comment = tokens.parse_embedded_comment()
                token = tokens.get_token()

            if not (token == ''):
                try: brlen = float(token)
                except ValueError:
                    raise ValueError, ("invalid literal for branch length, "
                                       "'%s'" % token)
            else:
                raise 'NewickError', \
                      'unexpected end-of-file (expecting branch length)'

            node.length = brlen
        # comment
        elif token == '[':
            node.comment = tokens.parse_embedded_comment()
            if node.comment[0] == '&':
                # metadata
                meta = META.findall(node.comment[1:])
                if meta:
                    node.meta = {}
                    for k, v in meta:
                        v = eval(v.replace('{','(').replace('}',')'))
                        node.meta[k] = v

        # leaf node or internal node label
        else:
            if previous != ')': # leaf node
                if ttable:
                    try:
                        ttoken = (ttable.get(int(token)) or
                                  ttable.get(token))
                    except ValueError:
                        ttoken = ttable.get(token)
                    if ttoken:
                        token = ttoken
                newnode = Node()
                newnode.ni = ni; ni += 1
                newnode.pi = pi; pi += 1
                newnode.label = "_".join(token.split()).replace("'", "")
                newnode.isleaf = True
                newnode.li = li; li += 1
                if node.children: newnode.left = node.children[-1].right+1
                else: newnode.left = node.left+1
                newnode.right = newnode.left+1
                newnode.treename = treename
                node.add_child(newnode)
                node = newnode
            else: # label
                if ttable:
                    node.label = ttable.get(token, token)
                else:
                    node.label = token

        previous = token
    node.isroot = True
    return node