def _parse_entry(str):
    for field in tokenize.line(str):
        field = field[1:].strip()   # remove leading backslash and trailing whitespace
        field = tuple(split(field, sep=" ", maxsplit=1))
        if len(field) == 1:
            field = (field[0], None)
        yield field
def conll_chunk(s, chunk_types=("NP",), top_node="S"):
    """
    @return: A chunk structure for a single sentence
        encoded in the given CONLL 2000 style string.
    @rtype: L{Tree}
    """

    stack = [Tree(top_node, [])]

    for lineno, line in enumerate(tokenize.line(s)):

        # Decode the line.
        match = _LINE_RE.match(line)
        if match is None:
            raise ValueError, 'Error on line %d' % lineno
        (word, tag, state, chunk_type) = match.groups()

        # If it's a chunk type we don't care about, treat it as O.
        if (chunk_types is not None and
            chunk_type not in chunk_types):
            state = 'O'

        # For "Begin"/"Outside", finish any completed chunks -
        # also do so for "Inside" which don't match the previous token.
        mismatch_I = state == 'I' and chunk_type != stack[-1].node
        if state in 'BO' or mismatch_I:
            if len(stack) == 2: stack.pop()

        # For "Begin", start a new chunk.
        if state == 'B' or mismatch_I:
            chunk = Tree(chunk_type, [])
            stack[-1].append(chunk)
            stack.append(chunk)

        # Add the new word token.
        stack[-1].append((word, tag))

    return stack[0]
def _list_sent(sent):
    return [tokenize.whitespace(line) for line in tokenize.line(sent)]