def merge_escaped(stream): """Merge tokens whose escaped attribute is True together. Must be run after detect_escaped. Parameters ---------- stream: iterator """ stream = Peeker(stream, EOF) queue = [] t = six.advance_iterator(stream) while t: if t.escaped: queue.append(t) else: if t.type == "WORD": if queue: queue.append(t) n = stream.peek() if not n.escaped: t.value = "".join([c.value for c in queue]) yield t queue = [] else: n = stream.peek() if n.escaped: queue.append(t) else: yield t else: if queue: queue[-1].value = "".join([c.value for c in queue]) queue[-1].type = "WORD" yield queue[-1] queue = [] yield t try: t = six.advance_iterator(stream) except StopIteration: if queue: t.value = "".join([c.value for c in queue]) t.type = "WORD" yield t return
def merge_escaped(stream): stream = Peeker(stream, EOF) queue = [] t = six.advance_iterator(stream) while t: if t.escaped: queue.append(t) else: if t.type == "WORD": if queue: queue.append(t) n = stream.peek() if not n.escaped: t.value = "".join([c.value for c in queue]) yield t queue = [] else: n = stream.peek() if n.escaped: queue.append(t) else: yield t else: if queue: queue[-1].value = "".join([c.value for c in queue]) queue[-1].type = "WORD" yield queue[-1] queue = [] yield t try: t = six.advance_iterator(stream) except StopIteration: if queue: t.value = "".join([c.value for c in queue]) t.type = "WORD" yield t return
def post_process(stream, lexdata): # XXX: this is awfully complicated... class _Internal(object): def __init__(self): self.stack = [] self.words_stack = [] self.stack_level = None internal = _Internal() state = "SCANNING_FIELD_ID" stream = Peeker(stream) i = six.advance_iterator(stream) while i: if state == "SCANNING_FIELD_ID": if i.value in CONDITIONAL_ID.keys(): queue, i = tokenize_conditional(stream, i) for q in queue: yield q elif i.value in META_FIELDS_ID.keys(): queue, i, state = scan_field_id(i, state, stream, lexdata) for q in queue: yield q else: queue, i = find_next(i, stream, internal) for q in queue: yield q elif state == "SCANNING_SINGLELINE_FIELD": queue, i, state = singleline_tokenizer(i, state, stream) for q in queue: yield q elif state == "SCANNING_MULTILINE_FIELD": queue, i, state = multiline_tokenizer(i, state, stream, internal) while len(queue) > 0: yield queue.pop() elif state == "SCANNING_WORD_FIELD": queue, i, state = word_tokenizer(i, state, stream) for t in queue: yield t elif state == "SCANNING_WORDS_FIELD": queue, i, state = words_tokenizer(i, state, stream, internal) for q in queue: yield q elif state == "SCANNING_COMMA_LIST_FIELD": queue, i, state = comma_list_tokenizer(i, state, stream, internal) for q in queue: yield q else: raise ValueError("Unknown state: %s" % state)