def run(self, stream, pos=0, search=False): ''' Execute a search. ''' self.__stream = stream self.__pos = pos state = State(self._parser_state, stream, Groups(group_state=self._parser_state.groups, stream=stream), pos=pos) # for testing optimizations self.ticks = 0 self.max_depth = 0 self.__stack = None self.__state = None self.__stacks = [] self.__lookaheads = {} # map from node to set of known ok states state.start_group(0) (match, state) = self.__run(0, state, search=search) if match: state.end_group(0) return state.groups else: return Groups()
def _run_from(self, start_index, stream, delta, search): self._reset(0, stream, None) self._advance(delta) self._search = search self._checkpoints = {} self._lookaheads = (self._offset, {}) search = self._search # read only, dereference optimisation # states are ordered by group start, which explains a lot of # the otherwise rather opaque logic below. self._states = [(start_index, self._offset, 0)] try: while self._states and self._excess < 2: known_next = set() next_states = [] while self._states: # unpack state (index, self._start, skip) = self._states.pop() try: if not skip: # process the current character index = self._program[index]() if index not in known_next: next_states.append((index, self._start, 0)) known_next.add(index) elif skip == -1: raise Match else: skip -= 1 # if we have other states, or will add them via search if search or next_states or self._states: if (index, skip) not in known_next: next_states.append((index, self._start, skip)) known_next.add((index, skip)) # otherwise, we can jump directly else: self._advance(skip) next_states.append((index, self._start, 0)) except Fail: pass except Match: # no groups starting earlier? if not next_states: raise # some other, pending, earlier starting, state may # still give a match next_states.append((index, self._start, -1)) known_next.add(index) # but we can discard anything that starts later self._states = [] search = False # move to next character self._advance() self._states = next_states # add current position as search if necessary if search and start_index not in known_next: self._states.append((start_index, self._offset, 0)) self._states.reverse() # pick first matched state, if any while self._states: (index, self._start, skip) = self._states.pop() if skip == -1: raise Match # exhausted states with no match return Groups() except Match: groups = Groups(group_state=self._parser_state.groups, stream=self._initial_stream) groups.start_group(0, self._start) groups.end_group(0, self._offset) return groups
def _run_from(self, start_state, stream, delta, search): start_state.start_group(0, delta) self._reset(0, stream, None) self._advance(delta) self._search = search self._lookaheads = (self._offset, {}) self._states = [start_state.clone()] try: while self._states and self._excess < 2: known_next = set() next_states = [] while self._states: self._state = self._states.pop() state = self._state skip = state.skip if not skip: # advance a character (compiled actions re-call on stack # until a character is consumed) try: state.advance(self._program[state.index]()) if state not in known_next: next_states.append(state) known_next.add(state) except Fail: pass except Match: state.skip = -1 if not next_states: raise next_states.append(state) known_next.add(state) elif skip == -1: if not next_states: raise Match next_states.append(state) else: skip -= 1 # if we have other states, or will add them via search if search or next_states or self._states: state.skip = skip next_states.append(state) known_next.add(state) # otherwise, we can jump directly else: self._advance(skip) state.skip = 0 next_states.append(state) # move to next character self._advance() self._states = next_states # add current position as search if necessary if search and start_state not in known_next: new_state = start_state.clone().start_group( 0, self._offset) self._states.append(new_state) self._states.reverse() while self._states: self._state = self._states.pop() if self._state.skip == -1: raise Match # exhausted states with no match return Groups() except Match: return self._state.groups(self._parser_state.groups)
def groups(self, group_state): return Groups(group_state, self.__stream, self.__groups, None, self.__last_number)
def _run_from(self, start_index, stream, delta, search): self._reset(0, stream, None) self._advance(delta) self._search = search self._checkpoints = {} self._lookaheads = (self._offset, {}) search = self._search # read only, dereference optimisation # states are ordered by group start, which explains a lot of # the otherwise rather opaque logic below. self._states = [(start_index, self._offset, 0)] try: while self._states and self._excess < 2: known_next = set() next_states = [] while self._states: # unpack state (index, self._start, skip) = self._states.pop() try: if not skip: # process the current character index = self._program[index]() if index not in known_next: next_states.append((index, self._start, 0)) known_next.add(index) elif skip == -1: raise Match else: skip -= 1 # if we have other states, or will add them via search if search or next_states or self._states: if (index, skip) not in known_next: next_states.append( (index, self._start, skip)) known_next.add((index, skip)) # otherwise, we can jump directly else: self._advance(skip) next_states.append((index, self._start, 0)) except Fail: pass except Match: # no groups starting earlier? if not next_states: raise # some other, pending, earlier starting, state may # still give a match next_states.append((index, self._start, -1)) known_next.add(index) # but we can discard anything that starts later self._states = [] search = False # move to next character self._advance() self._states = next_states # add current position as search if necessary if search and start_index not in known_next: self._states.append((start_index, self._offset, 0)) self._states.reverse() # pick first matched state, if any while self._states: (index, self._start, skip) = self._states.pop() if skip == -1: raise Match # exhausted states with no match return Groups() except Match: groups = Groups(group_state=self._parser_state.groups, stream=self._initial_stream) groups.start_group(0, self._start) groups.end_group(0, self._offset) return groups
def run(self, stream, pos=0, search=False): if pos or search: raise UnsupportedOperation('Search') self._initial_stream = stream self._reset(0, stream, None) self._checkpoints = {} self._last_group = 0 # default for no group self._states = [(0, 0)] try: while self._states and self._excess < 2: known_next = set() next_states = [] while self._states: # unpack state (index, skip) = self._states.pop() try: if not skip: # process the current character index = self._program[index]() if index not in known_next: next_states.append((index, 0)) known_next.add(index) elif skip < 0: raise Match else: skip -= 1 # if we have other states if next_states or self._states: if (index, skip) not in known_next: next_states.append((index, skip)) known_next.add((index, skip)) # otherwise, we can jump directly else: self._advance(skip) next_states.append((index, 0)) except Fail: pass except Match: # no groups starting earlier? if skip >= 0: skip = self._last_group if not next_states: raise # some other, pending, earlier starting, state may # still give a match if index not in known_next: next_states.append((index, skip)) known_next.add(index) # but we can discard anything that starts later self._states = [] # move to next character self._advance() self._states = next_states self._states.reverse() # pick first matched state, if any while self._states: (index, skip) = self._states.pop() if skip < 0: raise Match # exhausted states with no match return Groups() except Match: groups = Groups(group_state=self._parser_state.groups, stream=self._initial_stream) groups.start_group(0, 0) groups.end_group(0, self._offset) groups.start_group(-skip, 0) groups.end_group(-skip, self._offset) return groups
def run(self, stream, pos=0, search=False): if pos or search: raise UnsupportedOperation("Search") self._initial_stream = stream self._reset(0, stream, None) self._checkpoints = {} self._last_group = 0 # default for no group self._states = [(0, 0)] try: while self._states and self._excess < 2: known_next = set() next_states = [] while self._states: # unpack state (index, skip) = self._states.pop() try: if not skip: # process the current character index = self._program[index]() if index not in known_next: next_states.append((index, 0)) known_next.add(index) elif skip < 0: raise Match else: skip -= 1 # if we have other states if next_states or self._states: if (index, skip) not in known_next: next_states.append((index, skip)) known_next.add((index, skip)) # otherwise, we can jump directly else: self._advance(skip) next_states.append((index, 0)) except Fail: pass except Match: # no groups starting earlier? if skip >= 0: skip = self._last_group if not next_states: raise # some other, pending, earlier starting, state may # still give a match if index not in known_next: next_states.append((index, skip)) known_next.add(index) # but we can discard anything that starts later self._states = [] # move to next character self._advance() self._states = next_states self._states.reverse() # pick first matched state, if any while self._states: (index, skip) = self._states.pop() if skip < 0: raise Match # exhausted states with no match return Groups() except Match: groups = Groups(group_state=self._parser_state.groups, stream=self._initial_stream) groups.start_group(0, 0) groups.end_group(0, self._offset) groups.start_group(-skip, 0) groups.end_group(-skip, self._offset) return groups