def _parse_chunk(self, buf, start=0): "Get the next token from the RCS file." buflen = len(buf) assert start < buflen # construct a tag table which refers to the buffer we need to parse. table = ( #1: ignore whitespace. with or without whitespace, move to the next rule. (None, _tt.AllInSet, _tt.whitespace_set, +1), #2 (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS), #3: accumulate token text and exit, or move to the next rule. (_UNUSED, _tt.AllInSet + _tt.AppendMatch, _idchar_set, +2), #4 (_E_TOKEN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -3, _SUCCESS), #5: single character tokens exit immediately, or move to the next rule (_UNUSED, _tt.IsInSet + _tt.AppendMatch, _onechar_token_set, +2), #6 (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, -5, _SUCCESS), #7: if this isn't an '@' symbol, then we have a syntax error (go to a # negative index to indicate that condition). otherwise, suck it up # and move to the next rule. (_T_STRING_START, _tt.Is + _tt.AppendTagobj, '@'), #8 (None, _tt.Is, '@', +4, +1), #9 (buf, _tt.Is, '@', +1, -1), #10 (_T_STRING_END, _tt.Skip + _tt.AppendTagobj, 0, 0, +1), #11 (_E_STRING_END, _tt.EOF + _tt.AppendTagobj, _tt.Here, -10, _SUCCESS ), #12 (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS ), #13: suck up everything that isn't an AT. go to next rule to look for EOF (buf, _tt.AllInSet, _not_at_set, 0, +1), #14: go back to look for double AT if we aren't at the end of the string (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -6, _SUCCESS ), ) # Fast, texttools may be, but it's somewhat lacking in clarity. # Here's an attempt to document the logic encoded in the table above: # # Flowchart: # _____ # / /\ # 1 -> 2 -> 3 -> 5 -> 7 -> 8 -> 9 -> 10 -> 11 # | \/ \/ \/ /\ \/ # \ 4 6 12 14 / # \_______/_____/ \ / / # \ 13 / # \__________________________________________/ # # #1: Skip over any whitespace. # #2: If now EOF, exit with code _E_COMPLETE. # #3: If we have a series of characters in _idchar_set, then: # #4: Output them as a token, and go back to #1. # #5: If we have a character in _onechar_token_set, then: # #6: Output it as a token, and go back to #1. # #7: If we do not have an '@', then error. # If we do, then log a _T_STRING_START and continue. # #8: If we have another '@', continue on to #9. Otherwise: # #12: If now EOF, exit with code _E_STRING_SPAN. # #13: Record the slice up to the next '@' (or EOF). # #14: If now EOF, exit with code _E_STRING_SPAN. # Otherwise, go back to #8. # #9: If we have another '@', then we've just seen an escaped # (by doubling) '@' within an @-string. Record a slice including # just one '@' character, and jump back to #8. # Otherwise, we've *either* seen the terminating '@' of an @-string, # *or* we've seen one half of an escaped @@ sequence that just # happened to be split over a chunk boundary - in either case, # we continue on to #10. # #10: Log a _T_STRING_END. # #11: If now EOF, exit with _E_STRING_END. Otherwise, go back to #1. success, taglist, idx = _tt.tag(buf, table, start) if not success: ### need a better way to report this error raise common.RCSIllegalCharacter() assert idx == buflen # pop off the last item last_which = taglist.pop() i = 0 tlen = len(taglist) while i < tlen: if taglist[i] == _T_STRING_START: j = i + 1 while j < tlen: if taglist[j] == _T_STRING_END: s = _tt.join(taglist, '', i + 1, j) del taglist[i:j] tlen = len(taglist) taglist[i] = s break j = j + 1 else: assert last_which == _E_STRING_SPAN s = _tt.join(taglist, '', i + 1) del taglist[i:] self.partial = (_T_STRING_SPAN, [s]) break i = i + 1 # figure out whether we have a partial last-token if last_which == _E_TOKEN: self.partial = (_T_TOKEN, [taglist.pop()]) elif last_which == _E_COMPLETE: pass elif last_which == _E_STRING_SPAN: assert self.partial else: assert last_which == _E_STRING_END self.partial = (_T_STRING_END, [taglist.pop()]) taglist.reverse() taglist.extend(self.tokens) self.tokens = taglist
def _parse_chunk(self, buf, start=0): "Get the next token from the RCS file." buflen = len(buf) assert start < buflen # construct a tag table which refers to the buffer we need to parse. table = ( # ignore whitespace. with or without whitespace, move to the next rule. (None, _tt.AllInSet, _tt.whitespace_set, +1), (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS), # accumulate token text and exit, or move to the next rule. (_UNUSED, _tt.AllInSet + _tt.AppendMatch, _idchar_set, +2), (_E_TOKEN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -3, _SUCCESS), # single character tokens exit immediately, or move to the next rule (_UNUSED, _tt.IsInSet + _tt.AppendMatch, _onechar_token_set, +2), (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, -5, _SUCCESS), # if this isn't an '@' symbol, then we have a syntax error (go to a # negative index to indicate that condition). otherwise, suck it up # and move to the next rule. (_T_STRING_START, _tt.Is + _tt.AppendTagobj, '@'), (None, _tt.Is, '@', +4, +1), (buf, _tt.Is, '@', +1, -1), (_T_STRING_END, _tt.Skip + _tt.AppendTagobj, 0, 0, +1), (_E_STRING_END, _tt.EOF + _tt.AppendTagobj, _tt.Here, -10, _SUCCESS), (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS), # suck up everything that isn't an AT. go to next rule to look for EOF (buf, _tt.AllInSet, _not_at_set, 0, +1), # go back to look for double AT if we aren't at the end of the string (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -6, _SUCCESS), ) success, taglist, idx = _tt.tag(buf, table, start) if not success: ### need a better way to report this error raise common.RCSIllegalCharacter() assert idx == buflen # pop off the last item last_which = taglist.pop() i = 0 tlen = len(taglist) while i < tlen: if taglist[i] == _T_STRING_START: j = i + 1 while j < tlen: if taglist[j] == _T_STRING_END: s = _tt.join(taglist, '', i+1, j) del taglist[i:j] tlen = len(taglist) taglist[i] = s break j = j + 1 else: assert last_which == _E_STRING_SPAN s = _tt.join(taglist, '', i+1) del taglist[i:] self.partial = (_T_STRING_SPAN, [ s ]) break i = i + 1 # figure out whether we have a partial last-token if last_which == _E_TOKEN: self.partial = (_T_TOKEN, [ taglist.pop() ]) elif last_which == _E_COMPLETE: pass elif last_which == _E_STRING_SPAN: assert self.partial else: assert last_which == _E_STRING_END self.partial = (_T_STRING_END, [ taglist.pop() ]) taglist.reverse() taglist.extend(self.tokens) self.tokens = taglist