Example #1
0
    def _parse_chunk(self, buf, start=0):
        "Get the next token from the RCS file."

        buflen = len(buf)

        assert start < buflen

        # construct a tag table which refers to the buffer we need to parse.
        table = (
            #1: ignore whitespace. with or without whitespace, move to the next rule.
            (None, _tt.AllInSet, _tt.whitespace_set, +1),

            #2
            (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS),

            #3: accumulate token text and exit, or move to the next rule.
            (_UNUSED, _tt.AllInSet + _tt.AppendMatch, _idchar_set, +2),

            #4
            (_E_TOKEN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -3, _SUCCESS),

            #5: single character tokens exit immediately, or move to the next rule
            (_UNUSED, _tt.IsInSet + _tt.AppendMatch, _onechar_token_set, +2),

            #6
            (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, -5, _SUCCESS),

            #7: if this isn't an '@' symbol, then we have a syntax error (go to a
            # negative index to indicate that condition). otherwise, suck it up
            # and move to the next rule.
            (_T_STRING_START, _tt.Is + _tt.AppendTagobj, '@'),

            #8
            (None, _tt.Is, '@', +4, +1),
            #9
            (buf, _tt.Is, '@', +1, -1),
            #10
            (_T_STRING_END, _tt.Skip + _tt.AppendTagobj, 0, 0, +1),
            #11
            (_E_STRING_END, _tt.EOF + _tt.AppendTagobj, _tt.Here, -10, _SUCCESS
             ),

            #12
            (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS
             ),

            #13: suck up everything that isn't an AT. go to next rule to look for EOF
            (buf, _tt.AllInSet, _not_at_set, 0, +1),

            #14: go back to look for double AT if we aren't at the end of the string
            (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -6, _SUCCESS
             ),
        )

        # Fast, texttools may be, but it's somewhat lacking in clarity.
        # Here's an attempt to document the logic encoded in the table above:
        #
        # Flowchart:
        #                                   _____
        #                                  /    /\
        # 1 -> 2 ->  3 ->  5 ->  7 ->     8  ->  9 -> 10 -> 11
        # |         \/    \/           \/  /\               \/
        #  \         4     6          12    14              /
        #   \_______/_____/            \    /              /
        #    \                           13               /
        #     \__________________________________________/
        #
        # #1: Skip over any whitespace.
        # #2: If now EOF, exit with code _E_COMPLETE.
        # #3: If we have a series of characters in _idchar_set, then:
        #     #4: Output them as a token, and go back to #1.
        # #5: If we have a character in _onechar_token_set, then:
        #     #6: Output it as a token, and go back to #1.
        # #7: If we do not have an '@', then error.
        #     If we do, then log a _T_STRING_START and continue.
        # #8: If we have another '@', continue on to #9. Otherwise:
        #     #12: If now EOF, exit with code _E_STRING_SPAN.
        #     #13: Record the slice up to the next '@' (or EOF).
        #     #14: If now EOF, exit with code _E_STRING_SPAN.
        #          Otherwise, go back to #8.
        # #9: If we have another '@', then we've just seen an escaped
        #     (by doubling) '@' within an @-string.  Record a slice including
        #     just one '@' character, and jump back to #8.
        #     Otherwise, we've *either* seen the terminating '@' of an @-string,
        #     *or* we've seen one half of an escaped @@ sequence that just
        #     happened to be split over a chunk boundary - in either case,
        #     we continue on to #10.
        # #10: Log a _T_STRING_END.
        # #11: If now EOF, exit with _E_STRING_END. Otherwise, go back to #1.

        success, taglist, idx = _tt.tag(buf, table, start)

        if not success:
            ### need a better way to report this error
            raise common.RCSIllegalCharacter()
        assert idx == buflen

        # pop off the last item
        last_which = taglist.pop()

        i = 0
        tlen = len(taglist)
        while i < tlen:
            if taglist[i] == _T_STRING_START:
                j = i + 1
                while j < tlen:
                    if taglist[j] == _T_STRING_END:
                        s = _tt.join(taglist, '', i + 1, j)
                        del taglist[i:j]
                        tlen = len(taglist)
                        taglist[i] = s
                        break
                    j = j + 1
                else:
                    assert last_which == _E_STRING_SPAN
                    s = _tt.join(taglist, '', i + 1)
                    del taglist[i:]
                    self.partial = (_T_STRING_SPAN, [s])
                    break
            i = i + 1

        # figure out whether we have a partial last-token
        if last_which == _E_TOKEN:
            self.partial = (_T_TOKEN, [taglist.pop()])
        elif last_which == _E_COMPLETE:
            pass
        elif last_which == _E_STRING_SPAN:
            assert self.partial
        else:
            assert last_which == _E_STRING_END
            self.partial = (_T_STRING_END, [taglist.pop()])

        taglist.reverse()
        taglist.extend(self.tokens)
        self.tokens = taglist
Example #2
0
  def _parse_chunk(self, buf, start=0):
    "Get the next token from the RCS file."

    buflen = len(buf)

    assert start < buflen

    # construct a tag table which refers to the buffer we need to parse.
    table = (
      # ignore whitespace. with or without whitespace, move to the next rule.
      (None, _tt.AllInSet, _tt.whitespace_set, +1),

      (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS),

      # accumulate token text and exit, or move to the next rule.
      (_UNUSED,      _tt.AllInSet + _tt.AppendMatch, _idchar_set, +2),

      (_E_TOKEN,  _tt.EOF + _tt.AppendTagobj, _tt.Here, -3, _SUCCESS),

      # single character tokens exit immediately, or move to the next rule
      (_UNUSED,    _tt.IsInSet + _tt.AppendMatch, _onechar_token_set, +2),

      (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, -5, _SUCCESS),

      # if this isn't an '@' symbol, then we have a syntax error (go to a
      # negative index to indicate that condition). otherwise, suck it up
      # and move to the next rule.
      (_T_STRING_START, _tt.Is + _tt.AppendTagobj, '@'),

      (None, _tt.Is, '@', +4, +1),
      (buf, _tt.Is, '@', +1, -1),
      (_T_STRING_END, _tt.Skip + _tt.AppendTagobj, 0, 0, +1),
      (_E_STRING_END, _tt.EOF + _tt.AppendTagobj, _tt.Here, -10, _SUCCESS),

      (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS),

      # suck up everything that isn't an AT. go to next rule to look for EOF
      (buf,  _tt.AllInSet, _not_at_set, 0, +1),

      # go back to look for double AT if we aren't at the end of the string
      (_E_STRING_SPAN,   _tt.EOF + _tt.AppendTagobj, _tt.Here, -6, _SUCCESS),
      )

    success, taglist, idx = _tt.tag(buf, table, start)

    if not success:
      ### need a better way to report this error
      raise common.RCSIllegalCharacter()
    assert idx == buflen

    # pop off the last item
    last_which = taglist.pop()

    i = 0
    tlen = len(taglist)
    while i < tlen:
      if taglist[i] == _T_STRING_START:
        j = i + 1
        while j < tlen:
          if taglist[j] == _T_STRING_END:
            s = _tt.join(taglist, '', i+1, j)
            del taglist[i:j]
            tlen = len(taglist)
            taglist[i] = s
            break
          j = j + 1
        else:
          assert last_which == _E_STRING_SPAN
          s = _tt.join(taglist, '', i+1)
          del taglist[i:]
          self.partial = (_T_STRING_SPAN, [ s ])
          break
      i = i + 1

    # figure out whether we have a partial last-token
    if last_which == _E_TOKEN:
      self.partial = (_T_TOKEN, [ taglist.pop() ])
    elif last_which == _E_COMPLETE:
      pass
    elif last_which == _E_STRING_SPAN:
      assert self.partial
    else:
      assert last_which == _E_STRING_END
      self.partial = (_T_STRING_END, [ taglist.pop() ])

    taglist.reverse()
    taglist.extend(self.tokens)
    self.tokens = taglist