Python RCSIllegalCharacter Examples

Programming Language: Python

Namespace/Package Name: common

Method/Function: RCSIllegalCharacter

Examples at hotexamples.com: 2

Python RCSIllegalCharacter - 2 examples found. These are the top rated real world Python examples of common.RCSIllegalCharacter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def _parse_chunk(self, buf, start=0):
        "Get the next token from the RCS file."

        buflen = len(buf)

        assert start < buflen

        # construct a tag table which refers to the buffer we need to parse.
        table = (
            #1: ignore whitespace. with or without whitespace, move to the next rule.
            (None, _tt.AllInSet, _tt.whitespace_set, +1),

            #2
            (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS),

            #3: accumulate token text and exit, or move to the next rule.
            (_UNUSED, _tt.AllInSet + _tt.AppendMatch, _idchar_set, +2),

            #4
            (_E_TOKEN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -3, _SUCCESS),

            #5: single character tokens exit immediately, or move to the next rule
            (_UNUSED, _tt.IsInSet + _tt.AppendMatch, _onechar_token_set, +2),

            #6
            (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, -5, _SUCCESS),

            #7: if this isn't an '@' symbol, then we have a syntax error (go to a
            # negative index to indicate that condition). otherwise, suck it up
            # and move to the next rule.
            (_T_STRING_START, _tt.Is + _tt.AppendTagobj, '@'),

            #8
            (None, _tt.Is, '@', +4, +1),
            #9
            (buf, _tt.Is, '@', +1, -1),
            #10
            (_T_STRING_END, _tt.Skip + _tt.AppendTagobj, 0, 0, +1),
            #11
            (_E_STRING_END, _tt.EOF + _tt.AppendTagobj, _tt.Here, -10, _SUCCESS
             ),

            #12
            (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS
             ),

            #13: suck up everything that isn't an AT. go to next rule to look for EOF
            (buf, _tt.AllInSet, _not_at_set, 0, +1),

            #14: go back to look for double AT if we aren't at the end of the string
            (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -6, _SUCCESS
             ),
        )

        # Fast, texttools may be, but it's somewhat lacking in clarity.
        # Here's an attempt to document the logic encoded in the table above:
        #
        # Flowchart:
        #                                   _____
        #                                  /    /\
        # 1 -> 2 ->  3 ->  5 ->  7 ->     8  ->  9 -> 10 -> 11
        # |         \/    \/           \/  /\               \/
        #  \         4     6          12    14              /
        #   \_______/_____/            \    /              /
        #    \                           13               /
        #     \__________________________________________/
        #
        # #1: Skip over any whitespace.
        # #2: If now EOF, exit with code _E_COMPLETE.
        # #3: If we have a series of characters in _idchar_set, then:
        #     #4: Output them as a token, and go back to #1.
        # #5: If we have a character in _onechar_token_set, then:
        #     #6: Output it as a token, and go back to #1.
        # #7: If we do not have an '@', then error.
        #     If we do, then log a _T_STRING_START and continue.
        # #8: If we have another '@', continue on to #9. Otherwise:
        #     #12: If now EOF, exit with code _E_STRING_SPAN.
        #     #13: Record the slice up to the next '@' (or EOF).
        #     #14: If now EOF, exit with code _E_STRING_SPAN.
        #          Otherwise, go back to #8.
        # #9: If we have another '@', then we've just seen an escaped
        #     (by doubling) '@' within an @-string.  Record a slice including
        #     just one '@' character, and jump back to #8.
        #     Otherwise, we've *either* seen the terminating '@' of an @-string,
        #     *or* we've seen one half of an escaped @@ sequence that just
        #     happened to be split over a chunk boundary - in either case,
        #     we continue on to #10.
        # #10: Log a _T_STRING_END.
        # #11: If now EOF, exit with _E_STRING_END. Otherwise, go back to #1.

        success, taglist, idx = _tt.tag(buf, table, start)

        if not success:
            ### need a better way to report this error
            raise common.RCSIllegalCharacter()
        assert idx == buflen

        # pop off the last item
        last_which = taglist.pop()

        i = 0
        tlen = len(taglist)
        while i < tlen:
            if taglist[i] == _T_STRING_START:
                j = i + 1
                while j < tlen:
                    if taglist[j] == _T_STRING_END:
                        s = _tt.join(taglist, '', i + 1, j)
                        del taglist[i:j]
                        tlen = len(taglist)
                        taglist[i] = s
                        break
                    j = j + 1
                else:
                    assert last_which == _E_STRING_SPAN
                    s = _tt.join(taglist, '', i + 1)
                    del taglist[i:]
                    self.partial = (_T_STRING_SPAN, [s])
                    break
            i = i + 1

        # figure out whether we have a partial last-token
        if last_which == _E_TOKEN:
            self.partial = (_T_TOKEN, [taglist.pop()])
        elif last_which == _E_COMPLETE:
            pass
        elif last_which == _E_STRING_SPAN:
            assert self.partial
        else:
            assert last_which == _E_STRING_END
            self.partial = (_T_STRING_END, [taglist.pop()])

        taglist.reverse()
        taglist.extend(self.tokens)
        self.tokens = taglist

Example #2

Show file

  def _parse_chunk(self, buf, start=0):
    "Get the next token from the RCS file."

    buflen = len(buf)

    assert start < buflen

    # construct a tag table which refers to the buffer we need to parse.
    table = (
      # ignore whitespace. with or without whitespace, move to the next rule.
      (None, _tt.AllInSet, _tt.whitespace_set, +1),

      (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS),

      # accumulate token text and exit, or move to the next rule.
      (_UNUSED,      _tt.AllInSet + _tt.AppendMatch, _idchar_set, +2),

      (_E_TOKEN,  _tt.EOF + _tt.AppendTagobj, _tt.Here, -3, _SUCCESS),

      # single character tokens exit immediately, or move to the next rule
      (_UNUSED,    _tt.IsInSet + _tt.AppendMatch, _onechar_token_set, +2),

      (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, -5, _SUCCESS),

      # if this isn't an '@' symbol, then we have a syntax error (go to a
      # negative index to indicate that condition). otherwise, suck it up
      # and move to the next rule.
      (_T_STRING_START, _tt.Is + _tt.AppendTagobj, '@'),

      (None, _tt.Is, '@', +4, +1),
      (buf, _tt.Is, '@', +1, -1),
      (_T_STRING_END, _tt.Skip + _tt.AppendTagobj, 0, 0, +1),
      (_E_STRING_END, _tt.EOF + _tt.AppendTagobj, _tt.Here, -10, _SUCCESS),

      (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS),

      # suck up everything that isn't an AT. go to next rule to look for EOF
      (buf,  _tt.AllInSet, _not_at_set, 0, +1),

      # go back to look for double AT if we aren't at the end of the string
      (_E_STRING_SPAN,   _tt.EOF + _tt.AppendTagobj, _tt.Here, -6, _SUCCESS),
      )

    success, taglist, idx = _tt.tag(buf, table, start)

    if not success:
      ### need a better way to report this error
      raise common.RCSIllegalCharacter()
    assert idx == buflen

    # pop off the last item
    last_which = taglist.pop()

    i = 0
    tlen = len(taglist)
    while i < tlen:
      if taglist[i] == _T_STRING_START:
        j = i + 1
        while j < tlen:
          if taglist[j] == _T_STRING_END:
            s = _tt.join(taglist, '', i+1, j)
            del taglist[i:j]
            tlen = len(taglist)
            taglist[i] = s
            break
          j = j + 1
        else:
          assert last_which == _E_STRING_SPAN
          s = _tt.join(taglist, '', i+1)
          del taglist[i:]
          self.partial = (_T_STRING_SPAN, [ s ])
          break
      i = i + 1

    # figure out whether we have a partial last-token
    if last_which == _E_TOKEN:
      self.partial = (_T_TOKEN, [ taglist.pop() ])
    elif last_which == _E_COMPLETE:
      pass
    elif last_which == _E_STRING_SPAN:
      assert self.partial
    else:
      assert last_which == _E_STRING_END
      self.partial = (_T_STRING_END, [ taglist.pop() ])

    taglist.reverse()
    taglist.extend(self.tokens)
    self.tokens = taglist