Beispiel #1
0
 def match_pattern(self):
     m = _re.match(self._text, self._pos)
     if not m:
         return None
     self._pos = m.end(0)
     return Tok(type=_toktype[m.lastindex],
                text=m.group(0),
                col=m.start(0) + 1)
Beispiel #2
0
 def nextToken(self):
     # skip whitespace
     while not self.isEOF() and self.is_whitespace():
         self.next_ch()
     if self.isEOF():
         return Tok(type = EOF)
     # first, try to match token with 2 or more chars
     t = self.match_pattern()
     if t:
         return t
     # second, we want 1-char tokens
     te = self.curr_ch()
     try:
         ty = single_char_lookup[te]
     except KeyError:
         raise TokenStreamException(
             "Unexpected char %r in column %u." % (self.curr_ch(), self._pos))
     self.next_ch()
     return Tok(type=ty, text=te, col=self._pos)
Beispiel #3
0
 def nextToken(self):
     # skip whitespace
     while not self.isEOF() and self.is_whitespace():
         self.next_ch()
     if self.isEOF():
         return Tok(type=EOF)
     # first, try to match token with more chars
     for ptt in pattern_type_tuples:
         t = self.match_pattern(*ptt)
         if t:
             return t
     # second, we want find short tokens
     for ty, te in type_text_tuples:
         if self.curr_ch() == te:
             self.next_ch()
             return Tok(type=ty, text=te, col=self._pos)
     # at this point, smth strange is happened
     raise TokenStreamException("Unknown char %s at %u col." %
                                (self.curr_ch(), self._pos))
Beispiel #4
0
 def match_pattern(self, pattern, toktype):
     m = pattern.match(self._text[self._pos:])
     if m:
         start_pos = self._pos + m.start(0)
         end_pos = self._pos + m.end(0)
         tt = self._text[start_pos:end_pos]
         self._pos = end_pos
         return Tok(type=toktype, text=tt, col=start_pos + 1)
     else:
         return None