def process_meta_comment(self, token): logger.log(logging.DEBUG - 1, 'Parser : Processing meta comment: %s', token[1]) self._meta_comments.append(token[1])
def process_comment(self, token): logger.log(logging.DEBUG - 1, 'Parser : Processing comment: %s', token[1])
def next_token(self): '''Find and return a tuple with the next token details. Tuple contains the token (type, value) as strings. The token types and details are: number, # such as 1234, 123.45, -123, +123.4 comment, # single // or multi-line (* ... *), { ... } meta comment, # start with /// ... id, # identifier name starting with alpha, including # alpha-numeric characters and the _ character attribute, # name starting with @... inside meta comment block # follows the id name character rules operator, # one of + - / * ** := < > <= >= <> symbol, # one of ':;,.()' ''' def num_match(cha, tmp): '''Checks for a number in format ##, ##.#. Returns False when at the end of a number.''' if cha in '1234567890': return True elif cha == '.' and '.' not in tmp: return self._peek(1) in '1234567890' else: return False while (True): t = self._next_char() self._token_start = self._char_no # Ignore white space characters if t == ' ' or t == '\t': #ignore white space pass # Move to next line (if at end of line) elif t == '\n': self._advance_line() # Numbers (int or float style format elif t in '1234567890' or (t in '-+' and self._peek(1) in '1234567890'): #is digit or +/- result = ('number', self._read_matching(t, num_match)) logger.debug('Tokenisier: read %s - %s', result[0], result[1]) return result # Comment, single line // or meta comment line /// elif t == '/' and self._peek(1) == '/': #start of comment if self._match_and_read('/'): if self._match_and_read('/'): kind = 'meta comment' self._meta_comment_start = self._char_no self._meta_comment_line = self._line_no comment = self.read_to_end_of_comment() else: kind = 'comment' comment = self.read_to_eol() result = (kind, comment) else: result = ('error', t) logger.debug('Tokenisier: read %s', result[0]) return result # Attribute identified by an @ symbol then a name elif t == '@': name = self._read_matching( '', lambda cha, tmp: cha.isalnum() or cha == '_') result = ('attribute', name) logger.debug('Tokenisier: read %s - %s', result[0], result[1]) return result # Identifier (id) of alphanumeric characters including elif t.isalpha(): name = self._read_matching( t, lambda cha, tmp: cha.isalnum() or cha == '_') if name.lower() in ['true', 'false']: result = ('boolean', name) else: result = ('id', name) logger.debug('Tokenisier: read %s - %s', result[0], result[1]) return result #Bound Comment elif t == '{' or (t == '(' and self._peek(1) == '*'): if t == '(' and self._match_and_read('*'): comment = self._read_until('', lambda temp: temp[-2:] == '*)') result = ('comment', comment[:-2]) elif t == '{': comment = self._read_until('', lambda temp: temp[-1:] == '}') result = ('comment', comment[:-1]) logger.log(logging.DEBUG, 'Tokenisier: read %s', result[0]) return result # Operator elif (t == ':' and self._peek(1) == '=') or t in '=+-*/><': if t == ':' and self._match_and_read('='): result = ('operator', ':=') elif t == '*' and self._match_and_read('*'): result = ('operator', '**') elif t == '<' and self._match_and_read('>'): result = ('operator', '<>') elif t in '<>' and self._match_and_read('='): result = ('operator', t + '=') else: result = ('operator', t) return result # Symbol elif t in '(),:;[].^': result = ('symbol', t) logger.debug('Tokenisier: read %s - %s', result[0], result[1]) return result # Catch any single quotes inside a string value. elif t == "'": string = self._read_until( '', lambda temp: (temp[-1:] == "'") and (not self._match_and_read("'"))) result = ('string', string[:-1]) logger.debug('Tokenisier: read %s - %s', result[0], result[1]) return result # Hmm.. unknown token. What did we forget? else: logger.error("Unknown token type: " + t) return ('error', t)
def next_token(self): '''Find and return a tuple with the next token details. Tuple contains the token (type, value) as strings. The token types and details are: number, # such as 1234, 123.45, -123, +123.4 comment, # single // or multi-line (* ... *), { ... } meta comment, # start with /// ... id, # identifier name starting with alpha, including # alpha-numeric characters and the _ character attribute, # name starting with @... inside meta comment block # follows the id name character rules operator, # one of + - / * ** := < > <= >= <> symbol, # one of ':;,.()' ''' def num_match(cha, tmp): '''Checks for a number in format ##, ##.#. Returns False when at the end of a number.''' if cha in '1234567890': return True elif cha == '.' and '.' not in tmp: return self._peek(1) in '1234567890' else: return False while (True): t = self._next_char(); self._token_start = self._char_no # Ignore white space characters if t == ' ' or t == '\t': #ignore white space pass # Move to next line (if at end of line) elif t == '\n': self._advance_line() # Numbers (int or float style format elif t in '1234567890' or (t in '-+' and self._peek(1) in '1234567890'): #is digit or +/- result = ('number', self._read_matching(t, num_match)) logger.debug('Tokenisier: read %s - %s', result[0], result[1]) return result # Comment, single line // or meta comment line /// elif t == '/' and self._peek(1) == '/': #start of comment if self._match_and_read('/'): if self._match_and_read('/'): kind = 'meta comment' self._meta_comment_start = self._char_no self._meta_comment_line = self._line_no comment = self.read_to_end_of_comment() else: kind = 'comment' comment = self.read_to_eol() result = (kind, comment) else: result = ('error', t) logger.debug('Tokenisier: read %s', result[0]) return result # Attribute identified by an @ symbol then a name elif t == '@': name = self._read_matching('', lambda cha, tmp: cha.isalnum() or cha == '_') result = ('attribute', name) logger.debug('Tokenisier: read %s - %s', result[0], result[1]) return result # Identifier (id) of alphanumeric characters including elif t.isalpha(): name = self._read_matching(t, lambda cha, tmp: cha.isalnum() or cha == '_') if name.lower() in ['true','false']: result = ('boolean', name) else: result = ('id', name) logger.debug('Tokenisier: read %s - %s', result[0], result[1]) return result #Bound Comment elif t == '{' or (t == '(' and self._peek(1) == '*'): if t == '(' and self._match_and_read('*'): comment = self._read_until('', lambda temp: temp[-2:] == '*)') result = ('comment', comment[:-2]) elif t == '{': comment = self._read_until('', lambda temp: temp[-1:] == '}') result = ('comment', comment[:-1]) logger.log(logging.DEBUG, 'Tokenisier: read %s', result[0]) return result # Operator elif (t == ':' and self._peek(1) == '=') or t in '=+-*/><': if t == ':' and self._match_and_read('='): result = ('operator', ':=') elif t == '*' and self._match_and_read('*'): result = ('operator', '**') elif t == '<' and self._match_and_read('>'): result = ('operator', '<>') elif t in '<>' and self._match_and_read('='): result = ('operator', t + '=') else: result = ('operator', t) return result # Symbol elif t in '(),:;[].^': result = ('symbol', t) logger.debug('Tokenisier: read %s - %s', result[0], result[1]) return result # Catch any single quotes inside a string value. elif t == "'": string = self._read_until('', lambda temp: (temp[-1:] == "'") and (not self._match_and_read("'"))) result = ('string', string[:-1]) logger.debug('Tokenisier: read %s - %s', result[0], result[1]) return result # Hmm.. unknown token. What did we forget? else: logger.error("Unknown token type: "+t) return ('error', t)