Ejemplo n.º 1
0
 def process_meta_comment(self, token):
     logger.log(logging.DEBUG - 1,
                'Parser    : Processing meta comment: %s', token[1])
     self._meta_comments.append(token[1])
Ejemplo n.º 2
0
 def process_comment(self, token):
     logger.log(logging.DEBUG - 1, 'Parser    : Processing comment: %s',
                token[1])
Ejemplo n.º 3
0
    def next_token(self):
        '''Find and return a tuple with the next token details. Tuple contains 
        the token (type, value) as strings. The token types and details are: 
        
            number,       # such as 1234, 123.45, -123, +123.4
            comment,      # single // or multi-line (* ... *), { ... } 
            meta comment, # start with /// ...
            id,           # identifier name starting with alpha, including 
                          # alpha-numeric characters and the _ character
            attribute,    # name starting with @... inside meta comment block 
                          # follows the id name character rules
            operator,     # one of + - / * ** := < > <= >= <>
            symbol,       # one of ':;,.()'
        
        '''
        def num_match(cha, tmp):
            '''Checks for a number in format ##, ##.#. Returns False when at the 
            end of a number.'''
            if cha in '1234567890':
                return True
            elif cha == '.' and '.' not in tmp:
                return self._peek(1) in '1234567890'
            else:
                return False

        while (True):
            t = self._next_char()
            self._token_start = self._char_no

            # Ignore white space characters
            if t == ' ' or t == '\t':  #ignore white space
                pass
            # Move to next line (if at end of line)
            elif t == '\n':
                self._advance_line()
            # Numbers (int or float style format
            elif t in '1234567890' or (t in '-+' and self._peek(1)
                                       in '1234567890'):  #is digit or +/-
                result = ('number', self._read_matching(t, num_match))
                logger.debug('Tokenisier: read %s - %s', result[0], result[1])
                return result
            # Comment, single line // or meta comment line ///
            elif t == '/' and self._peek(1) == '/':  #start of comment
                if self._match_and_read('/'):
                    if self._match_and_read('/'):
                        kind = 'meta comment'
                        self._meta_comment_start = self._char_no
                        self._meta_comment_line = self._line_no
                        comment = self.read_to_end_of_comment()
                    else:
                        kind = 'comment'
                        comment = self.read_to_eol()
                    result = (kind, comment)
                else:
                    result = ('error', t)
                logger.debug('Tokenisier: read %s', result[0])
                return result
            # Attribute identified by an @ symbol then a name
            elif t == '@':
                name = self._read_matching(
                    '', lambda cha, tmp: cha.isalnum() or cha == '_')
                result = ('attribute', name)
                logger.debug('Tokenisier: read %s - %s', result[0], result[1])
                return result
            # Identifier (id) of alphanumeric characters including
            elif t.isalpha():
                name = self._read_matching(
                    t, lambda cha, tmp: cha.isalnum() or cha == '_')
                if name.lower() in ['true', 'false']:
                    result = ('boolean', name)
                else:
                    result = ('id', name)
                logger.debug('Tokenisier: read %s - %s', result[0], result[1])
                return result
            #Bound Comment
            elif t == '{' or (t == '(' and self._peek(1) == '*'):
                if t == '(' and self._match_and_read('*'):
                    comment = self._read_until('',
                                               lambda temp: temp[-2:] == '*)')
                    result = ('comment', comment[:-2])
                elif t == '{':
                    comment = self._read_until('',
                                               lambda temp: temp[-1:] == '}')
                    result = ('comment', comment[:-1])
                logger.log(logging.DEBUG, 'Tokenisier: read %s', result[0])
                return result
            # Operator
            elif (t == ':' and self._peek(1) == '=') or t in '=+-*/><':
                if t == ':' and self._match_and_read('='):
                    result = ('operator', ':=')
                elif t == '*' and self._match_and_read('*'):
                    result = ('operator', '**')
                elif t == '<' and self._match_and_read('>'):
                    result = ('operator', '<>')
                elif t in '<>' and self._match_and_read('='):
                    result = ('operator', t + '=')
                else:
                    result = ('operator', t)
                return result
            # Symbol
            elif t in '(),:;[].^':
                result = ('symbol', t)
                logger.debug('Tokenisier: read %s - %s', result[0], result[1])
                return result
            # Catch any single quotes inside a string value.
            elif t == "'":
                string = self._read_until(
                    '', lambda temp:
                    (temp[-1:] == "'") and (not self._match_and_read("'")))
                result = ('string', string[:-1])
                logger.debug('Tokenisier: read %s - %s', result[0], result[1])
                return result
            # Hmm.. unknown token. What did we forget?
            else:
                logger.error("Unknown token type: " + t)
                return ('error', t)
Ejemplo n.º 4
0
 def process_meta_comment(self, token):
     logger.log(logging.DEBUG - 1, 'Parser    : Processing meta comment: %s', token[1])
     self._meta_comments.append(token[1])
Ejemplo n.º 5
0
 def process_comment(self, token):
     logger.log(logging.DEBUG - 1, 'Parser    : Processing comment: %s', token[1])
Ejemplo n.º 6
0
 def next_token(self):
     '''Find and return a tuple with the next token details. Tuple contains 
     the token (type, value) as strings. The token types and details are: 
     
         number,       # such as 1234, 123.45, -123, +123.4
         comment,      # single // or multi-line (* ... *), { ... } 
         meta comment, # start with /// ...
         id,           # identifier name starting with alpha, including 
                       # alpha-numeric characters and the _ character
         attribute,    # name starting with @... inside meta comment block 
                       # follows the id name character rules
         operator,     # one of + - / * ** := < > <= >= <>
         symbol,       # one of ':;,.()'
     
     '''
     
     def num_match(cha, tmp):
         '''Checks for a number in format ##, ##.#. Returns False when at the 
         end of a number.'''
         if cha in '1234567890':
             return True
         elif cha == '.' and '.' not in tmp:
             return self._peek(1) in '1234567890'
         else:
             return False
     
     while (True):
         t = self._next_char();
         self._token_start = self._char_no
         
         # Ignore white space characters
         if t == ' ' or t == '\t': #ignore white space
             pass
         # Move to next line (if at end of line)
         elif t == '\n': 
             self._advance_line()
         # Numbers (int or float style format
         elif t in '1234567890' or (t in '-+' and self._peek(1) in '1234567890'): #is digit or +/-
             result = ('number', self._read_matching(t, num_match))
             logger.debug('Tokenisier: read %s - %s', result[0], result[1])
             return result
         # Comment, single line // or meta comment line ///
         elif t == '/' and self._peek(1) == '/': #start of comment
             if self._match_and_read('/'):
                 if self._match_and_read('/'):
                     kind = 'meta comment'
                     self._meta_comment_start = self._char_no
                     self._meta_comment_line = self._line_no
                     comment = self.read_to_end_of_comment()
                 else:
                     kind = 'comment'
                     comment = self.read_to_eol()
                 result = (kind, comment)
             else:
                 result = ('error', t)
             logger.debug('Tokenisier: read %s', result[0])
             return result
         # Attribute identified by an @ symbol then a name
         elif t == '@':
             name = self._read_matching('', lambda cha, tmp: cha.isalnum() or cha == '_')
             result = ('attribute', name)
             logger.debug('Tokenisier: read %s - %s', result[0], result[1])
             return result
         # Identifier (id) of alphanumeric characters including 
         elif t.isalpha():
             name = self._read_matching(t, lambda cha, tmp: cha.isalnum() or cha == '_')
             if name.lower() in ['true','false']:
                 result = ('boolean', name)
             else:
                 result = ('id', name)
             logger.debug('Tokenisier: read %s - %s', result[0], result[1])
             return result
         #Bound Comment
         elif t == '{' or (t == '(' and self._peek(1) == '*'):
             if t == '(' and self._match_and_read('*'):
                 comment = self._read_until('', lambda temp: temp[-2:] == '*)')
                 result = ('comment', comment[:-2])
             elif t == '{':
                 comment = self._read_until('', lambda temp: temp[-1:] == '}')
                 result = ('comment', comment[:-1])
             logger.log(logging.DEBUG, 'Tokenisier: read %s', result[0])
             return result
         # Operator
         elif (t == ':' and self._peek(1) == '=') or t in '=+-*/><':
             if t == ':' and self._match_and_read('='):
                 result = ('operator', ':=')
             elif t == '*' and self._match_and_read('*'):
                 result = ('operator', '**')
             elif t == '<' and self._match_and_read('>'):
                 result = ('operator', '<>')
             elif t in '<>' and self._match_and_read('='):
                 result = ('operator', t + '=')
             else:
                 result = ('operator', t)
             return result
         # Symbol
         elif t in '(),:;[].^':
             result = ('symbol', t)
             logger.debug('Tokenisier: read %s - %s', result[0], result[1])
             return result
         # Catch any single quotes inside a string value.
         elif t == "'":
             string = self._read_until('', lambda temp: (temp[-1:] == "'") and (not self._match_and_read("'")))
             result = ('string', string[:-1])
             logger.debug('Tokenisier: read %s - %s', result[0], result[1])
             return result
         # Hmm.. unknown token. What did we forget? 
         else:
             logger.error("Unknown token type: "+t)
             return ('error', t)