Example #1
0
def lex(input, token_definitions):
    tokens = []
    i = 0
    while i < len(input):
        regex_match = None  # regex match object
        for token_expr in token_definitions:  ## iterate through each token definition

            token_tag, regex_pattern = token_expr  # get the pattern and tag of token expression
            regex_obj = re.compile(regex_pattern)  # compile match obj
            # try to match one of the token expressions to the string of input starting at position i
            regex_match = regex_obj.match(input, i)
            if regex_match:  # if match is not none
                lexeme = regex_match.group(0)  # grab the capture group
                if token_tag != None:  # if the tag is valid token, not whitespace, comments, etc
                    tokens.append((token_tag, lexeme))
                break  #found token
        # end of token expression check, check if any of the
        # token expr matched the symbol string at i,
        # if so, move i to the end of the match characters
        if regex_match:
            j = regex_match.end(0)
            if i is j:
                # did not advance, repeating same match, break loop
                break
            else:
                i = j
        else:
            log.error("Lexer Error: Invalid symbol: " + input[i])
            raw_input('...')
            break
    return tokens
Example #2
0
 def construct(self):
     for symbol in self.grammer.keys():
         self[symbol] = {}  # allocate map for symbol
         # find first and follow set
         firsts = self.first_set(symbol)
         follows = self.follow_set(symbol)
         for first in firsts:
             # add rule whose first matches first of rule
             for rule in self.grammer[
                     symbol]:  # for each rule in symbols rule list
                 if first in self.first_set(
                         rule[0]
                 ):  # if the first matches the first of the rule
                     if first not in self[
                             symbol]:  # if symbol, first rule has not been added
                         if first == self.EPSILON:  # get follow of parent symbol if first is epsilon
                             # for each follow, assign epsilon
                             for follow in follows:
                                 self[symbol][follow] = [
                                     first
                                 ]  # make epsilon symbol a rule (list)
                         else:
                             self[symbol][first] = rule
                     else:
                         log.error('TABLE: Duplicate Rules for ' + symbol +
                                   ',' + first)
Example #3
0
 def parse(self, input):
     tokens = lexer.lex(input, self.lexemes)
     if len(tokens) <= 0:
         log.error('No TOKENS')
         return None
     # append EOI to token queue
     tokens.append((self.table.EOI,
                    None))  # append end of input token to end of input
     self.validate(tokens)
     # begin by parsing the start token
     root = self.parse_token([self.table.START, None], tokens)
     return root
Example #4
0
 def parse_token(self, root, tokens):
     if len(tokens) <= 0: return root  # done, no tokens
     # tokens[0] is next token with its symbol tag tokens[0][0]  and its value at tokens[0][1]
     if root != None:
         root_tag = root[0]
         root_value = root[1]
         next_token = tokens[0]
         next_tag = next_token[0]
         next_value = next_token[1]
         if root_tag == next_tag:  # roots tag matches token tag, generated a match to terminal
             tokens.pop(0)  # move to the next token
             return next_token  # return matched token token value
         elif root_tag in self.table and next_tag in self.table[root[
                 0]]:  # if root is nonterminal and current token has rules with nonterminal
             value = []
             if self.table[root_tag][
                     next_tag] == None:  # no production rule in table from root symbol to next token
                 log.error('ERROR: No Rule for ROOT:' + str(root_tag))
                 return None
             # else, for each symbol in rule
             for rule_tag in self.table[root_tag][next_tag]:
                 # parse the remaining tokens for the rule
                 if rule_tag != self.table.EPSILON:
                     # if not epsilon, attempt to parse the remaining tokens for the given symbol in the rule
                     rule_token = self.parse_token([rule_tag, None], tokens)
                     # if rule token parsed is valid and is a token pair(tag, value)
                     if rule_token != None and len(rule_token) > 1:
                         # if the parsed token tag matches rule tag
                         value.append(rule_token)
                     else:  # else, rule_symbol could not be parsed
                         log.error('Could not parse RULE:' +
                                   str(rule_symbol) + '\n\tTOKENS: ' +
                                   str(tokens))
                         return None
                 else:
                     value.append(None)
             root[1] = value  # update roots value to the parsed value
     return root
Example #5
0
    def validate(self, tokens):
        # nonterminal tokens contain null values
        token_stack = [(self.table.START, None)]  # push eoi and start on stack
        index = 0  # current token index
        valid = True
        # token stack contains currently parsed
        while len(token_stack) > 0 and valid:
            top_token = token_stack[-1]
            if top_token[0] == tokens[index][0]:  # generated a match
                # take action by calling current token's action function
                token_stack.pop()
                index += 1
            # get rule to do given symbol and token tag (token[0])
            elif tokens[index][0] in self.table[
                    top_token[0]]:  # if theres a rule for symbol(nonterminal)
                # copy rule, as to not affect table's rule when popping rule symbols
                rule = []  # rule symbol list
                for symbol in self.table[top_token[0]][tokens[index][0]]:
                    rule.append(symbol)  # add single symbol
                # pop stack for both epsilon and non epsilon cases
                token_stack.pop()  # pop symbol and replace if not epsilon
                if rule[0] != self.table.EPSILON:  # if not epsilon
                    # add rule symbols in reverse order into stack
                    while len(rule) > 0:
                        # append null tokens that are used for their tags
                        token_stack.append((rule[-1], None))
                        rule.pop()
            else:  # no rule for symbol
                valid = False  # reject input

        # end parse loop
        if valid:
            log.debug('VALIDATION: SUCCESS')

        else:
            log.error(
                'VALIDATION: FAILED, No RULE in TABLE[top_token][next_token]')
            log.error('TOP TOKEN: ' + str(top_token))
            log.error('NEXT TOKEN: ' + str(tokens[index]) + '\n')

        return valid