Example #1
0
    def tokenize_number(self, line_index, column_index, line):
        number = line[column_index]
        end_column_index = column_index
        dot_found = False
        decimal_inserted = False

        while end_column_index + 1 < len(line) and (
                line[end_column_index + 1] in lexicon.DIGITS or
            (not dot_found and line[end_column_index + 1] == '.')):
            number += line[end_column_index + 1]

            if dot_found:
                decimal_inserted = True
            elif line[end_column_index + 1] == '.':
                dot_found = True

            end_column_index += 1

        # If the number contains a "dot", it should have decimal numbers.
        if dot_found and not decimal_inserted:
            return Token(number, Code.MF_NUMBER, line_index, line_index,
                         column_index, end_column_index)
        else:
            return Token(number, Code.NUMBER, line_index, line_index,
                         column_index, end_column_index)
Example #2
0
    def tokenize_string(self, line_index, column_index, line):
        string = line[column_index]
        end_column_index = column_index
        end_found = False
        invalid_symbol_found = False

        while end_column_index + 1 < len(line) and not end_found:
            char = line[end_column_index + 1]

            # If a quotation mark is found, the last char is verified, and if it is a backslash, the string is not ended.
            if char in lexicon.STRING_DELIMITER:
                if line[end_column_index] != chr(92):
                    end_found = True
            elif not letter_digit_symbol.match(char):
                invalid_symbol_found = True
            if char != '\n':
                string += char
            end_column_index += 1

        if end_found and not invalid_symbol_found:
            return Token(string, Code.STRING, line_index, line_index,
                         column_index, end_column_index)

        return Token(string, Code.MF_STRING, line_index, line_index,
                     column_index, end_column_index)
Example #3
0
    def tokenize_logical_op(self, line_index, column_index, line):
        logical_op = line[column_index]
        end_column_index = column_index

        if end_column_index + 1 < len(line) and logical_op + line[
                end_column_index + 1] in lexicon.LOGICAL_OPERATORS_EXTENDED:
            logical_op += line[end_column_index + 1]
            end_column_index += 1
        elif not logical_op in lexicon.COMMOM_RELATIONAL_LOGICAL:
            return Token(logical_op, Code.MF_OPERATOR, line_index, line_index,
                         column_index, end_column_index)

        return Token(logical_op, Code.OP_LOGICAL, line_index, line_index,
                     column_index, end_column_index)
Example #4
0
    def tokenize_id_or_keyword(self, line_index, column_index, line):
        id_or_keyword = line[column_index]
        end_column_index = column_index

        while end_column_index + 1 < len(
                line) and letter_digit_underscore.match(
                    line[end_column_index + 1]):
            id_or_keyword += line[end_column_index + 1]
            end_column_index += 1

        if id_or_keyword in lexicon.KEYWORDS:
            return Token(id_or_keyword, Code.KEYWORD, line_index, line_index,
                         column_index, end_column_index)
        else:
            return Token(id_or_keyword, Code.IDENTIFIER, line_index,
                         line_index, column_index, end_column_index)
Example #5
0
    def execute(self):
        line_index = 0
        column_index = 0
        line_index_changed = False

        while line_index < len(self.input_lines):
            line = self.input_lines[line_index]
            line_index_changed = False

            while column_index < len(line):
                char = line[column_index]
                token = Token(char, Code.INVALID_SYMBOL, line_index,
                              line_index, column_index, column_index)

                if char in lexicon.DELIMITERS:
                    token = self.tokenize_delimiter(line_index, column_index,
                                                    line[column_index])
                elif char in lexicon.DIGITS:
                    token = self.tokenize_number(line_index, column_index,
                                                 line)
                elif char in lexicon.ARITHMETIC_OPERATORS_BEGINNING:
                    token = self.tokenize_arithmetic_op(
                        line_index, column_index, line)
                elif char in lexicon.RELATIONAL_OPERATORS_BEGINNING:
                    token = self.tokenize_relational_op(
                        line_index, column_index, line)
                elif char in lexicon.LOGICAL_OPERATORS_BEGGINING:
                    token = self.tokenize_logical_op(line_index, column_index,
                                                     line)
                elif char in lexicon.STRING_DELIMITER:
                    token = self.tokenize_string(line_index, column_index,
                                                 line)
                elif char in lexicon.COMMOM_RELATIONAL_LOGICAL:
                    token = self.tokenize_relational_or_logical_op(
                        line_index, column_index, line)
                elif char in lexicon.COMMOM_ARITHMETIC_COMMENT:
                    token = self.tokenize_arithmetic_or_comment(
                        line_index, column_index, line)
                elif letter.match(line[column_index]):
                    token = self.tokenize_id_or_keyword(
                        line_index, column_index, line)

                # Add the token to tokens list only if its lexeme is not a blank space or a comment.
                if not token.lexeme.isspace() and token.code != Code.COMMENT:
                    self.lexical_tokens.append(token)

                column_index = token.column_end_index + 1

                # if the token index was changed, the columns loop should be broke.
                if line_index != token.line_end_index:
                    line_index = token.line_end_index
                    line_index_changed = True
                    break

            # indexes should be changed only if the line_index was not changed on the last column loop.
            if not line_index_changed:
                line_index += 1
                column_index = 0

        return self.lexical_tokens
Example #6
0
    def tokenize_relational_op(self, line_index, column_index, line):
        relational_op = line[column_index]
        end_column_index = column_index

        if end_column_index + 1 < len(line) and relational_op + line[
                end_column_index + 1] in lexicon.RELATIONAL_OPERATORS_EXTENDED:
            relational_op += line[end_column_index + 1]
            end_column_index += 1

        return Token(relational_op, Code.OP_RELATIONAL, line_index, line_index,
                     column_index, end_column_index)
Example #7
0
    def tokenize_arithmetic_op(self, line_index, column_index, line):
        arithmetic_op = line[column_index]
        end_column_index = column_index

        if end_column_index + 1 < len(line) and arithmetic_op + line[
                end_column_index + 1] in lexicon.ARITHMETIC_OPERATORS_EXTENDED:
            arithmetic_op += line[end_column_index + 1]
            end_column_index += 1

        return Token(arithmetic_op, Code.OP_ARITHMETIC, line_index, line_index,
                     column_index, end_column_index)
Example #8
0
    def ignore_comment(self, line_index, column_index, init):
        comment = init
        end_line_index = line_index
        end_column_index = column_index + 2

        if init == '//':
            comment = self.input_lines[end_line_index][column_index:]
            return Token(comment, Code.COMMENT, line_index, end_line_index,
                         column_index,
                         len(self.input_lines[line_index]) - 1)

        end_reached = False

        while end_line_index < len(self.input_lines) and not end_reached:
            line = self.input_lines[end_line_index]

            while end_column_index < len(line) and not end_reached:
                char = line[end_column_index]
                comment += char

                if char == '*' and end_column_index + 1 < len(line):
                    next_char = line[end_column_index + 1]
                    if next_char == '/':
                        comment += next_char
                        end_reached = True

                end_column_index += 1

            if not end_reached:
                end_line_index += 1
                end_column_index = 0

        if end_reached:
            return Token(comment, Code.COMMENT, line_index, end_line_index,
                         column_index, end_column_index)

        return Token(comment, Code.MF_COMMENT, line_index, end_line_index,
                     column_index, end_column_index)
Example #9
0
 def tokenize_delimiter(self, line_index, column_index, delimiter):
     return Token(delimiter, Code.DELIMITER, line_index, line_index,
                  column_index, column_index)
Example #10
0
    def open_function(self, identifier, init_index_array, array):
        end_index_array = init_index_array + 1
        open_amount = 0
        first_open = True
        func_array = [array[init_index_array]]
        func_len = len(array)
        error = False
        child_error = False
        func_str = array[init_index_array].lexeme
        function_name = array[init_index_array].lexeme
        
        # gets the function array in the initial array
        while end_index_array < func_len:
            token = array[end_index_array]
            if not first_open and open_amount == 0:
                break
            elif token.lexeme == '(':
                if first_open or open_amount > 0:
                    open_amount += 1
                elif open_amount == 0:
                    break
            elif token.lexeme == ')':
                if open_amount == 0:
                    break
                else:
                    open_amount -= 1

            func_array.append(token)
            end_index_array += 1
            first_open = False

        params_index = 3
        open_amount = 0
        func_str += '('
        cur_param = [func_array[2]]
        func_array_w_types = [func_array[0], func_array[1]]
        no_params = False

        if params_index >= len(func_array):
            no_params = True
            func_str += ')'

        while params_index < len(func_array):
            token = func_array[params_index]
            
            if token.lexeme == '(':
                open_amount += 1
                cur_param.append(token)
            elif token.lexeme == ')' and open_amount > 0:
                open_amount -= 1
                cur_param.append(token)
            elif open_amount == 0 and (token.lexeme == ',' or token.lexeme == ')'):
                param_type = self.get_type_expression(identifier, cur_param)
                func_str += param_type
                func_str += token.lexeme

                tk = ''
                if param_type == 'int':
                    tk = Token('0', Code.NUMBER, identifier.line_begin_index, identifier.line_end_index, identifier.column_begin_index, identifier.column_end_index)
                elif param_type == 'real':
                    tk = Token('0.0', Code.NUMBER, identifier.line_begin_index, identifier.line_end_index, identifier.column_begin_index, identifier.column_end_index)
                elif param_type == 'string':
                    tk = Token('" "', Code.KEYWORD, identifier.line_begin_index, identifier.line_end_index, identifier.column_begin_index, identifier.column_end_index)
                elif param_type == 'boolean':
                    tk = Token('true', Code.KEYWORD, identifier.line_begin_index, identifier.line_end_index, identifier.column_begin_index, identifier.column_end_index)
                else:
                    tk = Token('object', Code.IDENTIFIER, identifier.line_begin_index, identifier.line_end_index, identifier.column_begin_index, identifier.column_end_index)
                
                func_array_w_types.append(tk)
                func_array_w_types.append(token)
                cur_param = []
            else:
                cur_param.append(token)

            params_index += 1

        #verify if the function exists:
        func_declared = False
        found_key = ''
        return_type = func_str
        
        if no_params:
            if func_str in self.symbols:
                func_declared = True
                found_key = func_str
        else:
            for key in self.symbols:
                try:
                    proc_key = key[:(key.index('('))]
                    
                    if function_name in proc_key:
                        params = key[(key.index('(') + 1):key.index(')')]
                        splitted_params = params.split(',')
                        params_str = ''

                        for i, param in enumerate(splitted_params):
                            if len(splitted_params) > 0:
                                params_str += param.split()[0]

                                if i + 1< len(splitted_params):
                                    params_str += ','
                                    
                        if function_name + '(' + params_str + ')' == func_str:
                            func_declared = True
                            found_key = key
                            break
                except:
                    func_declared = False

        if func_declared:
            if '@return' in self.symbols[found_key]:
                return_type = self.symbols[found_key]['@return']
                if return_type == 'int':
                    return_type = '0'
                elif return_type == 'real':
                    return_type = '0.0'
                elif return_type == 'string':
                    return_type = '" "'
                elif return_type == 'boolean':
                    return_type = 'true'
            else:
                self.add_error(array[init_index_array], 'A procedure does not returns any type:`' + func_str + '`.')
        elif not child_error:
            self.add_error(array[init_index_array], 'This function does not exists: `' + func_str + '`.')
            error = True

        if not error:
            error = child_error
            
        return [end_index_array, return_type, error]
Example #11
0
    def get_type_expression(self, identifier, expr_array):
        if not hasattr(identifier, 'lexeme'):
            return

        expr = ''
        custom_type = ''
        
        for x in expr_array:
            if hasattr(x, 'lexeme'):
                expr += x.lexeme

        x = re.findall(LETTER, expr)
        
        if len(x) == 0:        
            if any(l_and_r in expr for l_and_r in LOGICAL_AND_RELATIONAL) > 0:
                return 'boolean'
            else:
                try:
                    if isinstance(eval(expr), int):
                        return 'int'
                    else:
                        return 'real'
                except:
                    return 'invalid'
            """ if self.symbols[scope][identifier.lexeme]['type'] == 'int':
                if isinstance(eval(expr), int):
                    return 'int'
                else:
                    self.add_error(identifier, 'You cannot assign `' + str(eval(expr)) +'` to `int`')
            elif self.symbols[scope][identifier.lexeme]['type'] == 'real':
                return 'real'
            else:
                self.add_error(identifier, 'You cannot assign `' + str(eval(expr)) +'` to ' + '`' + self.symbols[scope][identifier.lexeme]['type'] + '`') """
        else:
            is_int = 0
            is_real = 0
            is_boolean = 0
            is_string = 0
            last_scope = ''
            index = 0
            array_len = len(expr_array)

            if any(l_and_r in expr for l_and_r in LOGICAL_AND_RELATIONAL) > 0:
                is_boolean = 1
            
            while index < array_len:
                token = expr_array[index]
                if token.lexeme == 'global':
                    last_scope = 'global'
                elif token.lexeme == 'local':
                    last_scope = 'local'
                elif token.code == Code.IDENTIFIER:
                    proc_declared = False

                    if index + 1 < array_len and expr_array[index + 1].lexeme == '(':
                        for key in self.symbols:
                            try:
                                proc_key = key[:(key.index('(') + 1)]
                                if token.lexeme + '(' == proc_key:
                                    proc_declared = True
                                    break
                            except:
                                proc_declared = False
                            
                    if proc_declared:
                        initial_index = index
                        resp = self.open_function(identifier, index, expr_array)
                        last_index = resp[0]

                        part1 = expr_array[0:initial_index]
                        part2 = expr_array[last_index:array_len]
                        new_expr_array = []

                        for part in part1:
                            new_expr_array.append(part)
                            
                        tk_str = resp[1]

                        tk = ''
                        if tk_str == '0':
                            tk = Token(tk_str, Code.NUMBER, identifier.line_begin_index, identifier.line_end_index, identifier.column_begin_index, identifier.column_end_index)
                        elif tk_str == '0.0':
                            tk = Token(tk_str, Code.NUMBER, identifier.line_begin_index, identifier.line_end_index, identifier.column_begin_index, identifier.column_end_index)
                        elif tk_str == '" "':
                            tk = Token(tk_str, Code.KEYWORD, identifier.line_begin_index, identifier.line_end_index, identifier.column_begin_index, identifier.column_end_index)
                        elif tk_str == 'true':
                            tk = Token(tk_str, Code.KEYWORD, identifier.line_begin_index, identifier.line_end_index, identifier.column_begin_index, identifier.column_end_index)
                        else:
                            tk = Token(tk_str, Code.IDENTIFIER, identifier.line_begin_index, identifier.line_end_index, identifier.column_begin_index, identifier.column_end_index)
                        new_expr_array.append(tk)

                        for part in part2:
                            new_expr_array.append(part)
                            
                        expr_array = new_expr_array
                        array_len = len(new_expr_array)
                        index = len(part1) - 1

                    else:
                        new_index = index
                        access_list = last_scope

                        if last_scope == 'global' or last_scope == 'local':
                            access_list += '.'
                        
                        array_found = 0

                        while new_index < array_len:
                            if array_found == 0 and expr_array[new_index].code == Code.IDENTIFIER:
                                access_list += expr_array[new_index].lexeme
                            elif array_found == 0 and expr_array[new_index].lexeme == '.':
                                access_list += '.'
                            elif expr_array[new_index].lexeme == '[':
                                if array_found == 0:
                                    access_list += expr_array[new_index].lexeme
                                array_found += 1
                            elif expr_array[new_index].lexeme == ']':
                                array_found -= 1
                                if array_found == 0:
                                    access_list += expr_array[new_index].lexeme
                            elif array_found == 0:
                                break

                            new_index += 1
                            
                        found_type = self.get_access_type(access_list, token, True)

                        if found_type == 'int':
                            is_int = 1
                        elif found_type == 'real':
                            is_real = 1
                        elif found_type == 'boolean':
                            is_boolean = 1
                        elif found_type == 'string':
                            is_string = 1
                        else:
                            custom_type = found_type
                        """ else:
                            if not ('(' in token.lexeme and ')' in token.lexeme):
                                self.add_error(identifier, 'Identifier not declared: `' + token.lexeme + '`') """
                        
                        index = new_index
                        last_scope = ''
                elif token.code == Code.NUMBER:
                    try:
                        if isinstance(eval(token.lexeme), int):
                            is_int = 1
                        else:
                            is_real = 1
                    except:
                        pass
                elif token.lexeme == 'true' or token.lexeme == 'false':
                    is_boolean = 1  
                elif token.code == Code.STRING:
                    is_string = 1
                
                index += 1

            expr = ''

            for x in expr_array:
                expr += x.lexeme

            if is_int + is_real + is_string > 1:
                self.add_error(identifier, 'There are more than one types in a single expression `' + expr + '`. Conversions are not allowed here.')
            elif is_boolean == 1:
                return 'boolean'
            elif is_int == 1:
                return 'int'
            elif is_real == 1:
                return 'real'
            elif is_string == 1:
                return 'string'
            
            if custom_type != '':
                return custom_type
            return 'invalid'