Exemple #1
0
def read_file(arguments):
    """ Read the file(s) in arguments and return the file contents """
    try:
        with open(arguments.filename) as c_file:
            return c_file.read(), arguments.filename
    except IOError:
        descr = "could not read file: '{}'"
        error_collector.add(CompilerError(descr.format(arguments.filename)))
Exemple #2
0
def write_asm(asm_source, asm_filename):
    """Save the given assembly source to disk at asm_filename.
        asm_source (str) - Full assembly source code.
        asm_filename (str) - Filename to which to save the generated assembly.
    """
    try:
        with open(asm_filename, "w") as s_file:
            s_file.write(asm_source)
    except IOError:
        descr = "could not write output file '{}'"
        error_collector.add(CompilerError(descr.format(asm_filename)))
Exemple #3
0
def parse(tokens_to_parse):
    """Parse the given tokens into an AST. Also, as the entry point for the myparser, responsible for setting the tokens
    global variable.
    """
    p.best_error = None
    p.tokens = tokens_to_parse

    with log_error():
        return parse_root(0)[0]

    error_collector.add(p.best_error)
    return None
Exemple #4
0
def parse_abstract_declarator(index):
    """Parse an abstract declarator into a decl_nodes.Node. This function saves a CompilerError if the parsed entity is
    a declarator, rather than an abstract declarator.
    """
    root, index = parse_declarator(index)
    node = root
    while not isinstance(node, decl_nodes.Identifier):
        node = node.child

    if node.identifier:
        # add error to the error_collector because more of a semantic error than a parsing error
        err = "expected abstract declarator, but identifier name was provided"
        error_collector.add(CompilerError(err, node.identifier.r))

    return root, index
Exemple #5
0
def tokenize(code, filename):
    """Convert given code into a flat list of Tokens.
        lines - List of list of Tagged objects, where each embedded list is a separate line in the input program.
        return - List of Token objects.
    """
    # Store tokens as they are generated
    tokens = []

    lines = split_to_tagged_lines(code, filename)
    join_extended_lines(lines)

    in_comment = False
    for line in lines:
        try:
            line_tokens, in_comment = tokenize_line(line, in_comment)
            tokens += line_tokens
        except CompilerError as e:
            error_collector.add(e)

    return tokens
Exemple #6
0
def parse_decl_specifiers(index, spec_qual=False):
    """ Parse a declaration specifier.
        Examples: int/const char
    The returned `specs` list may contain two types of elements: tokens and Node objects.
    A Node object will be included for a struct declaration, and a token for all other declaration specifiers.
    """
    type_specs = set(ctypes.simple_types.keys())
    type_specs |= {token_kinds.signed_kw, token_kinds.unsigned_kw}

    type_quals = {token_kinds.const_kw}

    storage_specs = {
        token_kinds.auto_kw, token_kinds.static_kw, token_kinds.extern_kw,
        token_kinds.typedef_kw
    }

    specs = []

    # The type specifier class, either SIMPLE, STRUCT, or TYPEDEF, represents the allowed kinds of type specifiers.
    # Once the first specifier is parsed, the type specifier class is set. If the type specifier class is set to STRUCT
    # or TYPEDEF, no further type specifiers are permitted in the type specifier list. If it is set to  SIMPLE, more
    # simple type specifiers are permitted. This is important for typedef parsing.

    SIMPLE = 1
    STRUCT = 2
    TYPEDEF = 3
    type_spec_class = None

    while True:
        # Parse a struct specifier if there is one.
        if not type_spec_class and token_is(index, token_kinds.struct_kw):
            node, index = parse_struct(index + 1)
            specs.append(node)
            type_spec_class = STRUCT

        # Match a typedef name
        elif not type_spec_class and token_is(
                index, token_kinds.identifier) and p.symbols.is_typedef(
                    p.tokens[index]):
            specs.append(p.tokens[index])
            index += 1
            type_spec_class = TYPEDEF

        elif type_spec_class in {None, SIMPLE} and token_in(index, type_specs):
            specs.append(p.tokens[index])
            index += 1
            type_spec_class = SIMPLE

        elif token_in(index, type_quals):
            specs.append(p.tokens[index])
            index += 1

        elif token_in(index, storage_specs):
            if not spec_qual:
                specs.append(p.tokens[index])
            else:
                err = "storage specifier not permitted here"
                error_collector.add(CompilerError(err, p.tokens[index].r))
            index += 1

        else:
            break

    if specs:
        return specs, index
    else:
        raise_error("expected declaration specifier", index, ParserError.AT)
Exemple #7
0
def tokenize_line(line, in_comment):
    """Tokenize the given single line.
        line - List of Tagged objects.
        in_comment - Whether the first character in this line is part of a C-style comment body.
        return - List of Token objects, and boolean indicating whether the next character is part of a comment body.
    """
    tokens = []

    # line[block_start:block_end] is the section of the line currently being considered for conversion into a token;
    # this string will be called the 'block'. Everything before the block has already been tokenized, and everything
    # after has not yet been examined
    block_start = 0
    block_end = 0

    while block_end < len(line):
        symbol_kind = match_symbol_kind_at(line, block_end)
        next_ = match_symbol_kind_at(line, block_end + 1)

        if in_comment:
            # If next characters end the comment...
            if symbol_kind == token_kinds.star and next_ == token_kinds.slash:
                in_comment = False
                block_start = block_end + 2
                block_end = block_start
            # Otherwise, just skip one character.
            else:
                block_start = block_end + 1
                block_end = block_start

        # If next characters start a comment, process previous block and set in_comment to true.
        elif symbol_kind == token_kinds.slash and next_ == token_kinds.star:
            add_block(line[block_start:block_end], tokens)
            in_comment = True

        # If next two characters are //, we skip the rest of this line.
        elif symbol_kind == token_kinds.slash and next_ == token_kinds.slash:
            break

        # Skip spaces and process previous block.
        elif line[block_end].c.isspace():
            add_block(line[block_start:block_end], tokens)
            block_start = block_end + 1
            block_end = block_start

        # If next character is a quote, we read the whole string as a token.
        elif symbol_kind in {token_kinds.dquote, token_kinds.squote}:
            if symbol_kind == token_kinds.dquote:
                global STR_EX
                STR_EX.append(1)
                quote_str = '"'
                kind = token_kinds.string
                add_null = True
            else:
                quote_str = "'"
                kind = token_kinds.char_string
                add_null = False

            chars, end = read_string(line, block_end + 1, quote_str, add_null)
            rep = block_to_str(line[block_end:end + 1])
            r = Range(line[block_end].p, line[end].p)

            if kind == token_kinds.char_string and len(chars) == 0:
                err = "empty character constant"
                error_collector.add(CompilerError(err, r))
            elif kind == token_kinds.char_string and len(chars) > 1:
                err = "multiple characters in character constant"
                error_collector.add(CompilerError(err, r))

            tokens.append(Token(kind, chars, rep, r=r))
            block_start = end + 1
            block_end = block_start

        # If next character is another symbol, add previous block and then add the symbol.
        elif symbol_kind:
            symbol_start_index = block_end
            symbol_end_index = block_end + len(symbol_kind.text_repr) - 1

            r = Range(line[symbol_start_index].p, line[symbol_end_index].p)
            symbol_token = Token(symbol_kind, r=r)

            add_block(line[block_start:block_end], tokens)
            tokens.append(symbol_token)

            block_start = block_end + len(symbol_kind.text_repr)
            block_end = block_start

        # Include another character in the block.
        else:
            block_end += 1

    # Flush out anything that is left in the block to the output
    add_block(line[block_start:block_end], tokens)

    return tokens, in_comment
Exemple #8
0
def report_err():
    """ Catch and add any errors to error collector """
    try:
        yield
    except CompilerError as e:
        error_collector.add(e)