Ejemplo n.º 1
0
def process(tokens, this_file):
    """Process the given tokens and return the preprocessed token list."""

    processed = []
    i = 0
    while i < len(tokens) - 2:
        if (tokens[i].kind == token_kinds.pound
                and tokens[i + 1].kind == token_kinds.identifier
                and tokens[i + 1].content == "include"
                and tokens[i + 2].kind == token_kinds.include_file):

            # Replace tokens[i] -> tokens[i+2] with preprocessed contents of
            # the included file.
            file, filename = read_file(tokens[i + 2].content, this_file)
            if not file:
                error_collector.add(
                    CompilerError("unable to read included file",
                                  tokens[i + 2].r))
            else:
                new_tokens = process(lexer.tokenize(file, filename), filename)
                processed += new_tokens

            i += 3

        else:
            processed.append(tokens[i])
            i += 1

    return processed + tokens[i:]
Ejemplo n.º 2
0
def tokenize(code, filename):
    """Convert given code into a flat list of Tokens.

    lines - List of list of Tagged objects, where each embedded list is a
    separate line in the input program.
    return - List of Token objects.
    """
    # Store tokens as they are generated
    tokens = []

    lines = split_to_tagged_lines(code, filename)
    join_extended_lines(lines)


    in_comment = False
    for line in lines:
        try:
            line_tokens, in_comment = tokenize_line(line, in_comment)
            tokens += line_tokens
        except CompilerError as e:
            error_collector.add(e)

    #for token in tokens:
    #    print(token.__dict__)
    return tokens, defineDict
Ejemplo n.º 3
0
def read_file(file):
    """Return the contents of the given file."""
    try:
        with open(file) as c_file:
            return c_file.read()
    except IOError as e:
        descrip = f"could not read file: '{file}'"
        error_collector.add(CompilerError(descrip))
Ejemplo n.º 4
0
def read_file(file):
    """Return the contents of the given file."""
    try:
        with open(file) as c_file:
            return c_file.read()
    except IOError as e:
        descrip = f"could not read file: '{file}'"
        error_collector.add(CompilerError(descrip))
Ejemplo n.º 5
0
def assemble(asm_name, obj_name):
    """Assemble the given assembly file into an object file."""
    try:
        subprocess.check_call(["as", "-64", "-o", obj_name, asm_name])
        return True
    except subprocess.CalledProcessError:
        err = "assembler returned non-zero status"
        error_collector.add(CompilerError(err))
        return False
Ejemplo n.º 6
0
def process_file(file, args):
    """Process single file into assembly code and return the code as string."""
    #print("processing file: ", file)
    if file[-2:] == ".c":
        return process_c_file(file, args)
    else:
        err = f"unknown file type: '{file}'"
        error_collector.add(CompilerError(err))
        return None
Ejemplo n.º 7
0
def assemble(asm_name, obj_name):
    """Assemble the given assembly file into an object file."""
    try:
        subprocess.check_call(["as", "-64", "-o", obj_name, asm_name])
        return True
    except subprocess.CalledProcessError:
        err = "assembler returned non-zero status"
        error_collector.add(CompilerError(err))
        return False
Ejemplo n.º 8
0
def process_file(file, args):
    """Process single file into object file and return the object file name."""
    if file[-2:] == ".c":
        return process_c_file(file, args)
    elif file[-2:] == ".o":
        return file
    else:
        err = f"unknown file type: '{file}'"
        error_collector.add(CompilerError(err))
        return None
Ejemplo n.º 9
0
def process_file(file, args):
    """Process single file into object file and return the object file name."""
    if file[-2:] == ".c":
        return process_c_file(file, args)
    elif file[-2:] == ".o":
        return file
    else:
        err = f"unknown file type: '{file}'"
        error_collector.add(CompilerError(err))
        return None
Ejemplo n.º 10
0
def process(tokens, this_file, defineDict={}, includeList=[]):
    """Process the given tokens and return the preprocessed token list."""
    #for token in tokens:
    #    print(token)
    processed = []
    i = 0
    while i < len(tokens) - 2:
        if (tokens[i].kind == token_kinds.pound
                and tokens[i + 1].kind == token_kinds.identifier
                and tokens[i + 1].content == "include"
                and tokens[i + 2].kind == token_kinds.include_file):

            # Replace tokens[i] -> tokens[i+2] with preprocessed contents of
            # the included file.
            try:
                file, filename = read_file(tokens[i + 2].content, this_file)
                if filename not in includeList:
                    includeList.append(filename)
                    lexTokens, _ = lexer.tokenize(file, filename)
                    new_tokens = process(lexTokens, filename, defineDict,
                                         includeList)
                    processed += new_tokens

            except IOError:
                error_collector.add(
                    CompilerError("unable to read included file",
                                  tokens[i + 2].r))

            i += 3

        # Ignore defines. Currently the value of the define is not in the token list
        elif (tokens[i].kind == token_kinds.pound
              and tokens[i + 1].kind == token_kinds.identifier
              and tokens[i + 1].content == "define"):

            i += 3

        else:
            # Here we apply the Define dictionary

            if str(tokens[i]) in defineDict:
                if defineDict[str(tokens[i])].isdigit():
                    tokens[i].kind = token_kinds.number
                else:
                    error_collector.add(
                        CompilerError("Define value is not a number",
                                      tokens[i].r))
                tokens[i].content = defineDict[str(tokens[i])]

            processed.append(tokens[i])
            i += 1

    return processed + tokens[i:]
Ejemplo n.º 11
0
def find_library_or_err(file):
    """Search the given library file and return path if found.

    If not found, add an error to the error collector and return None.
    """
    path = find_library(file)
    if not path:
        err = f"could not find {file}"
        error_collector.add(CompilerError(err))
        return None
    else:
        return path
Ejemplo n.º 12
0
def find_library_or_err(file):
    """Search the given library file and return path if found.

    If not found, add an error to the error collector and return None.
    """
    path = find_library(file)
    if not path:
        err = f"could not find {file}"
        error_collector.add(CompilerError(err))
        return None
    else:
        return path
Ejemplo n.º 13
0
def write_asm(asm_source, asm_filename):
    """Save the given assembly source to disk at asm_filename.

    asm_source (str) - Full assembly source code.
    asm_filename (str) - Filename to which to save the generated assembly.

    """
    try:
        with open(asm_filename, "w") as s_file:
            s_file.write(asm_source)
    except IOError:
        descrip = f"could not write output file '{asm_filename}'"
        error_collector.add(CompilerError(descrip))
Ejemplo n.º 14
0
def find_crtnum():
    """Search for the crt0, crt1, or crt2.o files on the system.

    If one is found, return its path. Else, add an error to the
    error_collector and return None.
    """
    for file in ["crt2.o", "crt1.o", "crt0.o"]:
        crt = find_library(file)
        if crt: return crt

    err = "could not find crt0.o, crt1.o, or crt2.o for linking"
    error_collector.add(CompilerError(err))
    return None
Ejemplo n.º 15
0
def write_asm(asm_source, asm_filename):
    """Save the given assembly source to disk at asm_filename.

    asm_source (str) - Full assembly source code.
    asm_filename (str) - Filename to which to save the generated assembly.

    """
    try:
        with open(asm_filename, "w") as s_file:
            s_file.write(asm_source)
    except IOError:
        descrip = f"could not write output file '{asm_filename}'"
        error_collector.add(CompilerError(descrip))
Ejemplo n.º 16
0
def find_crtnum():
    """Search for the crt0, crt1, or crt2.o files on the system.

    If one is found, return its path. Else, add an error to the
    error_collector and return None.
    """
    for file in ["crt2.o", "crt1.o", "crt0.o"]:
        crt = find_library(file)
        if crt: return crt

    err = "could not find crt0.o, crt1.o, or crt2.o for linking"
    error_collector.add(CompilerError(err))
    return None
Ejemplo n.º 17
0
def parse(tokens_to_parse):
    """Parse the given tokens into an AST.

    Also, as the entry point for the parser, responsible for setting the
    tokens global variable.
    """
    p.best_error = None
    p.tokens = tokens_to_parse

    with log_error():
        return parse_root(0)[0]

    error_collector.add(p.best_error)
    return None
Ejemplo n.º 18
0
def parse_abstract_declarator(index):
    """Parse an abstract declarator into a decl_nodes.Node.

    This function saves a CompilerError if the parsed entity is a declarator,
    rather than an abstract declarator.
    """
    root, index = parse_declarator(index)
    node = root
    while not isinstance(node, decl_nodes.Identifier):
        node = node.child

    if node.identifier:
        # add error to the error_collector because more of a semantic error
        # than a parsing error
        err = "expected abstract declarator, but identifier name was provided"
        error_collector.add(CompilerError(err, node.identifier.r))

    return root, index
Ejemplo n.º 19
0
def parse_abstract_declarator(index):
    """Parse an abstract declarator into a decl_nodes.Node.

    This function saves a CompilerError if the parsed entity is a declarator,
    rather than an abstract declarator.
    """
    root, index = parse_declarator(index)
    node = root
    while not isinstance(node, decl_nodes.Identifier):
        node = node.child

    if node.identifier:
        # add error to the error_collector because more of a semantic error
        # than a parsing error
        err = "expected abstract declarator, but identifier name was provided"
        error_collector.add(CompilerError(err, node.identifier.r))

    return root, index
Ejemplo n.º 20
0
def tokenize(code, filename):
    """Convert given code into a flat list of Tokens.

    lines - List of list of Tagged objects, where each embedded list is a
    separate line in the input program.
    return - List of Token objects.
    """
    # Store tokens as they are generated
    tokens = []

    lines = split_to_tagged_lines(code, filename)
    join_extended_lines(lines)

    in_comment = False
    for line in lines:
        try:
            line_tokens, in_comment = tokenize_line(line, in_comment)
            tokens += line_tokens
        except CompilerError as e:
            error_collector.add(e)

    return tokens
Ejemplo n.º 21
0
def parse_decl_specifiers(index, _spec_qual=False):
    """Parse a declaration specifier list.

    Examples:
        int
        const char
        typedef int

    If _spec_qual=True, produces a CompilerError if given any specifiers
    that are neither type specifier nor type qualifier.

    The returned `specs` list may contain two types of elements: tokens and
    Node objects. A Node object will be included for a struct or union
    declaration, and a token for all other declaration specifiers.
    """
    type_specs = set(ctypes.simple_types.keys())
    type_specs |= {token_kinds.signed_kw, token_kinds.unsigned_kw}

    type_quals = {token_kinds.const_kw}

    storage_specs = {token_kinds.auto_kw, token_kinds.static_kw,
                     token_kinds.extern_kw, token_kinds.typedef_kw}

    specs = []

    # The type specifier class, either SIMPLE, STRUCT, or TYPEDEF,
    # represents the allowed kinds of type specifiers. Once the first
    # specifier is parsed, the type specifier class is set. If the type
    # specifier class is set to STRUCT or TYPEDEF, no further type
    # specifiers are permitted in the type specifier list. If it is set to
    # SIMPLE, more simple type specifiers are permitted. This is important
    # for typedef parsing.

    SIMPLE = 1
    STRUCT = 2
    TYPEDEF = 3
    type_spec_class = None

    while True:
        # Parse a struct specifier if there is one.
        if not type_spec_class and token_is(index, token_kinds.struct_kw):
            node, index = parse_struct_spec(index + 1)
            specs.append(node)
            type_spec_class = STRUCT

        # Parse a union specifier if there is one.
        elif not type_spec_class and token_is(index, token_kinds.union_kw):
            node, index = parse_union_spec(index + 1)
            specs.append(node)
            type_spec_class = STRUCT

        # Match a typedef name
        elif (not type_spec_class
              and token_is(index, token_kinds.identifier)
              and p.symbols.is_typedef(p.tokens[index])):
            specs.append(p.tokens[index])
            index += 1
            type_spec_class = TYPEDEF

        elif type_spec_class in {None, SIMPLE} and token_in(index, type_specs):
            specs.append(p.tokens[index])
            index += 1
            type_spec_class = SIMPLE

        elif token_in(index, type_quals):
            specs.append(p.tokens[index])
            index += 1

        elif token_in(index, storage_specs):
            if not _spec_qual:
                specs.append(p.tokens[index])
            else:
                err = "storage specifier not permitted here"
                error_collector.add(CompilerError(err, p.tokens[index].r))
            index += 1

        else:
            break

    if specs:
        return specs, index
    else:
        raise_error("expected declaration specifier", index, ParserError.AT)
Ejemplo n.º 22
0
def tokenize_line(line, in_comment):
    """Tokenize the given single line.

    line - List of Tagged objects.
    in_comment - Whether the first character in this line is part of a
    C-style comment body.
    return - List of Token objects, and boolean indicating whether the next
    character is part of a comment body.
    """
    tokens = []

    # line[chunk_start:chunk_end] is the section of the line currently
    # being considered for conversion into a token; this string will be
    # called the 'chunk'. Everything before the chunk has already been
    # tokenized, and everything after has not yet been examined
    chunk_start = 0
    chunk_end = 0

    # Flag that is set True if the line begins with `#` and `include`,
    # perhaps with comments and whitespace in between.
    include_line = False
    # Flag that is set True if the line is an include directive and the
    # filename has been seen and succesfully parsed.
    seen_filename = False

    while chunk_end < len(line):
        symbol_kind = match_symbol_kind_at(line, chunk_end)
        next = match_symbol_kind_at(line, chunk_end + 1)

        # Set include_line flag True as soon as a `#include` is detected.
        if match_include_command(tokens):
            include_line = True

        if in_comment:
            # If next characters end the comment...
            if symbol_kind == token_kinds.star and next == token_kinds.slash:
                in_comment = False
                chunk_start = chunk_end + 2
                chunk_end = chunk_start
            # Otherwise, just skip one character.
            else:
                chunk_start = chunk_end + 1
                chunk_end = chunk_start

        # If next characters start a comment, process previous chunk and set
        # in_comment to true.
        elif symbol_kind == token_kinds.slash and next == token_kinds.star:
            add_chunk(line[chunk_start:chunk_end], tokens)
            in_comment = True

        # If next two characters are //, we skip the rest of this line.
        elif symbol_kind == token_kinds.slash and next == token_kinds.slash:
            break

        # Skip spaces and process previous chunk.
        elif line[chunk_end].c.isspace():
            add_chunk(line[chunk_start:chunk_end], tokens)
            chunk_start = chunk_end + 1
            chunk_end = chunk_start

        # If this is an include line, and not a comment or whitespace,
        # expect the line to match an include filename.
        elif include_line:

            # If the filename has already been seen, there should be no more
            # tokens.
            if seen_filename:
                descrip = "extra tokens at end of include directive"
                raise CompilerError(descrip, line[chunk_end].r)

            filename, end = read_include_filename(line, chunk_end)
            tokens.append(Token(token_kinds.include_file, filename,
                                r=Range(line[chunk_end].p, line[end].p)))

            chunk_start = end + 1
            chunk_end = chunk_start
            seen_filename = True

        # If next character is a quote, we read the whole string as a token.
        elif symbol_kind in {token_kinds.dquote, token_kinds.squote}:
            if symbol_kind == token_kinds.dquote:
                quote_str = '"'
                kind = token_kinds.string
                add_null = True
            else:
                quote_str = "'"
                kind = token_kinds.char_string
                add_null = False

            chars, end = read_string(line, chunk_end + 1, quote_str, add_null)
            rep = chunk_to_str(line[chunk_end:end + 1])
            r = Range(line[chunk_end].p, line[end].p)

            if kind == token_kinds.char_string and len(chars) == 0:
                err = "empty character constant"
                error_collector.add(CompilerError(err, r))
            elif kind == token_kinds.char_string and len(chars) > 1:
                err = "multiple characters in character constant"
                error_collector.add(CompilerError(err, r))

            tokens.append(Token(kind, chars, rep, r=r))

            chunk_start = end + 1
            chunk_end = chunk_start

        # If next character is another symbol, add previous chunk and then
        # add the symbol.
        elif symbol_kind:
            symbol_start_index = chunk_end
            symbol_end_index = chunk_end + len(symbol_kind.text_repr) - 1

            r = Range(line[symbol_start_index].p, line[symbol_end_index].p)
            symbol_token = Token(symbol_kind, r=r)

            add_chunk(line[chunk_start:chunk_end], tokens)
            tokens.append(symbol_token)

            chunk_start = chunk_end + len(symbol_kind.text_repr)
            chunk_end = chunk_start

        # Include another character in the chunk.
        else:
            chunk_end += 1

    # Flush out anything that is left in the chunk to the output
    add_chunk(line[chunk_start:chunk_end], tokens)

    # Catch a `#include` on a line by itself.
    if (include_line or match_include_command(tokens)) and not seen_filename:
        read_include_filename(line, chunk_end)

    return tokens, in_comment
Ejemplo n.º 23
0
def tokenize_line(line, in_comment):
    """Tokenize the given single line.

    line - List of Tagged objects.
    in_comment - Whether the first character in this line is part of a
    C-style comment body.
    return - List of Token objects, and boolean indicating whether the next
    character is part of a comment body.
    """
    tokens = []

    # line[chunk_start:chunk_end] is the section of the line currently
    # being considered for conversion into a token; this string will be
    # called the 'chunk'. Everything before the chunk has already been
    # tokenized, and everything after has not yet been examined
    chunk_start = 0
    chunk_end = 0

    # Flag that is set True if the line begins with `#` and `include`,
    # perhaps with comments and whitespace in between.
    include_line = False
    # Flag that is set True if the line is an include directive and the
    # filename has been seen and succesfully parsed.
    seen_filename = False

    while chunk_end < len(line):
        symbol_kind = match_symbol_kind_at(line, chunk_end)
        next = match_symbol_kind_at(line, chunk_end + 1)

        # Set include_line flag True as soon as a `#include` is detected.
        if match_include_command(tokens):
            include_line = True

        if in_comment:
            # If next characters end the comment...
            if symbol_kind == token_kinds.star and next == token_kinds.slash:
                in_comment = False
                chunk_start = chunk_end + 2
                chunk_end = chunk_start
            # Otherwise, just skip one character.
            else:
                chunk_start = chunk_end + 1
                chunk_end = chunk_start

        # If next characters start a comment, process previous chunk and set
        # in_comment to true.
        elif symbol_kind == token_kinds.slash and next == token_kinds.star:
            add_chunk(line[chunk_start:chunk_end], tokens)
            in_comment = True

        # If next two characters are //, we skip the rest of this line.
        elif symbol_kind == token_kinds.slash and next == token_kinds.slash:
            break

        # Skip spaces and process previous chunk.
        elif line[chunk_end].c.isspace():
            add_chunk(line[chunk_start:chunk_end], tokens)
            chunk_start = chunk_end + 1
            chunk_end = chunk_start

        # If this is an include line, and not a comment or whitespace,
        # expect the line to match an include filename.
        elif include_line:

            # If the filename has already been seen, there should be no more
            # tokens.
            if seen_filename:
                descrip = "extra tokens at end of include directive"
                raise CompilerError(descrip, line[chunk_end].r)

            filename, end = read_include_filename(line, chunk_end)
            tokens.append(Token(token_kinds.include_file, filename,
                                r=Range(line[chunk_end].p, line[end].p)))

            chunk_start = end + 1
            chunk_end = chunk_start
            seen_filename = True

        # If next character is a quote, we read the whole string as a token.
        elif symbol_kind in {token_kinds.dquote, token_kinds.squote}:
            if symbol_kind == token_kinds.dquote:
                quote_str = '"'
                kind = token_kinds.string
                add_null = True
            else:
                quote_str = "'"
                kind = token_kinds.char_string
                add_null = False

            chars, end = read_string(line, chunk_end + 1, quote_str, add_null)
            rep = chunk_to_str(line[chunk_end:end + 1])
            r = Range(line[chunk_end].p, line[end].p)

            if kind == token_kinds.char_string and len(chars) == 0:
                err = "empty character constant"
                error_collector.add(CompilerError(err, r))
            elif kind == token_kinds.char_string and len(chars) > 1:
                err = "multiple characters in character constant"
                error_collector.add(CompilerError(err, r))

            tokens.append(Token(kind, chars, rep, r=r))

            chunk_start = end + 1
            chunk_end = chunk_start

        # If next character is another symbol, add previous chunk and then
        # add the symbol.
        elif symbol_kind:
            symbol_start_index = chunk_end
            symbol_end_index = chunk_end + len(symbol_kind.text_repr) - 1

            r = Range(line[symbol_start_index].p, line[symbol_end_index].p)
            symbol_token = Token(symbol_kind, r=r)

            add_chunk(line[chunk_start:chunk_end], tokens)
            tokens.append(symbol_token)

            chunk_start = chunk_end + len(symbol_kind.text_repr)
            chunk_end = chunk_start

        # Include another character in the chunk.
        else:
            chunk_end += 1

    # Flush out anything that is left in the chunk to the output
    add_chunk(line[chunk_start:chunk_end], tokens)

    # Catch a `#include` on a line by itself.
    if (include_line or match_include_command(tokens)) and not seen_filename:
        read_include_filename(line, chunk_end)

    return tokens, in_comment
Ejemplo n.º 24
0
def report_err():
    """Catch and add any errors to error collector."""
    try:
        yield
    except CompilerError as e:
        error_collector.add(e)
Ejemplo n.º 25
0
def parse_decl_specifiers(index, _spec_qual=False):
    """Parse a declaration specifier list.

    Examples:
        int
        const char
        typedef int

    If _spec_qual=True, produces a CompilerError if given any specifiers
    that are neither type specifier nor type qualifier.

    The returned `specs` list may contain two types of elements: tokens and
    Node objects. A Node object will be included for a struct or union
    declaration, and a token for all other declaration specifiers.
    """
    type_specs = set(ctypes.simple_types.keys())
    type_specs |= {token_kinds.signed_kw, token_kinds.unsigned_kw}

    type_quals = {token_kinds.const_kw}

    storage_specs = {
        token_kinds.auto_kw, token_kinds.static_kw, token_kinds.extern_kw,
        token_kinds.typedef_kw
    }

    specs = []

    # The type specifier class, either SIMPLE, STRUCT, or TYPEDEF,
    # represents the allowed kinds of type specifiers. Once the first
    # specifier is parsed, the type specifier class is set. If the type
    # specifier class is set to STRUCT or TYPEDEF, no further type
    # specifiers are permitted in the type specifier list. If it is set to
    # SIMPLE, more simple type specifiers are permitted. This is important
    # for typedef parsing.

    SIMPLE = 1
    STRUCT = 2
    TYPEDEF = 3
    type_spec_class = None

    while True:
        # Parse a struct specifier if there is one.
        if not type_spec_class and token_is(index, token_kinds.struct_kw):
            node, index = parse_struct_spec(index + 1)
            specs.append(node)
            type_spec_class = STRUCT

        # Parse a union specifier if there is one.
        elif not type_spec_class and token_is(index, token_kinds.union_kw):
            node, index = parse_union_spec(index + 1)
            specs.append(node)
            type_spec_class = STRUCT

        # Match a typedef name
        elif (not type_spec_class and token_is(index, token_kinds.identifier)
              and p.symbols.is_typedef(p.tokens[index])):
            specs.append(p.tokens[index])
            index += 1
            type_spec_class = TYPEDEF

        elif type_spec_class in {None, SIMPLE} and token_in(index, type_specs):
            specs.append(p.tokens[index])
            index += 1
            type_spec_class = SIMPLE

        elif token_in(index, type_quals):
            specs.append(p.tokens[index])
            index += 1

        elif token_in(index, storage_specs):
            if not _spec_qual:
                specs.append(p.tokens[index])
            else:
                err = "storage specifier not permitted here"
                error_collector.add(CompilerError(err, p.tokens[index].r))
            index += 1

        else:
            break

    if specs:
        return specs, index
    else:
        raise_error("expected declaration specifier", index, ParserError.AT)