Exemple #1
0
def second_pass():
    """
    docstring
    """
    code_parser = Parser(sys.argv[1])
    memory_counter = 16
    with open(output_file, 'w') as file_object:
        while (code_parser.has_more_commands()):
            code_parser.advance()
            word = ''
            if code_parser.command_type() == 'C_COMMAND':
                word = '111' + code_trans.comp(
                    code_parser.comp()) + code_trans.dest(
                        code_parser.dest()) + code_trans.jump(
                            code_parser.jump()) + '\n'
            elif code_parser.command_type() == 'A_COMMAND':
                if is_integer(code_parser.symbol()):
                    word = binary_word(code_parser.symbol()) + '\n'
                elif SymbolTable.contains(code_parser.symbol()):
                    word = SymbolTable.get_address(code_parser.symbol()) + '\n'
                elif not SymbolTable.contains(code_parser.symbol()):
                    SymbolTable.add_entry(code_parser.symbol(),
                                          binary_word(memory_counter))
                    word = binary_word(memory_counter) + '\n'
                    memory_counter = memory_counter + 1
            elif code_parser.command_type() == 'L_COMMAND':
                pass
            file_object.write(word)
Exemple #2
0
def main():
    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('asm_file', type=str, help='asm file')

    args = parser.parse_args()
    asm_file = args.asm_file

    save_file = os.path.splitext(asm_file)[0] + ".hack"

    st = SymbolTable()

    with HackParser(asm_file) as hp:

        op_address = 0

        while hp.advance() != None:
            cmd_type = hp.command_type()
            if cmd_type == A_COMMAND or cmd_type == C_COMMAND:
                op_address += 1
            elif cmd_type == L_COMMAND:
                st.add_entry(hp.symbol(), op_address)

    with HackParser(asm_file) as hp:

        with open(save_file, 'w') as wf:

            while hp.advance() != None:

                cmd_type = hp.command_type()

                if cmd_type == A_COMMAND:
                    symbol = hp.symbol()
                    m = symbol_pattern.match(symbol)

                    if m.group(1):  # @value
                        bincode = "0" + int2bin(int(m.group(1)), 15)
                    elif m.group(2):  # @symbol
                        symbol = m.group(2)
                        if st.contains(symbol):
                            address = st.get_address(symbol)
                            bincode = "0" + int2bin(address, 15)
                        else:
                            st.add_variable(symbol)
                            address = st.get_address(symbol)
                            bincode = "0" + int2bin(address, 15)

                elif cmd_type == C_COMMAND:
                    bincode = '111' + code_writer.comp(
                        hp.comp()) + code_writer.dest(
                            hp.dest()) + code_writer.jump(hp.jump())

                if cmd_type != L_COMMAND:
                    wf.write(bincode + '\n')
Exemple #3
0
def first_pass():
    code_parser = Parser(sys.argv[1])
    program_counter = 0
    while (code_parser.has_more_commands()):
        code_parser.advance()
        if code_parser.command_type() == 'C_COMMAND':
            program_counter = program_counter + 1
        elif code_parser.command_type() == 'A_COMMAND':
            program_counter = program_counter + 1
        elif code_parser.command_type() == 'L_COMMAND':
            if not SymbolTable.contains(code_parser.symbol()):
                SymbolTable.add_entry(code_parser.symbol(),
                                      binary_word(program_counter))
Exemple #4
0
def generate_symbols(file):
    symbol_table = SymbolTable()
    instruction_number = 0
    with open(file) as f:
        parser = Parser(f)
        while parser.has_more_commands():
            if parser.command_type() is 'L_COMMAND' and not symbol_table.contains(parser.symbol()):
                symbol_table.add_entry(parser.symbol(), instruction_number)
            if parser.command_type() is 'A_COMMAND' or parser.command_type() is 'C_COMMAND':
                instruction_number += 1
                # line = decoder.bin(p.symbol()) + '\n'
            parser.advance()
    return symbol_table
Exemple #5
0
def main():
    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('asm_file', type=str, help='asm file')

    args = parser.parse_args()
    asm_file = args.asm_file

    save_file = os.path.splitext(asm_file)[0] + ".hack"

    st = SymbolTable()

    with HackParser(asm_file) as hp:

        op_address = 0

        while hp.advance() != None:
            cmd_type = hp.command_type()
            if cmd_type == A_COMMAND or cmd_type == C_COMMAND:
                op_address += 1
            elif cmd_type == L_COMMAND:
                st.add_entry(hp.symbol(), op_address)

    with HackParser(asm_file) as hp:

        with open(save_file, 'w') as wf:

            while hp.advance() != None:

                cmd_type = hp.command_type()

                if cmd_type == A_COMMAND:
                    symbol = hp.symbol()
                    m = symbol_pattern.match(symbol)

                    if m.group(1):  # @value
                        bincode = "0" + int2bin(int(m.group(1)), 15)
                    elif m.group(2):  # @symbol
                        symbol = m.group(2)
                        if st.contains(symbol):
                            address = st.get_address(symbol)
                            bincode = "0" + int2bin(address, 15)
                        else:
                            st.add_variable(symbol)
                            address = st.get_address(symbol)
                            bincode = "0" + int2bin(address, 15)

                elif cmd_type == C_COMMAND:
                    bincode = '111' + code_writer.comp(hp.comp()) + code_writer.dest(hp.dest()) + code_writer.jump(hp.jump())

                if cmd_type != L_COMMAND:
                    wf.write(bincode + '\n')
Exemple #6
0
def main():
    filename = sys.argv[1].split('.')[0]
    symbol_table = SymbolTable()
    first_iter(symbol_table)
    parser = Parser(filename)
    code = Code()
    output = []
    address = 16
    while (parser.has_more_commands()):
        parser.advance()

        if parser.command_type() == 'C_COMMAND':
            dest = parser.dest()
            comp = parser.comp()
            jump = parser.jump()
            # print(parser.current_command, code.dest(dest), code.comp(comp), code.jump(jump))
            output.append("111" + code.comp(comp) + code.dest(dest) +
                          code.jump(jump))
        else:
            symbol = parser.symbol()

            try:
                symbol_address = int(symbol)
            except:
                if not symbol_table.contains(symbol):
                    symbol_table.add_entry(symbol, address)
                    address += 1
                symbol_address = symbol_table.get_address(symbol)
            finally:
                output.append(bin(symbol_address)[2:].zfill(16))
                # print(parser.current_command, bin(symbol_table.get_address(symbol)))

            # if not symbol_table.contains(symbol):
            # 	symbol_table.add_entry(symbol, address)
            # 	address += 1
            # 	symbol_address = symbol_table.get_address(symbol)
            # 	print(symbol_address)
            # 	output.append(bin(symbol_table.get_address(symbol))[2:].zfill(16))
            # 	print(parser.current_command, bin(symbol_table.get_address(symbol)))

            # print(symbol_address)
            # output.append(bin(symbol_table.get_address(symbol))[2:].zfill(16))
            # print(parser.current_command, bin(symbol_table.get_address(symbol)))

    parser.close()
    hack_file = open(filename + '.hack', 'w')
    for line in output:
        hack_file.write(line + '\n')
    hack_file.close()
Exemple #7
0
    def two_pass_assembly(self):
        symbol_table = SymbolTable()
        rom_address = 0
        print("starting first pass")
        while(self.parser.has_more_commands()):
            cmd = self.parser.get_current_command()
            if(self.parser.commands.L_COMMAND==self.parser.command_type()):
                print "L: " + self.parser.symbol()
                symbol_table.add_entry(self.parser.symbol(),rom_address)
            else:
                rom_address += 1
            self.parser.advance()
            
        self.parser.reset()


        #the second pass is the same as the first without the print statements, 
        #and without handling (Xxx) syntax

        print("starting second pass")
        while(self.parser.has_more_commands()):
            cmd = self.parser.get_current_command()
            if(self.parser.commands.A_COMMAND==self.parser.command_type()):
                sym = self.parser.symbol()                
                if(sym.isdigit()):
                    val = sym
                else:
                    if(symbol_table.contains(sym)):
                        val = symbol_table.get_address(sym)
                    else:
                        symbol_table.add_entry(sym, rom_address)
                        rom_address += 1
                        val = rom_address
                
                self.parser.output.write("0" + '{0:015b}'.format(int(val))+"\n")

            elif(self.parser.commands.C_COMMAND==self.parser.command_type()):
                self.parser.output.write("111" 
                                         + self.code.comp(self.parser.comp()) 
                                         + self.code.dest(self.parser.dest()) 
                                         + self.code.jump(self.parser.jump())
                                         +"\n")
            self.parser.advance()
    symbol_table = SymbolTable()
    binary_code = []
    ram_address = 16
    flag = 0

    register_symble(parser, symbol_table)
    parser.reset_idx()

    while parser.hasMoreCommands():

        parser.advance()
        cmdtype = parser.commandType()
        if cmdtype == "A_COMMAND":
            symbol = parser.symbol()
            if re.search("[A-Za-z]", symbol) != None:
                if symbol_table.contains(symbol):
                    address = symbol_table.getAddress(symbol)  #16進数
                else:
                    address = format(ram_address, '04x')  #16進数
                    symbol_table.addEntry(symbol, address)
                    ram_address += 1

                symbol = int(address, 16)

            binary = bin(int(symbol))[2:]
            bit16 = ("0" * (16 - len(binary))) + binary

        elif cmdtype == "C_COMMAND":
            dest_bin = code.dest2bin(parser.dest())
            comp_bin = code.comp2bin(parser.comp())
            jump_bin = code.jump2bin(parser.jump())
class Assembler:
    def __init__(self):
        self._code = Code()
        self._lexer = Lexer()
        self._parser = Parser()
        self._symbol_table = SymbolTable()

    def assemble(self, path):
        # Read file content
        content = self._read_file_contents(path)
        # Lex the file contents
        self._lexer.lex(content)
        # Pass the tokens to ther parser
        self._parser.set_tokens(self._lexer.tokens())
        # Build symbols
        self._build_symbols()
        # Translate to symbolless asm
        self._write_symbolless_asm(path)
        # Translate binary
        self._write_binary(path)

    def _write_symbolless_asm(self, path):
        basename = os.path.basename(path)
        name = basename.rsplit('.asm', 1)[0]
        file = open('%s/%sL.asm' % (os.path.dirname(path), name), 'w')
        self._parser.reset_pos()
        while self._parser.has_commands():
            command_type = self._parser.command_type()
            if command_type is Command.L_COMMAND:
                self._parser.advance()
                continue
            elif command_type is Command.A_COMMAND:
                value = None
                symbol = self._parser.symbol()
                if symbol is not None:
                    value = self._symbol_table.get_address(symbol)
                    if value is None:
                        sys.stderr.write('[Assembler]: Symbol not defined %s' %
                                         symbol)
                        file.close()
                        sys.exit(1)
                else:
                    value = self._parser.value()
                file.write('@%s' % value)
            elif command_type in [
                    Command.C_COMMAND_JMP, Command.C_COMMAND_COMP
            ]:
                file.write(self._parser.text())
            file.write('\n')
            self._parser.advance()
        file.close()

    def _write_binary(self, path):
        basename = os.path.basename(path)
        name = basename.rsplit('.asm', 1)[0]
        file = open('%s/%s.hack' % (os.path.dirname(path), name), 'w')
        binary = self._translate()
        file.write(binary)
        file.close()

    def _build_symbols(self):
        symbols_count = 0
        self._init_symbol_table()
        # First pass - get label symbols
        while self._parser.has_commands():
            if self._parser.command_type() is Command.L_COMMAND:
                symbol = self._parser.symbol()
                if self._symbol_table.contains(symbol):
                    sys.stderr.write(
                        '[Assembler]: Symbol %s is used more than once' %
                        symbol)
                    sys.exit(1)
                address = self._parser.pos() - symbols_count
                self._symbol_table.add_entry(symbol, address)
                symbols_count += 1
            self._parser.advance()
        # Second pass - get variable symbols
        variable_address = 16
        self._parser.reset_pos()
        while self._parser.has_commands():
            command_type = self._parser.command_type()
            if command_type is Command.A_COMMAND:
                symbol = self._parser.symbol()
                if symbol is not None and \
                        not self._symbol_table.contains(symbol):
                    address = variable_address
                    self._symbol_table.add_entry(symbol, address)
                    variable_address += 1
            self._parser.advance()

    def _translate(self):
        output = ''
        self._parser.reset_pos()
        while self._parser.has_commands():
            command_type = self._parser.command_type()
            if command_type is Command.A_COMMAND:
                symbol = self._parser.symbol()
                if symbol is not None:
                    if not self._symbol_table.contains(symbol):
                        sys.stderr.write('[Assembler]: Unknown symbol: %s' %
                                         symbol)
                        sys.exit(1)
                    address = self._symbol_table.get_address(symbol)
                    output += self._to_binary(address) + '\n'
                else:
                    value = self._parser.value()
                    output += self._to_binary(int(value)) + '\n'
            elif command_type is Command.C_COMMAND_COMP:
                comp = self._code.comp(self._parser.comp())
                dest = self._code.dest(self._parser.dest())
                output += '111' + comp + dest + '000\n'
            elif command_type is Command.C_COMMAND_JMP:
                comp = self._code.comp(self._parser.comp())
                jump = self._code.jump(self._parser.jump())
                output += '111' + comp + '000' + jump + '\n'
            self._parser.advance()
        return output

    def _init_symbol_table(self):
        self._symbol_table.clear()
        entries = {
            'SP': 0,
            'LCL': 1,
            'ARG': 2,
            'THIS': 3,
            'THAT': 4,
            'SCREEN': 16384,
            'KBD': 24576,
        }
        # R0 to R15
        for address in range(16):
            entries['R%d' % address] = address
        # Initialize symbol table with predefined symbols
        for key, value in entries.items():
            self._symbol_table.add_entry(key, value)

    def _read_file_contents(self, path):
        file = open(path, 'r')
        content = file.read()
        file.close()
        return content

    def _to_binary(self, value):
        return format(value, '016b')
Exemple #10
0
class Parser(object):

    A_CMD = 'A_COMMAND'
    C_CMD = 'C_COMMAND'
    L_CMD = 'L_COMMAND'

    def __init__(self, file_name):
        self.lines = []
        self.current_position = 0
        self.symbol_table = SymbolTable()
        self.readInFile(file_name)
        self.addLabelsToSymbolTable()
        self.substituteVars()

    def readInFile(self, file_name):
        with open (file_name, "r") as fp:
            for line in fp.readlines():
                line = line.strip()
                # skip empty newlines and comments
                if not line or line.startswith("//"):
                    continue
                self.lines.append(line.split()[0])

    def addLabelsToSymbolTable(self):
        line_count = 1
        while (self.hasMoreCommands()):
            self.advance()
            if self.commandType() == Parser.L_CMD:
                # TODO check this, this is wonky
                self.symbol_table.addEntry(self.symbol(), line_count-1)
            else:
                line_count += 1
        # reset current_position after reading
        self.current_position = 0

    def substituteVars(self):
        while (self.hasMoreCommands()):
            self.advance()
            if self.commandType() != Parser.A_CMD:
                continue
            var = self.symbol()
            if var.isdigit():
                continue
            if not self.symbol_table.contains(var):
                self.symbol_table.addEntry(var)
            self.lines[self.current_position-1] = "@{}".format(self.symbol_table.getAddress(var))
        # reset current_position after reading
        self.current_position = 0

    def commandType(self):
        if not self.current_command:
            raise Exception("current_command is empty")

        if self.current_command.startswith("@"):
            return self.A_CMD

        if self.current_command.startswith("("):
            return self.L_CMD

        return self.C_CMD

    def hasMoreCommands(self):
        return self.current_position < len(self.lines)

    def advance(self):
        self.current_command = self.lines[self.current_position]
        self.current_position += 1

    def symbol(self):
        return self.current_command.strip("@()")

    def dest(self):
        if len(self.current_command.split("=")) == 2:
            return self.current_command.split("=")[0]

    def comp(self):
        if len(self.current_command.split(";")) == 2:
            return self.current_command.split(";")[0]
        elif len(self.current_command.split("=")) == 2:
            return self.current_command.split("=")[1]

    def jump(self):
        if len(self.current_command.split(";")) == 2:
            return self.current_command.split(";")[1]
Exemple #11
0
class Assembler:
    def __init__(self, filename):
        self.asm_filename = filename
        self.hack_filename = self.asm_filename[:-3] + 'hack'
        self.outfile = open(self.hack_filename, 'w')

        self.parser = Parser(self.asm_filename)
        self.code = Code()
        self.symbol_table = SymbolTable()
        self.next_available_address = 16

    def translate(self):
        """Translates all the commands in the .asm file, writing the resulting
        binary code to the .hack file.
        """
        self.first_pass()
        self.parser.reset()
        self.second_pass()
        self.outfile.close()

    def first_pass(self):
        """Goes through the input file, adding all the labels to the symbol
        table.
        """
        label = self.parser.first_label()
        while not self.parser.is_done():
            self.symbol_table.add_entry(label, self.parser.label_address())
            label = self.parser.next_label()

    def second_pass(self):
        """Goes through the input file fo a second time, translating the
        commands into binary, and adding symbols to the symbol table as
        required.
        """
        while not self.parser.is_done():
            self.process_command()
            self.parser.next_command()

    def process_command(self):
        """Translates the current command of the parser, writing the
        corresponding binary command to the output file (including a newline
        character).
        """
        if self.parser.command_type() == 'C_COMMAND':
            self.process_c_command()
        elif self.parser.command_type() == 'A_COMMAND':
            self.process_a_command()
        else:  #Command is a label
            return

    def process_c_command(self):
        """Translates the current C-command of the parser, writing the
        corresponding binary command to the output file (including a newline
        character).
        """
        dest_binary = self.code.dest(self.parser.dest)
        comp_binary = self.code.comp(self.parser.comp)
        jump_binary = self.code.jump(self.parser.jump)
        binary_command = '111' + comp_binary + dest_binary + jump_binary
        self.outfile.write(binary_command + '\n')

    def process_a_command(self):
        """Translates the current C-command of the parser, writing the
        corresponding binary command to the output file (including a newline
        character).
        """
        symbol = self.parser.symbol

        if symbol.isnumeric():
            address = int(symbol)
        elif self.symbol_table.contains(symbol):
            address = self.symbol_table.get_address(symbol)
        else:
            address = self.next_available_address
            self.symbol_table.add_entry(symbol, address)
            self.next_available_address += 1

        binary_command = self.binary_number(address)
        self.outfile.write(binary_command + '\n')

    def binary_number(self, symbol):
        """Takes a decimal number in the form of a string and converts
        it to a 16-bit binary number in the form of a string.
        """
        binary = bin(int(symbol))
        return binary[2:].zfill(16)
Exemple #12
0
class Parser():
    """Parser: Encapsulates access to the input code. Reads an assembly language command, parses it, and provides convenient access to the command's components (fields and symbols). In addition, removes all white space and comments."""
    def __init__(self, in_file):
        """Get the input file and gets ready to parse it. Instantiate a Code2bin for binary translation"""
        self.in_file = in_file
        self.code2bin = Code2Bin()
        self.symb_table = SymbolTable()

    def read_in_file(self):
        """Read the input file and process lines, put lines containing codes to a buffer"""
        self.code_contents = []
        ROM_address = 0
        with open(self.in_file, 'r', encoding='utf_8') as inf:
            for line in inf:
                command = self.process(line)
                # If returned command is an empty line after processed, skip it
                if not command:
                    continue
                self.code_contents.append(command)
                cmd_type = self.command_type(command)
                if cmd_type == 'L_COMMAND':  # cmd_type is 'L_COMMAND', add new entry to the symbol table
                    symbol = self.get_symbol(command)
                    self.symb_table.add_entry(symbol, ROM_address)
                else:
                    ROM_address += 1

    def process(self, line):
        """Removes all white space and comments"""
        return line.split('//')[0].strip()

    def command_type(self, command):
        """
        Returns the type of the current command: 
        A_COMMAND for @Xxx where Xxx is either a symbol or a decimal number
        C_COMMAND for dest=comp;jump (Either the dest or jump fields may be empty. If dest is empty, the "=" is omitted; If jump is empty, the ";" is omitted.)
        L_COMMAND (pseudo-command) for (Xxx) where Xxx is a symbol
        """
        if '@' in command:
            return 'A_COMMAND'
        elif '=' in command or ';' in command:
            return 'C_COMMAND'
        elif '(' in command and ')' in command:
            return 'L_COMMAND'

    def get_symbol(self, command):
        """Returns the symbol or decimal Xxx of the current command @Xxx or (Xxx). Should be called only when command_type() is A_COMMAND or L_COMMAND."""
        return command.replace('@', '').replace('(', '').replace(')', '')

    def get_dest_comp_jump(self, command):
        """Returns the dest, comp, jump mnemonic in the current C-command. Should be called only when command_type()is C_COMMAND"""
        if ';' not in command:
            dest, comp = command.split('=')
            jump = 'null'
        elif '=' not in command:
            dest = 'null'
            comp, jump = command.split(';')
        else:
            dest = command.split('=')[0]
            comp, jump = command.split('=')[1].split(';')
            # jump = command.split('=')[1].split(';')[1]
        return dest, comp, jump

    def translate(self):
        """Second pass, translate code_contents to binary contens"""
        self.out_binarys = []
        available_RAM_address = 16
        for command in self.code_contents:
            cmd_type = self.command_type(command)
            if cmd_type == 'A_COMMAND':
                symbol = self.get_symbol(command)
                if symbol.isdigit():
                    binary_line = '0{:015b}'.format(int(symbol))
                    self.out_binarys.append(binary_line)
                elif self.symb_table.contains(symbol):
                    binary_line = '0{:015b}'.format(
                        int(self.symb_table.get_address(symbol)))
                    self.out_binarys.append(binary_line)
                else:
                    self.symb_table.add_entry(symbol, available_RAM_address)
                    binary_line = '0{:015b}'.format(available_RAM_address)
                    self.out_binarys.append(binary_line)
                    available_RAM_address += 1
            elif cmd_type == 'C_COMMAND':
                dest, comp, jump = self.get_dest_comp_jump(command)
                dest_binary = self.code2bin.dest2bin(dest)
                comp_binary = self.code2bin.comp2bin(comp)
                jump_binary = self.code2bin.jump2bin(jump)
                binary_line = '111' + comp_binary + dest_binary + jump_binary
                self.out_binarys.append(binary_line)

    def write_out_binarys(self):
        out_file = self.in_file.replace('asm', 'hack')
        with open(out_file, 'w', encoding='utf_8') as outf:
            for binary in self.out_binarys:
                outf.write(binary + '\n')
        print(out_file, 'finished assembling.')

    def parse(self):
        self.read_in_file()
        self.translate()
        self.write_out_binarys()
Exemple #13
0
class AssmParser(AssmCommandType, Parser):
    def __init__(self, file_name):
        """ Open the input file/stream and gets ready to parse it """
        super(AssmParser, self).__init__(file_name)
        self.RAM = 16

        self.symbol_table = SymbolTable(self.buff)
        self.symbol_table.find_symbols()

    def symbol(self):
        """
        returns the symbol or decimal Xxx of the current command @Xxx of (Xxx).
        Should be called only when commandType() is A_COMMAND or L_COMMAND

        return string
        """
        self.current_symbol = self.current_command[1:]

    def dest(self):
        """
        returns the dest mnemonic in the current C_COMMAND (8 possibilities).

        Should be called only when AssmCommandType() is C_COMMAND

        returns string
        """
        # semicolon: 000 dest
        if '=' in self.current_command:
            d = self.current_command.split('=')[0]
        else:
            d='null'

        code = Code()
        self.current_dest = code.dest(d)

    def comp(self):
        """
        Returns the comp menomonic in the current C_COMMAND (28 possibilities)
        Should be called only when commandType() is C_COMMAND

        return string
        """
        if '=' in self.current_command:
            c = self.current_command.split('=')[1]
        elif ';' in self.current_command:
            c = self.current_command.split(';')[0]

        code = Code()
        self.current_comp, self.current_a = code.comp(c)

    #def a(self):
        ##TODO: figure out how to know 'a' bit
        ## I believe it has to do whether the previous command is an l or a command
        #self.current_a = '0'

    def jump(self):
        """
        Returns the jump mnemonic in the current C_COMMAND (8 possibilities)
        Should be called only when commandType() is C_COMMAND

        returns string
        """
        # equal: 000 jump 
        if ';' in self.current_command:
            j = self.current_command.split(';')[1]
        elif '=' in self.current_command:
            j='null'
        else:
            j = None
        code = Code()
        self.current_jump = code.jump(j)

    def __repr__(self):
        return self.asmfile + '\n'.join(self.buff)

    def binarize_c_command(self):
        self.comp() 
        self.dest()
        self.jump()
        self.bin_current = '111' + self.current_a + self.current_comp + self.current_dest + self.current_jump


    def binarize_a_symbol(self):
        if not self.symbol_table.contains(self.current_command):
            self.symbol_table.addEntry(self.current_command, self.RAM)
            self.RAM = self.RAM + 1

        address = self.symbol_table.get_address(self.current_command)
        bin_address = bin(address)[2:]
        self.bin_current = '0' * (16 - len(bin_address))  + bin_address

    def binarize_a_address(self):
        address = int(self.current_command[1:])
        bin_address = bin(address)[2:]
        self.bin_current = '0'*(16 - len(bin_address))  + bin_address

    def binarize_a_command(self):
        if re.match(r'^@[0-9].*$', self.current_command):
            self.binarize_a_address()
        else:
            self.binarize_a_symbol()
class AssemblerSymb:

    def __init__(self, path):
        self.parser = Parser(path)
        self.code = Code()
        self.symb_table = SymbolTable()
        ind1 = path.find('/')
        ind2 = path.find('.')
        writefile = path[:ind1] + "/" + path[ind1+1:ind2]
        self.file = open(writefile + '2.hack', 'w')

    def binary(self, s):
        return "{0:b}".format(int(s))

    def firstPass(self):
        counter = 0
        while self.parser.hasMoreCommands():
            self.parser.advance()
            command_type = self.parser.commandType()
            if command_type in ['A_COMMAND', 'C_COMMAND']:
                counter += 1
            elif command_type == 'L_COMMAND':
                symbol = self.parser.symbol()
                self.symb_table.addEntry(symbol, counter)
            else:
                raise ValueError("Unexpected command type encountered")

    def secondPass(self):
        ram_address = 16
        self.parser.i = -1
        while self.parser.hasMoreCommands():
            self.parser.advance()
            command_type = self.parser.commandType()
            if command_type == 'A_COMMAND':
                symbol = self.parser.symbol()
                if (not symbol.isdigit()) and (not self.symb_table.contains(symbol)):
                    self.symb_table.addEntry(symbol, ram_address)
                    ram_address += 1

    def createOutput(self):
        self.parser.i = -1
        while self.parser.hasMoreCommands():
            self.parser.advance()
            command_type = self.parser.commandType()
            # if A command
            if command_type == 'A_COMMAND':
                symbol = self.parser.symbol()
                if symbol.isdigit():
                    bin_symbol = self.binary(symbol)
                else:
                    symb_add = self.symb_table.getAddress(symbol)
                    bin_symbol = self.binary(symb_add)
                a_command = '0' * (16 - len(bin_symbol)) + bin_symbol
                self.file.write(a_command + '\n')
            elif command_type == 'C_COMMAND':
                dest_mnem = self.parser.dest()
                dest = self.code.dest(dest_mnem)
                comp_mnem = self.parser.comp()
                comp = self.code.comp(comp_mnem)
                jump_mnem = self.parser.jump()
                jump = self.code.jump(jump_mnem)
                c_command = '111' + comp + dest + jump
                self.file.write(c_command + '\n')
            else:
                pass
        self.file.close()
Exemple #15
0
class EmbindGenerator(object):
    def __init__(self, symbol_dir, classes):
        self.symbol_table = SymbolTable()
        self.symbol_table.LoadSymbolsFromDirectory(symbol_dir)

        self.classes = classes

        self.known_symbols = [
            '',  # Typeless 'types', e.g. return value of ctor is parsed to an empty string.
            'void',
            'bool',
            'char',
            'signed char',
            'unsigned char',
            'short',
            'signed short',
            'unsigned short',
            'int',
            'signed int',
            'unsigned int',
            'long',
            'signed long',
            'unsigned long',
            'float',
            'double',
            'unsigned int',
            'std::string',
            'emscripten::val',
        ]

        self.generated_function_selectors = []

        # Lists the types for which we need to generate type identifier functions for.
        self.require_type_function = []

    def output(self, output):
        self.cpp_out = open('{}.cpp'.format(output), 'w')
        self.js_out = open('{}.js'.format(output), 'w')

        self.cpp_out.write('''#ifdef EMSCRIPTEN

#include <emscripten/bind.h>
using namespace emscripten

#include "embind_prologue.h" # Implement this file and all required code and header files to compile this file here."
''')

        self.js_out.write('function RegisterFunctionSelectors() {\n')

        for c in self.classes:
            self.GenerateCtorFunctions(c)

        # TODO:: if we create a separate bindings file for each, generate a unique name here.
        self.cpp_out.write('EMSCRIPTEN_BINDINGS(bindings) {\n\n')

        for c in self.classes:
            self.WriteForwardDeclaration(c)

        for c in self.classes:
            self.GenerateEmbindFile(c, self.known_symbols)

        self.cpp_out.write("\n}\n\n")
        self.cpp_out.write("#endif\n")
        self.cpp_out.close()

        print("Writing embind_symbols.cpp done.")

        self.GenerateTypeIdAssignments()

        self.js_out.write("}\n")
        self.js_out.write(
            "window['RegisterFunctionSelectors'] = RegisterFunctionSelectors;\n"
        )

        self.GenerateIsOfTypeFunctions()

        self.js_out.close()

        print("Writing embind_symbols.js done.")

    def WriteForwardDeclaration(self, class_name):
        pass


#            string t = "auto " + class_name + "_class = class_<" + class_name + ">(\"" + class_name + "\");\n\n"
#            self.cpp_out.Write(t)

    def FetchFunctionOverloads(self, function):
        functionOverloads = [
            s for s in function.parent.children
            if s.name == function.name and self.IsGoodSymbol(s)
        ]
        functionOverloads.sort(
            lambda left, right: len(right.parameters) - len(left.parameters))
        return functionOverloads

    def GenerateTypeIdAssignments(self):
        typeIdCounter = 1
        for n, _type in enumerate(self.require_type_function):
            if _type in ["float", "int"]:
                continue
            self.js_out.write(
                '    Module.{0}.prototype.TypeId = {1}; /* Magic automatically generated TypeId number for {0} */\n'
                .format(_type, n + 1))

    def GenerateIsOfTypeFunctions(self):
        self.js_out.write('''function isNumber(value) {
    if ((undefined === value) || (null === value)) {
        return false;
    }
    if (typeof value == 'number') {
        return true;
    }
    return !isNaN(value - 0);
}
''')
        for n, _type in enumerate(self.require_type_function):
            if _type in ['float', 'int']:
                self.js_out.write(
                    'function IsOfType_{}(obj) {{ return isNumber(obj); }}\n'.
                    format(_type))
            else:
                self.js_out.write(
                    '''/* Magic automatically generated TypeId number for {0} */
function IsOfType_{0}(obj) {{ return obj != undefined && obj != null && obj.TypeId == {1}; }}\n'''
                    .format(_type, n))

    def GenerateFunctionSelector(self, functionOverloads):
        functionOverloads.sort(
            lambda left, right: len(right.parameters) - len(left.parameters))
        function = functionOverloads[0]
        if (function.parent.name + "::" +
                function.name) in self.generated_function_selectors:
            return
        self.generated_function_selectors.append(function.parent.name + "::" +
                                                 function.name)
        if len(function.parameters) == 0:
            return

        # TODO: Add support for REAL ctors.
        isCtor = (function.name == function.parent.name)
        if isCtor:
            prototype = ['{}_ = function('.format(function.name)]
        else:
            prototype = [
                '{}.prototype.{} = function('.format(function.parent.name,
                                                     function.name)
            ]
        prototype.append(', '.join(
            ['arg{}'.format(i + 1) for i in len(function.parameters)]))
        prototype.append(') {\n')
        self.js_out.write(''.join(prototype))

        thisFunc = functionOverloads[0]
        for i in xrange(len(functionOverloads)):
            nextFunc = functionOverloads[i +
                                         1] if i < (len(functionOverloads) -
                                                    1) else None
            self.js_out.write("    ")
            if i: self.js_out.write("else ")
            if nextFunc:
                if len(thisFunc.parameters) != len(nextFunc.parameters):
                    self.js_out.write("if (arg{} != undefined)".format(
                        len(thisFunc.parameters)))
                else:
                    self.js_out.write("if (")
                    for j in xrange(len(thisFunc.parameters)):
                        _type = thisFunc.parameters[j].BasicType()
                        if j: self.js_out.write(" && ")
                        self.js_out.write("IsOfType_{}(arg{})".format(
                            _type, j + 1))
                        if _type not in self.require_type_function:
                            self.require_type_function.append(_type)
                    self.js_out.write(")")
            self.js_out.write("\n")
            self.js_out.Write(
                '    {}'.format('return ' if thisFunc.type != 'void' else ''))
            self.js_out.Write('{}.{}'.format('Module' if isCtor else 'this',
                                             function.name))
            for p in thisFunc.parameters:
                self.js_out.write("_" + p.BasicType().replace(
                    ':', '_').replace('<', '_').replace('>', '_'))
            paramList = [
                'arg{}'.format(j + 1) for j in xrange(len(thisFunc.parameters))
            ]
            self.js_out.write('({});\n'.format(', '.join(paramList)))
            thisFunc = nextFunc

        self.js_out.write('}\n')

    def IsGoodSymbol(s):
        if ("noembind" in s.attributes or s.type not in self.known_symbols):
            return False
        for p in s.parameters:
            if p.BasicType() not in self.known_symbols:
                return False
        return True

    def GenerateEmbindFile(class_name):
        if not self.symbol_table.contains(class_name):
            print(
                "Error: Cannot generate bindings for class '{}', XML for that class doesn't exist!"
                .format(class_name))
            return

        code = [
            '##include <emscripten/bind.h>', '#using namespace emscripten;',
            '',
            '##include "embind_prologue.h" # Implement this file and all required code and header files to compile this file here.',
            '', '#EMSCRIPTEN_BINDINGS({0}) {'.format(class_name),
            'class_<{0}>("{0}")'.format(class_name)
        ]

        hasCtorExposed = False  # Embind only supports exposing one ctor, so pick the first one.

        #            t += class_name + "_class\n"
        s = self.symbol_table.symbol(class_name)
        for f in s.children:
            if f.visibilityLevel != VisibilityLevel.Public:
                continue  # Only public functions and members are exported.

            functionOverloads = self.FetchFunctionOverloads(f)

            good_symbol = "noembind" not in f.attributes  # If True, this symbol is exposed. If False, this symbol is not enabled for JS.
            reason = [
                '' if good_symbol else '(ignoring since [noembind] specified)'
            ]

            if f.kind == "function" and not f.name.StartsWith("operator"):
                isCtor = (f.name == class_name)
                if good_symbol and f.type not in self.known_symbols:
                    good_symbol = False
                    reason.append('({} is not known to embind)'.format(f.type))

                hasOverloads = len(functionOverloads) > 1
                targetFunctionName = [
                    f.name
                ]  # The JS side name with which this function will be exposed.
                funcPtrType = [
                    '{}({}*)('.format(
                        f.type,
                        '' if f.isStatic else '{}::'.format(class_name))
                ]
                paramList = []
                for p in f.parameters:
                    paramList.append(p.type)
                    if good_symbol and p.BasicType() not in self.known_symbols:
                        good_symbol = False
                        reason.append('{} is not known to embind)'.format(
                            p.BasicType()))
                    if hasOverloads:
                        targetFunctionName.append('_{}'.format(
                            p.BasicType().replace(':', '_').replace(
                                '<', '_').replace('>', '_')))

                funcPtrType.append('{})'.format(','.join(paramList)))
                if f.isConst: funcPtrType.append(' const')

                # TODO: Remove this line once multiple ctors is supported!
                if (good_symbol and f.name == class_name and hasCtorExposed):
                    good_symbol = False
                    reason = "(Multiple constructors not yet supported by embind!)"

                if not good_symbol:
                    code.append('# /*{}*/'.format(reason))

                if isCtor:
                    code.append('    .constructor<{}>()\n'.format(
                        ','.join(parmList)))
                    if good_symbol: hasCtorExposed = True
                else:
                    if f.isStatic: code.append('    .class_function(')
                    else: code.append('    .function(')
                    code.append('"{}", ({})&{}::{})'.format(
                        targetFunctionName, funcPtrType, class_name, f.name))

                if hasOverloads and good_symbol:
                    self.GenerateFunctionSelector(functionOverloads)

            elif f.kind == "variable" and f.visibilityLevel == VisibilityLevel.Public:
                if f.type not in self.known_symbols:
                    code.append('# /* {} is not known to embind. */'.format(
                        f.type))
                elif f.IsArray():
                    code.append(
                        '# /* Exposing array types as fields are not supported by embind. */'
                    )
                elif f.isStatic:
                    code.append(
                        '# /* Exposing static class variables not yet implemented (are they supported?) */'
                    )
                code.append('    .property("{0}", &{1}::{0})'.format(
                    f.name, class_name))

        code.append("    ;")
        code.append("#}")

        self.RegisterCtorFunctions(class_name)

        self.cpp_out.Write('\n'.join(code))

    def GenerateCtorFunctions(self, class_name):
        s = self.symbol_table.symbol(class_name)
        ctors = []
        for f in s.children:
            if f.name == s.name and len(
                    f.parameters
            ):  # 0-parameter ctors are created with 'new type();'
                good_ctor = True
                for p in f.parameters:
                    if p.BasicType() not in self.known_symbols:
                        good_ctor = False
                        break
                if good_ctor:
                    ctors.append(f)
                    self.cpp_out.write('{} {}'.format(
                        class_name,
                        '_'.join(class_name +
                                 [p.BasicType() for p in f.parameters])))
                    self.cpp_out.write('{} {{ return {}{}; }}'.format(
                        f.ArgStringWithTypes(), class_name,
                        f.ArgStringWithoutTypes()))
        self.cpp_out.write('\n')
        #            self.js_out.WriteLine(class_name + " = Module." + class_name +";")
        self.js_out.write('window["{0}"] = Module.{0};\n'.format(class_name))
        if len(ctors) > 1:
            self.GenerateFunctionSelector(ctors)
            self.js_out.write(
                'window["{0}_"] = Module.{0}_;\n'.format(class_name))

    def RegisterCtorFunctions(self, class_name):
        s = self.symbol_table.symbol(class_name)
        for f in s.children:
            if f.name == s.name and len(
                    f.parameters
            ):  # 0-parameter ctors are created with 'new type();'
                good_ctor = True
                for p in f.parameters:
                    if p.BasicType() not in self.known_symbols:
                        good_ctor = False
                        break
                if good_ctor:
                    t = '_'.join(class_name +
                                 [p.BasicType() for p in f.parameters])
                    self.cpp_out.write('function("{0}", &{0});\n'.format(t))
Exemple #16
0
    def main():
        filename = os.path.join(os.getcwd(), Util.getCommandLineArg(1))
        first_parser = Parser(filename)
        second_parser = Parser(filename)
        symbol_table = SymbolTable()

        hack_filename = filename.replace('asm', 'hack')
        hack_file = open(hack_filename, 'w')

        ann_filename = filename.replace('asm', 'ann')
        ann_file = open(ann_filename, 'w')

        rom_address = 0
        ram_address = 16

        assembly = ''

        while first_parser.has_more_commands():
            first_parser.advance()

            if first_parser.command_type(
            ) is 'A_COMMAND' or first_parser.command_type() is 'C_COMMAND':
                rom_address += 1
            elif first_parser.command_type() is 'L_COMMAND':
                symbol_table.add_entry(first_parser.symbol(), rom_address,
                                       'LAB')

        while second_parser.has_more_commands():
            second_parser.advance()
            machine_command = ''

            if second_parser.command_type() is 'A_COMMAND':
                if second_parser.symbol()[0].isdigit():
                    binary = second_parser.symbol()
                else:
                    if symbol_table.contains(second_parser.symbol()):
                        binary = symbol_table.get_address(
                            second_parser.symbol())
                    else:
                        binary = ram_address
                        symbol_table.add_entry(second_parser.symbol(),
                                               ram_address, 'VAR')
                        ram_address += 1

                machine_command = '{0:016b}\n'.format(int(binary))

                hack_file.write(machine_command)
            elif second_parser.command_type() is 'C_COMMAND':
                dest = Code.dest(second_parser.dest())
                comp = Code.comp(second_parser.comp())
                jump = Code.jump(second_parser.jump())

                machine_command = '111{0}{1}{2}\n'.format(comp, dest, jump)

                hack_file.write(machine_command)

            assembly = second_parser.original_command().strip()
            mc = machine_command.strip()

            annotated_machine = '{} {} {} {}'.format(mc[0:4], mc[4:8],
                                                     mc[8:12], mc[12:16])

            symbolless_command = ''

            if second_parser.command_type() is 'L_COMMAND':
                symbolless_command = symbol_table.get_address(
                    second_parser.symbol())
            elif second_parser.command_type(
            ) is 'A_COMMAND' and not second_parser.symbol().isdigit():
                symbolless_command = '@{}'.format(
                    symbol_table.get_address(second_parser.symbol()))
            else:
                symbolless_command = second_parser.command

            annotated_command = '{:<39} {} {:<11} {}\n'.format(
                assembly, '//' if second_parser.command_type() else '',
                symbolless_command, annotated_machine)

            ann_file.write(annotated_command)

        ann_file.write('\n// Symbol Table:\n')

        for symbol, address in symbol_table.symbol_table.items():
            ann_file.write('// {}: {:<30} -> {}\n'.format(
                address[1], symbol, address[0]))

        hack_file.close()
        ann_file.close()
def main():

    # If there is an invalid number of arguments the program stops.
    if len(sys.argv) != 2:
        print("ERROR: Invalid number of arguments. Expected: file_name.asm ")
        exit(1)
    # The assembler only accepts asm files to be translated into hack files
    elif sys.argv[1][-4:] != ".asm":
        print("ERROR: Invalid file type. Expected: asm file")
        exit(1)

    input_file = sys.argv[1]

    # Initialize the symbol table with the predefined symbols.
    symbol_table = SymbolTable()
    translator_c_command = Code()

    # Counters to keep track of the ROM and RAM memory address.
    count_ROM = 0
    count_variable = 16

    # List containing all the translated commands from the file.
    commands_translation = []

    # First pass
    parser = Parser(input_file)
    # Reads the whole file.
    while parser.has_more_commands():
        parser.advance()
        # Checks if the current command is has a label to and adds it to the table.
        if parser.command_type() == "L_COMMAND":
            # Takes the symbol from the label.
            label = parser.symbol()
            # Check if the label does not start with a number and adds the symbol to the table.
            if not label[0].isdigit():
                symbol_table.add_entry(label, count_ROM)
            else:
                print("ERROR: invalid label indentifier")
                exit(1)
        else:
            # If it finds an A_COMMAND or C_COMMAND adds one to the ROM counter.
            count_ROM += 1

    # Reset the parser pointer to read the file
    parser.file.seek(0)

    # Second pass

    #Reads the whole file
    while parser.has_more_commands():
        parser.advance()
        # Checks if the current command is type "A_COMMAND".
        if parser.command_type() == "A_COMMAND":
            # Get the variable
            variable = parser.symbol()
            # Checks the variable starts with a letter.
            if not variable[0].isdigit():
                # If the table does not contain the symbol, adds it to the table, does the translation to binary code,
                # adds it to the list of translations and ads one to the RAM counter.
                if not symbol_table.contains(variable):
                    symbol_table.add_entry(variable, count_variable)
                    binary_address = "{:016b}".format(count_variable)
                    commands_translation.append(binary_address)
                    count_variable += 1
                # If the table contains the symbol, gets the address associated with the symbol,
                # does the translation to binary code and adds it to the translated list.
                else:
                    address = symbol_table.get_address(variable)
                    binary_address = "{:016b}".format(address)
                    commands_translation.append(binary_address)
            # Check if the variable is a number, translates it to its binary code and adds it to the translated list.
            elif variable.isdigit():
                binary_address = "{:016b}".format(int(variable))
                commands_translation.append(binary_address)
            # If the variable is not a number o starts with a letter theres a mistake in the command and the program stops.
            else:
                print("ERROR: The symbol " + variable + " is invalid")
                exit(1)
        # Check if the current command is type "C_COMMAND".
        elif parser.command_type() == "C_COMMAND":
            # Gets the dest, comp and jump mnemonic.
            command_dest = parser.dest()
            command_comp = parser.comp()
            command_jump = parser.jump()

            # Translates each mnemonic into its binary code.
            binary_dest = translator_c_command.dest(command_dest)
            binary_comp = translator_c_command.comp(command_comp)
            binary_jump = translator_c_command.jump(command_jump)
            # Put together all the binary codes addring three '1's at the beging and adds it to the translated list.
            binary_code = "111" + binary_comp + binary_dest + binary_jump
            commands_translation.append(binary_code)

    # Creates the hack file using the input file
    dot_index = input_file.find(".")
    hack_file = input_file[:dot_index] + ".hack"
    # Opens the hack file, if it does not exist creates it
    file = open(hack_file, "w")
    # For each command in the translated list, writes the binary code on the hack file and adds a new line
    for command in commands_translation:
        file.write(command)
        file.write("\n")
    # Close the hack file
    file.close()
    exit(0)
Exemple #18
0
class HackAssembler:
    def __init__(self, asm_filename):
        self.asm_filename = asm_filename
        self.symbol_table = SymbolTable()
        self.parser = Parser(self.symbol_table)
        self.binary_translator = BinaryTraslator(self.parser)
        self.next_open_memory_address = 16
        self.labels_parsed = 0
        self.output_string = ''

    def compile(self):
        # self.__remove_commented_and_empty_lines()
        self.__scan_for_labels()
        self.__scan_for_variables()
        self.__translate_to_binary()
        self.__write_to_out_file()

    # def __remove_commented_and_empty_lines(self):
    #   self.__read_file_by_line(self.__remove_commented_or_empty_line)

    # def __remove_commented_or_empty_line(self, line, cnt):
    #   line = line.split('//')[0].strip().replace(' ', '')
    #   if len(line) > 0:
    #     self.output_string += "{}\n".format(line)

    def __scan_for_labels(self):
        self.__read_file_by_line(self.__add_value_to_symbol_table_if_label)

    def __scan_for_variables(self):
        self.__read_file_by_line(self.__add_value_to_symbol_table_if_variable)

    def __translate_to_binary(self):
        self.__read_file_by_line(self.__translate_line_to_binary)

    def __write_to_out_file(self):
        base_filename = self.asm_filename.split('.asm')[0]
        hack_filename = "{}_Brayden.hack".format(base_filename)
        with open(hack_filename, "w") as text_file:
            text_file.write(self.output_string)

    def __read_file_by_line(self, block):
        lines_processed = 0
        with open(self.asm_filename) as fp:
            for line in fp:
                line = self.__preprocess_line(line)
                if len(line) > 0:
                    block(line, lines_processed)
                    lines_processed += 1

    def __preprocess_line(self, line):
        return line.split('//')[0].strip().replace(' ', '')

    def __add_value_to_symbol_table_if_label(self, line, cnt):
        if self.parser.is_label(line):
            self.symbol_table.add(self.parser.label_value(line),
                                  cnt - self.labels_parsed)
            self.labels_parsed += 1

    def __add_value_to_symbol_table_if_variable(self, line, cnt):
        if self.parser.is_variable(line):
            if not self.symbol_table.contains(
                    self.parser.variable_value(line)):
                self.symbol_table.add(self.parser.variable_value(line),
                                      self.next_open_memory_address)
                self.__increment_next_open_memory_address()

    def __translate_line_to_binary(self, line, _):
        binary_command = self.binary_translator.translate(line)
        if binary_command:
            self.output_string += '{}\n'.format(binary_command)

    def __increment_next_open_memory_address(self):
        self.next_open_memory_address += 1
Exemple #19
0
        else:
            line_address += 1

    parser.seek_head()
    var_address = 16
    while parser.has_more_commands():
        parser.advance()

        if parser.command_type() == 'L_COMMAND':
            continue

        elif parser.command_type() == 'A_COMMAND':
            symbol = parser.symbol()
            if symbol.isdigit():
                address = int(symbol)
            elif symbol_table.contains(symbol):
                address = symbol_table.get_address(symbol)
            else:
                address = var_address
                symbol_table.add_entry(symbol, address)
                var_address += 1
            machine_code = address

        elif parser.command_type() == 'C_COMMAND':
            comp = Code.comp(parser.comp())
            dest = Code.dest(parser.dest())
            jump = Code.jump(parser.jump())
            machine_code = 0b111 << 13 | comp << 6 | dest << 3 | jump

        machine_code_str = "{0:016b}".format(machine_code)
        hack_file.write(machine_code_str + '\n')
Exemple #20
0
class Parser:
    def __init__(self, assembly_path: str) -> None:
        self.coder = Code()
        self.symbol_table = SymbolTable()
        self.assembly = []
        self.idx = 0

        f = open(assembly_path, 'r')
        while True:
            line = f.readline()
            if not line: break
            if line[:2] == "//": continue
            if [e for e in line if e != " "] == ["\n"]:
                continue
            self.assembly.append(line)
        f.close()

    def reset_idx(self):
        self.idx = 0

    def hasMoreCommands(self) -> bool:
        return self.idx < len(self.assembly)

    def advance(self) -> None:
        if self.hasMoreCommands():
            self.idx += 1

    def commandType(self) -> str:
        if self.hasMoreCommands():
            curr = self.assembly[self.idx]

            if "@" in curr:
                print("A", curr)
                return A
            elif "=" in curr or ";" in curr:
                print("C", curr)
                return C
            elif "(" in curr:
                return L
            else:
                return None

    def symbol(self) -> str:
        symbol = "".join([s for s in self.assembly[self.idx] if s != " "])
        symbol = symbol.split("\n")[0]
        if self.commandType() == A:
            symbol = symbol[1:]
            if not self.symbol_table.contains(symbol):
                try:
                    address = int(symbol)
                    self.symbol_table.addEntry(symbol, str(address))
                except:
                    self.symbol_table.addEntry(symbol,
                                               str(self.symbol_table.idx))
                    self.symbol_table.advance()
            self.assembly[
                self.idx] = "@" + self.symbol_table.getAddress(symbol)
        elif self.commandType() == L:
            symbol = symbol[1:-1]
            if not self.symbol_table.contains(symbol):
                self.symbol_table.addEntry(symbol, str(self.idx))
            self.assembly = self.assembly[:self.idx] + self.assembly[self.idx +
                                                                     1:]
        else:
            return ""

        return self.symbol_table.getAddress(symbol)

    def dest(self) -> str:
        if self.commandType() == C:
            return self.coder.dest(self.assembly[self.idx])
        else:
            raise NotImplementedError

    def comp(self) -> str:
        if self.commandType() == C:
            return self.coder.comp(self.assembly[self.idx])
        else:
            raise NotImplementedError

    def jump(self) -> str:
        if self.commandType() == C:
            return self.coder.jump(self.assembly[self.idx])
        else:
            raise NotImplementedError

    # def address(self) -> str:
    #     if self.commandType() == A:
    #         return self.coder.address(self.assembly[self.idx])
    #     else:
    #         raise NotImplementedError

    def address(self) -> str:
        instruction = self.assembly[self.idx]
        instruction = "".join([i for i in instruction if i != " "])
        instruction = instruction[1:].split("\n")[0]
        if self.symbol_table.contains(instruction):
            address = self.symbol_table.getAddress(instruction)
        else:
            address = instruction

        b = bin(int(address))[2:]
        address = "0" * (16 - len(b)) + b
        return address
Exemple #21
0
  def main():
    filename      = os.path.join(os.getcwd(), Util.getCommandLineArg(1))
    first_parser  = Parser(filename)
    second_parser = Parser(filename)
    symbol_table  = SymbolTable()

    hack_filename = filename.replace('asm', 'hack')
    hack_file     = open(hack_filename, 'w')

    ann_filename  = filename.replace('asm', 'ann')
    ann_file      = open(ann_filename, 'w')

    rom_address = 0
    ram_address = 16

    assembly    = ''

    while first_parser.has_more_commands():
      first_parser.advance()

      if first_parser.command_type() is 'A_COMMAND' or first_parser.command_type() is 'C_COMMAND':
        rom_address += 1
      elif first_parser.command_type() is 'L_COMMAND':
        symbol_table.add_entry(first_parser.symbol(), rom_address, 'LAB')

    while second_parser.has_more_commands():
      second_parser.advance()
      machine_command = ''

      if second_parser.command_type() is 'A_COMMAND':
        if second_parser.symbol()[0].isdigit():
          binary = second_parser.symbol()
        else:
          if symbol_table.contains(second_parser.symbol()):
            binary = symbol_table.get_address(second_parser.symbol())
          else:
            binary = ram_address
            symbol_table.add_entry(second_parser.symbol(), ram_address, 'VAR')
            ram_address += 1

        machine_command = '{0:016b}\n'.format(int(binary))

        hack_file.write(machine_command)
      elif second_parser.command_type() is 'C_COMMAND':
        dest = Code.dest(second_parser.dest())
        comp = Code.comp(second_parser.comp())
        jump = Code.jump(second_parser.jump())

        machine_command = '111{0}{1}{2}\n'.format(comp, dest, jump)

        hack_file.write(machine_command)

      assembly = second_parser.original_command().strip()
      mc = machine_command.strip()

      annotated_machine = '{} {} {} {}'.format(mc[0:4], mc[4:8], mc[8:12], mc[12:16])

      symbolless_command = ''

      if second_parser.command_type() is 'L_COMMAND':
        symbolless_command = symbol_table.get_address(second_parser.symbol())
      elif second_parser.command_type() is 'A_COMMAND' and not second_parser.symbol().isdigit():
        symbolless_command = '@{}'.format(symbol_table.get_address(second_parser.symbol()))
      else:
        symbolless_command = second_parser.command

      annotated_command = '{:<39} {} {:<11} {}\n'.format(assembly, '//' if second_parser.command_type() else '', symbolless_command, annotated_machine)

      ann_file.write(annotated_command)

    ann_file.write('\n// Symbol Table:\n')

    for symbol, address in symbol_table.symbol_table.items():
      ann_file.write('// {}: {:<30} -> {}\n'.format(address[1], symbol, address[0]))

    hack_file.close()
    ann_file.close()
Exemple #22
0
class Assembler(object):
    """
    lalala
    """
    def __init__(self, asm_file):
        self.asm_file = asm_file
        self.hack_file = ''.join([asm_file.split('.')[0], '.hack'])
        self.parser = Parser(asm_file)
        self.code = Code()
        self.symbol_table = SymbolTable()

    def assembly(self):
        print('Starting to assembly {} file...'.format(self.asm_file))
        with open(self.hack_file, 'w') as hack_f:

            ##################
            ### First pass ###
            ##################

            line_number = 0
            while self.parser.advance():
                command_type = self.parser.command_type()
                if command_type == self.parser.c_command or command_type == self.parser.a_command:
                    line_number += 1

                elif command_type == self.parser.l_command:
                    symbol = self.parser.symbol()
                    self.symbol_table.add_entry(symbol, line_number)

                else:
                    raise ValueError('Ups!')

            ###################
            ### Second pass ###
            ###################

            next_var_address = 16
            while self.parser.advance():
                command_type = self.parser.command_type()
                if command_type == self.parser.c_command:
                    dest_bin = self.code.dest_mnemonic[self.parser.dest()]
                    comp_bin = self.code.comp_mnemonic[self.parser.comp()]
                    jump_bin = self.code.jump_mnemonic[self.parser.jump()]
                    word_16 = ''.join(
                        ['111', comp_bin, dest_bin, jump_bin, '\n'])

                elif command_type == self.parser.a_command:
                    symbol = self.parser.symbol()
                    if symbol.isdigit():
                        address_bin = format(int(symbol), 'b').zfill(15)
                    else:
                        if self.symbol_table.contains(symbol) == False:
                            self.symbol_table.add_entry(
                                symbol, next_var_address)
                            next_var_address += 1

                        address_int = self.symbol_table.get_address(symbol)
                        address_bin = format(address_int, 'b').zfill(15)
                    word_16 = ''.join(['0', address_bin, '\n'])

                elif command_type == self.parser.l_command:
                    continue
                else:
                    raise ValueError('Ups!')
                hack_f.write(word_16)
        print('Successfully finished the assembly process :)')
Exemple #23
0
class CompilationEngine(object):
   # the destination file for writing
   destination_file = None

   # the tokenizer for the input file
   tokenizer = None

   # symbol table
   symbol_table = None

   # vm writer
   vm_writer = None

   # the class name
   class_name = ""

   # indicies for if and while loops
   # start at -1 because we increment before use
   while_index = -1
   if_index = -1

   # the constructor for compiling a single class
   # the next method to be called after construction must be compile_class
   # source_filename must be a single file, not a directory
   def __init__(self, source_filename):
      # destination filename
      # if the original extension was .jack, then make the extension .vm
      # if the original extension was not .jack, then append .vm
      if source_filename.lower().endswith(".jack"):
         destination_filename = source_filename[:-5] + ".vm"
      else:
         destination_filename = source_filename + ".vm"

      # open the destination filename for writing
      self.destination_file = open(destination_filename, 'w')

      # create a tokenizer for the input file
      self.tokenizer = JackTokenizer(source_filename)

      # create the symbol table
      self.symbol_table = SymbolTable()

      # create the vm writer
      self.vm_writer = VMWriter(self.destination_file)

   # compiles a complete class and closes the output file
   def compile_class(self):
      # class keyword
      tt, t = self._token_next(True, "KEYWORD", "class")

      # name of class
      tt, t = self._token_next(True, "IDENTIFIER")
      self.class_name = t

      # open brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # one or more variable declarations
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["field", "static"]:
            self.compile_class_var_dec()
         else:
            # stop trying to process variable declarations
            break

      # one or more subroutine declarations
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["constructor", "function", "method"]:
            self.compile_subroutine()
         else:
            # stop trying to process functions
            break

      # close brace
      # do not advance because we already advanced upon exiting the last loop
      tt, t = self._token_next(False, "SYMBOL", "}")

      # done with compilation; close the output file
      self.destination_file.close()

   # compiles a static declaration or field declaration
   def compile_class_var_dec(self):
      # compile the variable declaration
      # False means this is a class (not a subroutine)
      self.compile_var_dec(False)

   # compiles a complete method, function, or constructor
   def compile_subroutine(self):
      # start of subroutine
      self.symbol_table.start_subroutine()

      # constructor, function, or method keyword
      tt, type = self._token_next(False, "KEYWORD")

      # type of the return value
      # can be either keyword (void) or an identifier (any type)
      tt, t = self._token_next(True)

      # name of the method/function/constructor
      tt, name = self._token_next(True)
      name = self.class_name + "." + name

      # if the type is a method, "define" this as an argument, so the other
      # argument indexes work correctly
      if type == "method":
         self.symbol_table.define("this", self.class_name, SymbolTable.ARG)

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # arguments
      self.tokenizer.advance()
      self.compile_parameter_list()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # variable declarations
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t == "var":
            self.compile_var_dec()
         else:
            # stop trying to process variable declarations
            break

      # write the function
      num_locals = self.symbol_table.var_count(self.symbol_table.VAR)
      self.vm_writer.write_function(name, num_locals)

      # write any special code at the top of the function
      if type == "constructor":
         # code to allocate memory and set "this"
         size = self.symbol_table.var_count(self.symbol_table.FIELD)
         self.vm_writer.write_push(self.vm_writer.CONST, size)
         self.vm_writer.write_call("Memory.alloc", 1)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 0)
      elif type == "function":
         # nothing special
         pass
      elif type == "method":
         # put argument 0 into pointer 0 (this)
         self.vm_writer.write_push(self.vm_writer.ARG, 0)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 0)
      else:
         print "WARNING: Expected constructor, function, or name; got", type

      # statements
      self.compile_statements()

      # closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      self.tokenizer.advance()

   # compiles a (possibly empty) parameter list, not including the enclosing
   # parentheses
   def compile_parameter_list(self):
      # check for empty list
      tt, t = self._token_next(False)
      if tt == "SYMBOL" and t == ")":
         # the parameter list was empty; do not process any more
         pass
      else:
         # there are things in the parameter list
         while True:
            # keyword (variable type)
            tt, type = self._token_next(False)

            # identifier (variable name)
            tt, name = self._token_next(True)

            # the kind is always an arg, since these are all parameters to the
            # function
            kind = SymbolTable.ARG

            # define the variable in the symbol table
            self.symbol_table.define(name, type, kind)

            # possible comma
            tt, t = self._token_next(True)
            if tt != "SYMBOL" or t != ",":
               # not a comma; stop processing parameters
               break

            self.tokenizer.advance()

   # compiles a var declaration
   # if subroutine is true, only the var keyword can be used
   # if subroutine is false, only the static and field keywords can be used
   def compile_var_dec(self, subroutine=True):
      # the keyword to start the declaration
      tt, kind = self._token_next(False, "KEYWORD")

      # check for required types
      if subroutine:
         if kind == "var":
            kind = SymbolTable.VAR
         else:
            print "WARNING: expecting var, but received %s" % (str(kind))
      else:
         if kind == "static":
            kind = SymbolTable.STATIC
         elif kind == "field":
            kind = SymbolTable.FIELD
         else:
            print "WARNING: expecting static or field, but received %s" % (str(kind))

      # type of the declaration
      # could be an identifier or a keyword (int, etc)
      tt, type = self._token_next(True)

      # name of the declaration
      tt, name = self._token_next(True, "IDENTIFIER")

      # define the variable in the symbol table
      self.symbol_table.define(name, type, kind)

      # can support more than one identifier name, to declare more than one
      # variable, separated by commas; process the 2nd-infinite variables
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "SYMBOL" and t == ",":
            # another variable name follows
            tt, name = self._token_next(True, "IDENTIFIER")

            # define the variable in the symbol table
            self.symbol_table.define(name, type, kind)

            self.tokenizer.advance()
         else:
            # no more variable names
            break

      # should be on the semicolon at the end of the line
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a sequence of statements, not including the enclosing {}
   def compile_statements(self):
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]:
            # call compile_t, where t is the type of compilation we want
            token = getattr(self, "compile_" + t)()
         else:
            # not a statement; stop processing statements
            break

   # compiles a do statement
   def compile_do(self):
      # do keyword
      tt, t = self._token_next(False, "KEYWORD", "do")

      # subroutine call
      self.tokenizer.advance()
      self.compile_subroutine_call()

      # do statements do not have a return value, so eliminate the return
      # off of the stack
      self.vm_writer.write_pop(self.vm_writer.TEMP, 0)

      # semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a let statement
   def compile_let(self):
      # let keyword
      tt, t = self._token_next(False, "KEYWORD", "let")

      # variable name
      tt, name = self._token_next(True, "IDENTIFIER")

      # possible brackets for array
      tt, t = self._token_next(True)
      if tt == "SYMBOL" and t == "[":
         # array - write operation
         array = True

         # compile the offset expression
         self.tokenizer.advance()
         self.compile_expression()

         # write the base address onto the stack
         segment, index = self._resolve_symbol(name)
         self.vm_writer.write_push(segment, index)

         # add base and offset
         self.vm_writer.write_arithmetic("add")

         # we cannot yet put the result into pointer 1, since the read
         # operation (which hasn't been parsed/computed yet) may use pointer 1
         # to read from an arrya value

         # closing bracket
         tt, t = self._token_next(False, "SYMBOL", "]")

         # advance to the next token, since we are expected to be on the = for
         # the next line
         self.tokenizer.advance()
      else:
         array = False

      # equals sign
      tt, t = self._token_next(False, "SYMBOL", "=")

      # expression
      self.tokenizer.advance()
      self.compile_expression()

      if array:
         # our stack now looks like this:
         #    TOP OF STACK
         #    computed result to store
         #    address in which value should be stored
         #    ... previous stuff ...

         # pop the computed value to temp 0
         self.vm_writer.write_pop(self.vm_writer.TEMP, 0)

         # pop the array address to pointer 1 (that)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 1)

         # put the computed value back onto the stack
         self.vm_writer.write_push(self.vm_writer.TEMP, 0)

         # pop to the variable name or the array reference
         self.vm_writer.write_pop(self.vm_writer.THAT, 0)
      else:
         # not an array - pop the expression to the variable
         segment, index = self._resolve_symbol(name)
         self.vm_writer.write_pop(segment, index)

      # semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a while statement
   def compile_while(self):
      # labels for this while loop
      self.while_index += 1
      while_start = "WHILE_START_%d" % (self.while_index)
      while_end = "WHILE_END_%d" % (self.while_index)

      # while keyword
      tt, t = self._token_next(False, "KEYWORD", "while")

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # label for the start of the while statement
      self.vm_writer.write_label(while_start)

      # the expression that is the condition of the while statement
      self.tokenizer.advance()
      self.compile_expression()

      # the closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # the result of the evaluation is now on the stack
      # if false, then goto to the end of the loop
      # to do this, negate and then call if-goto
      self.vm_writer.write_arithmetic("not")
      self.vm_writer.write_if(while_end)

      # the opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # the statments that is the body of the while loop
      self.tokenizer.advance()
      self.compile_statements()

      # the closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      # after the last statement of the while loop
      # need to jump back up to the top of the loop to evaluate again
      self.vm_writer.write_goto(while_start)

      # label at the end of the loop
      self.vm_writer.write_label(while_end)

      self.tokenizer.advance()

   # compiles a return statement
   def compile_return(self):
      # return keyword
      tt, t = self._token_next(False, "KEYWORD", "return")

      # possible expression to return
      tt, t = self._token_next(True)
      if tt != "SYMBOL" and t != ";":
         self.compile_expression()
      else:
         # no return expression; return 0
         self.vm_writer.write_push(self.vm_writer.CONST, 0)

      # ending semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.vm_writer.write_return()

      self.tokenizer.advance()

   # compiles a if statement, including a possible trailing else clause
   def compile_if(self):
      # it is more efficient in an if-else case to have the else portion first
      # in the code when testing, but we use the less-efficient but
      # easier-to-write true-false pattern here

      # labels for this if statement
      self.if_index += 1
      if_false = "IF_FALSE_%d" % (self.if_index)
      if_end = "IF_END_%d" % (self.if_index)

      # if keyword
      tt, t = self._token_next(False, "KEYWORD", "if")

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # expression of if statement
      self.tokenizer.advance()
      self.compile_expression()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # the result of the evaluation is now on the stack
      # if false, then goto the false label
      # if true, fall through to executing code
      # if there is no else, then false and end are the same, but having two
      # labels does not increase code size
      self.vm_writer.write_arithmetic("not")
      self.vm_writer.write_if(if_false)

      # opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # statements for true portion
      self.tokenizer.advance()
      self.compile_statements()

      # closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      tt, t = self._token_next(True)
      if tt == "KEYWORD" and t == "else":
         # else statement exists

         # goto the end of the if statement at the end of the true portion
         self.vm_writer.write_goto(if_end)

         # label for the start of the false portion
         self.vm_writer.write_label(if_false)

         # opening brace
         tt, t = self._token_next(True, "SYMBOL", "{")

         # statements
         self.tokenizer.advance()
         self.compile_statements()

         # closing brace
         tt, t = self._token_next(False, "SYMBOL", "}")

         # end label
         self.vm_writer.write_label(if_end)

         # advance tokenizer only if we are in the else, since otherwise the
         # token was advanced by the else check
         self.tokenizer.advance()
      else:
         # no else portion; only put in a label for false, since end is not
         # used
         self.vm_writer.write_label(if_false)

   # compiles an expression (one or more terms connected by operators)
   def compile_expression(self):
      # the first term
      self.compile_term()

      # finish any number of operators followed by terms
      while True:
         tt, t = self._token_next(False)
         if tt == "SYMBOL" and t in "+-*/&|<>=":
            # found an operator
            # postfix order - add the next term and then do the operator

            # the next term
            self.tokenizer.advance()
            self.compile_term()

            # the operator
            if t == "+":
               self.vm_writer.write_arithmetic("add")
            if t == "-":
               self.vm_writer.write_arithmetic("sub")
            if t == "=":
               self.vm_writer.write_arithmetic("eq")
            if t == ">":
               self.vm_writer.write_arithmetic("gt")
            if t == "<":
               self.vm_writer.write_arithmetic("lt")
            if t == "&":
               self.vm_writer.write_arithmetic("and")
            if t == "|":
               self.vm_writer.write_arithmetic("or")
            if t == "*":
               self.vm_writer.write_call("Math.multiply", 2)
            if t == "/":
               self.vm_writer.write_call("Math.divide", 2)
         else:
            # no term found; done parsing the expression
            break

   # compiles a term
   # this routine is faced with a slight difficulty when trying to decide
   # between some of the alternative parsing rules. specifically, if the
   # current token is an identifier, the routine must distinguish between a
   # variable, an array entry, and a subroutine call. a single lookahead token,
   # which may be one of [, (, or ., suffices to distinguish between the three
   # possibilities. any other token is not part of this term and should not
   # be advanced over.
   def compile_term(self):
      # a term: integer_constant | string_constant | keyword_constant |
      # varname | varname[expression] | subroutine_call | (expression) |
      # unary_op term
      tt, t = self._token_next(False)
      if tt == "INT_CONST":
         self.vm_writer.write_push(self.vm_writer.CONST, t)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "STRING_CONST":
         # after this portion is run, a pointer to a string should be on the
         # stack
         # we create a new string of a certain size and then append characters
         # one by one; each append operation returns the pointer to the same
         # string

         # create the string
         # string is a len, data tuple; not null-terminated
         size = len(t)
         self.vm_writer.write_push(self.vm_writer.CONST, size)
         self.vm_writer.write_call("String.new", 1)

         # append each character
         for char in t:
            self.vm_writer.write_push(self.vm_writer.CONST, ord(char))
            self.vm_writer.write_call("String.appendChar", 2)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "KEYWORD":
         if t == "true":
            # true is -1, which is 0 negated
            self.vm_writer.write_push(self.vm_writer.CONST, 0)
            self.vm_writer.write_arithmetic("not")
         elif t == "false" or t == "null":
            self.vm_writer.write_push(self.vm_writer.CONST, 0)
         elif t == "this":
            self.vm_writer.write_push(self.vm_writer.POINTER, 0)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "SYMBOL" and t == "(":
         # ( expression )

         # parse the expression
         self.tokenizer.advance()
         self.compile_expression()

         # closing parenthesis
         tt, t = self._token_next(False, "SYMBOL", ")")

         # advance for the next statement
         self.tokenizer.advance()

      elif tt == "SYMBOL" and t in "-~":
         # unary_op term
         # postfix order - add the next term and then do the operator

         # parse the rest of the term
         self.tokenizer.advance()
         self.compile_term()

         # write the unary operation
         if t == "-":
            self.vm_writer.write_arithmetic("neg")
         elif t == "~":
            self.vm_writer.write_arithmetic("not")

      elif tt == "IDENTIFIER":
         # varname, varname[expression], subroutine_call

         # do not write the identifer yet

         # get the next bit of the expression
         # if it is a [, then array; if it is a ( or ., then subroutine call
         # if none of above, then pass over
         tt2, t2 = self._token_next(True)

         if tt2 == "SYMBOL" and t2 in "(.":
            # subroutine call
            # back up and then compile the subroutine call
            self.tokenizer.retreat()

            self.compile_subroutine_call()
         elif tt2 == "SYMBOL" and t2 == "[":
            # array - read operation

            # write the base address onto the stack
            segment, index = self._resolve_symbol(t)
            self.vm_writer.write_push(segment, index)

            # compile the offset expression
            self.tokenizer.advance()
            self.compile_expression()

            # add base and offset
            self.vm_writer.write_arithmetic("add")

            # put the resulting address into pointer 1 (that)
            self.vm_writer.write_pop(self.vm_writer.POINTER, 1)

            # read from that 0 onto the stack
            self.vm_writer.write_push(self.vm_writer.THAT, 0)

            # closing bracket
            tt, t = self._token_next(False, "SYMBOL", "]")

            # advance for the next statement
            self.tokenizer.advance()
         else:
            # none of above - just a single identifier
            segment, index = self._resolve_symbol(t)
            self.vm_writer.write_push(segment, index)

      else:
         # unknown
         print "WARNING: Unknown term expression object:", tt, t

   # compiles a (possible empty) comma-separated list of expressions
   def compile_expression_list(self):
      num_args = 0

      # check for empty list
      tt, t = self._token_next(False)
      if tt == "SYMBOL" and t == ")":
         # the parameter list was empty; do not process any more
         pass
      else:
         # there are things in the parameter list
         while True:
            # expression to pass
            self.compile_expression()
            num_args += 1

            # possible comma
            tt, t = self._token_next(False)
            if tt == "SYMBOL" and t == ",":
               self.tokenizer.advance()
            else:
               # not a comma; stop processing parameters
               break

      return num_args

   # compiles a subroutine call
   # two cases:
   # - subroutineName(expressionList)
   # - (class|var).subroutineName(expressionList)
   def compile_subroutine_call(self):
      # first part of name
      tt, name1 = self._token_next(False, "IDENTIFIER")

      # a dot and another name may exist, or it could be a parenthesis
      name2 = None
      tt, t = self._token_next(True)
      if tt == "SYMBOL" and t == ".":
         # the name after the dot
         tt, name2 = self._token_next(True, "IDENTIFIER")

         # advance so that we are on the parenthesis
         self.tokenizer.advance()

      # determine if this is a method call
      # three possibilities
      # - class.func() - function call
      # - var.func()   - method call
      # - func()       - method call on current object
      if self.symbol_table.contains(name1):
         method_call = True
         local_call = False
      elif name2 == None:
         method_call = True
         local_call = True
      else:
         method_call = False

      # if a method call, push variable name1
      # this a method call if the symbol table contains name1 and name2 exists
      # OR name1 is a method in the current object
      if method_call and local_call:
         # push the current object onto the stack as a hidden argument
         self.vm_writer.write_push(self.vm_writer.POINTER, 0)
      elif method_call and not local_call:
         # push the variable onto the stack as a hidden argument
         segment, index = self._resolve_symbol(name1)
         self.vm_writer.write_push(segment, index)

      # opening parenthesis
      tt, t = self._token_next(False, "SYMBOL", "(")

      # expression list
      self.tokenizer.advance()
      num_args = self.compile_expression_list()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # write the call
      if method_call and local_call:
         # methd + <blank>

         # get the name of the vm function to call
         classname = self.class_name
         vm_function_name = classname + "." + name1

         # increase arguments by 1, since there is the hidden "this"
         num_args += 1

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)

      elif method_call and not local_call:
         # variable name + method

         # get the name of the vm function to call
         classname = self.symbol_table.get(name1)[1]
         vm_function_name = classname + "." + name2

         # increase arguments by 1, since there is the hidden "this"
         num_args += 1

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)
      else:
         # get the name of the vm function to call
         vm_function_name = name1 + "." + name2

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)

      self.tokenizer.advance()

   # returns the token_type and token of the next token after advancing the
   # tokenizer before reading if advance is True
   def _token_next(self, advance=False, expected_type=None, expected_value=None):
      # advance the tokenizer, if requested
      if advance:
         self.tokenizer.advance()

      # get the token type and the token itself
      token_type = self.tokenizer.token_type()
      token = str(getattr(self.tokenizer, token_type.lower())())

      if expected_type and token_type != expected_type:
         print "WARNING: Type", token_type, "found; expected", expected_type
         import traceback, sys
         traceback.print_stack()
         sys.exit(1)
      if expected_value and token != expected_value:
         print "WARNING: Value", token, "found; expected", expected_value
         import traceback, sys
         traceback.print_stack()
         sys.exit(1)

      return token_type, token

   # convets a symbol table type into a segment type
   def _type_to_segment(self, type):
      if type == self.symbol_table.STATIC:
         return self.vm_writer.STATIC
      elif type == self.symbol_table.FIELD:
         return self.vm_writer.THIS
      elif type == self.symbol_table.ARG:
         return self.vm_writer.ARG
      elif type == self.symbol_table.VAR:
         return self.vm_writer.LOCAL
      else:
         print "ERROR: Bad type %s" % (str(type))
 
   # resolves the symbol from the symbol table
   # the segment and index is returned as a 2-tuple
   def _resolve_symbol(self, name):
      kind, type, index = self.symbol_table.get(name)
      return self._type_to_segment(kind), index
l_command_address = 0

#a_command_set = set()
a_command_list = []

while asm.hasMoreCommands():

    asm.advance()
    current_command = asm.currentCommand
    current_command_type = asm.commandType()

    if current_command_type == 'L_COMMAND':
        #get the symbol and
        symbol = asm.symbol()
        #add the symbol into the table only if it does not already exist
        if not symbol_table.contains(symbol):
            #add the symbol
            symbol_table.addEntry(symbol, l_command_address)

        continue

    if current_command_type == 'A_COMMAND':

        #get the symbol
        symbol = asm.symbol()
        #check to see if it really is a symbol or just a number
        if not symbol.isdigit():
            if symbol not in a_command_list:
                a_command_list.append(symbol)

        l_command_address += 1