def parse_pass_one(fin, args): address = 0 symbols_table = {} # Suppress instruction warnings prev_warn = cp.warn prev_warn32 = cp.warn32 cp.warn = False cp.warn32 = False for line in fin: result = parser.parse(line) if result is None: # 原因不明 continue if result["tokens"] is None: continue if result['type'] is 'non_label': address += 4 continue if not result['tokens']['label'] in symbols_table: symbols_table[result['tokens']['label']] = address else: cp.cprint_fail("Error: " + str(result['tokens']['lineno']) + " : Redeclaration of label '" + str(result['tokens']) + "'.") exit(1) # Restore warning state cp.warn = prev_warn cp.warn32 = prev_warn32 return symbols_table
def parse_pass_two(fin, fout, symbols_table, args): fin.seek(0, 0) # Reset line number state reset_lineno() address = 0 for line in fin: print(line) result = parser.parse(line) if result is None: continue if result["tokens"] is None: continue if result['type'] is 'label': continue instr = None result = result['tokens'] if 'label' in result: if result['label'] not in symbols_table: cp.cprint_fail("Error: " + str(result['lineno']) + " : Label used but never defined '" + str(result['label']) + "'.") exit(1) result = encode_offset(result, address, symbols_table[result['label']]) if result: instr, instr_dict = mcg.convert_to_binary(result) if not instr: continue fout.write(instr + '\n') address += 4
def op_jal(self, tokens): ''' imm[20] imm[10:1] imm[11] imm[19:12] rd opcode immediate is already shuffled in tokens ''' bin_opcode = None bin_rd = None rd = None imm = None try: bin_opcode = self.CONST.BOP_JAL rd = tokens['rd'] bin_rd = self.get_bin_register(rd) imm = tokens['imm'] except: cp.cprint_fail("Internal Error: AUIPC: could not parse" + "tokens in " + str(tokens['lineno'])) exit() bin_str = imm + bin_rd + bin_opcode assert (len(bin_str) == 32) tok_dict = {'opcode': bin_opcode, 'rd': bin_rd, 'imm': imm} return bin_str, tok_dict
def p_error(p): lineno = '' if p: lineno = str(p.lineno) cp.cprint_fail("Error:" + lineno + ": Invalid or incomplete token" + " found '" + str(p.value) + "'") else: cp.cprint_fail("Error: Invalid or incomplete token found " + "Did you end with a newline?")
def p_register(p): 'register : REGISTER' r = p[1] r = r[1:] r = int(r) if (r < 0) or (r > 31): cp.cprint_fail("Error:" + str(p.lineno(1)) + ":Invalid register index.") raise SyntaxError p[0] = p[1]
def op_arithi(self, tokens): ''' imm[11:0] rs1 funct3 rd opcode The immediate for SLLI and SRLI needs to have the upper 7 bets set to 0 and for SRAI, it needs to be set to 0100000 ''' opcode = tokens['opcode'] bin_opcode = None funct3 = None rs1 = None bin_rs1 = None bin_rd = None rd = None imm = None try: funct3 = self.CONST.FUNCT3_ARITHI[opcode] bin_opcode = self.CONST.BOP_ARITHI rs1 = tokens['rs1'] bin_rs1 = self.get_bin_register(rs1) rd = tokens['rd'] bin_rd = self.get_bin_register(rd) imm = tokens['imm'] except: cp.cprint_fail("Internal Error: ARITHI: could not parse" + "tokens in " + str(tokens['lineno'])) exit() bin_str = imm + bin_rs1 + funct3 + bin_rd + bin_opcode assert (len(bin_str) == 32) if opcode in (self.CONST.INSTR_SLLI, self.CONST.INSTR_SRLI): if imm[0:7] != '0000000': cp.cprint_warn("Warning:" + str(tokens['lineno']) + ": Upper 7 bits of immediate should be 0") if opcode in (self.CONST.INSTR_SRAI): if imm[0:7] != '0100000': cp.cprint_warn("Warning:" + str(tokens['lineno']) + ": Upper 7 bits of immediate should be " + "01000000") tok_dict = { 'opcode': bin_opcode, 'funct3': funct3, 'rs1': bin_rs1, 'rd': bin_rd, 'imm': imm } return bin_str, tok_dict
def p_statement_R(p): 'statement : OPCODE register COMMA register COMMA register NEWLINE' if p[1] not in mcc.INSTR_TYPE_R: cp.cprint_fail("Error:" + str(p.lineno(1)) + ": Incorrect opcode or arguments") raise SyntaxError p[0] = { 'opcode': p[1], 'rd': p[2], 'rs1': p[4], 'rs2': p[6], 'lineno': p.lineno(1) }
def encode_offset(ltokens, address, target): ''' In instructions having label, this function calculates the value of the offset that has to be encoded inplace of the the label. It uses the current address of the instruction and the target address to calculate the difference and encode the offset in binary returns: immediate offset in binary ''' # Offset address, should be divisible by 2 (2-byte aligned) offset = target - address assert (offset % 2 == 0) lineno = ltokens['lineno'] if ltokens['opcode'] == mcc.INSTR_JAL: ret, imm, msg = get_imm_UJ(offset, lineno) if not ret: # Label translation should not raise errors, # Warnings make sense. cp.cprint_fail("Internal error:" + str(tokens['lineno']) + ":" + msg) exit(1) result = { 'opcode': ltokens['opcode'], 'rd': ltokens['rd'], 'imm': imm, 'lineno': lineno } elif ltokens['opcode'] in mcc.INSTR_TYPE_SB: ret, imm, msg = get_imm_SB(offset, lineno) if not ret: cp.cprint_fail("Internal error:" + str(lineno) + ":" + msg) exit(1) result = { 'opcode': ltokens['opcode'], 'rs1': ltokens['rs1'], 'rs2': ltokens['rs2'], 'imm': imm, 'lineno': lineno } elif ltokens['opcode'] == mcc.INSTR_JALR: ret, imm, msg = get_imm_I(offset, lineno) if not ret: cp.cprint_fail("Error:" + str(lineno) + ":" + msg) raise SyntaxError result = { 'opcode': ltokens['opcode'], 'rd': ltokens['rd'], 'rs1': ltokens['rs1'], 'imm': imm, 'lineno': lineno } else: cp.cprint_fail("Error: " + str(lineno) + " : " + "Label not supported in '" + str(ltokens['opcode']) + "'") return result
def p_statement_UJ_LABEL(p): 'statement : OPCODE register COMMA LABEL NEWLINE' if (p[1] not in mcc.INSTR_TYPE_UJ): cp.cprint_fail("Error:" + str(p.lineno(1)) + ": Incorrect opcode or arguments") raise SyntaxError else: # UJ Type p[0] = { 'opcode': p[1], 'rd': p[2], 'label': p[4], 'lineno': p.lineno(1) }
def main(): if len(sys.argv) <= 1: exit("Error: No file specified") fin = None try: fin = open(sys.argv[1], 'r') except IOError: cp.cprint_fail("File does not seem to exist or" + " you do not have the required permissions.") return 1 for line in fin: result = parser.parse(line) if result: print(result)
def get_bin_register(self, r): ''' converts the register in format r'[0-9][0-9]?' to its equivalent binary ''' r = r[1:] try: r = int(r) except: cp.cprint_fail("Internal Error: get_bin_register:" + " Register could not be parsed") assert (r >= 0) assert (r < 32) rbin = format(r, '05b') return rbin
def op_store(self, tokens): ''' imm[11:5] rs2 rs1 funct3 imm[4:0] opcode immediates returned in tokens as touple (imm_11_5, imm_4_0) ''' opcode = tokens['opcode'] imm_11_5 = None imm_4_0 = None funct3 = None rs1 = None bin_rs1 = None bin_rs2 = None rs2 = None try: funct3 = self.CONST.FUNCT3_STORE[opcode] bin_opcode = self.CONST.BOP_STORE rs1 = tokens['rs1'] bin_rs1 = self.get_bin_register(rs1) rs2 = tokens['rs2'] bin_rs2 = self.get_bin_register(rs2) imm_11_5, imm_4_0 = tokens['imm'] except: cp.cprint_fail("Internal Error: STORE: could not parse" + " tokens in " + str(tokens['lineno'])) exit() bin_str = imm_11_5 + bin_rs2 + bin_rs1 + funct3 + imm_4_0 + bin_opcode assert (len(bin_str) == 32) if imm_4_0[-2:] != '00': cp.cprint_warn_32("32_Warning:" + str(tokens['lineno']) + ": Missaligned address." + " Address should be 4 bytes aligned.") tok_dict = { 'opcode': bin_opcode, 'funct': funct3, 'rs1': bin_rs1, 'rs2': bin_rs2, 'imm_11_5': imm_11_5, 'imm_4_0': imm_4_0 } return bin_str, tok_dict
def op_load(self, tokens): opcode = tokens['opcode'] ''' imm[11:0] rs1 funct3 rd opcode ''' opcode = tokens['opcode'] bin_opcode = None funct3 = None rs1 = None bin_rs1 = None bin_rd = None rd = None imm = None try: funct3 = self.CONST.FUNCT3_LOAD[opcode] bin_opcode = self.CONST.BOP_LOAD rs1 = tokens['rs1'] bin_rs1 = self.get_bin_register(rs1) rd = tokens['rd'] bin_rd = self.get_bin_register(rd) imm = tokens['imm'] except: cp.cprint_fail("Internal Error: LOAD: could not parse" + "tokens in " + str(tokens['lineno'])) exit() bin_str = imm + bin_rs1 + funct3 + bin_rd + bin_opcode assert (len(bin_str) == 32) if imm[-2:] != '00': cp.cprint_warn_32("32_Warning:" + str(tokens['lineno']) + ": Missaligned address." + " Address should be 4 bytes aligned.") tok_dict = { 'opcode': bin_opcode, 'funct': funct3, 'rs1': bin_rs1, 'rd': bin_rd, 'imm': imm } return bin_str, tok_dict
def op_branch(self, tokens): ''' imm[12|10:5] rs2 rs1 funct3 imm[4:1|11] opcode immediates returned in tokens as touple (imm_12_10_5, imm_4_1_11) ''' opcode = tokens['opcode'] imm_12_10_5 = None imm_4_1_11 = None funct3 = None rs1 = None rs2 = None bin_rs1 = None bin_rs2 = None try: funct3 = self.CONST.FUNCT3_BRANCH[opcode] bin_opcode = self.CONST.BOP_BRANCH rs1 = tokens['rs1'] bin_rs1 = self.get_bin_register(rs1) rs2 = tokens['rs2'] bin_rs2 = self.get_bin_register(rs2) imm_12_10_5, imm_4_1_11 = tokens['imm'] except: cp.cprint_fail("Internal Error: BRANCH: could not parse" + " tokens in " + str(tokens['lineno'])) exit() bin_str = imm_12_10_5 + bin_rs2 + bin_rs1 + funct3 bin_str += imm_4_1_11 + bin_opcode if imm_4_1_11[-2] != '0': cp.cprint_warn_32("32_Warning:" + str(tokens['lineno']) + ": Missaligned address." + " Address should be 4 bytes aligned.") assert (len(bin_str) == 32) tok_dict = { 'opcode': bin_opcode, 'funct': funct3, 'rs1': bin_rs1, 'rs2': bin_rs2, 'imm_12_10_5': imm_12_10_5, 'imm_4_1_11': imm_4_1_11 } return bin_str, tok_dict
def op_arith(self, tokens): ''' funct7 rs2 rs1 funct3 rd opcode ''' opcode = tokens['opcode'] bin_opcode = None funct3 = None funct7 = None rs1 = None rs2 = None rd = None bin_rs1 = None bin_rs2 = None bin_rd = None try: funct3 = self.CONST.FUNCT3_ARITH[opcode] funct7 = self.CONST.FUNCT7_ARITH[opcode] bin_opcode = self.CONST.BOP_ARITH rs1 = tokens['rs1'] rs2 = tokens['rs2'] rd = tokens['rd'] bin_rs1 = self.get_bin_register(rs1) bin_rs2 = self.get_bin_register(rs2) bin_rd = self.get_bin_register(rd) except: cp.cprint_fail("Internal Error: ARITH: could not parse" + "tokens in " + str(tokens['lineno'])) exit() bin_str = funct7 + bin_rs2 + bin_rs1 + funct3 + bin_rd + bin_opcode assert (len(bin_str) == 32) tok_dict = { 'opcode': bin_opcode, 'funct3': funct3, 'funct7': funct7, 'rs1': bin_rs1, 'rd': bin_rd, 'rs2': bin_rs2 } return bin_str, tok_dict
def convert_to_binary(self, tokens): ''' The driver function for converting tokens to machine code. Takes the tokens parsed by the lexer and returns the binary equivalent. Returns a touple (instr, dict), where instr is the binary string of the instruction and the dict is the tokens converted individually ''' try: opcode = tokens['opcode'] except KeyError: print("Internal Error: Key not found (opcode)") return None if opcode in self.CONST.INSTR_BOP_LUI: return self.op_lui(tokens) elif opcode in self.CONST.INSTR_BOP_AUIPC: return self.op_auipc(tokens) elif opcode in self.CONST.INSTR_BOP_JAL: return self.op_jal(tokens) elif opcode in self.CONST.INSTR_BOP_JALR: return self.op_jalr(tokens) elif opcode in self.CONST.INSTR_BOP_BRANCH: return self.op_branch(tokens) elif opcode in self.CONST.INSTR_BOP_LOAD: return self.op_load(tokens) elif opcode in self.CONST.INSTR_BOP_STORE: return self.op_store(tokens) elif opcode in self.CONST.INSTR_BOP_ARITHI: return self.op_arithi(tokens) elif opcode in self.CONST.INSTR_BOP_ARITH: return self.op_arith(tokens) else: cp.cprint_fail("Error:" + str(tokens['lineno']) + ": Opcode: '%s' not implemented" % opcode) return None print("Internal Error: Control should not reach here!") return None
def p_statement_SB__JALR_LABEL(p): 'statement : OPCODE register COMMA register COMMA LABEL NEWLINE' # Branch and JALR if (p[1] not in mcc.INSTR_TYPE_SB) and (p[1] != mcc.INSTR_JALR): cp.cprint_fail("Error:" + str(p.lineno(1)) + ": Incorrect opcode or arguments") raise SyntaxError if p[1] in mcc.INSTR_TYPE_SB: p[0] = { 'opcode': p[1], 'rs1': p[2], 'rs2': p[4], 'label': p[6], 'lineno': p.lineno(1) } elif p[1] == mcc.INSTR_JALR: p[0] = { 'opcode': p[1], 'rd': p[2], 'rs1': p[4], 'label': p[6], 'lineno': p.lineno(1) }
def parse_input(infile, **kwargs): fin = None try: fin = open(infile, 'r') except IOError: cp.cprint_fail("Error: File does not seem to exist or" + " you do not have the required permissions.") return 1 outfile = kwargs['outfile'] fout = None try: fout = open(outfile, 'w') except IOError: cp.cprint_fail("Error: Could not create '" + outfile + "' for output") return 1 # Pass 1: Address resolution of labels symbols_table = parse_pass_one(fin, kwargs) # Pass 2: Mapping instructions to binary coding parse_pass_two(fin, fout, symbols_table, kwargs) fout.close() fin.close()
def op_auipc(self, tokens): ''' imm[31:12] rd opcode ''' bin_opcode = None bin_rd = None rd = None imm = None try: bin_opcode = self.CONST.BOP_AUIPC rd = tokens['rd'] bin_rd = self.get_bin_register(rd) imm = tokens['imm'] except: cp.cprint_fail("Internal Error: AUIPC: could not parse" + "tokens in " + str(tokens['lineno'])) exit() bin_str = imm + bin_rd + bin_opcode assert (len(bin_str) == 32) tok_dict = {'opcode': bin_opcode, 'rd': bin_rd, 'imm': imm} return bin_str, tok_dict
def p_statement_I_S_SB(p): 'statement : OPCODE register COMMA register COMMA IMMEDIATE NEWLINE' if (p[1] not in mcc.INSTR_TYPE_I) and (p[1] not in mcc.INSTR_TYPE_S) and ( p[1] not in mcc.INSTR_TYPE_SB): cp.cprint_fail("Error:" + str(p.lineno(1)) + ": Incorrect opcode or arguments") raise SyntaxError elif p[1] in mcc.INSTR_TYPE_I: ret, imm, msg = get_imm_I(p[6], p.lineno(6)) if not ret: cp.cprint_fail("Error:" + str(p.lineno(6)) + ":" + msg) raise SyntaxError p[0] = { 'opcode': p[1], 'rd': p[2], 'rs1': p[4], 'imm': imm, 'lineno': p.lineno(1) } elif p[1] in mcc.INSTR_TYPE_S: ret, imm, msg = get_imm_S(p[6], p.lineno(6)) if not ret: cp.cprint_fail("Error:" + str(p.lineno(1)) + ":" + msg) raise SyntaxError p[0] = { 'opcode': p[1], 'rs1': p[2], 'rs2': p[4], 'imm': imm, 'lineno': p.lineno(1) } else: # SB (BRANCH) ret, imm, msg = get_imm_SB(p[6], p.lineno(6)) if not ret: cp.cprint_fail("Error:" + str(p.lineno(1)) + ":" + msg) raise SyntaxError p[0] = { 'opcode': p[1], 'rs1': p[2], 'rs2': p[4], 'imm': imm, 'lineno': p.lineno(1) }
def p_statement_U_UJ(p): 'statement : OPCODE register COMMA IMMEDIATE NEWLINE' if (p[1] not in mcc.INSTR_TYPE_U) and (p[1] not in mcc.INSTR_TYPE_UJ): cp.cprint_fail("Error:" + str(p.lineno(1)) + ": Incorrect opcode or arguments") raise SyntaxError elif p[1] in mcc.INSTR_TYPE_U: ret, imm, msg = get_imm_U(p[4], p.lineno(4)) if not ret: cp.cprint_fail("Error:" + str(p.lineno(1)) + ":" + msg) raise SyntaxError p[0] = {'opcode': p[1], 'rd': p[2], 'imm': imm, 'lineno': p.lineno(1)} else: # UJ Type ret, imm, msg = get_imm_UJ(p[4], p.lineno(4)) if not ret: cp.cprint_fail("Error:" + str(p.lineno(1)) + ":" + msg) raise SyntaxError p[0] = {'opcode': p[1], 'rd': p[2], 'imm': imm, 'lineno': p.lineno(1)}