def setUpClass(cls): # set up parser and kernels cls.parser_x86 = ParserX86ATT() cls.parser_AArch64 = ParserAArch64() with open(cls._find_file('kernel_x86.s')) as f: cls.code_x86 = f.read() with open(cls._find_file('kernel_aarch64.s')) as f: cls.code_AArch64 = f.read() cls.kernel_x86 = reduce_to_section( cls.parser_x86.parse_file(cls.code_x86), 'x86') cls.kernel_AArch64 = reduce_to_section( cls.parser_AArch64.parse_file(cls.code_AArch64), 'aarch64') # set up machine models cls.machine_model_csx = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'csx.yml')) cls.machine_model_tx2 = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'tx2.yml')) cls.semantics_csx = ArchSemantics(cls.machine_model_csx, path_to_yaml=os.path.join( cls.MODULE_DATA_DIR, 'isa/x86.yml')) cls.semantics_tx2 = ArchSemantics( cls.machine_model_tx2, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'isa/aarch64.yml'), ) cls.machine_model_zen = MachineModel(arch='zen1') for i in range(len(cls.kernel_x86)): cls.semantics_csx.assign_src_dst(cls.kernel_x86[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i]) for i in range(len(cls.kernel_AArch64)): cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
def test_marker_matching_AArch64(self): # preparation bytes_1_line = ".byte 213,3,32,31\n" bytes_2_lines_1 = ".byte 213,3,32\n" + ".byte 31\n" bytes_2_lines_2 = ".byte 213,3\n" + ".byte 32,31\n" bytes_2_lines_3 = ".byte 213\n" + ".byte 3,32,31\n" bytes_3_lines_1 = ".byte 213,3\n" + ".byte 32\n" + ".byte 31\n" bytes_3_lines_2 = ".byte 213\n" + ".byte 3,32\n" + ".byte 31\n" bytes_3_lines_3 = ".byte 213\n" + ".byte 3\n" + ".byte 32,31\n" bytes_4_lines = ".byte 213\n" + ".byte 3\n" + ".byte 32\n" + ".byte 31\n" mov_start_1 = "mov x1, #111\n" mov_start_2 = "mov x1, 111 // should work as well\n" mov_end_1 = "mov x1, #222 // preferred way\n" mov_end_2 = "mov x1, 222\n" prologue = ("mov x12, xzr\n" + "\tldp x9, x10, [sp, #16] // 8-byte Folded Reload\n" + " .p2align 6\n") kernel = (".LBB0_28:\n" + "fmul v7.2d, v7.2d, v19.2d\n" + "stp q0, q1, [x10, #-32]\n" + "b.ne .LBB0_28\n") epilogue = ".LBB0_29: // Parent Loop BB0_20 Depth=1\n" + "bl dummy\n" kernel_length = len(list(filter(None, kernel.split("\n")))) bytes_variations = [ bytes_1_line, bytes_2_lines_1, bytes_2_lines_2, bytes_2_lines_3, bytes_3_lines_1, bytes_3_lines_2, bytes_3_lines_3, bytes_4_lines, ] mov_start_variations = [mov_start_1, mov_start_2] mov_end_variations = [mov_end_1, mov_end_2] # actual tests for mov_start_var in mov_start_variations: for bytes_var_1 in bytes_variations: for mov_end_var in mov_end_variations: for bytes_var_2 in bytes_variations: sample_code = (prologue + mov_start_var + bytes_var_1 + kernel + mov_end_var + bytes_var_2 + epilogue) with self.subTest( mov_start=mov_start_var, bytes_start=bytes_var_1, mov_end=mov_end_var, bytes_end=bytes_var_2, ): sample_parsed = self.parser_AArch.parse_file( sample_code) sample_kernel = reduce_to_section( sample_parsed, "AArch64") self.assertEqual(len(sample_kernel), kernel_length) kernel_start = len( list( filter(None, (prologue + mov_start_var + bytes_var_1).split("\n")))) parsed_kernel = self.parser_AArch.parse_file( kernel, start_line=kernel_start) self.assertEqual(sample_kernel, parsed_kernel)
def test_marker_special_cases_x86(self): bytes_line = ( '.byte 100\n' '.byte 103\n' '.byte 144\n') start_marker = 'movl $111, %ebx\n' + bytes_line end_marker = 'movl $222, %ebx\n' + bytes_line prologue = ( 'movl -88(%rbp), %r10d\n' 'xorl %r11d, %r11d\n' '.p2align 4,,10\n') kernel = ( '.L3: #L3\n' 'vmovsd .LC1(%rip), %xmm0\n' 'vmovsd %xmm0, (%r15,%rcx,8)\n' 'cmpl %ecx, %ebx\n' 'jle .L3\n') epilogue = ( 'leaq -56(%rbp), %rsi\n' 'movl %r10d, -88(%rbp)\n' 'call timing\n') samples = [ # (test name, # ignored prologue, section to be extraced, ignored epilogue) ("markers", prologue + start_marker, kernel, end_marker + epilogue), ("marker at file start", start_marker, kernel, end_marker + epilogue), ("no start marker", '', prologue + kernel, end_marker + epilogue), ("marker at file end", prologue + start_marker, kernel, end_marker), ("no end marker", prologue + start_marker, kernel + epilogue, ''), ("empty kernel", prologue + start_marker, '', end_marker + epilogue), ] for test_name, pro, kernel, epi in samples: code = pro + kernel + epi parsed = self.parser_x86.parse_file(code) test_kernel = reduce_to_section(parsed, 'x86') if kernel: kernel_length = len(kernel.strip().split('\n')) else: kernel_length = 0 self.assertEqual( len(test_kernel), kernel_length, msg="Invalid exctracted kernel length on {!r} sample".format(test_name)) if pro: kernel_start = len((pro).strip().split('\n')) else: kernel_start = 0 parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start) self.assertEqual( test_kernel, parsed_kernel, msg="Invalid exctracted kernel on {!r}".format(test_name))
def test_marker_special_cases_AArch(self): bytes_line = '.byte 213,3,32,31\n' start_marker = 'mov x1, #111\n' + bytes_line end_marker = 'mov x1, #222\n' + bytes_line prologue = ( 'dup v0.2d, x14\n' 'neg x9, x9\n' '.p2align 6\n') kernel = ( '.LBB0_28:\n' + 'fmul v7.2d, v7.2d, v19.2d\n' + 'stp q0, q1, [x10, #-32]\n' + 'b.ne .LBB0_28\n') epilogue = ( '.LBB0_29: // Parent Loop BB0_20 Depth=1\n' 'bl dummy\n') samples = [ # (test name, # ignored prologue, section to be extraced, ignored epilogue) ("markers", prologue + start_marker, kernel, end_marker + epilogue), ("marker at file start", start_marker, kernel, end_marker + epilogue), ("no start marker", '', prologue + kernel, end_marker + epilogue), ("marker at file end", prologue + start_marker, kernel, end_marker), ("no end marker", prologue + start_marker, kernel + epilogue, ''), ("empty kernel", prologue + start_marker, '', end_marker + epilogue), ] for test_name, pro, kernel, epi in samples: code = pro + kernel + epi parsed = self.parser_AArch.parse_file(code) test_kernel = reduce_to_section(parsed, 'AArch64') if kernel: kernel_length = len(kernel.strip().split('\n')) else: kernel_length = 0 self.assertEqual( len(test_kernel), kernel_length, msg="Invalid exctracted kernel length on {!r} sample".format(test_name)) if pro: kernel_start = len((pro).strip().split('\n')) else: kernel_start = 0 parsed_kernel = self.parser_AArch.parse_file(kernel, start_line=kernel_start) self.assertEqual( test_kernel, parsed_kernel, msg="Invalid exctracted kernel on {!r}".format(test_name))
def __init__(self, arch, code): self.machine_model = MachineModel(arch=arch) self.semantics = ArchSemantics(self.machine_model) isa = self.machine_model.get_ISA().lower() if isa == 'aarch64': self.parser = ParserAArch64() elif isa == 'x86': self.parser = ParserX86ATT() parsed_code = self.parser.parse_file(code) self.kernel = reduce_to_section(parsed_code, self.machine_model.get_ISA()) self.semantics.add_semantics(self.kernel)
def test_marker_matching_x86(self): # preparation bytes_1_line = ".byte 100,103,144\n" bytes_2_lines_1 = ".byte 100,103\n" + ".byte 144\n" bytes_2_lines_2 = ".byte 100\n" + ".byte 103,144\n" bytes_3_lines = (".byte 100 # IACA MARKER UTILITY\n" + ".byte 103 # IACA MARKER UTILITY\n" + ".byte 144 # IACA MARKER UTILITY\n") mov_start_1 = "movl $111, %ebx # IACA START\n" mov_start_2 = "mov $111, %ebx # IACA START\n" mov_end_1 = "movl $222, %ebx # IACA END\n" mov_end_2 = "mov $222, %ebx # IACA END\n" prologue = "movl -92(%rbp), %r11d\n" + "movl $111, %ebx\n" kernel = ("vfmadd132sd (%r15,%rcx,8), %xmm5, %xmm0\n" + "vmovsd %xmm0, (%r14,%rcx,8)\n" + "cmpl %ebx, %ecx\n" + "jge .L8\n") epilogue = ".LE9:\t\t#12.2\n" "call dummy\n" kernel_length = len(list(filter(None, kernel.split("\n")))) bytes_variations = [ bytes_1_line, bytes_2_lines_1, bytes_2_lines_2, bytes_3_lines ] mov_start_variations = [mov_start_1, mov_start_2] mov_end_variations = [mov_end_1, mov_end_2] # actual tests for mov_start_var in mov_start_variations: for bytes_var_1 in bytes_variations: for mov_end_var in mov_end_variations: for bytes_var_2 in bytes_variations: sample_code = (prologue + mov_start_var + bytes_var_1 + kernel + mov_end_var + bytes_var_2 + epilogue) with self.subTest( mov_start=mov_start_var, bytes_start=bytes_var_1, mov_end=mov_end_var, bytes_end=bytes_var_2, ): sample_parsed = self.parser_x86.parse_file( sample_code) sample_kernel = reduce_to_section( sample_parsed, "x86") self.assertEqual(len(sample_kernel), kernel_length) kernel_start = len( list( filter(None, (prologue + mov_start_var + bytes_var_1).split("\n")))) parsed_kernel = self.parser_x86.parse_file( kernel, start_line=kernel_start) self.assertEqual(sample_kernel, parsed_kernel)
def inspect(args, output_file=sys.stdout): """ Does the actual throughput and critical path analysis of OSACA and prints it to the terminal. :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing :param output_file: Define the stream for output, defaults to :class:`sys.stdout` :type output_file: stream, optional """ # Read file code = args.file.read() # Detect ISA if necessary arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)] print_arch_warning = False if args.arch else True isa = MachineModel.get_isa_for_arch(arch) verbose = args.verbose ignore_unknown = args.ignore_unknown # Parse file parser = get_asm_parser(arch) try: parsed_code = parser.parse_file(code) except: # probably the wrong parser based on heuristic if args.arch is None: # change ISA and try again arch = DEFAULT_ARCHS['x86'] if BaseParser.detect_ISA(code) == 'aarch64' else DEFAULT_ARCHS['aarch64'] isa = MachineModel.get_isa_for_arch(arch) parser = get_asm_parser(arch) parsed_code = parser.parse_file(code) else: traceback.print_exc(file=sys.stderr) sys.exit(1) # Reduce to marked kernel or chosen section and add semantics if args.lines: line_range = get_line_range(args.lines) kernel = [line for line in parsed_code if line['line_number'] in line_range] print_length_warning = False else: kernel = reduce_to_section(parsed_code, isa) # Print warning if kernel has no markers and is larger than threshold (100) print_length_warning = True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False machine_model = MachineModel(arch=arch) semantics = ArchSemantics(machine_model) semantics.add_semantics(kernel) # Do optimal schedule for kernel throughput if wished if not args.fixed: semantics.assign_optimal_throughput(kernel) # Create DiGrahps kernel_graph = KernelDG(kernel, parser, machine_model) if args.dotpath is not None: kernel_graph.export_graph(args.dotpath if args.dotpath != '.' else None) # Print analysis frontend = Frontend(args.file.name, arch=arch) print( frontend.full_analysis( kernel, kernel_graph, ignore_unknown=ignore_unknown, arch_warning=print_arch_warning, length_warning=print_length_warning, verbose=verbose ), file=output_file, )
def test_marker_detection_x86(self): kernel = reduce_to_section(self.parsed_x86, "x86") self.assertEqual(len(kernel), 9) self.assertEqual(kernel[0].line_number, 146) self.assertEqual(kernel[-1].line_number, 154)
def test_marker_detection_AArch64(self): kernel = reduce_to_section(self.parsed_AArch, "AArch64") self.assertEqual(len(kernel), 138) self.assertEqual(kernel[0].line_number, 307) self.assertEqual(kernel[-1].line_number, 444)
def setUpClass(cls): # set up parser and kernels cls.parser_x86 = ParserX86ATT() cls.parser_AArch64 = ParserAArch64() with open(cls._find_file("kernel_x86.s")) as f: cls.code_x86 = f.read() with open(cls._find_file("kernel_x86_memdep.s")) as f: cls.code_x86_memdep = f.read() with open(cls._find_file("kernel_x86_long_LCD.s")) as f: cls.code_x86_long_LCD = f.read() with open(cls._find_file("kernel_aarch64_memdep.s")) as f: cls.code_aarch64_memdep = f.read() with open(cls._find_file("kernel_aarch64.s")) as f: cls.code_AArch64 = f.read() with open(cls._find_file("kernel_aarch64_sve.s")) as f: cls.code_AArch64_SVE = f.read() cls.kernel_x86 = reduce_to_section( cls.parser_x86.parse_file(cls.code_x86), "x86") cls.kernel_x86_memdep = reduce_to_section( cls.parser_x86.parse_file(cls.code_x86_memdep), "x86") cls.kernel_x86_long_LCD = reduce_to_section( cls.parser_x86.parse_file(cls.code_x86_long_LCD), "x86") cls.kernel_AArch64 = reduce_to_section( cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64") cls.kernel_aarch64_memdep = reduce_to_section( cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64") cls.kernel_aarch64_SVE = reduce_to_section( cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64") # set up machine models cls.machine_model_csx = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "csx.yml")) cls.machine_model_tx2 = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml")) cls.machine_model_a64fx = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml")) cls.semantics_x86 = ISASemantics("x86") cls.semantics_csx = ArchSemantics(cls.machine_model_csx, path_to_yaml=os.path.join( cls.MODULE_DATA_DIR, "isa/x86.yml")) cls.semantics_aarch64 = ISASemantics("aarch64") cls.semantics_tx2 = ArchSemantics( cls.machine_model_tx2, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"), ) cls.semantics_a64fx = ArchSemantics( cls.machine_model_a64fx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"), ) cls.machine_model_zen = MachineModel(arch="zen1") for i in range(len(cls.kernel_x86)): cls.semantics_csx.assign_src_dst(cls.kernel_x86[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i]) for i in range(len(cls.kernel_x86_memdep)): cls.semantics_csx.assign_src_dst(cls.kernel_x86_memdep[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86_memdep[i]) for i in range(len(cls.kernel_x86_long_LCD)): cls.semantics_csx.assign_src_dst(cls.kernel_x86_long_LCD[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86_long_LCD[i]) for i in range(len(cls.kernel_AArch64)): cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i]) for i in range(len(cls.kernel_aarch64_memdep)): cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i]) for i in range(len(cls.kernel_aarch64_SVE)): cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i]) cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])