def test_loop_carried_dependency_x86(self): lcd_id = 8 lcd_id2 = 5 dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx) lc_deps = dg.get_loopcarried_dependencies() self.assertEqual(len(lc_deps), 2) # ID 8 self.assertEqual(lc_deps[lcd_id]["root"], dg.dg.nodes(data=True)[lcd_id]["instruction_form"]) self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1) self.assertEqual( lc_deps[lcd_id]["dependencies"][0][0], dg.dg.nodes(data=True)[lcd_id]["instruction_form"], ) # w/ flag dependencies: ID 9 w/ len=2 # w/o flag dependencies: ID 5 w/ len=1 # TODO discuss self.assertEqual(lc_deps[lcd_id2]["root"], dg.dg.nodes(data=True)[lcd_id2]["instruction_form"]) self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1) self.assertEqual( lc_deps[lcd_id2]["dependencies"][0][0], dg.dg.nodes(data=True)[lcd_id2]["instruction_form"], )
def full_analysis(self, kernel, kernel_dg: KernelDG, ignore_unknown=False, arch_warning=False, length_warning=False, verbose=False): """ Build the full analysis report including header, the symbol map, the combined TP/CP/LCD view and the list based LCD view. :param kernel: kernel to report on :type kernel: list :param kernel_dg: directed graph containing CP and LCD :type kernel_dg: :class:`~osaca.semantics.KernelDG` :param ignore_unknown: flag for ignore warning if performance data is missing, defaults to `False` :type ignore_unknown: boolean, optional :param print_arch_warning: flag for additional user warning to specify micro-arch :type print_arch_warning: boolean, optional :param print_length_warning: flag for additional user warning to specify kernel length with --lines :type print_length_warning: boolean, optional :param verbose: flag for verbosity level, defaults to False :type verbose: boolean, optional """ return (self._header_report() + self._user_warnings(arch_warning, length_warning) + self._symbol_map() + self.combined_view( kernel, kernel_dg.get_critical_path(), kernel_dg.get_loopcarried_dependencies(), ignore_unknown, ) + self.loopcarried_dependencies( kernel_dg.get_loopcarried_dependencies()))
def get_lcd(self): kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model) lcd_dict = kernel_graph.get_loopcarried_dependencies() lcd = 0.0 for dep in lcd_dict: lcd_tmp = sum( [x['latency_lcd'] for x in lcd_dict[dep]['dependencies']]) lcd = lcd_tmp if lcd_tmp > lcd else lcd return lcd
def test_loop_carried_dependency_aarch64(self): dg = KernelDG( self.kernel_aarch64_memdep, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2, ) lc_deps = dg.get_loopcarried_dependencies() self.assertEqual(len(lc_deps), 2) # based on line 6 self.assertEqual(lc_deps[6]["latency"], 28.0) self.assertEqual( [(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']], [(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)], )
def test_kernelDG_SVE(self): KernelDG( self.kernel_aarch64_SVE, self.parser_AArch64, self.machine_model_a64fx, self.semantics_a64fx, )
def test_memdependency_x86(self): dg = KernelDG(self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx, self.semantics_csx) self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) self.assertEqual( set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8}) self.assertEqual( set(dg.get_dependent_instruction_forms(line_number=5)), {10, 12}) with self.assertRaises(ValueError): dg.get_dependent_instruction_forms() # test dot creation dg.export_graph(filepath="/dev/null")
def test_timeout_during_loop_carried_dependency(self): start_time = time.perf_counter() KernelDG(self.kernel_x86_long_LCD, self.parser_x86, self.machine_model_csx, self.semantics_x86, timeout=10) end_time = time.perf_counter() time_10 = end_time - start_time start_time = time.perf_counter() KernelDG(self.kernel_x86_long_LCD, self.parser_x86, self.machine_model_csx, self.semantics_x86, timeout=2) end_time = time.perf_counter() time_2 = end_time - start_time self.assertTrue(time_10 > 10) self.assertTrue(2 < time_2) self.assertTrue(time_2 < (time_10 - 7))
def test_cyclic_dag(self): dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx) dg.dg.add_edge(100, 101, latency=1.0) dg.dg.add_edge(101, 102, latency=2.0) dg.dg.add_edge(102, 100, latency=3.0) with self.assertRaises(NotImplementedError): dg.get_critical_path() with self.assertRaises(NotImplementedError): dg.get_loopcarried_dependencies()
def test_kernelDG_AArch64(self): dg = KernelDG(self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2) self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) self.assertEqual( set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8}) self.assertEqual( set(dg.get_dependent_instruction_forms(line_number=4)), {9, 10}) self.assertEqual( set(dg.get_dependent_instruction_forms(line_number=5)), {6, 7, 8}) self.assertEqual( set(dg.get_dependent_instruction_forms(line_number=6)), {9, 10}) self.assertEqual( next(dg.get_dependent_instruction_forms(line_number=7)), 13) self.assertEqual( next(dg.get_dependent_instruction_forms(line_number=8)), 14) self.assertEqual( next(dg.get_dependent_instruction_forms(line_number=9)), 16) self.assertEqual( next(dg.get_dependent_instruction_forms(line_number=10)), 17) self.assertEqual( set(dg.get_dependent_instruction_forms(line_number=11)), {13, 14}) self.assertEqual( set(dg.get_dependent_instruction_forms(line_number=12)), {16, 17}) self.assertEqual( next(dg.get_dependent_instruction_forms(line_number=13)), 15) self.assertEqual( next(dg.get_dependent_instruction_forms(line_number=14)), 15) self.assertEqual( len(list(dg.get_dependent_instruction_forms(line_number=15))), 0) self.assertEqual( next(dg.get_dependent_instruction_forms(line_number=16)), 18) self.assertEqual( next(dg.get_dependent_instruction_forms(line_number=17)), 18) self.assertEqual( len(list(dg.get_dependent_instruction_forms(line_number=18))), 0) self.assertEqual( len(list(dg.get_dependent_instruction_forms(line_number=19))), 0) self.assertEqual( len(list(dg.get_dependent_instruction_forms(line_number=20))), 0) with self.assertRaises(ValueError): dg.get_dependent_instruction_forms() # test dot creation dg.export_graph(filepath='/dev/null')
def test_kernelDG_x86(self): # # 4 # \___>6__>7 # / # 3 # 5_______>9 # dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx) self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) self.assertEqual( len(list(dg.get_dependent_instruction_forms(line_number=3))), 1) self.assertEqual( next(dg.get_dependent_instruction_forms(line_number=3)), 6) self.assertEqual( len(list(dg.get_dependent_instruction_forms(line_number=4))), 1) self.assertEqual( next(dg.get_dependent_instruction_forms(line_number=4)), 6) self.assertEqual( len(list(dg.get_dependent_instruction_forms(line_number=5))), 1) self.assertEqual( next(dg.get_dependent_instruction_forms(line_number=5)), 9) self.assertEqual( len(list(dg.get_dependent_instruction_forms(line_number=6))), 1) self.assertEqual( next(dg.get_dependent_instruction_forms(line_number=6)), 7) self.assertEqual( len(list(dg.get_dependent_instruction_forms(line_number=7))), 0) self.assertEqual( len(list(dg.get_dependent_instruction_forms(line_number=8))), 0) with self.assertRaises(ValueError): dg.get_dependent_instruction_forms() # test dot creation dg.export_graph(filepath='/dev/null')
def inspect(args, output_file=sys.stdout): """ Does the actual throughput and critical path analysis of OSACA and prints it to the terminal. :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing :param output_file: Define the stream for output, defaults to :class:`sys.stdout` :type output_file: stream, optional """ # Read file code = args.file.read() # Detect ISA if necessary arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)] print_arch_warning = False if args.arch else True isa = MachineModel.get_isa_for_arch(arch) verbose = args.verbose ignore_unknown = args.ignore_unknown # Parse file parser = get_asm_parser(arch) try: parsed_code = parser.parse_file(code) except: # probably the wrong parser based on heuristic if args.arch is None: # change ISA and try again arch = DEFAULT_ARCHS['x86'] if BaseParser.detect_ISA(code) == 'aarch64' else DEFAULT_ARCHS['aarch64'] isa = MachineModel.get_isa_for_arch(arch) parser = get_asm_parser(arch) parsed_code = parser.parse_file(code) else: traceback.print_exc(file=sys.stderr) sys.exit(1) # Reduce to marked kernel or chosen section and add semantics if args.lines: line_range = get_line_range(args.lines) kernel = [line for line in parsed_code if line['line_number'] in line_range] print_length_warning = False else: kernel = reduce_to_section(parsed_code, isa) # Print warning if kernel has no markers and is larger than threshold (100) print_length_warning = True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False machine_model = MachineModel(arch=arch) semantics = ArchSemantics(machine_model) semantics.add_semantics(kernel) # Do optimal schedule for kernel throughput if wished if not args.fixed: semantics.assign_optimal_throughput(kernel) # Create DiGrahps kernel_graph = KernelDG(kernel, parser, machine_model) if args.dotpath is not None: kernel_graph.export_graph(args.dotpath if args.dotpath != '.' else None) # Print analysis frontend = Frontend(args.file.name, arch=arch) print( frontend.full_analysis( kernel, kernel_graph, ignore_unknown=ignore_unknown, arch_warning=print_arch_warning, length_warning=print_length_warning, verbose=verbose ), file=output_file, )
def test_is_read_is_written_AArch64(self): # independent form HW model dag = KernelDG(self.kernel_AArch64, self.parser_AArch64, None, None) reg_x1 = AttrDict({"prefix": "x", "name": "1"}) reg_w1 = AttrDict({"prefix": "w", "name": "1"}) reg_d1 = AttrDict({"prefix": "d", "name": "1"}) reg_q1 = AttrDict({"prefix": "q", "name": "1"}) reg_v1 = AttrDict({ "prefix": "v", "name": "1", "lanes": "2", "shape": "d" }) regs = [reg_d1, reg_q1, reg_v1] regs_gp = [reg_w1, reg_x1] instr_form_r_1 = self.parser_AArch64.parse_line( "stp q1, q3, [x12, #192]") self.semantics_tx2.assign_src_dst(instr_form_r_1) instr_form_r_2 = self.parser_AArch64.parse_line( "fadd v2.2d, v1.2d, v0.2d") self.semantics_tx2.assign_src_dst(instr_form_r_2) instr_form_w_1 = self.parser_AArch64.parse_line( "ldr d1, [x1, #:got_lo12:q2c]") self.semantics_tx2.assign_src_dst(instr_form_w_1) instr_form_non_w_1 = self.parser_AArch64.parse_line( "ldr x1, [x1, #:got_lo12:q2c]") self.semantics_tx2.assign_src_dst(instr_form_non_w_1) instr_form_rw_1 = self.parser_AArch64.parse_line( "fmul v1.2d, v1.2d, v0.2d") self.semantics_tx2.assign_src_dst(instr_form_rw_1) instr_form_rw_2 = self.parser_AArch64.parse_line( "ldp q2, q4, [x1, #64]!") self.semantics_tx2.assign_src_dst(instr_form_rw_2) instr_form_rw_3 = self.parser_AArch64.parse_line("str x4, [x1], #64") self.semantics_tx2.assign_src_dst(instr_form_rw_3) instr_form_non_rw_1 = self.parser_AArch64.parse_line("adds x1, x11") self.semantics_tx2.assign_src_dst(instr_form_non_rw_1) for reg in regs: with self.subTest(reg=reg): self.assertTrue(dag.is_read(reg, instr_form_r_1)) self.assertTrue(dag.is_read(reg, instr_form_r_2)) self.assertTrue(dag.is_read(reg, instr_form_rw_1)) self.assertFalse(dag.is_read(reg, instr_form_rw_2)) self.assertFalse(dag.is_read(reg, instr_form_rw_3)) self.assertFalse(dag.is_read(reg, instr_form_w_1)) self.assertTrue(dag.is_written(reg, instr_form_w_1)) self.assertTrue(dag.is_written(reg, instr_form_rw_1)) self.assertFalse(dag.is_written(reg, instr_form_non_w_1)) self.assertFalse(dag.is_written(reg, instr_form_rw_2)) self.assertFalse(dag.is_written(reg, instr_form_rw_3)) self.assertFalse(dag.is_written(reg, instr_form_non_rw_1)) self.assertFalse(dag.is_written(reg, instr_form_non_rw_1)) for reg in regs_gp: with self.subTest(reg=reg): self.assertFalse(dag.is_read(reg, instr_form_r_1)) self.assertFalse(dag.is_read(reg, instr_form_r_2)) self.assertFalse(dag.is_read(reg, instr_form_rw_1)) self.assertTrue(dag.is_read(reg, instr_form_rw_2)) self.assertTrue(dag.is_read(reg, instr_form_rw_3)) self.assertTrue(dag.is_read(reg, instr_form_w_1)) self.assertFalse(dag.is_written(reg, instr_form_w_1)) self.assertFalse(dag.is_written(reg, instr_form_rw_1)) self.assertTrue(dag.is_written(reg, instr_form_non_w_1)) self.assertTrue(dag.is_written(reg, instr_form_rw_2)) self.assertTrue(dag.is_written(reg, instr_form_rw_3)) self.assertTrue(dag.is_written(reg, instr_form_non_rw_1)) self.assertTrue(dag.is_written(reg, instr_form_non_rw_1))
def get_cp(self): kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model) kernel_cp = kernel_graph.get_critical_path() return sum([x['latency_cp'] for x in kernel_cp])
def test_frontend_AArch64(self): dg = KernelDG(self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2) fe = Frontend( path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml')) fe.full_analysis(self.kernel_AArch64, dg, verbose=True)
def test_frontend_x86(self): dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx) fe = Frontend( path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')) fe.throughput_analysis(self.kernel_x86, show_cmnts=False) fe.latency_analysis(dg.get_critical_path())
def test_is_read_is_written_x86(self): # independent form HW model dag = KernelDG(self.kernel_x86, self.parser_x86, None) reg_rcx = AttrDict({'name': 'rcx'}) reg_ymm1 = AttrDict({'name': 'ymm1'}) instr_form_r_c = self.parser_x86.parse_line( 'vmovsd %xmm0, (%r15,%rcx,8)') self.semantics_csx.assign_src_dst(instr_form_r_c) instr_form_non_r_c = self.parser_x86.parse_line( 'movl %xmm0, (%r15,%rax,8)') self.semantics_csx.assign_src_dst(instr_form_non_r_c) instr_form_w_c = self.parser_x86.parse_line('movi $0x05ACA, %rcx') self.semantics_csx.assign_src_dst(instr_form_w_c) instr_form_rw_ymm_1 = self.parser_x86.parse_line( 'vinsertf128 $0x1, %xmm1, %ymm0, %ymm1') self.semantics_csx.assign_src_dst(instr_form_rw_ymm_1) instr_form_rw_ymm_2 = self.parser_x86.parse_line( 'vinsertf128 $0x1, %xmm0, %ymm1, %ymm1') self.semantics_csx.assign_src_dst(instr_form_rw_ymm_2) instr_form_r_ymm = self.parser_x86.parse_line('vmovapd %ymm1, %ymm0') self.semantics_csx.assign_src_dst(instr_form_r_ymm) self.assertTrue(dag.is_read(reg_rcx, instr_form_r_c)) self.assertFalse(dag.is_read(reg_rcx, instr_form_non_r_c)) self.assertFalse(dag.is_read(reg_rcx, instr_form_w_c)) self.assertTrue(dag.is_written(reg_rcx, instr_form_w_c)) self.assertFalse(dag.is_written(reg_rcx, instr_form_r_c)) self.assertTrue(dag.is_read(reg_ymm1, instr_form_rw_ymm_1)) self.assertTrue(dag.is_read(reg_ymm1, instr_form_rw_ymm_2)) self.assertTrue(dag.is_read(reg_ymm1, instr_form_r_ymm)) self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1)) self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2)) self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm))
def test_is_read_is_written_AArch64(self): # independent form HW model dag = KernelDG(self.kernel_AArch64, self.parser_AArch64, None) reg_x1 = AttrDict({'prefix': 'x', 'name': '1'}) reg_w1 = AttrDict({'prefix': 'w', 'name': '1'}) reg_d1 = AttrDict({'prefix': 'd', 'name': '1'}) reg_q1 = AttrDict({'prefix': 'q', 'name': '1'}) reg_v1 = AttrDict({ 'prefix': 'v', 'name': '1', 'lanes': '2', 'shape': 'd' }) regs = [reg_d1, reg_q1, reg_v1] regs_gp = [reg_w1, reg_x1] instr_form_r_1 = self.parser_AArch64.parse_line( 'stp q1, q3, [x12, #192]') self.semantics_tx2.assign_src_dst(instr_form_r_1) instr_form_r_2 = self.parser_AArch64.parse_line( 'fadd v2.2d, v1.2d, v0.2d') self.semantics_tx2.assign_src_dst(instr_form_r_2) instr_form_w_1 = self.parser_AArch64.parse_line( 'ldr d1, [x1, #:got_lo12:q2c]') self.semantics_tx2.assign_src_dst(instr_form_w_1) instr_form_non_w_1 = self.parser_AArch64.parse_line( 'ldr x1, [x1, #:got_lo12:q2c]') self.semantics_tx2.assign_src_dst(instr_form_non_w_1) instr_form_rw_1 = self.parser_AArch64.parse_line( 'fmul v1.2d, v1.2d, v0.2d') self.semantics_tx2.assign_src_dst(instr_form_rw_1) instr_form_rw_2 = self.parser_AArch64.parse_line( 'ldp q2, q4, [x1, #64]!') self.semantics_tx2.assign_src_dst(instr_form_rw_2) instr_form_rw_3 = self.parser_AArch64.parse_line('str x4, [x1], #64') self.semantics_tx2.assign_src_dst(instr_form_rw_3) instr_form_non_rw_1 = self.parser_AArch64.parse_line('adds x1, x11') self.semantics_tx2.assign_src_dst(instr_form_non_rw_1) for reg in regs: with self.subTest(reg=reg): self.assertTrue(dag.is_read(reg, instr_form_r_1)) self.assertTrue(dag.is_read(reg, instr_form_r_2)) self.assertTrue(dag.is_read(reg, instr_form_rw_1)) self.assertFalse(dag.is_read(reg, instr_form_rw_2)) self.assertFalse(dag.is_read(reg, instr_form_rw_3)) self.assertFalse(dag.is_read(reg, instr_form_w_1)) self.assertTrue(dag.is_written(reg, instr_form_w_1)) self.assertTrue(dag.is_written(reg, instr_form_rw_1)) self.assertFalse(dag.is_written(reg, instr_form_non_w_1)) self.assertFalse(dag.is_written(reg, instr_form_rw_2)) self.assertFalse(dag.is_written(reg, instr_form_rw_3)) self.assertFalse(dag.is_written(reg, instr_form_non_rw_1)) self.assertFalse(dag.is_written(reg, instr_form_non_rw_1)) for reg in regs_gp: with self.subTest(reg=reg): self.assertFalse(dag.is_read(reg, instr_form_r_1)) self.assertFalse(dag.is_read(reg, instr_form_r_2)) self.assertFalse(dag.is_read(reg, instr_form_rw_1)) self.assertTrue(dag.is_read(reg, instr_form_rw_2)) self.assertTrue(dag.is_read(reg, instr_form_rw_3)) self.assertTrue(dag.is_read(reg, instr_form_w_1)) self.assertFalse(dag.is_written(reg, instr_form_w_1)) self.assertFalse(dag.is_written(reg, instr_form_rw_1)) self.assertTrue(dag.is_written(reg, instr_form_non_w_1)) self.assertTrue(dag.is_written(reg, instr_form_rw_2)) self.assertTrue(dag.is_written(reg, instr_form_rw_3)) self.assertTrue(dag.is_written(reg, instr_form_non_rw_1)) self.assertTrue(dag.is_written(reg, instr_form_non_rw_1))
def create_output(self, verbose=False): kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model) frontend = Frontend(arch=self.machine_model.get_arch()) return frontend.full_analysis(self.kernel, kernel_graph, verbose=verbose)