Ejemplo n.º 1
0
 def test_loop_carried_dependency_x86(self):
     lcd_id = 8
     lcd_id2 = 5
     dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx,
                   self.semantics_csx)
     lc_deps = dg.get_loopcarried_dependencies()
     self.assertEqual(len(lc_deps), 2)
     # ID 8
     self.assertEqual(lc_deps[lcd_id]["root"],
                      dg.dg.nodes(data=True)[lcd_id]["instruction_form"])
     self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1)
     self.assertEqual(
         lc_deps[lcd_id]["dependencies"][0][0],
         dg.dg.nodes(data=True)[lcd_id]["instruction_form"],
     )
     # w/  flag dependencies: ID 9 w/ len=2
     # w/o flag dependencies: ID 5 w/ len=1
     # TODO discuss
     self.assertEqual(lc_deps[lcd_id2]["root"],
                      dg.dg.nodes(data=True)[lcd_id2]["instruction_form"])
     self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
     self.assertEqual(
         lc_deps[lcd_id2]["dependencies"][0][0],
         dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
     )
Ejemplo n.º 2
0
    def full_analysis(self,
                      kernel,
                      kernel_dg: KernelDG,
                      ignore_unknown=False,
                      arch_warning=False,
                      length_warning=False,
                      verbose=False):
        """
        Build the full analysis report including header, the symbol map, the combined TP/CP/LCD
        view and the list based LCD view.

        :param kernel: kernel to report on
        :type kernel: list
        :param kernel_dg: directed graph containing CP and LCD
        :type kernel_dg: :class:`~osaca.semantics.KernelDG`
        :param ignore_unknown: flag for ignore warning if performance data is missing, defaults to
            `False`
        :type ignore_unknown: boolean, optional
        :param print_arch_warning: flag for additional user warning to specify micro-arch 
        :type print_arch_warning: boolean, optional
        :param print_length_warning: flag for additional user warning to specify kernel length with --lines
        :type print_length_warning: boolean, optional
        :param verbose: flag for verbosity level, defaults to False
        :type verbose: boolean, optional
        """
        return (self._header_report() +
                self._user_warnings(arch_warning, length_warning) +
                self._symbol_map() + self.combined_view(
                    kernel,
                    kernel_dg.get_critical_path(),
                    kernel_dg.get_loopcarried_dependencies(),
                    ignore_unknown,
                ) + self.loopcarried_dependencies(
                    kernel_dg.get_loopcarried_dependencies()))
Ejemplo n.º 3
0
 def get_lcd(self):
     kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model)
     lcd_dict = kernel_graph.get_loopcarried_dependencies()
     lcd = 0.0
     for dep in lcd_dict:
         lcd_tmp = sum(
             [x['latency_lcd'] for x in lcd_dict[dep]['dependencies']])
         lcd = lcd_tmp if lcd_tmp > lcd else lcd
     return lcd
Ejemplo n.º 4
0
 def test_loop_carried_dependency_aarch64(self):
     dg = KernelDG(
         self.kernel_aarch64_memdep,
         self.parser_AArch64,
         self.machine_model_tx2,
         self.semantics_tx2,
     )
     lc_deps = dg.get_loopcarried_dependencies()
     self.assertEqual(len(lc_deps), 2)
     # based on line 6
     self.assertEqual(lc_deps[6]["latency"], 28.0)
     self.assertEqual(
         [(iform.line_number, lat)
          for iform, lat in lc_deps[6]['dependencies']],
         [(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
     )
Ejemplo n.º 5
0
 def test_kernelDG_SVE(self):
     KernelDG(
         self.kernel_aarch64_SVE,
         self.parser_AArch64,
         self.machine_model_a64fx,
         self.semantics_a64fx,
     )
Ejemplo n.º 6
0
 def test_memdependency_x86(self):
     dg = KernelDG(self.kernel_x86_memdep, self.parser_x86,
                   self.machine_model_csx, self.semantics_csx)
     self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
     self.assertEqual(
         set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
     self.assertEqual(
         set(dg.get_dependent_instruction_forms(line_number=5)), {10, 12})
     with self.assertRaises(ValueError):
         dg.get_dependent_instruction_forms()
     # test dot creation
     dg.export_graph(filepath="/dev/null")
Ejemplo n.º 7
0
 def test_timeout_during_loop_carried_dependency(self):
     start_time = time.perf_counter()
     KernelDG(self.kernel_x86_long_LCD,
              self.parser_x86,
              self.machine_model_csx,
              self.semantics_x86,
              timeout=10)
     end_time = time.perf_counter()
     time_10 = end_time - start_time
     start_time = time.perf_counter()
     KernelDG(self.kernel_x86_long_LCD,
              self.parser_x86,
              self.machine_model_csx,
              self.semantics_x86,
              timeout=2)
     end_time = time.perf_counter()
     time_2 = end_time - start_time
     self.assertTrue(time_10 > 10)
     self.assertTrue(2 < time_2)
     self.assertTrue(time_2 < (time_10 - 7))
Ejemplo n.º 8
0
 def test_cyclic_dag(self):
     dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx)
     dg.dg.add_edge(100, 101, latency=1.0)
     dg.dg.add_edge(101, 102, latency=2.0)
     dg.dg.add_edge(102, 100, latency=3.0)
     with self.assertRaises(NotImplementedError):
         dg.get_critical_path()
     with self.assertRaises(NotImplementedError):
         dg.get_loopcarried_dependencies()
Ejemplo n.º 9
0
 def test_kernelDG_AArch64(self):
     dg = KernelDG(self.kernel_AArch64, self.parser_AArch64,
                   self.machine_model_tx2)
     self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
     self.assertEqual(
         set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
     self.assertEqual(
         set(dg.get_dependent_instruction_forms(line_number=4)), {9, 10})
     self.assertEqual(
         set(dg.get_dependent_instruction_forms(line_number=5)), {6, 7, 8})
     self.assertEqual(
         set(dg.get_dependent_instruction_forms(line_number=6)), {9, 10})
     self.assertEqual(
         next(dg.get_dependent_instruction_forms(line_number=7)), 13)
     self.assertEqual(
         next(dg.get_dependent_instruction_forms(line_number=8)), 14)
     self.assertEqual(
         next(dg.get_dependent_instruction_forms(line_number=9)), 16)
     self.assertEqual(
         next(dg.get_dependent_instruction_forms(line_number=10)), 17)
     self.assertEqual(
         set(dg.get_dependent_instruction_forms(line_number=11)), {13, 14})
     self.assertEqual(
         set(dg.get_dependent_instruction_forms(line_number=12)), {16, 17})
     self.assertEqual(
         next(dg.get_dependent_instruction_forms(line_number=13)), 15)
     self.assertEqual(
         next(dg.get_dependent_instruction_forms(line_number=14)), 15)
     self.assertEqual(
         len(list(dg.get_dependent_instruction_forms(line_number=15))), 0)
     self.assertEqual(
         next(dg.get_dependent_instruction_forms(line_number=16)), 18)
     self.assertEqual(
         next(dg.get_dependent_instruction_forms(line_number=17)), 18)
     self.assertEqual(
         len(list(dg.get_dependent_instruction_forms(line_number=18))), 0)
     self.assertEqual(
         len(list(dg.get_dependent_instruction_forms(line_number=19))), 0)
     self.assertEqual(
         len(list(dg.get_dependent_instruction_forms(line_number=20))), 0)
     with self.assertRaises(ValueError):
         dg.get_dependent_instruction_forms()
     # test dot creation
     dg.export_graph(filepath='/dev/null')
Ejemplo n.º 10
0
 def test_kernelDG_x86(self):
     #
     #  4
     #   \___>6__>7
     #   /
     #  3
     #     5_______>9
     #
     dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx)
     self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
     self.assertEqual(
         len(list(dg.get_dependent_instruction_forms(line_number=3))), 1)
     self.assertEqual(
         next(dg.get_dependent_instruction_forms(line_number=3)), 6)
     self.assertEqual(
         len(list(dg.get_dependent_instruction_forms(line_number=4))), 1)
     self.assertEqual(
         next(dg.get_dependent_instruction_forms(line_number=4)), 6)
     self.assertEqual(
         len(list(dg.get_dependent_instruction_forms(line_number=5))), 1)
     self.assertEqual(
         next(dg.get_dependent_instruction_forms(line_number=5)), 9)
     self.assertEqual(
         len(list(dg.get_dependent_instruction_forms(line_number=6))), 1)
     self.assertEqual(
         next(dg.get_dependent_instruction_forms(line_number=6)), 7)
     self.assertEqual(
         len(list(dg.get_dependent_instruction_forms(line_number=7))), 0)
     self.assertEqual(
         len(list(dg.get_dependent_instruction_forms(line_number=8))), 0)
     with self.assertRaises(ValueError):
         dg.get_dependent_instruction_forms()
     # test dot creation
     dg.export_graph(filepath='/dev/null')
Ejemplo n.º 11
0
def inspect(args, output_file=sys.stdout):
    """
    Does the actual throughput and critical path analysis of OSACA and prints it to the
    terminal.

    :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
    :param output_file: Define the stream for output, defaults to :class:`sys.stdout`
    :type output_file: stream, optional
    """
    # Read file
    code = args.file.read()

    # Detect ISA if necessary
    arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)]
    print_arch_warning = False if args.arch else True
    isa = MachineModel.get_isa_for_arch(arch)
    verbose = args.verbose
    ignore_unknown = args.ignore_unknown

    # Parse file
    parser = get_asm_parser(arch)
    try:
        parsed_code = parser.parse_file(code)
    except:
        # probably the wrong parser based on heuristic
        if args.arch is None:
            # change ISA and try again
            arch = DEFAULT_ARCHS['x86'] if BaseParser.detect_ISA(code) == 'aarch64' else DEFAULT_ARCHS['aarch64']
            isa = MachineModel.get_isa_for_arch(arch)
            parser = get_asm_parser(arch)
            parsed_code = parser.parse_file(code)
        else:
            traceback.print_exc(file=sys.stderr)
            sys.exit(1)

    # Reduce to marked kernel or chosen section and add semantics
    if args.lines:
        line_range = get_line_range(args.lines)
        kernel = [line for line in parsed_code if line['line_number'] in line_range]
        print_length_warning = False
    else:
        kernel = reduce_to_section(parsed_code, isa)
        # Print warning if kernel has no markers and is larger than threshold (100)
        print_length_warning = True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False
    machine_model = MachineModel(arch=arch)
    semantics = ArchSemantics(machine_model)
    semantics.add_semantics(kernel)
    # Do optimal schedule for kernel throughput if wished
    if not args.fixed:
        semantics.assign_optimal_throughput(kernel)

    # Create DiGrahps
    kernel_graph = KernelDG(kernel, parser, machine_model)
    if args.dotpath is not None:
        kernel_graph.export_graph(args.dotpath if args.dotpath != '.' else None)
    # Print analysis
    frontend = Frontend(args.file.name, arch=arch)
    print(
        frontend.full_analysis(
            kernel,
            kernel_graph,
            ignore_unknown=ignore_unknown,
            arch_warning=print_arch_warning,
            length_warning=print_length_warning,
            verbose=verbose
        ),
        file=output_file,
    )
Ejemplo n.º 12
0
    def test_is_read_is_written_AArch64(self):
        # independent form HW model
        dag = KernelDG(self.kernel_AArch64, self.parser_AArch64, None, None)
        reg_x1 = AttrDict({"prefix": "x", "name": "1"})
        reg_w1 = AttrDict({"prefix": "w", "name": "1"})
        reg_d1 = AttrDict({"prefix": "d", "name": "1"})
        reg_q1 = AttrDict({"prefix": "q", "name": "1"})
        reg_v1 = AttrDict({
            "prefix": "v",
            "name": "1",
            "lanes": "2",
            "shape": "d"
        })
        regs = [reg_d1, reg_q1, reg_v1]
        regs_gp = [reg_w1, reg_x1]

        instr_form_r_1 = self.parser_AArch64.parse_line(
            "stp q1, q3, [x12, #192]")
        self.semantics_tx2.assign_src_dst(instr_form_r_1)
        instr_form_r_2 = self.parser_AArch64.parse_line(
            "fadd v2.2d, v1.2d, v0.2d")
        self.semantics_tx2.assign_src_dst(instr_form_r_2)
        instr_form_w_1 = self.parser_AArch64.parse_line(
            "ldr d1, [x1, #:got_lo12:q2c]")
        self.semantics_tx2.assign_src_dst(instr_form_w_1)
        instr_form_non_w_1 = self.parser_AArch64.parse_line(
            "ldr x1, [x1, #:got_lo12:q2c]")
        self.semantics_tx2.assign_src_dst(instr_form_non_w_1)
        instr_form_rw_1 = self.parser_AArch64.parse_line(
            "fmul v1.2d, v1.2d, v0.2d")
        self.semantics_tx2.assign_src_dst(instr_form_rw_1)
        instr_form_rw_2 = self.parser_AArch64.parse_line(
            "ldp q2, q4, [x1, #64]!")
        self.semantics_tx2.assign_src_dst(instr_form_rw_2)
        instr_form_rw_3 = self.parser_AArch64.parse_line("str x4, [x1], #64")
        self.semantics_tx2.assign_src_dst(instr_form_rw_3)
        instr_form_non_rw_1 = self.parser_AArch64.parse_line("adds x1, x11")
        self.semantics_tx2.assign_src_dst(instr_form_non_rw_1)

        for reg in regs:
            with self.subTest(reg=reg):
                self.assertTrue(dag.is_read(reg, instr_form_r_1))
                self.assertTrue(dag.is_read(reg, instr_form_r_2))
                self.assertTrue(dag.is_read(reg, instr_form_rw_1))
                self.assertFalse(dag.is_read(reg, instr_form_rw_2))
                self.assertFalse(dag.is_read(reg, instr_form_rw_3))
                self.assertFalse(dag.is_read(reg, instr_form_w_1))
                self.assertTrue(dag.is_written(reg, instr_form_w_1))
                self.assertTrue(dag.is_written(reg, instr_form_rw_1))
                self.assertFalse(dag.is_written(reg, instr_form_non_w_1))
                self.assertFalse(dag.is_written(reg, instr_form_rw_2))
                self.assertFalse(dag.is_written(reg, instr_form_rw_3))
                self.assertFalse(dag.is_written(reg, instr_form_non_rw_1))
                self.assertFalse(dag.is_written(reg, instr_form_non_rw_1))
        for reg in regs_gp:
            with self.subTest(reg=reg):
                self.assertFalse(dag.is_read(reg, instr_form_r_1))
                self.assertFalse(dag.is_read(reg, instr_form_r_2))
                self.assertFalse(dag.is_read(reg, instr_form_rw_1))
                self.assertTrue(dag.is_read(reg, instr_form_rw_2))
                self.assertTrue(dag.is_read(reg, instr_form_rw_3))
                self.assertTrue(dag.is_read(reg, instr_form_w_1))
                self.assertFalse(dag.is_written(reg, instr_form_w_1))
                self.assertFalse(dag.is_written(reg, instr_form_rw_1))
                self.assertTrue(dag.is_written(reg, instr_form_non_w_1))
                self.assertTrue(dag.is_written(reg, instr_form_rw_2))
                self.assertTrue(dag.is_written(reg, instr_form_rw_3))
                self.assertTrue(dag.is_written(reg, instr_form_non_rw_1))
                self.assertTrue(dag.is_written(reg, instr_form_non_rw_1))
Ejemplo n.º 13
0
 def get_cp(self):
     kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model)
     kernel_cp = kernel_graph.get_critical_path()
     return sum([x['latency_cp'] for x in kernel_cp])
Ejemplo n.º 14
0
 def test_frontend_AArch64(self):
     dg = KernelDG(self.kernel_AArch64, self.parser_AArch64,
                   self.machine_model_tx2)
     fe = Frontend(
         path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml'))
     fe.full_analysis(self.kernel_AArch64, dg, verbose=True)
Ejemplo n.º 15
0
 def test_frontend_x86(self):
     dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx)
     fe = Frontend(
         path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml'))
     fe.throughput_analysis(self.kernel_x86, show_cmnts=False)
     fe.latency_analysis(dg.get_critical_path())
Ejemplo n.º 16
0
    def test_is_read_is_written_x86(self):
        # independent form HW model
        dag = KernelDG(self.kernel_x86, self.parser_x86, None)
        reg_rcx = AttrDict({'name': 'rcx'})
        reg_ymm1 = AttrDict({'name': 'ymm1'})

        instr_form_r_c = self.parser_x86.parse_line(
            'vmovsd  %xmm0, (%r15,%rcx,8)')
        self.semantics_csx.assign_src_dst(instr_form_r_c)
        instr_form_non_r_c = self.parser_x86.parse_line(
            'movl  %xmm0, (%r15,%rax,8)')
        self.semantics_csx.assign_src_dst(instr_form_non_r_c)
        instr_form_w_c = self.parser_x86.parse_line('movi $0x05ACA, %rcx')
        self.semantics_csx.assign_src_dst(instr_form_w_c)

        instr_form_rw_ymm_1 = self.parser_x86.parse_line(
            'vinsertf128 $0x1, %xmm1, %ymm0, %ymm1')
        self.semantics_csx.assign_src_dst(instr_form_rw_ymm_1)
        instr_form_rw_ymm_2 = self.parser_x86.parse_line(
            'vinsertf128 $0x1, %xmm0, %ymm1, %ymm1')
        self.semantics_csx.assign_src_dst(instr_form_rw_ymm_2)
        instr_form_r_ymm = self.parser_x86.parse_line('vmovapd %ymm1, %ymm0')
        self.semantics_csx.assign_src_dst(instr_form_r_ymm)

        self.assertTrue(dag.is_read(reg_rcx, instr_form_r_c))
        self.assertFalse(dag.is_read(reg_rcx, instr_form_non_r_c))
        self.assertFalse(dag.is_read(reg_rcx, instr_form_w_c))
        self.assertTrue(dag.is_written(reg_rcx, instr_form_w_c))
        self.assertFalse(dag.is_written(reg_rcx, instr_form_r_c))

        self.assertTrue(dag.is_read(reg_ymm1, instr_form_rw_ymm_1))
        self.assertTrue(dag.is_read(reg_ymm1, instr_form_rw_ymm_2))
        self.assertTrue(dag.is_read(reg_ymm1, instr_form_r_ymm))
        self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1))
        self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2))
        self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm))
Ejemplo n.º 17
0
    def test_is_read_is_written_AArch64(self):
        # independent form HW model
        dag = KernelDG(self.kernel_AArch64, self.parser_AArch64, None)
        reg_x1 = AttrDict({'prefix': 'x', 'name': '1'})
        reg_w1 = AttrDict({'prefix': 'w', 'name': '1'})
        reg_d1 = AttrDict({'prefix': 'd', 'name': '1'})
        reg_q1 = AttrDict({'prefix': 'q', 'name': '1'})
        reg_v1 = AttrDict({
            'prefix': 'v',
            'name': '1',
            'lanes': '2',
            'shape': 'd'
        })
        regs = [reg_d1, reg_q1, reg_v1]
        regs_gp = [reg_w1, reg_x1]

        instr_form_r_1 = self.parser_AArch64.parse_line(
            'stp q1, q3, [x12, #192]')
        self.semantics_tx2.assign_src_dst(instr_form_r_1)
        instr_form_r_2 = self.parser_AArch64.parse_line(
            'fadd v2.2d, v1.2d, v0.2d')
        self.semantics_tx2.assign_src_dst(instr_form_r_2)
        instr_form_w_1 = self.parser_AArch64.parse_line(
            'ldr d1, [x1, #:got_lo12:q2c]')
        self.semantics_tx2.assign_src_dst(instr_form_w_1)
        instr_form_non_w_1 = self.parser_AArch64.parse_line(
            'ldr x1, [x1, #:got_lo12:q2c]')
        self.semantics_tx2.assign_src_dst(instr_form_non_w_1)
        instr_form_rw_1 = self.parser_AArch64.parse_line(
            'fmul v1.2d, v1.2d, v0.2d')
        self.semantics_tx2.assign_src_dst(instr_form_rw_1)
        instr_form_rw_2 = self.parser_AArch64.parse_line(
            'ldp q2, q4, [x1, #64]!')
        self.semantics_tx2.assign_src_dst(instr_form_rw_2)
        instr_form_rw_3 = self.parser_AArch64.parse_line('str x4, [x1], #64')
        self.semantics_tx2.assign_src_dst(instr_form_rw_3)
        instr_form_non_rw_1 = self.parser_AArch64.parse_line('adds x1, x11')
        self.semantics_tx2.assign_src_dst(instr_form_non_rw_1)

        for reg in regs:
            with self.subTest(reg=reg):
                self.assertTrue(dag.is_read(reg, instr_form_r_1))
                self.assertTrue(dag.is_read(reg, instr_form_r_2))
                self.assertTrue(dag.is_read(reg, instr_form_rw_1))
                self.assertFalse(dag.is_read(reg, instr_form_rw_2))
                self.assertFalse(dag.is_read(reg, instr_form_rw_3))
                self.assertFalse(dag.is_read(reg, instr_form_w_1))
                self.assertTrue(dag.is_written(reg, instr_form_w_1))
                self.assertTrue(dag.is_written(reg, instr_form_rw_1))
                self.assertFalse(dag.is_written(reg, instr_form_non_w_1))
                self.assertFalse(dag.is_written(reg, instr_form_rw_2))
                self.assertFalse(dag.is_written(reg, instr_form_rw_3))
                self.assertFalse(dag.is_written(reg, instr_form_non_rw_1))
                self.assertFalse(dag.is_written(reg, instr_form_non_rw_1))
        for reg in regs_gp:
            with self.subTest(reg=reg):
                self.assertFalse(dag.is_read(reg, instr_form_r_1))
                self.assertFalse(dag.is_read(reg, instr_form_r_2))
                self.assertFalse(dag.is_read(reg, instr_form_rw_1))
                self.assertTrue(dag.is_read(reg, instr_form_rw_2))
                self.assertTrue(dag.is_read(reg, instr_form_rw_3))
                self.assertTrue(dag.is_read(reg, instr_form_w_1))
                self.assertFalse(dag.is_written(reg, instr_form_w_1))
                self.assertFalse(dag.is_written(reg, instr_form_rw_1))
                self.assertTrue(dag.is_written(reg, instr_form_non_w_1))
                self.assertTrue(dag.is_written(reg, instr_form_rw_2))
                self.assertTrue(dag.is_written(reg, instr_form_rw_3))
                self.assertTrue(dag.is_written(reg, instr_form_non_rw_1))
                self.assertTrue(dag.is_written(reg, instr_form_non_rw_1))
Ejemplo n.º 18
0
 def create_output(self, verbose=False):
     kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model)
     frontend = Frontend(arch=self.machine_model.get_arch())
     return frontend.full_analysis(self.kernel,
                                   kernel_graph,
                                   verbose=verbose)