Python MachineModel Examples, osaca.semantics.MachineModel Python Examples

Example #1

0

Show file

File: test_semantics.py Project: jdomke/OSACA

    def setUpClass(cls):
        # set up parser and kernels
        cls.parser_x86 = ParserX86ATT()
        cls.parser_AArch64 = ParserAArch64()
        with open(cls._find_file('kernel_x86.s')) as f:
            cls.code_x86 = f.read()
        with open(cls._find_file('kernel_aarch64.s')) as f:
            cls.code_AArch64 = f.read()
        cls.kernel_x86 = reduce_to_section(
            cls.parser_x86.parse_file(cls.code_x86), 'x86')
        cls.kernel_AArch64 = reduce_to_section(
            cls.parser_AArch64.parse_file(cls.code_AArch64), 'aarch64')

        # set up machine models
        cls.machine_model_csx = MachineModel(
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'csx.yml'))
        cls.machine_model_tx2 = MachineModel(
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'tx2.yml'))
        cls.semantics_csx = ArchSemantics(cls.machine_model_csx,
                                          path_to_yaml=os.path.join(
                                              cls.MODULE_DATA_DIR,
                                              'isa/x86.yml'))
        cls.semantics_tx2 = ArchSemantics(
            cls.machine_model_tx2,
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'isa/aarch64.yml'),
        )
        cls.machine_model_zen = MachineModel(arch='zen1')

        for i in range(len(cls.kernel_x86)):
            cls.semantics_csx.assign_src_dst(cls.kernel_x86[i])
            cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i])
        for i in range(len(cls.kernel_AArch64)):
            cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
            cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])

Example #2

0

Show file

File: test_frontend.py Project: jdomke/OSACA

    def setUpClass(self):
        # set up parser and kernels
        self.parser_x86 = ParserX86ATT()
        self.parser_AArch64 = ParserAArch64()
        with open(self._find_file('kernel_x86.s')) as f:
            code_x86 = f.read()
        with open(self._find_file('kernel_aarch64.s')) as f:
            code_AArch64 = f.read()
        self.kernel_x86 = self.parser_x86.parse_file(code_x86)
        self.kernel_AArch64 = self.parser_AArch64.parse_file(code_AArch64)

        # set up machine models
        self.machine_model_csx = MachineModel(
            path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml'))
        self.machine_model_tx2 = MachineModel(arch='tx2')
        self.semantics_csx = ArchSemantics(self.machine_model_csx,
                                           path_to_yaml=os.path.join(
                                               self.MODULE_DATA_DIR,
                                               'isa/x86.yml'))
        self.semantics_tx2 = ArchSemantics(
            self.machine_model_tx2,
            path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/aarch64.yml'),
        )
        for i in range(len(self.kernel_x86)):
            self.semantics_csx.assign_src_dst(self.kernel_x86[i])
            self.semantics_csx.assign_tp_lt(self.kernel_x86[i])
        for i in range(len(self.kernel_AArch64)):
            self.semantics_tx2.assign_src_dst(self.kernel_AArch64[i])
            self.semantics_tx2.assign_tp_lt(self.kernel_AArch64[i])

Example #3

0

Show file

File: db_interface.py Project: RRZE-HPC/OSACA

def sanity_check(arch: str,
                 verbose=False,
                 internet_check=False,
                 output_file=sys.stdout):
    """
    Checks the database for missing TP/LT values, instructions might missing int the ISA DB and
    duplicate instructions.

    :param arch: micro-arch key to define DB to check
    :type arch: str
    :param verbose: verbose output flag, defaults to `False`
    :type verbose: bool, optional
    :param internet_check: indicates if OSACA should try to look up the src/dst distribution in the
                           internet, defaults to False
    :type internet_check: boolean, optional
    :param output_file: output stream specifying where to write output,
                        defaults to :class:`sys.stdout`
    :type output_file: stream, optional

    :return: True if everything checked out
    """
    # load arch machine model
    arch_mm = MachineModel(arch=arch)
    data = arch_mm["instruction_forms"]
    # load isa machine model
    isa = arch_mm.get_ISA()
    isa_mm = MachineModel(arch="isa/{}".format(isa))
    num_of_instr = len(data)

    # check arch DB entries
    (
        missing_throughput,
        missing_latency,
        missing_port_pressure,
        suspicious_instructions,
        duplicate_instr_arch,
        bad_operand,
    ) = _check_sanity_arch_db(arch_mm, isa_mm, internet_check=internet_check)
    # check ISA DB entries
    duplicate_instr_isa, only_in_isa = _check_sanity_isa_db(arch_mm, isa_mm)

    report = _get_sanity_report(
        num_of_instr,
        missing_throughput,
        missing_latency,
        missing_port_pressure,
        suspicious_instructions,
        duplicate_instr_arch,
        duplicate_instr_isa,
        only_in_isa,
        bad_operand,
        verbose=verbose,
        colors=True if output_file == sys.stdout else False,
    )
    print(report, file=output_file)

    return not any([missing_port_pressure, bad_operand])

Example #4

0

Show file

File: kerncraft_interface.py Project: jdomke/OSACA

class KerncraftAPI(object):
    def __init__(self, arch, code):
        self.machine_model = MachineModel(arch=arch)
        self.semantics = ArchSemantics(self.machine_model)
        isa = self.machine_model.get_ISA().lower()
        if isa == 'aarch64':
            self.parser = ParserAArch64()
        elif isa == 'x86':
            self.parser = ParserX86ATT()

        parsed_code = self.parser.parse_file(code)
        self.kernel = reduce_to_section(parsed_code,
                                        self.machine_model.get_ISA())
        self.semantics.add_semantics(self.kernel)

    def create_output(self, verbose=False):
        kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model)
        frontend = Frontend(arch=self.machine_model.get_arch())
        return frontend.full_analysis(self.kernel,
                                      kernel_graph,
                                      verbose=verbose)

    def get_unmatched_instruction_ratio(self):
        unmatched_counter = 0
        for instruction in self.kernel:
            if (INSTR_FLAGS.TP_UNKWN in instruction['flags']
                    and INSTR_FLAGS.LT_UNKWN in instruction['flags']):
                unmatched_counter += 1
        return unmatched_counter / len(self.kernel)

    def get_port_occupation_cycles(self):
        throughput_values = self.semantics.get_throughput_sum(self.kernel)
        port_names = self.machine_model['ports']
        return collections.OrderedDict(list(zip(port_names,
                                                throughput_values)))

    def get_total_throughput(self):
        return max(self.semantics.get_throughput_sum(self.kernel))

    def get_latency(self):
        return (self.get_lcd(), self.get_cp())

    def get_cp(self):
        kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model)
        kernel_cp = kernel_graph.get_critical_path()
        return sum([x['latency_cp'] for x in kernel_cp])

    def get_lcd(self):
        kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model)
        lcd_dict = kernel_graph.get_loopcarried_dependencies()
        lcd = 0.0
        for dep in lcd_dict:
            lcd_tmp = sum(
                [x['latency_lcd'] for x in lcd_dict[dep]['dependencies']])
            lcd = lcd_tmp if lcd_tmp > lcd else lcd
        return lcd

Example #5

0

Show file

File: db_interface.py Project: RRZE-HPC/OSACA

def import_benchmark_output(arch, bench_type, filepath, output=sys.stdout):
    """
    Import benchmark results from micro-benchmarks.

    :param arch: target architecture key
    :type arch: str
    :param bench_type: key for defining type of benchmark output
    :type bench_type: str
    :param filepath: filepath to the output file
    :type filepath: str
    :param output: output stream to dump, defaults to sys.stdout
    :type output: stream
    """
    supported_bench_outputs = ["ibench", "asmbench"]
    assert os.path.exists(filepath)
    if bench_type not in supported_bench_outputs:
        raise ValueError("Benchmark type is not supported.")
    with open(filepath, "r") as f:
        input_data = f.readlines()
    db_entries = None
    mm = MachineModel(arch)
    if bench_type == "ibench":
        db_entries = _get_ibench_output(input_data, mm.get_ISA())
    elif bench_type == "asmbench":
        db_entries = _get_asmbench_output(input_data, mm.get_ISA())
    # write entries to DB
    for entry in db_entries:
        mm.set_instruction_entry(db_entries[entry])
    if output is None:
        print(mm.dump())
    else:
        mm.dump(stream=output)

Example #6

0

Show file

File: test_semantics.py Project: jdomke/OSACA

 def test_invalid_MachineModel(self):
     with self.assertRaises(ValueError):
         MachineModel()
     with self.assertRaises(ValueError):
         MachineModel(arch='CSX',
                      path_to_yaml=os.path.join(self.MODULE_DATA_DIR,
                                                'csx.yml'))
     with self.assertRaises(FileNotFoundError):
         MachineModel(arch='THE_MACHINE')
     with self.assertRaises(FileNotFoundError):
         MachineModel(path_to_yaml=os.path.join(self.MODULE_DATA_DIR,
                                                'THE_MACHINE.yml'))

Example #7

0

Show file

File: kerncraft_interface.py Project: jdomke/OSACA

    def __init__(self, arch, code):
        self.machine_model = MachineModel(arch=arch)
        self.semantics = ArchSemantics(self.machine_model)
        isa = self.machine_model.get_ISA().lower()
        if isa == 'aarch64':
            self.parser = ParserAArch64()
        elif isa == 'x86':
            self.parser = ParserX86ATT()

        parsed_code = self.parser.parse_file(code)
        self.kernel = reduce_to_section(parsed_code,
                                        self.machine_model.get_ISA())
        self.semantics.add_semantics(self.kernel)

Example #8

0

Show file

File: osaca.py Project: jdomke/OSACA

def insert_byte_marker(args):
    """
    Inserts byte markers into an assembly file using kerncraft.

    :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
    """
    try:
        from kerncraft.incore_model import asm_instrumentation
    except ImportError:
        print(
            'Module kerncraft not installed. Use \'pip install --user '
            'kerncraft\' for installation.\nFor more information see '
            'https://github.com/RRZE-HPC/kerncraft',
            file=sys.stderr,
        )
        sys.exit(1)

    assembly = args.file.read()
    unmarked_assembly = io.StringIO(assembly)
    marked_assembly = io.StringIO()
    asm_instrumentation(
        input_file=unmarked_assembly,
        output_file=marked_assembly,
        block_selection='manual',
        pointer_increment='auto_with_manual_fallback',
        isa=MachineModel.get_isa_for_arch(args.arch),
    )

    marked_assembly.seek(0)
    assembly = marked_assembly.read()
    with open(args.file.name, 'w') as f:
        f.write(assembly)

Example #9

0

Show file

File: test_cli.py Project: jdomke/OSACA

 def test_architectures(self):
     parser = osaca.create_parser()
     # Run the test kernel for all architectures
     archs = osaca.SUPPORTED_ARCHS
     for arch in archs:
         with self.subTest(micro_arch=arch):
             isa = MachineModel.get_isa_for_arch(arch)
             kernel = 'kernel_{}.s'.format(isa)
             args = parser.parse_args(
                 ['--arch', arch,
                  self._find_test_file(kernel)])
             output = StringIO()
             osaca.run(args, output_file=output)

Example #10

0

Show file

File: osaca.py Project: jdomke/OSACA

def get_asm_parser(arch) -> BaseParser:
    """
    Helper function to create the right parser for a specific architecture.

    :param arch: architecture code
    :type arch: str
    :returns: :class:`~osaca.parser.BaseParser` object
    """
    isa = MachineModel.get_isa_for_arch(arch)
    if isa == 'x86':
        return ParserX86ATT()
    elif isa == 'aarch64':
        return ParserAArch64()

Example #11

0

Show file

File: test_semantics.py Project: jdomke/OSACA

    def test_hidden_load(self):
        machine_model_hld = MachineModel(
            path_to_yaml=self._find_file('hidden_load_machine_model.yml'))
        self.assertTrue(machine_model_hld.has_hidden_loads())
        semantics_hld = ArchSemantics(machine_model_hld)
        kernel_hld = self.parser_x86.parse_file(self.code_x86)
        kernel_hld_2 = self.parser_x86.parse_file(self.code_x86)
        kernel_hld_2 = self.parser_x86.parse_file(self.code_x86)[-3:]
        kernel_hld_3 = self.parser_x86.parse_file(self.code_x86)[5:8]
        semantics_hld.add_semantics(kernel_hld)
        semantics_hld.add_semantics(kernel_hld_2)
        semantics_hld.add_semantics(kernel_hld_3)

        num_hidden_loads = len(
            [x for x in kernel_hld if INSTR_FLAGS.HIDDEN_LD in x['flags']])
        num_hidden_loads_2 = len(
            [x for x in kernel_hld_2 if INSTR_FLAGS.HIDDEN_LD in x['flags']])
        num_hidden_loads_3 = len(
            [x for x in kernel_hld_3 if INSTR_FLAGS.HIDDEN_LD in x['flags']])
        self.assertEqual(num_hidden_loads, 1)
        self.assertEqual(num_hidden_loads_2, 0)
        self.assertEqual(num_hidden_loads_3, 1)

Example #12

0

Show file

File: test_semantics.py Project: jdomke/OSACA

    def test_MachineModel_getter(self):
        sample_operands = [{
            'memory': {
                'offset': None,
                'base': {
                    'name': 'r12'
                },
                'index': {
                    'name': 'rcx'
                },
                'scale': 8,
            }
        }]
        self.assertIsNone(
            self.machine_model_csx.get_instruction('GETRESULT',
                                                   sample_operands))
        self.assertIsNone(
            self.machine_model_tx2.get_instruction('GETRESULT',
                                                   sample_operands))

        self.assertEqual(self.machine_model_csx.get_arch(), 'csx')
        self.assertEqual(self.machine_model_tx2.get_arch(), 'tx2')

        self.assertEqual(self.machine_model_csx.get_ISA(), 'x86')
        self.assertEqual(self.machine_model_tx2.get_ISA(), 'aarch64')

        ports_csx = ['0', '0DV', '1', '2', '2D', '3', '3D', '4', '5', '6', '7']
        data_ports_csx = ['2D', '3D']
        self.assertEqual(self.machine_model_csx.get_ports(), ports_csx)
        self.assertEqual(self.machine_model_csx.get_data_ports(),
                         data_ports_csx)

        self.assertFalse(self.machine_model_tx2.has_hidden_loads())

        self.assertEqual(MachineModel.get_isa_for_arch('CSX'), 'x86')
        self.assertEqual(MachineModel.get_isa_for_arch('tX2'), 'aarch64')
        with self.assertRaises(ValueError):
            self.assertIsNone(MachineModel.get_isa_for_arch('THE_MACHINE'))

Example #13

0

Show file

File: test_semantics.py Project: RRZE-HPC/OSACA

    def test_MachineModel_getter(self):
        sample_operands = [{
            "memory": {
                "offset": None,
                "base": {
                    "name": "r12"
                },
                "index": {
                    "name": "rcx"
                },
                "scale": 8,
            }
        }]
        self.assertIsNone(
            self.machine_model_csx.get_instruction("GETRESULT",
                                                   sample_operands))
        self.assertIsNone(
            self.machine_model_tx2.get_instruction("GETRESULT",
                                                   sample_operands))

        self.assertEqual(self.machine_model_csx.get_arch(), "csx")
        self.assertEqual(self.machine_model_tx2.get_arch(), "tx2")

        self.assertEqual(self.machine_model_csx.get_ISA(), "x86")
        self.assertEqual(self.machine_model_tx2.get_ISA(), "aarch64")

        ports_csx = ["0", "0DV", "1", "2", "2D", "3", "3D", "4", "5", "6", "7"]
        data_ports_csx = ["2D", "3D"]
        self.assertEqual(self.machine_model_csx.get_ports(), ports_csx)
        self.assertEqual(self.machine_model_csx.get_data_ports(),
                         data_ports_csx)

        self.assertFalse(self.machine_model_tx2.has_hidden_loads())

        self.assertEqual(MachineModel.get_isa_for_arch("CSX"), "x86")
        self.assertEqual(MachineModel.get_isa_for_arch("tX2"), "aarch64")
        with self.assertRaises(ValueError):
            self.assertIsNone(MachineModel.get_isa_for_arch("THE_MACHINE"))

Example #14

0

Show file

    def __init__(self, filename='', arch=None, path_to_yaml=None):
        """
        Constructor method.

        :param filename: path to the analyzed kernel file for documentation, defaults to ''
        :type filename: str, optional
        :param arch: micro-arch code for getting the machine model, defaults to None
        :type arch: str, optional
        :param path_to_yaml: path to the YAML file for getting the machine model, defaults to None
        :type path_to_yaml: str, optional
        """
        self._filename = filename
        if not arch and not path_to_yaml:
            raise ValueError('Either arch or path_to_yaml required.')
        if arch and path_to_yaml:
            raise ValueError('Only one of arch and path_to_yaml is allowed.')
        self._arch = arch
        if arch:
            self._arch = arch.lower()
            self._machine_model = MachineModel(arch=arch, lazy=True)
        elif path_to_yaml:
            self._machine_model = MachineModel(path_to_yaml=path_to_yaml, lazy=True)
            self._arch = self._machine_model.get_arch()

Example #15

0

Show file

def extract_model(tree, arch, skip_mem=True):
    try:
        isa = MachineModel.get_isa_for_arch(arch)
    except Exception:
        print("Skipping...", file=sys.stderr)
        return None
    mm = MachineModel(isa=isa)
    parser = get_parser(isa)

    for instruction_tag in tree.findall(".//instruction"):
        ignore = False

        mnemonic = instruction_tag.attrib["asm"]
        iform = instruction_tag.attrib["iform"]
        # reduce to second part if mnemonic contain space (e.g., "REX CRC32")
        if " " in mnemonic:
            mnemonic = mnemonic.split(" ", 1)[1]

        # Extract parameter components
        try:
            parameters = extract_paramters(instruction_tag, parser, isa)
            if isa == "x86":
                parameters.reverse()
        except ValueError as e:
            print(e, file=sys.stderr)

        # Extract port occupation, throughput and latency
        port_pressure, throughput, latency, uops = [], None, None, None
        arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
        if arch_tag is None:
            continue
        # skip any instructions without port utilization
        if not any(["ports" in x.attrib for x in arch_tag.findall("measurement")]):
            print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
            continue
        # skip if measured TP is smaller than computed
        if [float(x.attrib["TP_ports"]) > min(float(x.attrib["TP_loop"]),
                                              float(x.attrib["TP_unrolled"]))
                for x in arch_tag.findall("measurement")][0]:
            print(
                "Calculated TP is greater than measured TP.",
                iform,
                file=sys.stderr,
            )
        # skip if instruction contains memory operand
        if skip_mem and any(
            [x.attrib["type"] == "mem" for x in instruction_tag.findall("operand")]
        ):
            print("Contains memory operand, skip: ", iform, file=sys.stderr)
            continue
        # We collect all measurement and IACA information and compare them later
        for measurement_tag in arch_tag.iter("measurement"):
            if "TP_ports" in measurement_tag.attrib:
                throughput = float(measurement_tag.attrib["TP_ports"])
            else:
                throughput = min(
                    measurement_tag.attrib.get("TP_loop", float('inf')),
                    measurement_tag.attrib.get("TP_unroll", float('inf')),
                    measurement_tag.attrib.get("TP", float('inf')),
                )
                if throughput == float('inf'):
                    throughput = None
            uops = int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
            if "ports" in measurement_tag.attrib:
                port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
            latencies = [
                int(l_tag.attrib["cycles"])
                for l_tag in measurement_tag.iter("latency")
                if "cycles" in l_tag.attrib
            ]
            if len(latencies) == 0:
                latencies = [
                    int(l_tag.attrib["max_cycles"])
                    for l_tag in measurement_tag.iter("latency")
                    if "max_cycles" in l_tag.attrib
                ]
            if latencies[1:] != latencies[:-1]:
                print(
                    "Contradicting latencies found, using smallest:",
                    iform,
                    latencies,
                    file=sys.stderr,
                )
            if latencies:
                latency = min(latencies)
        if ignore:
            continue

        # Ordered by IACA version (newest last)
        for iaca_tag in sorted(
            arch_tag.iter("IACA"), key=lambda i: StrictVersion(i.attrib["version"])
        ):
            if "ports" in iaca_tag.attrib:
                port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib))

        # Check if all are equal
        if port_pressure:
            if port_pressure[1:] != port_pressure[:-1]:
                print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
            port_pressure = port_pressure[-1]
        else:
            # print("No data available for this architecture:", mnemonic, file=sys.stderr)
            continue

        # Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake:
        if arch.upper() in intel_archs and not arch.upper() in ["ICL"]:
            if any([p["class"] == "memory" for p in parameters]):
                # We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D
                # TODO remove port7 on 'hsw' onward and split entries depending on addressing mode
                port_23 = False
                port_4 = False
                for i, pp in enumerate(port_pressure):
                    if "2" in pp[1] and "3" in pp[1]:
                        port_23 = True
                    if "4" in pp[1]:
                        port_4 = True
                # Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
                if port_23 and not port_4:
                    if arch.upper() in ["SNB", "IVB"] and any(
                            [p.get('name', '') == 'ymm' for p in parameters]) and \
                            not '128' in mnemonic:
                        # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in 
                        # instruction name
                        port2D3D_pressure = 2
                    else:
                        # otherwiese x = 1
                        port2D3D_pressure = 1
                    port_pressure.append((port2D3D_pressure, ["2D", "3D"]))

        # Add missing ports:
        for ports in [pp[1] for pp in port_pressure]:
            for p in ports:
                mm.add_port(p)

        throughput = max(mm.average_port_pressure(port_pressure))
        mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops)
    # TODO eliminate entries which could be covered by automatic load / store expansion
    return mm

Example #16

0

Show file

class Frontend(object):
    def __init__(self, filename='', arch=None, path_to_yaml=None):
        """
        Constructor method.

        :param filename: path to the analyzed kernel file for documentation, defaults to ''
        :type filename: str, optional
        :param arch: micro-arch code for getting the machine model, defaults to None
        :type arch: str, optional
        :param path_to_yaml: path to the YAML file for getting the machine model, defaults to None
        :type path_to_yaml: str, optional
        """
        self._filename = filename
        if not arch and not path_to_yaml:
            raise ValueError('Either arch or path_to_yaml required.')
        if arch and path_to_yaml:
            raise ValueError('Only one of arch and path_to_yaml is allowed.')
        self._arch = arch
        if arch:
            self._arch = arch.lower()
            self._machine_model = MachineModel(arch=arch, lazy=True)
        elif path_to_yaml:
            self._machine_model = MachineModel(path_to_yaml=path_to_yaml,
                                               lazy=True)
            self._arch = self._machine_model.get_arch()

    def _is_comment(self, instruction_form):
        """
        Checks if instruction form is a comment-only line.

        :param instruction_form: instruction form to check
        :type instruction_form: `dict`
        :returns: `True` if comment line, `False` otherwise
        """
        return instruction_form[
            'comment'] is not None and instruction_form['instruction'] is None

    def throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True):
        """
        Build throughput analysis only.

        :param kernel: Kernel to build throughput analysis for.
        :type kernel: list
        :param show_lineno: flag for showing the line number of instructions, defaults to `False`
        :type show_lineno: bool, optional
        :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True`
        :type show_cmnts: bool, optional
        """
        lineno_filler = '     ' if show_lineno else ''
        port_len = self._get_max_port_len(kernel)
        separator = '-' * sum([x + 3 for x in port_len]) + '-'
        separator += '--' + len(str(
            kernel[-1]['line_number'])) * '-' if show_lineno else ''
        col_sep = '|'
        sep_list = self._get_separator_list(col_sep)
        headline = 'Port pressure in cycles'
        headline_str = '{{:^{}}}'.format(len(separator))

        s = '\n\nThroughput Analysis Report\n--------------------------\n'
        s += headline_str.format(headline) + '\n'
        s += lineno_filler + self._get_port_number_line(port_len) + '\n'
        s += separator + '\n'
        for instruction_form in kernel:
            line = '{:4d} {} {} {}'.format(
                instruction_form['line_number'],
                self._get_port_pressure(instruction_form['port_pressure'],
                                        port_len,
                                        separator=sep_list),
                self._get_flag_symbols(instruction_form['flags'])
                if instruction_form['instruction'] is not None else ' ',
                instruction_form['line'].strip().replace('\t', ' '),
            )
            line = line if show_lineno else col_sep + col_sep.join(
                line.split(col_sep)[1:])
            if show_cmnts is False and self._is_comment(instruction_form):
                continue
            s += line + '\n'
        s += '\n'
        tp_sum = ArchSemantics.get_throughput_sum(kernel)
        s += lineno_filler + self._get_port_pressure(
            tp_sum, port_len, separator=' ') + '\n'
        return s

    def latency_analysis(self, cp_kernel, separator='|'):
        """
        Build a list-based CP analysis report.

        :param cp_kernel: loop kernel containing the CP information for each instruction form
        :type cp_kernel: list
        :separator: separator symbol for the columns, defaults to '|'
        :type separator: str, optional
        """
        s = '\n\nLatency Analysis Report\n-----------------------\n'
        for instruction_form in cp_kernel:
            s += ('{:4d} {} {:4.1f} {}{}{} {}'.format(
                instruction_form['line_number'],
                separator,
                instruction_form['latency_cp'],
                separator,
                'X'
                if INSTR_FLAGS.LT_UNKWN in instruction_form['flags'] else ' ',
                separator,
                instruction_form['line'],
            )) + '\n'
        s += ('\n{:4} {} {:4.1f}'.format(
            ' ' * max([
                len(str(instr_form['line_number'])) for instr_form in cp_kernel
            ]),
            ' ' * len(separator),
            sum([instr_form['latency_cp'] for instr_form in cp_kernel]),
        )) + '\n'
        return s

    def loopcarried_dependencies(self, dep_dict, separator='|'):
        """
        Print a list-based LCD analysis to the terminal.

        :param dep_dict: dictionary with first instruction in LCD as key and the deps as value
        :type dep_dict: dict
        :separator: separator symbol for the columns, defaults to '|'
        :type separator: str, optional
        """
        s = ('\n\nLoop-Carried Dependencies Analysis Report\n' +
             '-----------------------------------------\n')
        # TODO find a way to overcome padding for different tab-lengths
        for dep in dep_dict:
            s += '{:4d} {} {:4.1f} {} {:36}{} {}\n'.format(
                dep,
                separator,
                sum([
                    instr_form['latency_lcd']
                    for instr_form in dep_dict[dep]['dependencies']
                ]),
                separator,
                dep_dict[dep]['root']['line'].strip(),
                separator,
                [
                    node['line_number']
                    for node in dep_dict[dep]['dependencies']
                ],
            )
        return s

    def full_analysis(self,
                      kernel,
                      kernel_dg: KernelDG,
                      ignore_unknown=False,
                      arch_warning=False,
                      length_warning=False,
                      verbose=False):
        """
        Build the full analysis report including header, the symbol map, the combined TP/CP/LCD
        view and the list based LCD view.

        :param kernel: kernel to report on
        :type kernel: list
        :param kernel_dg: directed graph containing CP and LCD
        :type kernel_dg: :class:`~osaca.semantics.KernelDG`
        :param ignore_unknown: flag for ignore warning if performance data is missing, defaults to
            `False`
        :type ignore_unknown: boolean, optional
        :param print_arch_warning: flag for additional user warning to specify micro-arch 
        :type print_arch_warning: boolean, optional
        :param print_length_warning: flag for additional user warning to specify kernel length with --lines
        :type print_length_warning: boolean, optional
        :param verbose: flag for verbosity level, defaults to False
        :type verbose: boolean, optional
        """
        return (self._header_report() +
                self._user_warnings(arch_warning, length_warning) +
                self._symbol_map() + self.combined_view(
                    kernel,
                    kernel_dg.get_critical_path(),
                    kernel_dg.get_loopcarried_dependencies(),
                    ignore_unknown,
                ) + self.loopcarried_dependencies(
                    kernel_dg.get_loopcarried_dependencies()))

    def combined_view(self,
                      kernel,
                      cp_kernel: KernelDG,
                      dep_dict,
                      ignore_unknown=False,
                      show_cmnts=True):
        """
        Build combined view of kernel including port pressure (TP), a CP column and a
        LCD column.

        :param kernel: kernel to report on
        :type kernel: list
        :param kernel_dg: directed graph containing CP and LCD
        :type kernel_dg: :class:`~osaca.semantics.KernelDG`
        :param dep_dict: dictionary with first instruction in LCD as key and the deps as value
        :type dep_dict: dict
        :param ignore_unknown: flag for showing result despite of missing instructions, defaults to
            `False`
        :type ignore_unknown: bool, optional
        :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True`
        :type show_cmnts: bool, optional
        """
        s = '\n\nCombined Analysis Report\n------------------------\n'
        lineno_filler = '     '
        port_len = self._get_max_port_len(kernel)
        # Separator for ports
        separator = '-' * sum([x + 3 for x in port_len]) + '-'
        # ... for line numbers
        separator += '--' + len(str(kernel[-1]['line_number'])) * '-'
        col_sep = '|'
        # for LCD/CP column
        separator += '-' * (2 * 6 + len(col_sep)) + '-' * len(col_sep)
        sep_list = self._get_separator_list(col_sep)
        headline = 'Port pressure in cycles'
        headline_str = '{{:^{}}}'.format(len(separator))
        # Prepare CP/LCD variable
        cp_lines = [x['line_number'] for x in cp_kernel]
        sums = {}
        for dep in dep_dict:
            sums[dep] = sum([
                instr_form['latency_lcd']
                for instr_form in dep_dict[dep]['dependencies']
            ])
        lcd_sum = max(sums.values()) if len(sums) > 0 else 0.0
        lcd_lines = []
        if len(dep_dict) > 0:
            longest_lcd = [
                line_no for line_no in sums if sums[line_no] == lcd_sum
            ][0]
            lcd_lines = [
                d['line_number'] for d in dep_dict[longest_lcd]['dependencies']
            ]

        s += headline_str.format(headline) + '\n'
        s += (
            (lineno_filler +
             self._get_port_number_line(port_len, separator=col_sep) +
             '{}{:^6}{}{:^6}{}'.format(col_sep, 'CP', col_sep, 'LCD', col_sep))
            + '\n' + separator + '\n')
        for instruction_form in kernel:
            if show_cmnts is False and self._is_comment(instruction_form):
                continue
            line_number = instruction_form['line_number']
            used_ports = [
                list(uops[1]) for uops in instruction_form['port_uops']
            ]
            used_ports = list(
                set([p for uops_ports in used_ports for p in uops_ports]))
            s += '{:4d} {}{} {} {}\n'.format(
                line_number,
                self._get_port_pressure(instruction_form['port_pressure'],
                                        port_len, used_ports, sep_list),
                self._get_lcd_cp_ports(
                    instruction_form['line_number'],
                    cp_kernel if line_number in cp_lines else None,
                    dep_dict[longest_lcd]
                    if line_number in lcd_lines else None,
                ),
                self._get_flag_symbols(instruction_form['flags'])
                if instruction_form['instruction'] is not None else ' ',
                instruction_form['line'].strip().replace('\t', ' '),
            )
        s += '\n'
        # check for unknown instructions and throw warning if called without --ignore-unknown
        if not ignore_unknown and INSTR_FLAGS.TP_UNKWN in [
                flag for instr in kernel for flag in instr['flags']
        ]:
            num_missing = len([
                instr['flags'] for instr in kernel
                if INSTR_FLAGS.TP_UNKWN in instr['flags']
            ])
            s += self._missing_instruction_error(num_missing)
        else:
            # lcd_sum already calculated before
            tp_sum = ArchSemantics.get_throughput_sum(kernel)
            cp_sum = sum([x['latency_cp'] for x in cp_kernel])
            s += (lineno_filler +
                  self._get_port_pressure(tp_sum, port_len, separator=' ') +
                  ' {:^6} {:^6}\n'.format(cp_sum, lcd_sum))
        return s

    ####################
    # HELPER FUNCTIONS
    ####################

    def _missing_instruction_error(self, amount):
        """Returns the warning for if any instruction form in the analysis is missing."""
        s = (
            '------------------ WARNING: The performance data for {} instructions is missing.'
            '------------------\n'
            '                     No final analysis is given. If you want to ignore this\n'
            '                     warning and run the analysis anyway, start osaca with\n'
            '                                       --ignore-unknown flag.\n'
            '--------------------------------------------------------------------------------'
            '----------------{}\n').format(amount, '-' * len(str(amount)))
        return s

    def _user_warnings(self, arch_warning, length_warning):
        """Returns warning texts for giving the user more insight in what he is doing."""
        arch_text = (
            'WARNING: No micro-architecture was specified and a default uarch was used.\n'
            '         Specify the uarch with --arch. See --help for more information.\n'
        )
        length_text = (
            'WARNING: You are analyzing a large amount of instruction forms. Analysis '
            'across loops/block boundaries often do not make much sense.\n'
            '         Specify the kernel length with --length. See --help for more '
            'information.\n'
            '         If this is intentional, you can safely ignore this message.\n'
        )

        warnings = ''
        warnings += arch_text if arch_warning else ''
        warnings += length_text if length_warning else ''
        warnings += '\n'
        return warnings

    def _get_separator_list(self, separator, separator_2=' '):
        """Creates column view for seperators in the TP/combined view."""
        separator_list = []
        for i in range(len(self._machine_model.get_ports()) - 1):
            match_1 = re.search(r'\d+', self._machine_model.get_ports()[i])
            match_2 = re.search(r'\d+', self._machine_model.get_ports()[i + 1])
            if match_1 is not None and match_2 is not None and match_1.group(
            ) == match_2.group():
                separator_list.append(separator_2)
            else:
                separator_list.append(separator)
        separator_list.append(separator)
        return separator_list

    def _get_flag_symbols(self, flag_obj):
        """Returns flags for a flag object of an instruction"""
        string_result = ''
        string_result += '*' if INSTR_FLAGS.NOT_BOUND in flag_obj else ''
        string_result += 'X' if INSTR_FLAGS.TP_UNKWN in flag_obj else ''
        string_result += 'P' if INSTR_FLAGS.HIDDEN_LD in flag_obj else ''
        # TODO add other flags
        string_result += ' ' if len(string_result) == 0 else ''
        return string_result

    def _get_port_pressure(self,
                           ports,
                           port_len,
                           used_ports=[],
                           separator='|'):
        """Returns line of port pressure for an instruction."""
        if not isinstance(separator, list):
            separator = [separator for x in ports]
        string_result = '{} '.format(separator[-1])
        for i in range(len(ports)):
            if float(ports[i]) == 0.0 and self._machine_model.get_ports(
            )[i] not in used_ports:
                string_result += port_len[i] * ' ' + ' {} '.format(
                    separator[i])
                continue
            left_len = len(str(float(ports[i])).split('.')[0])
            substr = '{:' + str(left_len) + '.' + str(
                max(port_len[i] - left_len - 1, 0)) + 'f}'
            substr = substr.format(ports[i])
            string_result += (substr +
                              ' {} '.format(separator[i]) if '.' in substr else
                              '{:.1f}{} '.format(ports[i], separator[i]))
        return string_result[:-1]

    def _get_node_by_lineno(self, lineno, kernel):
        """Returns instruction form from kernel by its line number."""
        nodes = [instr for instr in kernel if instr['line_number'] == lineno]
        return nodes[0] if len(nodes) > 0 else None

    def _get_lcd_cp_ports(self, line_number, cp_dg, dependency, separator='|'):
        """Returns the CP and LCD line for one instruction."""
        lat_cp = lat_lcd = ''
        if cp_dg:
            lat_cp = float(
                self._get_node_by_lineno(line_number, cp_dg)['latency_cp'])
        if dependency:
            lat_lcd = float(
                self._get_node_by_lineno(
                    line_number, dependency['dependencies'])['latency_lcd'])
        return '{} {:>4} {} {:>4} {}'.format(separator, lat_cp, separator,
                                             lat_lcd, separator)

    def _get_max_port_len(self, kernel):
        """Returns the maximal length needed to print all throughputs of the kernel."""
        port_len = [4 for x in self._machine_model.get_ports()]
        for instruction_form in kernel:
            for i, port in enumerate(instruction_form['port_pressure']):
                if len('{:.2f}'.format(port)) > port_len[i]:
                    port_len[i] = len('{:.2f}'.format(port))
        return port_len

    def _get_port_number_line(self, port_len, separator='|'):
        """Returns column view of port identificators of machine_model."""
        string_result = separator
        separator_list = self._get_separator_list(separator, '-')
        for i, length in enumerate(port_len):
            substr = '{:^' + str(length + 2) + 's}'
            string_result += substr.format(
                self._machine_model.get_ports()[i]) + separator_list[i]
        return string_result

    def _header_report(self):
        """Prints header information"""
        version = 'v0.3'
        adjust = 20
        header = ''
        header += 'Open Source Architecture Code Analyzer (OSACA) - {}\n'.format(
            version)
        header += 'Analyzed file:'.ljust(adjust) + '{}\n'.format(
            self._filename)
        header += 'Architecture:'.ljust(adjust) + '{}\n'.format(self._arch)
        header += 'Timestamp:'.ljust(adjust) + '{}\n'.format(
            dt.utcnow().strftime('%Y-%m-%d %H:%M:%S'))
        return header + '\n'

    def _symbol_map(self):
        """Prints instruction flag map."""
        symbol_dict = {
            INSTR_FLAGS.NOT_BOUND:
            'Instruction micro-ops not bound to a port',
            INSTR_FLAGS.TP_UNKWN:
            'No throughput/latency information for this instruction in ' +
            'data file',
            INSTR_FLAGS.HIDDEN_LD:
            'Throughput of LOAD operation can be hidden behind a past ' +
            'or future STORE instruction',
        }
        symbol_map = ''
        for flag in sorted(symbol_dict.keys()):
            symbol_map += ' {} - {}\n'.format(self._get_flag_symbols([flag]),
                                              symbol_dict[flag])

        return symbol_map

    def _port_binding_summary(self):
        raise NotImplementedError

Example #17

0

Show file

File: test_db_interface.py Project: jdomke/OSACA

 def test_invalid_add(self):
     entry = {}
     with self.assertRaises(KeyError):
         MachineModel('csx').set_instruction_entry(entry)
     with self.assertRaises(TypeError):
         MachineModel('csx').set_instruction()

Example #18

0

Show file

File: test_db_interface.py Project: RRZE-HPC/OSACA

    def test_add_single_entry(self):
        mm_csx = MachineModel("csx")
        mm_tx2 = MachineModel("tx2")
        mm_zen1 = MachineModel("zen1")
        num_entries_csx = len(mm_csx["instruction_forms"])
        num_entries_tx2 = len(mm_tx2["instruction_forms"])
        num_entries_zen1 = len(mm_zen1["instruction_forms"])

        mm_csx.set_instruction_entry(self.entry_csx)
        mm_tx2.set_instruction_entry(self.entry_tx2)
        mm_zen1.set_instruction_entry({"name": "empty_operation"})

        num_entries_csx = len(mm_csx["instruction_forms"]) - num_entries_csx
        num_entries_tx2 = len(mm_tx2["instruction_forms"]) - num_entries_tx2
        num_entries_zen1 = len(mm_zen1["instruction_forms"]) - num_entries_zen1

        self.assertEqual(num_entries_csx, 1)
        self.assertEqual(num_entries_tx2, 1)
        self.assertEqual(num_entries_zen1, 1)

Example #19

0

Show file

class Frontend(object):
    def __init__(self, filename="", arch=None, path_to_yaml=None):
        """
        Constructor method.

        :param filename: path to the analyzed kernel file for documentation, defaults to ''
        :type filename: str, optional
        :param arch: micro-arch code for getting the machine model, defaults to None
        :type arch: str, optional
        :param path_to_yaml: path to the YAML file for getting the machine model, defaults to None
        :type path_to_yaml: str, optional
        """
        self._filename = filename
        if not arch and not path_to_yaml:
            raise ValueError("Either arch or path_to_yaml required.")
        if arch and path_to_yaml:
            raise ValueError("Only one of arch and path_to_yaml is allowed.")
        self._arch = arch
        if arch:
            self._arch = arch.lower()
            self._machine_model = MachineModel(arch=arch, lazy=True)
        elif path_to_yaml:
            self._machine_model = MachineModel(path_to_yaml=path_to_yaml,
                                               lazy=True)
            self._arch = self._machine_model.get_arch()

    def _is_comment(self, instruction_form):
        """
        Checks if instruction form is a comment-only line.

        :param instruction_form: instruction form to check
        :type instruction_form: `dict`
        :returns: `True` if comment line, `False` otherwise
        """
        return instruction_form[
            "comment"] is not None and instruction_form["instruction"] is None

    def throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True):
        """
        Build throughput analysis only.

        :param kernel: Kernel to build throughput analysis for.
        :type kernel: list
        :param show_lineno: flag for showing the line number of instructions, defaults to `False`
        :type show_lineno: bool, optional
        :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True`
        :type show_cmnts: bool, optional
        """
        lineno_filler = "     " if show_lineno else ""
        port_len = self._get_max_port_len(kernel)
        separator = "-" * sum([x + 3 for x in port_len]) + "-"
        separator += "--" + len(str(
            kernel[-1]["line_number"])) * "-" if show_lineno else ""
        col_sep = "|"
        sep_list = self._get_separator_list(col_sep)
        headline = "Port pressure in cycles"
        headline_str = "{{:^{}}}".format(len(separator))

        s = "\n\nThroughput Analysis Report\n--------------------------\n"
        s += headline_str.format(headline) + "\n"
        s += lineno_filler + self._get_port_number_line(port_len) + "\n"
        s += separator + "\n"
        for instruction_form in kernel:
            line = "{:4d} {} {} {}".format(
                instruction_form["line_number"],
                self._get_port_pressure(instruction_form["port_pressure"],
                                        port_len,
                                        separator=sep_list),
                self._get_flag_symbols(instruction_form["flags"])
                if instruction_form["instruction"] is not None else " ",
                instruction_form["line"].strip().replace("\t", " "),
            )
            line = line if show_lineno else col_sep + col_sep.join(
                line.split(col_sep)[1:])
            if show_cmnts is False and self._is_comment(instruction_form):
                continue
            s += line + "\n"
        s += "\n"
        tp_sum = ArchSemantics.get_throughput_sum(kernel)
        s += lineno_filler + self._get_port_pressure(
            tp_sum, port_len, separator=" ") + "\n"
        return s

    def latency_analysis(self, cp_kernel, separator="|"):
        """
        Build a list-based CP analysis report.

        :param cp_kernel: loop kernel containing the CP information for each instruction form
        :type cp_kernel: list
        :separator: separator symbol for the columns, defaults to '|'
        :type separator: str, optional
        """
        s = "\n\nLatency Analysis Report\n-----------------------\n"
        for instruction_form in cp_kernel:
            s += ("{:4d} {} {:4.1f} {}{}{} {}".format(
                instruction_form["line_number"],
                separator,
                instruction_form["latency_cp"],
                separator,
                "X"
                if INSTR_FLAGS.LT_UNKWN in instruction_form["flags"] else " ",
                separator,
                instruction_form["line"],
            )) + "\n"
        s += ("\n{:4} {} {:4.1f}".format(
            " " * max([
                len(str(instr_form["line_number"])) for instr_form in cp_kernel
            ]),
            " " * len(separator),
            sum([instr_form["latency_cp"] for instr_form in cp_kernel]),
        )) + "\n"
        return s

    def loopcarried_dependencies(self, dep_dict, separator="|"):
        """
        Print a list-based LCD analysis to the terminal.

        :param dep_dict: dictionary with first instruction in LCD as key and the deps as value
        :type dep_dict: dict
        :separator: separator symbol for the columns, defaults to '|'
        :type separator: str, optional
        """
        s = ("\n\nLoop-Carried Dependencies Analysis Report\n" +
             "-----------------------------------------\n")
        # TODO find a way to overcome padding for different tab-lengths
        for dep in dep_dict:
            s += "{:4d} {} {:4.1f} {} {:36}{} {}\n".format(
                dep,
                separator,
                dep_dict[dep]["latency"],
                separator,
                dep_dict[dep]["root"]["line"].strip(),
                separator,
                [
                    node["line_number"]
                    for node, lat in dep_dict[dep]["dependencies"]
                ],
            )
        return s

    def full_analysis(
        self,
        kernel,
        kernel_dg: KernelDG,
        ignore_unknown=False,
        arch_warning=False,
        length_warning=False,
        lcd_warning=False,
        verbose=False,
    ):
        """
        Build the full analysis report including header, the symbol map, the combined TP/CP/LCD
        view and the list based LCD view.

        :param kernel: kernel to report on
        :type kernel: list
        :param kernel_dg: directed graph containing CP and LCD
        :type kernel_dg: :class:`~osaca.semantics.KernelDG`
        :param ignore_unknown: flag for ignore warning if performance data is missing, defaults to
            `False`
        :type ignore_unknown: boolean, optional
        :param arch_warning: flag for additional user warning to specify micro-arch
        :type arch_warning: boolean, optional
        :param length_warning: flag for additional user warning to specify kernel length with
                                     --lines
        :type length_warning: boolean, optional
        :param lcd_warning: flag for additional user warning due to LCD analysis timed out
        :type lcd_warning: boolean, optional
        :param verbose: flag for verbosity level, defaults to False
        :type verbose: boolean, optional
        """
        return (self._header_report() +
                self._user_warnings_header(arch_warning, length_warning) +
                self._symbol_map() + self.combined_view(
                    kernel,
                    kernel_dg.get_critical_path(),
                    kernel_dg.get_loopcarried_dependencies(),
                    ignore_unknown,
                ) + self._user_warnings_footer(lcd_warning) +
                self.loopcarried_dependencies(
                    kernel_dg.get_loopcarried_dependencies()))

    def combined_view(self,
                      kernel,
                      cp_kernel: KernelDG,
                      dep_dict,
                      ignore_unknown=False,
                      show_cmnts=True):
        """
        Build combined view of kernel including port pressure (TP), a CP column and a
        LCD column.

        :param kernel: kernel to report on
        :type kernel: list
        :param kernel_dg: directed graph containing CP and LCD
        :type kernel_dg: :class:`~osaca.semantics.KernelDG`
        :param dep_dict: dictionary with first instruction in LCD as key and the deps as value
        :type dep_dict: dict
        :param ignore_unknown: flag for showing result despite of missing instructions, defaults to
            `False`
        :type ignore_unknown: bool, optional
        :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True`
        :type show_cmnts: bool, optional
        """
        s = "\n\nCombined Analysis Report\n------------------------\n"
        lineno_filler = "     "
        port_len = self._get_max_port_len(kernel)
        # Separator for ports
        separator = "-" * sum([x + 3 for x in port_len]) + "-"
        # ... for line numbers
        separator += "--" + len(str(kernel[-1]["line_number"])) * "-"
        col_sep = "|"
        # for LCD/CP column
        separator += "-" * (2 * 6 + len(col_sep)) + "-" * len(col_sep)
        sep_list = self._get_separator_list(col_sep)
        headline = "Port pressure in cycles"
        headline_str = "{{:^{}}}".format(len(separator))
        # Prepare CP/LCD variable
        cp_lines = [x["line_number"] for x in cp_kernel]
        lcd_sum = 0.0
        lcd_lines = {}
        if dep_dict:
            longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]['latency'])
            lcd_sum = dep_dict[longest_lcd]['latency']
            lcd_lines = {
                instr["line_number"]: lat
                for instr, lat in dep_dict[longest_lcd]["dependencies"]
            }

        s += headline_str.format(headline) + "\n"
        s += (
            (lineno_filler +
             self._get_port_number_line(port_len, separator=col_sep) +
             "{}{:^6}{}{:^6}{}".format(col_sep, "CP", col_sep, "LCD", col_sep))
            + "\n" + separator + "\n")
        for instruction_form in kernel:
            if show_cmnts is False and self._is_comment(instruction_form):
                continue
            line_number = instruction_form["line_number"]
            used_ports = [
                list(uops[1]) for uops in instruction_form["port_uops"]
            ]
            used_ports = list(
                set([p for uops_ports in used_ports for p in uops_ports]))
            s += "{:4d} {}{} {} {}\n".format(
                line_number,
                self._get_port_pressure(instruction_form["port_pressure"],
                                        port_len, used_ports, sep_list),
                self._get_lcd_cp_ports(
                    instruction_form["line_number"],
                    cp_kernel if line_number in cp_lines else None,
                    lcd_lines.get(line_number),
                ),
                self._get_flag_symbols(instruction_form["flags"])
                if instruction_form["instruction"] is not None else " ",
                instruction_form["line"].strip().replace("\t", " "),
            )
        s += "\n"
        # check for unknown instructions and throw warning if called without --ignore-unknown
        if not ignore_unknown and INSTR_FLAGS.TP_UNKWN in [
                flag for instr in kernel for flag in instr["flags"]
        ]:
            num_missing = len([
                instr["flags"] for instr in kernel
                if INSTR_FLAGS.TP_UNKWN in instr["flags"]
            ])
            s += self._missing_instruction_error(num_missing)
        else:
            # lcd_sum already calculated before
            tp_sum = ArchSemantics.get_throughput_sum(kernel)
            # if ALL instructions are unknown, take a line of 0s
            if not tp_sum:
                tp_sum = kernel[0]["port_pressure"]
            cp_sum = sum([x["latency_cp"] for x in cp_kernel])
            s += (lineno_filler +
                  self._get_port_pressure(tp_sum, port_len, separator=" ") +
                  " {:^6} {:^6}\n".format(cp_sum, lcd_sum))
        return s

    ####################
    # HELPER FUNCTIONS
    ####################

    def _missing_instruction_error(self, amount):
        """Returns the warning for if any instruction form in the analysis is missing."""
        s = (
            "------------------ WARNING: The performance data for {} instructions is missing."
            "------------------\n"
            "                     No final analysis is given. If you want to ignore this\n"
            "                     warning and run the analysis anyway, start osaca with\n"
            "                                       --ignore-unknown flag.\n"
            "--------------------------------------------------------------------------------"
            "----------------{}\n").format(amount, "-" * len(str(amount)))
        return s

    def _user_warnings_header(self, arch_warning, length_warning):
        """Returns warning texts for giving the user more insight in what he is doing."""
        dashed_line = (
            "-------------------------------------------------------------------------"
            "------------------------\n")
        arch_text = (
            "-------------------------- WARNING: No micro-architecture was specified "
            "-------------------------\n"
            "         A default uarch for this particular ISA was used. Specify "
            "the uarch with --arch.\n         See --help for more information.\n"
            + dashed_line)
        length_text = (
            "----------------- WARNING: You are analyzing a large amount of instruction forms "
            "----------------\n         Analysis across loops/block boundaries often do not make"
            " much sense.\n         Specify the kernel length with --length. See --help for more "
            "information.\n         If this is intentional, you can safely ignore this message.\n"
            + dashed_line)

        warnings = ""
        warnings += arch_text if arch_warning else ""
        warnings += length_text if length_warning else ""
        warnings += "\n"
        return warnings

    def _user_warnings_footer(self, lcd_warning):
        """Returns warning texts for giving the user more insight in what he is doing."""
        dashed_line = (
            "-------------------------------------------------------------------------"
            "------------------------\n")
        lcd_text = (
            "-------------------------------- WARNING: LCD analysis timed out "
            "-------------------------------\n         While searching for all dependency chains"
            " the analysis timed out and might be\n         incomplete. Decrease the number of "
            "instructions or set the timeout threshold\n         with --lcd-timeout. See --help"
            " for more information.\n" + dashed_line)
        warnings = "\n"
        warnings += lcd_text if lcd_warning else ""
        warnings += "\n"
        return warnings

    def _get_separator_list(self, separator, separator_2=" "):
        """Creates column view for seperators in the TP/combined view."""
        separator_list = []
        for i in range(len(self._machine_model.get_ports()) - 1):
            match_1 = re.search(r"\d+", self._machine_model.get_ports()[i])
            match_2 = re.search(r"\d+", self._machine_model.get_ports()[i + 1])
            if match_1 is not None and match_2 is not None and match_1.group(
            ) == match_2.group():
                separator_list.append(separator_2)
            else:
                separator_list.append(separator)
        separator_list.append(separator)
        return separator_list

    def _get_flag_symbols(self, flag_obj):
        """Returns flags for a flag object of an instruction"""
        string_result = ""
        string_result += "*" if INSTR_FLAGS.NOT_BOUND in flag_obj else ""
        string_result += "X" if INSTR_FLAGS.TP_UNKWN in flag_obj else ""
        string_result += "P" if INSTR_FLAGS.HIDDEN_LD in flag_obj else ""
        # TODO add other flags
        string_result += " " if len(string_result) == 0 else ""
        return string_result

    def _get_port_pressure(self,
                           ports,
                           port_len,
                           used_ports=[],
                           separator="|"):
        """Returns line of port pressure for an instruction."""
        if not isinstance(separator, list):
            separator = [separator for x in ports]
        string_result = "{} ".format(separator[-1])
        for i in range(len(ports)):
            if float(ports[i]) == 0.0 and self._machine_model.get_ports(
            )[i] not in used_ports:
                string_result += port_len[i] * " " + " {} ".format(
                    separator[i])
                continue
            left_len = len(str(float(ports[i])).split(".")[0])
            substr = "{:" + str(left_len) + "." + str(
                max(port_len[i] - left_len - 1, 0)) + "f}"
            substr = substr.format(ports[i])
            string_result += (substr +
                              " {} ".format(separator[i]) if "." in substr else
                              "{:.1f}{} ".format(ports[i], separator[i]))
        return string_result[:-1]

    def _get_node_by_lineno(self, lineno, kernel):
        """Returns instruction form from kernel by its line number."""
        nodes = [instr for instr in kernel if instr["line_number"] == lineno]
        return nodes[0] if len(nodes) > 0 else None

    def _get_lcd_cp_ports(self, line_number, cp_dg, dep_lat, separator="|"):
        """Returns the CP and LCD line for one instruction."""
        lat_cp = lat_lcd = ""
        if cp_dg:
            lat_cp = float(
                self._get_node_by_lineno(line_number, cp_dg)["latency_cp"])
        if dep_lat is not None:
            lat_lcd = float(dep_lat)
        return "{} {:>4} {} {:>4} {}".format(separator, lat_cp, separator,
                                             lat_lcd, separator)

    def _get_max_port_len(self, kernel):
        """Returns the maximal length needed to print all throughputs of the kernel."""
        port_len = [4 for x in self._machine_model.get_ports()]
        for instruction_form in kernel:
            for i, port in enumerate(instruction_form["port_pressure"]):
                if len("{:.2f}".format(port)) > port_len[i]:
                    port_len[i] = len("{:.2f}".format(port))
        return port_len

    def _get_port_number_line(self, port_len, separator="|"):
        """Returns column view of port identificators of machine_model."""
        string_result = separator
        separator_list = self._get_separator_list(separator, "-")
        for i, length in enumerate(port_len):
            substr = "{:^" + str(length + 2) + "s}"
            string_result += substr.format(
                self._machine_model.get_ports()[i]) + separator_list[i]
        return string_result

    def _header_report(self):
        """Prints header information"""
        version = _get_version("__init__.py")
        adjust = 20
        header = ""
        header += "Open Source Architecture Code Analyzer (OSACA) - {}\n".format(
            version)
        header += "Analyzed file:".ljust(adjust) + "{}\n".format(
            self._filename)
        header += "Architecture:".ljust(adjust) + "{}\n".format(
            self._arch.upper())
        header += "Timestamp:".ljust(adjust) + "{}\n".format(
            dt.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
        return header + "\n"

    def _symbol_map(self):
        """Prints instruction flag map."""
        symbol_dict = {
            INSTR_FLAGS.NOT_BOUND:
            "Instruction micro-ops not bound to a port",
            INSTR_FLAGS.TP_UNKWN:
            "No throughput/latency information for this instruction in " +
            "data file",
            INSTR_FLAGS.HIDDEN_LD:
            "Throughput of LOAD operation can be hidden behind a past " +
            "or future STORE instruction",
        }
        symbol_map = ""
        for flag in sorted(symbol_dict.keys()):
            symbol_map += " {} - {}\n".format(self._get_flag_symbols([flag]),
                                              symbol_dict[flag])
        return symbol_map

    def _port_binding_summary(self):
        raise NotImplementedError

Example #20

0

Show file

File: model_importer.py Project: jdomke/OSACA

def extract_model(tree, arch, skip_mem=True):
    try:
        isa = MachineModel.get_isa_for_arch(arch)
    except Exception:
        print("Skipping...", file=sys.stderr)
        return None
    mm = MachineModel(isa=isa)
    parser = get_parser(isa)

    for instruction_tag in tree.findall('.//instruction'):
        ignore = False

        mnemonic = instruction_tag.attrib['asm']
        iform = instruction_tag.attrib['iform']
        # skip any mnemonic which contain spaces (e.g., "REX CRC32")
        if ' ' in mnemonic:
            continue

        # Extract parameter components
        try:
            parameters = extract_paramters(instruction_tag, parser, isa)
            if isa == 'x86':
                parameters.reverse()
        except ValueError as e:
            print(e, file=sys.stderr)

        # Extract port occupation, throughput and latency
        port_pressure, throughput, latency, uops = [], None, None, None
        arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
        if arch_tag is None:
            continue
        # skip any instructions without port utilization
        if not any(['ports' in x.attrib for x in arch_tag.findall('measurement')]):
            print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
            continue
        # skip if computed and measured TP don't match
        if not [x.attrib['TP_ports'] == x.attrib['TP'] for x in arch_tag.findall('measurement')][
            0
        ]:
            print(
                "Calculated TP from port utilization doesn't match TP, skip: ",
                iform,
                file=sys.stderr,
            )
            continue
        # skip if instruction contains memory operand
        if skip_mem and any(
            [x.attrib['type'] == 'mem' for x in instruction_tag.findall('operand')]
        ):
            print("Contains memory operand, skip: ", iform, file=sys.stderr)
            continue
        # We collect all measurement and IACA information and compare them later
        for measurement_tag in arch_tag.iter('measurement'):
            if 'TP_ports' in measurement_tag.attrib:
                throughput = measurement_tag.attrib['TP_ports']
            else:
                throughput = (
                    measurement_tag.attrib['TP'] if 'TP' in measurement_tag.attrib else None
                )
            uops = (
                int(measurement_tag.attrib['uops']) if 'uops' in measurement_tag.attrib else None
            )
            if 'ports' in measurement_tag.attrib:
                port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
            latencies = [
                int(l_tag.attrib['cycles'])
                for l_tag in measurement_tag.iter('latency')
                if 'cycles' in l_tag.attrib
            ]
            if len(latencies) == 0:
                latencies = [
                    int(l_tag.attrib['max_cycles'])
                    for l_tag in measurement_tag.iter('latency')
                    if 'max_cycles' in l_tag.attrib
                ]
            if latencies[1:] != latencies[:-1]:
                print(
                    "Contradicting latencies found, using smallest:",
                    iform,
                    latencies,
                    file=sys.stderr,
                )
            if latencies:
                latency = min(latencies)
        if ignore:
            continue

        # Ordered by IACA version (newest last)
        for iaca_tag in sorted(
            arch_tag.iter('IACA'), key=lambda i: StrictVersion(i.attrib['version'])
        ):
            if 'ports' in iaca_tag.attrib:
                port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib))

        # Check if all are equal
        if port_pressure:
            if port_pressure[1:] != port_pressure[:-1]:
                print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
            port_pressure = port_pressure[-1]
        else:
            # print("No data available for this architecture:", mnemonic, file=sys.stderr)
            continue

        # Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake:
        if arch.upper() in intel_archs and not arch.upper() in ['ICL']:
            if any([p['class'] == 'memory' for p in parameters]):
                # We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D
                # TODO remove port7 on 'hsw' onward and split entries depending on addressing mode
                port_23 = False
                port_4 = False
                for i, pp in enumerate(port_pressure):
                    if '2' in pp[1] and '3' in pp[1]:
                        port_23 = True
                    if '4' in pp[1]:
                        port_4 = True
                # Add (X, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
                # X = 2 on SNB and IVB IFF used in combination with ymm register, otherwise X = 1
                if arch.upper() in ['SNB', 'IVB'] and \
                    any([p['class'] == 'register' and p['name'] == 'ymm' for p in parameters]):
                    data_port_throughput = 2
                else:
                    data_port_throughput = 1
                if port_23 and not port_4:
                    port_pressure.append((data_port_throughput, ['2D', '3D']))

        # Add missing ports:
        for ports in [pp[1] for pp in port_pressure]:
            for p in ports:
                mm.add_port(p)

        throughput = max(mm.average_port_pressure(port_pressure))

        mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops)
    # TODO eliminate entries which could be covered by automatic load / store expansion
    return mm

Example #21

0

Show file

File: test_semantics.py Project: RRZE-HPC/OSACA

    def test_machine_model_various_functions(self):
        # check dummy MachineModel creation
        try:
            MachineModel(isa="x86")
            MachineModel(isa="aarch64")
        except ValueError:
            self.fail()
        test_mm_x86 = MachineModel(
            path_to_yaml=self._find_file("test_db_x86.yml"))
        test_mm_arm = MachineModel(
            path_to_yaml=self._find_file("test_db_aarch64.yml"))

        # test get_instruction without mnemonic
        self.assertIsNone(test_mm_x86.get_instruction(None, []))
        self.assertIsNone(test_mm_arm.get_instruction(None, []))

        # test get_instruction from DB
        self.assertIsNone(test_mm_x86.get_instruction(None, []))
        self.assertIsNone(test_mm_arm.get_instruction(None, []))
        self.assertIsNone(test_mm_x86.get_instruction("NOT_IN_DB", []))
        self.assertIsNone(test_mm_arm.get_instruction("NOT_IN_DB", []))
        name_x86_1 = "vaddpd"
        operands_x86_1 = [
            {
                "class": "register",
                "name": "xmm"
            },
            {
                "class": "register",
                "name": "xmm"
            },
            {
                "class": "register",
                "name": "xmm"
            },
        ]
        instr_form_x86_1 = test_mm_x86.get_instruction(name_x86_1,
                                                       operands_x86_1)
        self.assertEqual(
            instr_form_x86_1,
            test_mm_x86.get_instruction(name_x86_1, operands_x86_1))
        self.assertEqual(
            test_mm_x86.get_instruction("jg", [{
                "class": "identifier"
            }]),
            test_mm_x86.get_instruction("jg", [{
                "class": "identifier"
            }]),
        )
        name_arm_1 = "fadd"
        operands_arm_1 = [
            {
                "class": "register",
                "prefix": "v",
                "shape": "s"
            },
            {
                "class": "register",
                "prefix": "v",
                "shape": "s"
            },
            {
                "class": "register",
                "prefix": "v",
                "shape": "s"
            },
        ]
        instr_form_arm_1 = test_mm_arm.get_instruction(name_arm_1,
                                                       operands_arm_1)
        self.assertEqual(
            instr_form_arm_1,
            test_mm_arm.get_instruction(name_arm_1, operands_arm_1))
        self.assertEqual(
            test_mm_arm.get_instruction("b.ne", [{
                "class": "identifier"
            }]),
            test_mm_arm.get_instruction("b.ne", [{
                "class": "identifier"
            }]),
        )

        # test full instruction name
        self.assertEqual(
            MachineModel.get_full_instruction_name(instr_form_x86_1),
            "vaddpd  register(name:xmm),register(name:xmm),register(name:xmm)",
        )
        self.assertEqual(
            MachineModel.get_full_instruction_name(instr_form_arm_1),
            "fadd  register(prefix:v,shape:s),register(prefix:v,shape:s)," +
            "register(prefix:v,shape:s)",
        )

        # test get_store_tp
        self.assertEqual(
            test_mm_x86.get_store_throughput({
                "base": {
                    "name": "x"
                },
                "offset": None,
                "index": None,
                "scale": 1
            }),
            [[2, "237"], [2, "4"]],
        )
        self.assertEqual(
            test_mm_x86.get_store_throughput({
                "base": {
                    "prefix": "NOT_IN_DB"
                },
                "offset": None,
                "index": "NOT_NONE",
                "scale": 1
            }),
            [[1, "23"], [1, "4"]],
        )
        self.assertEqual(
            test_mm_arm.get_store_throughput({
                "base": {
                    "prefix": "x"
                },
                "offset": None,
                "index": None,
                "scale": 1
            }),
            [[2, "34"], [2, "5"]],
        )
        self.assertEqual(
            test_mm_arm.get_store_throughput({
                "base": {
                    "prefix": "NOT_IN_DB"
                },
                "offset": None,
                "index": None,
                "scale": 1
            }),
            [[1, "34"], [1, "5"]],
        )

        # test get_store_lt
        self.assertEqual(
            test_mm_x86.get_store_latency({
                "base": {
                    "name": "x"
                },
                "offset": None,
                "index": None,
                "scale": "1"
            }),
            0,
        )
        self.assertEqual(
            test_mm_arm.get_store_latency({
                "base": {
                    "prefix": "x"
                },
                "offset": None,
                "index": None,
                "scale": "1"
            }),
            0,
        )

        # test has_hidden_load
        self.assertFalse(test_mm_x86.has_hidden_loads())

        # test default load tp
        self.assertEqual(
            test_mm_x86.get_load_throughput({
                "base": {
                    "name": "x"
                },
                "offset": None,
                "index": None,
                "scale": 1
            }),
            [[1, "23"], [1, ["2D", "3D"]]],
        )

        # test adding port
        test_mm_x86.add_port("dummyPort")
        test_mm_arm.add_port("dummyPort")

        # test dump of DB
        with open("/dev/null", "w") as dev_null:
            test_mm_x86.dump(stream=dev_null)
            test_mm_arm.dump(stream=dev_null)

Example #22

0

Show file

File: test_semantics.py Project: RRZE-HPC/OSACA

    def setUpClass(cls):
        # set up parser and kernels
        cls.parser_x86 = ParserX86ATT()
        cls.parser_AArch64 = ParserAArch64()
        with open(cls._find_file("kernel_x86.s")) as f:
            cls.code_x86 = f.read()
        with open(cls._find_file("kernel_x86_memdep.s")) as f:
            cls.code_x86_memdep = f.read()
        with open(cls._find_file("kernel_x86_long_LCD.s")) as f:
            cls.code_x86_long_LCD = f.read()
        with open(cls._find_file("kernel_aarch64_memdep.s")) as f:
            cls.code_aarch64_memdep = f.read()
        with open(cls._find_file("kernel_aarch64.s")) as f:
            cls.code_AArch64 = f.read()
        with open(cls._find_file("kernel_aarch64_sve.s")) as f:
            cls.code_AArch64_SVE = f.read()
        cls.kernel_x86 = reduce_to_section(
            cls.parser_x86.parse_file(cls.code_x86), "x86")
        cls.kernel_x86_memdep = reduce_to_section(
            cls.parser_x86.parse_file(cls.code_x86_memdep), "x86")
        cls.kernel_x86_long_LCD = reduce_to_section(
            cls.parser_x86.parse_file(cls.code_x86_long_LCD), "x86")
        cls.kernel_AArch64 = reduce_to_section(
            cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64")
        cls.kernel_aarch64_memdep = reduce_to_section(
            cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64")
        cls.kernel_aarch64_SVE = reduce_to_section(
            cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64")

        # set up machine models
        cls.machine_model_csx = MachineModel(
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "csx.yml"))
        cls.machine_model_tx2 = MachineModel(
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml"))
        cls.machine_model_a64fx = MachineModel(
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml"))
        cls.semantics_x86 = ISASemantics("x86")
        cls.semantics_csx = ArchSemantics(cls.machine_model_csx,
                                          path_to_yaml=os.path.join(
                                              cls.MODULE_DATA_DIR,
                                              "isa/x86.yml"))
        cls.semantics_aarch64 = ISASemantics("aarch64")
        cls.semantics_tx2 = ArchSemantics(
            cls.machine_model_tx2,
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
        )
        cls.semantics_a64fx = ArchSemantics(
            cls.machine_model_a64fx,
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
        )
        cls.machine_model_zen = MachineModel(arch="zen1")

        for i in range(len(cls.kernel_x86)):
            cls.semantics_csx.assign_src_dst(cls.kernel_x86[i])
            cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i])
        for i in range(len(cls.kernel_x86_memdep)):
            cls.semantics_csx.assign_src_dst(cls.kernel_x86_memdep[i])
            cls.semantics_csx.assign_tp_lt(cls.kernel_x86_memdep[i])
        for i in range(len(cls.kernel_x86_long_LCD)):
            cls.semantics_csx.assign_src_dst(cls.kernel_x86_long_LCD[i])
            cls.semantics_csx.assign_tp_lt(cls.kernel_x86_long_LCD[i])
        for i in range(len(cls.kernel_AArch64)):
            cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
            cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
        for i in range(len(cls.kernel_aarch64_memdep)):
            cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i])
            cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i])
        for i in range(len(cls.kernel_aarch64_SVE)):
            cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i])
            cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])

Example #23

0

Show file

File: test_semantics.py Project: jdomke/OSACA

 def test_creation_by_name(self):
     try:
         tmp_mm = MachineModel(arch='CSX')
         ArchSemantics(tmp_mm)
     except ValueError:
         self.fail()

Example #24

0

Show file

File: osaca.py Project: jdomke/OSACA

def inspect(args, output_file=sys.stdout):
    """
    Does the actual throughput and critical path analysis of OSACA and prints it to the
    terminal.

    :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
    :param output_file: Define the stream for output, defaults to :class:`sys.stdout`
    :type output_file: stream, optional
    """
    # Read file
    code = args.file.read()

    # Detect ISA if necessary
    arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)]
    print_arch_warning = False if args.arch else True
    isa = MachineModel.get_isa_for_arch(arch)
    verbose = args.verbose
    ignore_unknown = args.ignore_unknown

    # Parse file
    parser = get_asm_parser(arch)
    try:
        parsed_code = parser.parse_file(code)
    except:
        # probably the wrong parser based on heuristic
        if args.arch is None:
            # change ISA and try again
            arch = DEFAULT_ARCHS['x86'] if BaseParser.detect_ISA(code) == 'aarch64' else DEFAULT_ARCHS['aarch64']
            isa = MachineModel.get_isa_for_arch(arch)
            parser = get_asm_parser(arch)
            parsed_code = parser.parse_file(code)
        else:
            traceback.print_exc(file=sys.stderr)
            sys.exit(1)

    # Reduce to marked kernel or chosen section and add semantics
    if args.lines:
        line_range = get_line_range(args.lines)
        kernel = [line for line in parsed_code if line['line_number'] in line_range]
        print_length_warning = False
    else:
        kernel = reduce_to_section(parsed_code, isa)
        # Print warning if kernel has no markers and is larger than threshold (100)
        print_length_warning = True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False
    machine_model = MachineModel(arch=arch)
    semantics = ArchSemantics(machine_model)
    semantics.add_semantics(kernel)
    # Do optimal schedule for kernel throughput if wished
    if not args.fixed:
        semantics.assign_optimal_throughput(kernel)

    # Create DiGrahps
    kernel_graph = KernelDG(kernel, parser, machine_model)
    if args.dotpath is not None:
        kernel_graph.export_graph(args.dotpath if args.dotpath != '.' else None)
    # Print analysis
    frontend = Frontend(args.file.name, arch=arch)
    print(
        frontend.full_analysis(
            kernel,
            kernel_graph,
            ignore_unknown=ignore_unknown,
            arch_warning=print_arch_warning,
            length_warning=print_length_warning,
            verbose=verbose
        ),
        file=output_file,
    )

Example #25

0

Show file

File: test_semantics.py Project: jdomke/OSACA

    def test_machine_model_various_functions(self):
        # check dummy MachineModel creation
        try:
            MachineModel(isa='x86')
            MachineModel(isa='aarch64')
        except ValueError:
            self.fail()
        test_mm_x86 = MachineModel(
            path_to_yaml=self._find_file('test_db_x86.yml'))
        test_mm_arm = MachineModel(
            path_to_yaml=self._find_file('test_db_aarch64.yml'))

        # test get_instruction without mnemonic
        self.assertIsNone(test_mm_x86.get_instruction(None, []))
        self.assertIsNone(test_mm_arm.get_instruction(None, []))

        # test get_instruction from DB
        self.assertIsNone(test_mm_x86.get_instruction(None, []))
        self.assertIsNone(test_mm_arm.get_instruction(None, []))
        self.assertIsNone(test_mm_x86.get_instruction('NOT_IN_DB', []))
        self.assertIsNone(test_mm_arm.get_instruction('NOT_IN_DB', []))
        name_x86_1 = 'vaddpd'
        operands_x86_1 = [
            {
                'class': 'register',
                'name': 'xmm'
            },
            {
                'class': 'register',
                'name': 'xmm'
            },
            {
                'class': 'register',
                'name': 'xmm'
            },
        ]
        instr_form_x86_1 = test_mm_x86.get_instruction(name_x86_1,
                                                       operands_x86_1)
        self.assertEqual(
            instr_form_x86_1,
            test_mm_x86.get_instruction(name_x86_1, operands_x86_1))
        self.assertEqual(
            test_mm_x86.get_instruction('jg', [{
                'class': 'identifier'
            }]),
            test_mm_x86.get_instruction('jg', [{
                'class': 'identifier'
            }]),
        )
        name_arm_1 = 'fadd'
        operands_arm_1 = [
            {
                'class': 'register',
                'prefix': 'v',
                'shape': 's'
            },
            {
                'class': 'register',
                'prefix': 'v',
                'shape': 's'
            },
            {
                'class': 'register',
                'prefix': 'v',
                'shape': 's'
            },
        ]
        instr_form_arm_1 = test_mm_arm.get_instruction(name_arm_1,
                                                       operands_arm_1)
        self.assertEqual(
            instr_form_arm_1,
            test_mm_arm.get_instruction(name_arm_1, operands_arm_1))
        self.assertEqual(
            test_mm_arm.get_instruction('b.ne', [{
                'class': 'identifier'
            }]),
            test_mm_arm.get_instruction('b.ne', [{
                'class': 'identifier'
            }]),
        )

        # test full instruction name
        self.assertEqual(
            MachineModel.get_full_instruction_name(instr_form_x86_1),
            'vaddpd  register(name:xmm),register(name:xmm),register(name:xmm)',
        )
        self.assertEqual(
            MachineModel.get_full_instruction_name(instr_form_arm_1),
            'fadd  register(prefix:v,shape:s),register(prefix:v,shape:s),' +
            'register(prefix:v,shape:s)',
        )

        # test get_store_tp
        self.assertEqual(
            test_mm_x86.get_store_throughput({
                'base': {
                    'name': 'x'
                },
                'offset': None,
                'index': None,
                'scale': 1
            }),
            [[2, '237'], [2, '4']],
        )
        self.assertEqual(
            test_mm_x86.get_store_throughput({
                'base': {
                    'prefix': 'NOT_IN_DB'
                },
                'offset': None,
                'index': 'NOT_NONE',
                'scale': 1
            }),
            [[1, '23'], [1, '4']],
        )
        self.assertEqual(
            test_mm_arm.get_store_throughput({
                'base': {
                    'prefix': 'x'
                },
                'offset': None,
                'index': None,
                'scale': 1
            }),
            [[2, '34'], [2, '5']],
        )
        self.assertEqual(
            test_mm_arm.get_store_throughput({
                'base': {
                    'prefix': 'NOT_IN_DB'
                },
                'offset': None,
                'index': None,
                'scale': 1
            }),
            [[1, '34'], [1, '5']],
        )

        # test get_store_lt
        self.assertEqual(
            test_mm_x86.get_store_latency({
                'base': {
                    'name': 'x'
                },
                'offset': None,
                'index': None,
                'scale': '1'
            }),
            0,
        )
        self.assertEqual(
            test_mm_arm.get_store_latency({
                'base': {
                    'prefix': 'x'
                },
                'offset': None,
                'index': None,
                'scale': '1'
            }),
            0,
        )

        # test has_hidden_load
        self.assertFalse(test_mm_x86.has_hidden_loads())

        # test default load tp
        self.assertEqual(
            test_mm_x86.get_load_throughput({
                'base': {
                    'name': 'x'
                },
                'offset': None,
                'index': None,
                'scale': 1
            }),
            [[1, '23'], [1, ['2D', '3D']]],
        )

        # test adding port
        test_mm_x86.add_port('dummyPort')
        test_mm_arm.add_port('dummyPort')

        # test dump of DB
        with open('/dev/null', 'w') as dev_null:
            test_mm_x86.dump(stream=dev_null)
            test_mm_arm.dump(stream=dev_null)

Example #26

0

Show file

File: test_db_interface.py Project: jdomke/OSACA

    def test_add_single_entry(self):
        mm_csx = MachineModel('csx')
        mm_tx2 = MachineModel('tx2')
        mm_zen1 = MachineModel('zen1')
        num_entries_csx = len(mm_csx['instruction_forms'])
        num_entries_tx2 = len(mm_tx2['instruction_forms'])
        num_entries_zen1 = len(mm_zen1['instruction_forms'])

        mm_csx.set_instruction_entry(self.entry_csx)
        mm_tx2.set_instruction_entry(self.entry_tx2)
        mm_zen1.set_instruction_entry({'name': 'empty_operation'})

        num_entries_csx = len(mm_csx['instruction_forms']) - num_entries_csx
        num_entries_tx2 = len(mm_tx2['instruction_forms']) - num_entries_tx2
        num_entries_zen1 = len(mm_zen1['instruction_forms']) - num_entries_zen1

        self.assertEqual(num_entries_csx, 1)
        self.assertEqual(num_entries_tx2, 1)
        self.assertEqual(num_entries_zen1, 1)