def test_machine_model_various_functions(self): # check dummy MachineModel creation try: MachineModel(isa='x86') MachineModel(isa='aarch64') except ValueError: self.fail() test_mm_x86 = MachineModel( path_to_yaml=self._find_file('test_db_x86.yml')) test_mm_arm = MachineModel( path_to_yaml=self._find_file('test_db_aarch64.yml')) # test get_instruction without mnemonic self.assertIsNone(test_mm_x86.get_instruction(None, [])) self.assertIsNone(test_mm_arm.get_instruction(None, [])) # test get_instruction from DB self.assertIsNone(test_mm_x86.get_instruction(None, [])) self.assertIsNone(test_mm_arm.get_instruction(None, [])) self.assertIsNone(test_mm_x86.get_instruction('NOT_IN_DB', [])) self.assertIsNone(test_mm_arm.get_instruction('NOT_IN_DB', [])) name_x86_1 = 'vaddpd' operands_x86_1 = [ { 'class': 'register', 'name': 'xmm' }, { 'class': 'register', 'name': 'xmm' }, { 'class': 'register', 'name': 'xmm' }, ] instr_form_x86_1 = test_mm_x86.get_instruction(name_x86_1, operands_x86_1) self.assertEqual( instr_form_x86_1, test_mm_x86.get_instruction(name_x86_1, operands_x86_1)) self.assertEqual( test_mm_x86.get_instruction('jg', [{ 'class': 'identifier' }]), test_mm_x86.get_instruction('jg', [{ 'class': 'identifier' }]), ) name_arm_1 = 'fadd' operands_arm_1 = [ { 'class': 'register', 'prefix': 'v', 'shape': 's' }, { 'class': 'register', 'prefix': 'v', 'shape': 's' }, { 'class': 'register', 'prefix': 'v', 'shape': 's' }, ] instr_form_arm_1 = test_mm_arm.get_instruction(name_arm_1, operands_arm_1) self.assertEqual( instr_form_arm_1, test_mm_arm.get_instruction(name_arm_1, operands_arm_1)) self.assertEqual( test_mm_arm.get_instruction('b.ne', [{ 'class': 'identifier' }]), test_mm_arm.get_instruction('b.ne', [{ 'class': 'identifier' }]), ) # test full instruction name self.assertEqual( MachineModel.get_full_instruction_name(instr_form_x86_1), 'vaddpd register(name:xmm),register(name:xmm),register(name:xmm)', ) self.assertEqual( MachineModel.get_full_instruction_name(instr_form_arm_1), 'fadd register(prefix:v,shape:s),register(prefix:v,shape:s),' + 'register(prefix:v,shape:s)', ) # test get_store_tp self.assertEqual( test_mm_x86.get_store_throughput({ 'base': { 'name': 'x' }, 'offset': None, 'index': None, 'scale': 1 }), [[2, '237'], [2, '4']], ) self.assertEqual( test_mm_x86.get_store_throughput({ 'base': { 'prefix': 'NOT_IN_DB' }, 'offset': None, 'index': 'NOT_NONE', 'scale': 1 }), [[1, '23'], [1, '4']], ) self.assertEqual( test_mm_arm.get_store_throughput({ 'base': { 'prefix': 'x' }, 'offset': None, 'index': None, 'scale': 1 }), [[2, '34'], [2, '5']], ) self.assertEqual( test_mm_arm.get_store_throughput({ 'base': { 'prefix': 'NOT_IN_DB' }, 'offset': None, 'index': None, 'scale': 1 }), [[1, '34'], [1, '5']], ) # test get_store_lt self.assertEqual( test_mm_x86.get_store_latency({ 'base': { 'name': 'x' }, 'offset': None, 'index': None, 'scale': '1' }), 0, ) self.assertEqual( test_mm_arm.get_store_latency({ 'base': { 'prefix': 'x' }, 'offset': None, 'index': None, 'scale': '1' }), 0, ) # test has_hidden_load self.assertFalse(test_mm_x86.has_hidden_loads()) # test default load tp self.assertEqual( test_mm_x86.get_load_throughput({ 'base': { 'name': 'x' }, 'offset': None, 'index': None, 'scale': 1 }), [[1, '23'], [1, ['2D', '3D']]], ) # test adding port test_mm_x86.add_port('dummyPort') test_mm_arm.add_port('dummyPort') # test dump of DB with open('/dev/null', 'w') as dev_null: test_mm_x86.dump(stream=dev_null) test_mm_arm.dump(stream=dev_null)
def test_machine_model_various_functions(self): # check dummy MachineModel creation try: MachineModel(isa="x86") MachineModel(isa="aarch64") except ValueError: self.fail() test_mm_x86 = MachineModel( path_to_yaml=self._find_file("test_db_x86.yml")) test_mm_arm = MachineModel( path_to_yaml=self._find_file("test_db_aarch64.yml")) # test get_instruction without mnemonic self.assertIsNone(test_mm_x86.get_instruction(None, [])) self.assertIsNone(test_mm_arm.get_instruction(None, [])) # test get_instruction from DB self.assertIsNone(test_mm_x86.get_instruction(None, [])) self.assertIsNone(test_mm_arm.get_instruction(None, [])) self.assertIsNone(test_mm_x86.get_instruction("NOT_IN_DB", [])) self.assertIsNone(test_mm_arm.get_instruction("NOT_IN_DB", [])) name_x86_1 = "vaddpd" operands_x86_1 = [ { "class": "register", "name": "xmm" }, { "class": "register", "name": "xmm" }, { "class": "register", "name": "xmm" }, ] instr_form_x86_1 = test_mm_x86.get_instruction(name_x86_1, operands_x86_1) self.assertEqual( instr_form_x86_1, test_mm_x86.get_instruction(name_x86_1, operands_x86_1)) self.assertEqual( test_mm_x86.get_instruction("jg", [{ "class": "identifier" }]), test_mm_x86.get_instruction("jg", [{ "class": "identifier" }]), ) name_arm_1 = "fadd" operands_arm_1 = [ { "class": "register", "prefix": "v", "shape": "s" }, { "class": "register", "prefix": "v", "shape": "s" }, { "class": "register", "prefix": "v", "shape": "s" }, ] instr_form_arm_1 = test_mm_arm.get_instruction(name_arm_1, operands_arm_1) self.assertEqual( instr_form_arm_1, test_mm_arm.get_instruction(name_arm_1, operands_arm_1)) self.assertEqual( test_mm_arm.get_instruction("b.ne", [{ "class": "identifier" }]), test_mm_arm.get_instruction("b.ne", [{ "class": "identifier" }]), ) # test full instruction name self.assertEqual( MachineModel.get_full_instruction_name(instr_form_x86_1), "vaddpd register(name:xmm),register(name:xmm),register(name:xmm)", ) self.assertEqual( MachineModel.get_full_instruction_name(instr_form_arm_1), "fadd register(prefix:v,shape:s),register(prefix:v,shape:s)," + "register(prefix:v,shape:s)", ) # test get_store_tp self.assertEqual( test_mm_x86.get_store_throughput({ "base": { "name": "x" }, "offset": None, "index": None, "scale": 1 }), [[2, "237"], [2, "4"]], ) self.assertEqual( test_mm_x86.get_store_throughput({ "base": { "prefix": "NOT_IN_DB" }, "offset": None, "index": "NOT_NONE", "scale": 1 }), [[1, "23"], [1, "4"]], ) self.assertEqual( test_mm_arm.get_store_throughput({ "base": { "prefix": "x" }, "offset": None, "index": None, "scale": 1 }), [[2, "34"], [2, "5"]], ) self.assertEqual( test_mm_arm.get_store_throughput({ "base": { "prefix": "NOT_IN_DB" }, "offset": None, "index": None, "scale": 1 }), [[1, "34"], [1, "5"]], ) # test get_store_lt self.assertEqual( test_mm_x86.get_store_latency({ "base": { "name": "x" }, "offset": None, "index": None, "scale": "1" }), 0, ) self.assertEqual( test_mm_arm.get_store_latency({ "base": { "prefix": "x" }, "offset": None, "index": None, "scale": "1" }), 0, ) # test has_hidden_load self.assertFalse(test_mm_x86.has_hidden_loads()) # test default load tp self.assertEqual( test_mm_x86.get_load_throughput({ "base": { "name": "x" }, "offset": None, "index": None, "scale": 1 }), [[1, "23"], [1, ["2D", "3D"]]], ) # test adding port test_mm_x86.add_port("dummyPort") test_mm_arm.add_port("dummyPort") # test dump of DB with open("/dev/null", "w") as dev_null: test_mm_x86.dump(stream=dev_null) test_mm_arm.dump(stream=dev_null)
def extract_model(tree, arch, skip_mem=True): try: isa = MachineModel.get_isa_for_arch(arch) except Exception: print("Skipping...", file=sys.stderr) return None mm = MachineModel(isa=isa) parser = get_parser(isa) for instruction_tag in tree.findall('.//instruction'): ignore = False mnemonic = instruction_tag.attrib['asm'] iform = instruction_tag.attrib['iform'] # skip any mnemonic which contain spaces (e.g., "REX CRC32") if ' ' in mnemonic: continue # Extract parameter components try: parameters = extract_paramters(instruction_tag, parser, isa) if isa == 'x86': parameters.reverse() except ValueError as e: print(e, file=sys.stderr) # Extract port occupation, throughput and latency port_pressure, throughput, latency, uops = [], None, None, None arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]') if arch_tag is None: continue # skip any instructions without port utilization if not any(['ports' in x.attrib for x in arch_tag.findall('measurement')]): print("Couldn't find port utilization, skip: ", iform, file=sys.stderr) continue # skip if computed and measured TP don't match if not [x.attrib['TP_ports'] == x.attrib['TP'] for x in arch_tag.findall('measurement')][ 0 ]: print( "Calculated TP from port utilization doesn't match TP, skip: ", iform, file=sys.stderr, ) continue # skip if instruction contains memory operand if skip_mem and any( [x.attrib['type'] == 'mem' for x in instruction_tag.findall('operand')] ): print("Contains memory operand, skip: ", iform, file=sys.stderr) continue # We collect all measurement and IACA information and compare them later for measurement_tag in arch_tag.iter('measurement'): if 'TP_ports' in measurement_tag.attrib: throughput = measurement_tag.attrib['TP_ports'] else: throughput = ( measurement_tag.attrib['TP'] if 'TP' in measurement_tag.attrib else None ) uops = ( int(measurement_tag.attrib['uops']) if 'uops' in measurement_tag.attrib else None ) if 'ports' in measurement_tag.attrib: port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib)) latencies = [ int(l_tag.attrib['cycles']) for l_tag in measurement_tag.iter('latency') if 'cycles' in l_tag.attrib ] if len(latencies) == 0: latencies = [ int(l_tag.attrib['max_cycles']) for l_tag in measurement_tag.iter('latency') if 'max_cycles' in l_tag.attrib ] if latencies[1:] != latencies[:-1]: print( "Contradicting latencies found, using smallest:", iform, latencies, file=sys.stderr, ) if latencies: latency = min(latencies) if ignore: continue # Ordered by IACA version (newest last) for iaca_tag in sorted( arch_tag.iter('IACA'), key=lambda i: StrictVersion(i.attrib['version']) ): if 'ports' in iaca_tag.attrib: port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib)) # Check if all are equal if port_pressure: if port_pressure[1:] != port_pressure[:-1]: print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr) port_pressure = port_pressure[-1] else: # print("No data available for this architecture:", mnemonic, file=sys.stderr) continue # Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake: if arch.upper() in intel_archs and not arch.upper() in ['ICL']: if any([p['class'] == 'memory' for p in parameters]): # We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D # TODO remove port7 on 'hsw' onward and split entries depending on addressing mode port_23 = False port_4 = False for i, pp in enumerate(port_pressure): if '2' in pp[1] and '3' in pp[1]: port_23 = True if '4' in pp[1]: port_4 = True # Add (X, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4) # X = 2 on SNB and IVB IFF used in combination with ymm register, otherwise X = 1 if arch.upper() in ['SNB', 'IVB'] and \ any([p['class'] == 'register' and p['name'] == 'ymm' for p in parameters]): data_port_throughput = 2 else: data_port_throughput = 1 if port_23 and not port_4: port_pressure.append((data_port_throughput, ['2D', '3D'])) # Add missing ports: for ports in [pp[1] for pp in port_pressure]: for p in ports: mm.add_port(p) throughput = max(mm.average_port_pressure(port_pressure)) mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops) # TODO eliminate entries which could be covered by automatic load / store expansion return mm
def extract_model(tree, arch, skip_mem=True): try: isa = MachineModel.get_isa_for_arch(arch) except Exception: print("Skipping...", file=sys.stderr) return None mm = MachineModel(isa=isa) parser = get_parser(isa) for instruction_tag in tree.findall(".//instruction"): ignore = False mnemonic = instruction_tag.attrib["asm"] iform = instruction_tag.attrib["iform"] # reduce to second part if mnemonic contain space (e.g., "REX CRC32") if " " in mnemonic: mnemonic = mnemonic.split(" ", 1)[1] # Extract parameter components try: parameters = extract_paramters(instruction_tag, parser, isa) if isa == "x86": parameters.reverse() except ValueError as e: print(e, file=sys.stderr) # Extract port occupation, throughput and latency port_pressure, throughput, latency, uops = [], None, None, None arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]') if arch_tag is None: continue # skip any instructions without port utilization if not any(["ports" in x.attrib for x in arch_tag.findall("measurement")]): print("Couldn't find port utilization, skip: ", iform, file=sys.stderr) continue # skip if measured TP is smaller than computed if [float(x.attrib["TP_ports"]) > min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"])) for x in arch_tag.findall("measurement")][0]: print( "Calculated TP is greater than measured TP.", iform, file=sys.stderr, ) # skip if instruction contains memory operand if skip_mem and any( [x.attrib["type"] == "mem" for x in instruction_tag.findall("operand")] ): print("Contains memory operand, skip: ", iform, file=sys.stderr) continue # We collect all measurement and IACA information and compare them later for measurement_tag in arch_tag.iter("measurement"): if "TP_ports" in measurement_tag.attrib: throughput = float(measurement_tag.attrib["TP_ports"]) else: throughput = min( measurement_tag.attrib.get("TP_loop", float('inf')), measurement_tag.attrib.get("TP_unroll", float('inf')), measurement_tag.attrib.get("TP", float('inf')), ) if throughput == float('inf'): throughput = None uops = int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None if "ports" in measurement_tag.attrib: port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib)) latencies = [ int(l_tag.attrib["cycles"]) for l_tag in measurement_tag.iter("latency") if "cycles" in l_tag.attrib ] if len(latencies) == 0: latencies = [ int(l_tag.attrib["max_cycles"]) for l_tag in measurement_tag.iter("latency") if "max_cycles" in l_tag.attrib ] if latencies[1:] != latencies[:-1]: print( "Contradicting latencies found, using smallest:", iform, latencies, file=sys.stderr, ) if latencies: latency = min(latencies) if ignore: continue # Ordered by IACA version (newest last) for iaca_tag in sorted( arch_tag.iter("IACA"), key=lambda i: StrictVersion(i.attrib["version"]) ): if "ports" in iaca_tag.attrib: port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib)) # Check if all are equal if port_pressure: if port_pressure[1:] != port_pressure[:-1]: print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr) port_pressure = port_pressure[-1] else: # print("No data available for this architecture:", mnemonic, file=sys.stderr) continue # Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake: if arch.upper() in intel_archs and not arch.upper() in ["ICL"]: if any([p["class"] == "memory" for p in parameters]): # We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D # TODO remove port7 on 'hsw' onward and split entries depending on addressing mode port_23 = False port_4 = False for i, pp in enumerate(port_pressure): if "2" in pp[1] and "3" in pp[1]: port_23 = True if "4" in pp[1]: port_4 = True # Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4) if port_23 and not port_4: if arch.upper() in ["SNB", "IVB"] and any( [p.get('name', '') == 'ymm' for p in parameters]) and \ not '128' in mnemonic: # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in # instruction name port2D3D_pressure = 2 else: # otherwiese x = 1 port2D3D_pressure = 1 port_pressure.append((port2D3D_pressure, ["2D", "3D"])) # Add missing ports: for ports in [pp[1] for pp in port_pressure]: for p in ports: mm.add_port(p) throughput = max(mm.average_port_pressure(port_pressure)) mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops) # TODO eliminate entries which could be covered by automatic load / store expansion return mm