def build_graphs(df: pd.DataFrame, ir_dir: Path, graph_dir: Path): """Build ProgramGraphs from LLVM-IR and features.""" for _, row in df.iterrows(): with open(ir_dir / f"{row['name']}.ll") as f: ir = f.read() graph = pg.from_llvm_ir(ir) graph.features.feature["devmap_label"].int64_list.value[:] = [ row["label"] ] graph.features.feature["wgsize"].int64_list.value[:] = [row["wgsize"]] graph.features.feature["transfer_bytes"].int64_list.value[:] = [ row["transfer_bytes"] ] graph.features.feature["wgsize_log1p"].float_list.value[:] = [ row["wgsize_log1p"] ] graph.features.feature["transfer_bytes_log1p"].float_list.value[:] = [ row["transfer_bytes_log1p"] ] pbutil.ToFile(graph, graph_dir / f"{row['name']}.ProgramGraph.pb", exist_ok=False)
def test_from_llvm_ir_multiple_inputs(): graphs = list(pg.from_llvm_ir([SIMPLE_IR] * 10)) assert len(graphs) == 10 for graph in graphs: assert isinstance(graph, pg.ProgramGraph)
def simple_ir_graph(request) -> pg.ProgramGraph: return pg.from_llvm_ir( SIMPLE_IR, version=request.param, )
def test_invalid_version(): with pytest.raises(pg.UnsupportedCompiler): pg.from_llvm_ir("", version="invalid")
def test_from_ir_string_smoke_test(llvm_ir: str, version: str): """Smoke test on real IRs.""" graph = pg.from_llvm_ir(llvm_ir, version=version) assert isinstance(graph, pg.ProgramGraph)
def test_invalid_ir(version: str): """Test equivalence of nodes that pre-process to the same text.""" with pytest.raises(pg.GraphCreationError, match="expected top-level entity"): pg.from_llvm_ir("foo bar", version=version)
def _ProcessRow(output_directory, row, file_id) -> None: source, src_lang, ir_type, binary_ir = row # Decode database row. source = source.decode("utf-8") src_lang = { "C": "c", "CPP": "cc", "OPENCL": "cl", "SWIFT": "swift", "HASKELL": "hs", "FORTRAN": "f90", }[src_lang.decode("utf-8")] ir_type = ir_type.decode("utf-8") if source.startswith("sqlite:///"): source = "github" else: source = { "github.com/av-maramzin/SNU_NPB:NPB3.3-SER-C": "npb-3_3-ser-c", "pact17_opencl_devmap": "opencl", "linux-4.19": "linux-4_19", "opencv-3.4.0": "opencv-3_4_0", }.get(source, source) # Output file paths. name = f"{source}.{file_id}.{src_lang}" ir_path = output_directory / f"ir/{name}.ll" ir_message_path = output_directory / f"ir/{name}.Ir.pb" # Check that the files to be generated do not already exist. # This is a defensive measure against accidentally overwriting files during # an export. A side effect of this is that partial exports are not supported. assert not ir_path.is_file() assert not ir_message_path.is_file() ir = pickle.loads(codecs.decode(binary_ir, "zlib")) # Write the text IR to file. with open(ir_path, "w") as f: f.write(ir) compiler_version = { "LLVM_6_0": 600, "LLVM_3_5": 350, }[ir_type] ir_message = Ir(type=Ir.LLVM, compiler_version=compiler_version, text=ir) pbutil.ToFile(ir_message, ir_message_path) # Convert to ProgramGraph. try: graph = pg.from_llvm_ir(ir) pbutil.ToFile(graph, output_directory / f"graphs/{name}.ProgramGraph.pb") # Put into train/val/test bin. r = random.random() if r < 0.6: dst = "train" elif r < 0.8: dst = "val" else: dst = "test" os.symlink( f"../graphs/{name}.ProgramGraph.pb", output_directory / dst / f"{name}.ProgramGraph.pb", ) except (ValueError, OSError, TimeoutError, AssertionError): pass