def Encode(encoder, graph, graph_path, ir_path): if ir_path.is_file(): try: ir = pbutil.FromFile(ir_path, ir_pb2.Ir()).text except pbutil.DecodeError: ir = None else: ir = None encoder.Encode(graph, ir=ir) pbutil.ToFile(graph, graph_path)
def _ProcessRow(output_directory, row, file_id) -> None: source, src_lang, ir_type, binary_ir = row # Decode database row. source = source.decode("utf-8") src_lang = { "C": "c", "CPP": "cc", "OPENCL": "cl", "SWIFT": "swift", "HASKELL": "hs", "FORTRAN": "f90", }[src_lang.decode("utf-8")] ir_type = ir_type.decode("utf-8") if source.startswith("sqlite:///"): source = "github" else: source = { "github.com/av-maramzin/SNU_NPB:NPB3.3-SER-C": "npb-3_3-ser-c", "pact17_opencl_devmap": "opencl", "linux-4.19": "linux-4_19", "opencv-3.4.0": "opencv-3_4_0", }.get(source, source) # Output file paths. name = f"{source}.{file_id}.{src_lang}" ir_path = output_directory / f"ir/{name}.ll" ir_message_path = output_directory / f"ir/{name}.Ir.pb" # Check that the files to be generated do not already exist. # This is a defensive measure against accidentally overwriting files during # an export. A side effect of this is that partial exports are not supported. assert not ir_path.is_file() assert not ir_message_path.is_file() ir = pickle.loads(codecs.decode(binary_ir, "zlib")) # Write the text IR to file. with open(ir_path, "w") as f: f.write(ir) compiler_version = { "LLVM_6_0": 600, "LLVM_3_5": 350, }[ir_type] ir_message = ir_pb2.Ir(type=ir_pb2.Ir.LLVM, compiler_version=compiler_version, text=ir) pbutil.ToFile(ir_message, ir_message_path) # Convert to ProgramGraph. try: graph = llvm.BuildProgramGraph(ir) pbutil.ToFile(graph, output_directory / f"graphs/{name}.ProgramGraph.pb") # Put into train/val/test bin. r = random.random() if r < 0.6: dst = "train" elif r < 0.8: dst = "val" else: dst = "test" os.symlink( f"../graphs/{name}.ProgramGraph.pb", output_directory / dst / f"{name}.ProgramGraph.pb", ) except (ValueError, OSError, TimeoutError, AssertionError) as e: pass