コード例 #1
0
def test_ProgramGraphToNetworkX_empty_graph():
    """Build from an empty proto."""
    proto = program_graph_pb2.ProgramGraph()
    g = nx_format.ProgramGraphToNetworkX(proto)
    assert isinstance(g, nx.MultiDiGraph)
    assert not g.number_of_nodes()
    assert not g.number_of_edges()
コード例 #2
0
def test_SerializeInstructionsInProgramGraph_root_node_only():
    proto = program_graph_pb2.ProgramGraph(node=[
        node_pb2.Node(type=node_pb2.Node.INSTRUCTION),
    ])
    n = graph_serializer.SerializeInstructionsInProgramGraph(proto,
                                                             max_nodes=1000)
    assert n == []
コード例 #3
0
def FromProgramGraphFile(path) -> Optional[program_graph_pb2.ProgramGraph]:
    """Convert a binary ProgramGraph message file to a CDFG.

  Args:
    path: The path of a ProgramGraph protocol buffer.

  Returns:
    A ProgramGraph instance, or None if graph conversion failed.

  Raises:
    ValueError: If the graph cannot be converted.
  """
    graph = program_graph_pb2.ProgramGraph()

    with open(path, "rb") as f:
        p = subprocess.Popen(
            [str(GRAPH2CDFG), "--stdin_fmt", "pb", "--stdout_fmt", "pb"],
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
        )
        stdout, _ = p.communicate(f.read())

    if p.returncode:
        return None

    try:
        graph.ParseFromString(stdout)
    except google.protobuf.message.DecodeError:
        return None

    return graph
コード例 #4
0
ファイル: collect_stats.py プロジェクト: sailfish009/ProGraML
  def Run(self):
    if self.write_header:
      self.writer.writerow(
        (
          "split",
          "graph_name",
          "node_count",
          "edge_count",
          "function_count",
          "module_count",
        )
      )

    for self.ctx.i, path in enumerate(self.files):
      graph_name = path.name[: -len(".ProgramGraph.pb")]
      graph = pbutil.FromFile(path, program_graph_pb2.ProgramGraph())
      self.writer.writerow(
        (
          self.split,
          graph_name,
          len(graph.node),
          len(graph.edge),
          len(graph.function),
          len(graph.module),
        )
      )
コード例 #5
0
def Main():
    """Main entry point."""
    path = pathlib.Path(FLAGS.path)

    graphs_path = path / "test"
    labels_path = path / "labels" / FLAGS.analysis

    for graph_path in graphs_path.iterdir():
        stem = graph_path.name[:-len("ProgramGraph.pb")]
        name = f"{stem}ProgramGraphFeaturesList.pb"
        features_path = labels_path / name
        # There is no guarantee that we have generated features for this
        # program graph, so we check for its existence. As a *very* defensive
        # measure, we also check for the existence of the graph file that we
        # enumerated at the start of this function. This check can be removed
        # later, it is only useful during development when you might be
        # modifying the dataset at the same time as having test jobs running.
        if not graph_path.is_file() or not features_path.is_file():
            continue

        graph = pbutil.FromFile(graph_path, program_graph_pb2.ProgramGraph())
        if not len(graph.node) or len(graph.node) > FLAGS.max_graph_node_count:
            continue

        features_list = pbutil.FromFile(
            features_path,
            program_graph_features_pb2.ProgramGraphFeaturesList())

        for j, features in enumerate(features_list.graph):
            step_count_feature = features.features.feature[
                "data_flow_step_count"].int64_list.value
            step_count = step_count_feature[0] if len(
                step_count_feature) else 0
            print(features_path.name, j, step_count)
コード例 #6
0
def TestOne(
    features_list_path: pathlib.Path,
    features_list_index: int,
    checkpoint_path: pathlib.Path,
) -> BatchResults:
    path = pathlib.Path(pathflag.path())

    features_list = pbutil.FromFile(
        features_list_path,
        program_graph_features_pb2.ProgramGraphFeaturesList(),
    )
    features = features_list.graph[features_list_index]

    graph_name = features_list_path.name[: -len(".ProgramGraphFeaturesList.pb")]
    graph = pbutil.FromFile(
        path / "graphs" / f"{graph_name}.ProgramGraph.pb",
        program_graph_pb2.ProgramGraph(),
    )

    # Instantiate and restore the model.
    vocab = vocabulary.LoadVocabulary(
        path,
        model_name="cdfg" if FLAGS.cdfg else "programl",
        max_items=FLAGS.max_vocab_size,
        target_cumfreq=FLAGS.target_vocab_cumfreq,
    )

    if FLAGS.cdfg:
        FLAGS.use_position_embeddings = False

    model = Ggnn(
        vocabulary=vocab,
        test_only=True,
        node_y_dimensionality=2,
        graph_y_dimensionality=0,
        graph_x_dimensionality=0,
        use_selector_embeddings=True,
    )
    checkpoint = pbutil.FromFile(checkpoint_path, checkpoint_pb2.Checkpoint())
    model.RestoreCheckpoint(checkpoint)

    batch = list(
        DataflowGgnnBatchBuilder(
            graph_loader=SingleGraphLoader(graph=graph, features=features),
            vocabulary=vocab,
            max_node_size=int(1e9),
            use_cdfg=FLAGS.cdfg,
            max_batch_count=1,
        )
    )[0]

    results = model.RunBatch(epoch_pb2.TEST, batch)

    return AnnotateGraphWithBatchResults(graph, features, results)
コード例 #7
0
def test_SerializeInstructionsInProgramGraph_single_function():
    proto = program_graph_pb2.ProgramGraph(
        node=[
            node_pb2.Node(type=node_pb2.Node.INSTRUCTION),
            node_pb2.Node(type=node_pb2.Node.INSTRUCTION),
            node_pb2.Node(type=node_pb2.Node.INSTRUCTION),
        ],
        edge=[
            edge_pb2.Edge(flow=edge_pb2.Edge.CALL, source=0, target=1),
            edge_pb2.Edge(flow=edge_pb2.Edge.CONTROL, source=1, target=2),
        ],
    )
    n = graph_serializer.SerializeInstructionsInProgramGraph(proto,
                                                             max_nodes=1000)
    assert n == [1, 2]
コード例 #8
0
ファイル: inst2vec.py プロジェクト: yibit/ProGraML
def Main():
  encoder = inst2vec_encoder.Inst2vecEncoder()

  if FLAGS.dataset:
    encoder.RunOnDataset(FLAGS.dataset)
    return

  if FLAGS.directory:
    encoder.RunOnDirectory(FLAGS.directory)
    return

  proto = ParseStdinOrDie(program_graph_pb2.ProgramGraph())
  ir = fs.Read(FLAGS.ir) if FLAGS.ir else None
  encoder.Encode(proto, ir)
  WriteStdout(proto)
コード例 #9
0
def BuildProgramGraph(
    ir: str,
    options: program_graph_options_pb2.ProgramGraphOptions = DefaultOptions,
    timeout: int = 60,
) -> program_graph_pb2.ProgramGraph:
    """Construct a program graph from an LLVM-IR.

    Args:
      ir: The text of an LLVM-IR Module.
      options: The graph construction options.
      timeout: The number of seconds to permit before timing out.

    Returns:
      A ProgramGraph instance.

    Raises:
      ValueError: In case graph construction fails.
      TimeoutError: If timeout is reached.
      OsError: In case of other error.
    """
    # Write the ProgramGraphOptions to a temporary file and pass it to a
    # worker subprocess which generates the graph and produces a ProgramGraph
    # message on stdout.
    with tempfile.NamedTemporaryFile("w") as f:
        f.write(ir)
        f.flush()
        options.ir_path = f.name
        process = subprocess.Popen(
            ["timeout", "-s9",
             str(timeout),
             str(GRAPH_BUILDER_BIN)],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            stdin=subprocess.PIPE,
        )
        stdout, stderr = process.communicate(options.SerializeToString())

    proto = program_graph_pb2.ProgramGraph()
    if process.returncode == 2:
        raise ValueError(stderr.decode("utf-8").rstrip())
    elif process.returncode == 9 or process.returncode == -9:
        raise TimeoutError(
            f"Program graph construction exceeded {timeout} seconds")
    elif process.returncode:
        raise OSError(stderr.decode("utf-8").rstrip())
    proto.ParseFromString(stdout)
    return proto
コード例 #10
0
    def Run(self):
        inst2vec = defaultdict(int)
        cdfg = defaultdict(int)
        programl = defaultdict(int)
        node_count = 0

        for self.ctx.i, path in enumerate(self.graphs, start=1):
            graph = pbutil.FromFile(path, program_graph_pb2.ProgramGraph())

            for node in graph.node:
                node_count += 1

                try:
                    n = (
                        node.features.feature["inst2vec_preprocessed"]
                        .bytes_list.value[0]
                        .decode("utf-8")
                    )
                    if n in self.inst2vec:
                        inst2vec[n] += 1
                except IndexError:
                    pass

                if node.text in self.cdfg:
                    cdfg[node.text] += 1

                if node.text in self.programl:
                    programl[node.text] += 1

        ToCsv(
            self.path / "vocab" / "inst2vec_test_coverage.csv",
            inst2vec,
            node_count,
        )
        ToCsv(
            self.path / "vocab" / "cdfg_test_coverage.csv",
            cdfg,
            node_count,
        )
        ToCsv(
            self.path / "vocab" / "programl_test_coverage.csv",
            programl,
            node_count,
        )
コード例 #11
0
def test_Encode_llvm_program_graph(
    llvm_program_graph: program_graph_pb2.ProgramGraph,
    encoder: inst2vec_encoder.Inst2vecEncoder,
):
    """Black-box test encoding LLVM program graphs."""
    proto = program_graph_pb2.ProgramGraph()
    proto.CopyFrom(llvm_program_graph)
    encoder.Encode(proto)

    # This assumes that all of the test graphs have at least one instruction.
    num_instructions = sum(1 if node.type == node_pb2.Node.INSTRUCTION else 0
                           for node in proto.node)
    assert num_instructions >= 1

    # Check for the presence of expected node attributes.
    for node in proto.node:
        assert "inst2vec_embedding" in node.features.feature
        if node.type == node_pb2.Node.INSTRUCTION:
            assert "inst2vec_preprocessed" in node.features.feature
コード例 #12
0
def main(argv):
    init_app(argv)
    encoder = inst2vec_encoder.Inst2vecEncoder()

    if FLAGS.dataset:
        encoder.RunOnDataset(Path(FLAGS.dataset))
        return

    if FLAGS.directory:
        encoder.RunOnDirectory(Path(FLAGS.directory))
        return

    proto = ParseStdinOrDie(program_graph_pb2.ProgramGraph())
    if FLAGS.ir:
        with open(FLAGS.ir) as f:
            ir = f.read()
    else:
        ir = None
    encoder.Encode(proto, ir)
    WriteStdout(proto)
コード例 #13
0
def _ProcessRows(job) -> Tuple[int, int, float]:
    start_time = time.time()
    encoded_count = 0

    encoder: Inst2vecEncoder = job[0]
    paths: List[Tuple[pathlib.Path, pathlib.Path]] = job[1]
    for graph_path, ir_path in paths:
        graph = pbutil.FromFile(graph_path, program_graph_pb2.ProgramGraph())
        # Check to see if we have already processed this file.
        if len(graph.features.feature["inst2vec_annotated"].int64_list.value):
            continue

        encoded_count += 1
        try:
            Encode(encoder, graph, graph_path, ir_path)
        except AssertionError:
            # NCC codebase uses assertions to check for errors.
            pass
        except TimeoutError:
            pass
    return len(paths), encoded_count, time.time() - start_time
コード例 #14
0
ファイル: xla.py プロジェクト: wanyao1992/ProGraML
def BuildProgramGraphProto(
    hlo_proto: xla_pb2.HloProto, ) -> program_graph_pb2.ProgramGraph:
    """Construct a program graph for the given LLVM IR.

    Args:
      hlo_proto: The LLVM IR string for a module.

    Returns:
      A ProgramGraph message instance.

    Raises:
      ValueError: If graph construction fails.
    """
    # This requires a round trip serialized to / from strings, since I can't
    # figure out a way to get pybind11 to auto-generate bindings for protocol
    # buffers.
    graph = program_graph_pb2.ProgramGraph()
    serialized_graph = xla_pybind.BuildProgramGraphProto(
        hlo_proto.SerializeToString())
    graph.ParseFromString(serialized_graph)
    return graph
コード例 #15
0
def test_GraphTuple_one_graph():
  graph = program_graph_pb2.ProgramGraph(
    node=[node_pb2.Node(),],
    edge=[
      edge_pb2.Edge(source=0, target=1,),
      edge_pb2.Edge(source=0, target=2, position=1,),
      edge_pb2.Edge(source=1, target=0, position=10, flow=edge_pb2.Edge.CALL,),
    ],
  )

  builder = GraphTupleBuilder()
  builder.AddProgramGraph(graph)
  gt = builder.Build()

  assert np.array_equal(gt.adjacencies[edge_pb2.Edge.CONTROL], [(0, 1), (0, 2)])
  assert np.array_equal(
    gt.adjacencies[edge_pb2.Edge.DATA], np.zeros((0, 2), dtype=np.int32)
  )
  assert np.array_equal(gt.adjacencies[edge_pb2.Edge.CALL], [(1, 0)])

  assert np.array_equal(gt.edge_positions[edge_pb2.Edge.CONTROL], [0, 1])
  assert np.array_equal(gt.edge_positions[edge_pb2.Edge.DATA], [])
  assert np.array_equal(gt.edge_positions[edge_pb2.Edge.CALL], [10])
コード例 #16
0
def EnumerateLlvmProgramGraphs(
) -> Iterable[Tuple[str, program_graph_pb2.ProgramGraph]]:
    """Enumerate a test set of LLVM IR file paths."""
    for path in LLVM_IR_GRAPHS.iterdir():
        yield path.name, pbutil.FromFile(path,
                                         program_graph_pb2.ProgramGraph())
コード例 #17
0
  def _Worker(self):
    """Threaded graph reader."""
    graph_files = list(self.graph_path.iterdir())
    app.Log(
      2, "Enumerated %s graph files to load", humanize.Commas(len(graph_files))
    )

    graph_count = 0
    while not self.min_graph_count or graph_count < self.min_graph_count:
      # Strip any graph files that we have earmarked for ignoring.
      graph_files = [
        f for f in graph_files if f not in self._excluded_graph_files
      ]

      # We may have run out of files.
      if not graph_files:
        self._Done(graph_count)
        return

      if self.seed:
        # If we are setting a reproducible seed, first sort the list of files
        # since iterdir() order is undefined, then seed the RNG for the
        # shuffle.
        graph_files = sorted(graph_files, key=lambda x: x.name)
        # Change the seed so that on the next execution of this loop we will
        # chose a different random ordering.
        self.seed += 1
      random.Random(self.seed).shuffle(graph_files)

      for graph_path in graph_files:
        if self._stopped:
          break

        stem = graph_path.name[: -len("ProgramGraph.pb")]
        name = f"{stem}ProgramGraphFeaturesList.pb"
        features_path = self.labels_path / name
        # There is no guarantee that we have generated features for this
        # program graph, so we check for its existence. As a *very* defensive
        # measure, we also check for the existence of the graph file that we
        # enumerated at the start of this function. This check can be removed
        # later, it is only useful during development when you might be
        # modifying the dataset at the same time as having test jobs running.
        if not graph_path.is_file() or not features_path.is_file():
          self.skip_count += 1
          continue

        # Read the graph from disk, maybe performing a cheeky wee conversion
        # to CDFG format.
        app.Log(3, "Read %s", features_path)
        if self.use_cdfg:
          graph = cdfg.FromProgramGraphFile(graph_path)
        else:
          graph = pbutil.FromFile(graph_path, program_graph_pb2.ProgramGraph())

        if not graph:
          app.Log(2, "Failed to load graph %s", graph_path)
          self._excluded_graph_files.add(graph_path)
          continue

        # Skip empty graphs.
        if not len(graph.node) or len(graph.node) > FLAGS.max_graph_node_count:
          app.Log(
            2,
            "Graph node count %s is not in range (1,%s]",
            len(graph.node),
            FLAGS.max_graph_node_count,
          )
          self._excluded_graph_files.add(graph_path)
          continue

        # Skip a graph without inst2vec
        if self.require_inst2vec and not len(
          graph.features.feature["inst2vec_annotated"].int64_list.value
        ):
          app.Log(2, "Skipping graph without inst2vec annotations")
          continue

        features_list = pbutil.FromFile(
          features_path, program_graph_features_pb2.ProgramGraphFeaturesList()
        )

        # Iterate over the features list to yield <graph, features> pairs.
        skipped_all_features = True
        for j, features in enumerate(features_list.graph):
          step_count_feature = features.features.feature[
            "data_flow_step_count"
          ].int64_list.value
          step_count = step_count_feature[0] if len(step_count_feature) else 0
          if self.data_flow_step_max and step_count > self.data_flow_step_max:
            self.skip_count += 1
            app.Log(
              3,
              "Skipped graph with data_flow_step_count %d > %d "
              "(skipped %d / %d, %.2f%%)",
              step_count,
              self.data_flow_step_max,
              self.skip_count,
              (graph_count + self.skip_count),
              (self.skip_count / (graph_count + self.skip_count)) * 100,
            )
            continue
          graph_count += 1
          if self.logfile:
            self.logfile.write(f"{features_path} {j}\n")
          self._outq.put((graph, features), block=True)
          skipped_all_features = False
          if self.max_graph_count and graph_count >= self.max_graph_count:
            app.Log(2, "Stopping after reading %d graphs", graph_count)
            self._Done(graph_count)
            return

        if skipped_all_features:
          self._excluded_graph_files.add(graph_path)

    self._Done(graph_count)
コード例 #18
0
 def Build(self) -> program_graph_pb2.ProgramGraph:
   proto = program_graph_pb2.ProgramGraph()
   proto.ParseFromString(self._Build())
   return proto
コード例 #19
0
ファイル: graph2nx.py プロジェクト: WilliamWangPeng/ProGraML
def Main():
    proto = ParseStdinOrDie(program_graph_pb2.ProgramGraph())
    pickle.dump(nx_format.ProgramGraphToNetworkX(proto), sys.stdout.buffer)
コード例 #20
0
def main(argv):
    init_app(argv)
    proto = ParseStdinOrDie(program_graph_pb2.ProgramGraph())
    pickle.dump(nx_format.ProgramGraphToNetworkX(proto), sys.stdout.buffer)
コード例 #21
0
def test_SerializeInstructionsInProgramGraph_empty_graph():
    proto = program_graph_pb2.ProgramGraph()
    n = graph_serializer.SerializeInstructionsInProgramGraph(proto,
                                                             max_nodes=1000)
    assert n == []