Example #1
0
  def __init__(self, *args, **kwargs):
    """Constructor."""
    super(Ggnn, self).__init__(*args, **kwargs)

    # set some global config values

    # Instantiate model
    config = GGNNConfig(
      num_classes=self.y_dimensionality,
      has_graph_labels=self.graph_db.graph_y_dimensionality > 0,
      has_aux_input=self.graph_db.graph_x_dimensionality > 0,
    )

    inst2vec_embeddings = node_encoder.GraphNodeEncoder().embeddings_tables[0]
    inst2vec_embeddings = torch.from_numpy(
      np.array(inst2vec_embeddings, dtype=np.float32)
    )
    self.model = GGNNModel(
      config,
      pretrained_embeddings=inst2vec_embeddings,
      test_only=FLAGS.test_only,
    )
    app.Log(
      1,
      "Using device %s with dtype %s",
      self.model.dev,
      torch.get_default_dtype(),
    )

    if DEBUG:
      for submodule in self.model.modules():
        submodule.register_forward_hook(nan_hook)
Example #2
0
def Main():
    irs = [fs.Read(path) for path in LLVM_IR.iterdir()]
    ir_count = len(irs)

    with prof.ProfileToStdout(lambda t: (
            f"STAGE 1: Construct unlabelled graphs (llvm2graph)         "
            f"({humanize.Duration(t / ir_count)} / IR)")):
        graphs = [llvm2graph.BuildProgramGraphNetworkX(ir) for ir in irs]

    encoder = node_encoder.GraphNodeEncoder()
    with prof.ProfileToStdout(lambda t: (
            f"STAGE 2: Encode graphs (inst2vec)                         "
            f"({humanize.Duration(t / ir_count)} / IR)")):
        for graph, ir in zip(graphs, irs):
            encoder.EncodeNodes(graph, ir)

    features_count = 0
    features_lists = []
    with prof.ProfileToStdout(lambda t: (
            f"STAGE 3: Produce labelled graphs (reachability analysis)  "
            f"({humanize.Duration(t / features_count)} / graph)")):
        for graph in graphs:
            analysis = reachability.ReachabilityAnnotator(
                programl.NetworkXToProgramGraph(graph))
            features_list = analysis.MakeAnnotated(n=10).graphs
            features_count += len(features_list)
            features_lists.append(features_list)

    def iter():
        for features_list in features_lists:
            for graph in features_list:
                yield graph_tuple.GraphTuple.CreateFromNetworkX(graph)

    with prof.ProfileToStdout(lambda t: (
            f"STAGE 4: Construct graph tuples                           "
            f"({humanize.Duration(t / features_count)} / graph)")):
        batcher = graph_batcher.GraphBatcher(iter(), max_node_count=10000)
        graph_tuples = list(batcher)

    print("=================================")
    print(f"Unlabelled graphs count: {ir_count}")
    print(f"  Labelled graphs count: {features_count}")
    print(f"     Graph tuples count: {len(graph_tuples)}")
    print(
        f"       Total node count: {sum(gt.node_count for gt in graph_tuples)}"
    )
    print(
        f"       Total edge count: {sum(gt.edge_count for gt in graph_tuples)}"
    )
Example #3
0
def test_EncodeNodes_llvm_program_graph(llvm_program_graph_nx: nx.MultiDiGraph):
  """Black-box test encoding LLVM program graphs."""
  encoder = node_encoder.GraphNodeEncoder()
  g = llvm_program_graph_nx.copy()
  encoder.EncodeNodes(g)

  # This assumes that all of the test graphs have at least one statement.
  num_statements = sum(
    1 if data["type"] == programl_pb2.Node.STATEMENT else 0
    for _, data in g.nodes(data=True)
  )
  assert num_statements >= 1

  # Check for the presence of expected node attributes.
  for _, data in g.nodes(data=True):
    assert len(data["x"]) == 1
    assert len(data["y"]) == 0
    assert "preprocessed_text" in data
Example #4
0
    def __init__(
        self,
        dataflow: bool = True,
        preprocess_text: bool = True,
        opt=None,
    ):
        """Instantiate a Control and Data Flow Graph (CDFG) builder.

    Args:
      dataflow: Determine the type of data flow information that is added to
        control flow graphs. If False, only control flow is used. If True,
        nodes representing identifiers and immediates are inserted, and data
        flow edges flowing between the identifier nodes and statements.
      preprocess_text: If true, pre-process the text of statements to discard
        literals, normalise identifiers, etc.
      opt: The path to LLVM `opt` binary to use to construct control-flow and
        call graphs from. The default uses the opt binary packaged with
        //compilers/llvm:opt.
    """
        self.dataflow = dataflow
        self.preprocess_text = preprocess_text
        self.node_encoder = node_encoder.GraphNodeEncoder()
        self.opt = opt
Example #5
0
def encoder() -> node_encoder.GraphNodeEncoder:
  """A session-level fixture to re-use a graph encoder instance."""
  return node_encoder.GraphNodeEncoder()