Esempio n. 1
0
 def proto(
   self, proto: programl_pb2.ProgramGraph = None
 ) -> programl_pb2.ProgramGraph:
   """Deserialize and load the protocol buffer."""
   proto = proto or programl_pb2.ProgramGraph()
   proto.ParseFromString(self.data.serialized_proto)
   return proto
Esempio n. 2
0
def test_pb_conversion(
  llvm_program_graph: programl_pb2.ProgramGraph, stdout_fmt: str
):
  """Test format conversion from text protocol buffer."""
  assert subprocess.check_output(
    [str(BINARY), "--stdin_fmt=pb", f"--stdout_fmt={stdout_fmt}"],
    input=llvm_program_graph.SerializeToString(),
  )
Esempio n. 3
0
def ToBytes(program_graph: programl_pb2.ProgramGraph,
            fmt: InputOutputFormat) -> bytes:
    """Convert a program graph to a byte array.

  Args:
    program_graph: A program graph.
    fmt: The desired binary format.

  Returns:
    A byte array.
  """
    if fmt == InputOutputFormat.PB:
        return program_graph.SerializeToString()
    elif fmt == InputOutputFormat.PBTXT:
        return str(program_graph).encode("utf-8")
    else:
        raise ValueError(f"Unknown program graph format: {fmt}")
Esempio n. 4
0
def ToBytes(program_graph: programl_pb2.ProgramGraph,
            fmt: StdoutGraphFormat) -> bytes:
    """Convert a program graph to a byte array.

  Args:
    program_graph: A program graph.
    fmt: The desired binary format.

  Returns:
    A byte array.
  """
    if fmt == StdoutGraphFormat.PB:
        return program_graph.SerializeToString()
    elif fmt == StdoutGraphFormat.PBTXT:
        return str(program_graph).encode("utf-8")
    elif fmt == StdoutGraphFormat.NX:
        return pickle.dumps(ProgramGraphToNetworkX(program_graph))
    elif fmt == StdoutGraphFormat.DOT:
        return ProgramGraphToGraphviz(program_graph).encode("utf-8")
    else:
        raise ValueError(f"Unknown program graph format: {fmt}")
Esempio n. 5
0
  def Create(
    cls,
    proto: programl_pb2.ProgramGraph,
    ir_id: int,
    split: Optional[int] = None,
  ) -> "ProgramGraph":
    """Create a ProgramGraph from the given protocol buffer.

    This is the preferred method of populating databases of program graphs, as
    it contains the boilerplate to extract and set the metadata columns, and
    handles the join between the two proto/metadata invisibly.

    Args:
      proto: The protocol buffer to instantiate a program graph from.
      ir_id: The ID of the intermediate representation for this program graph.
      split: The split of the proto buf.

    Returns:
      A ProgramGraph instance.
    """
    # Gather the edge attributes in a single pass of the proto.
    edge_attributes = [(edge.flow, edge.position) for edge in proto.edge]
    edge_flows = set([x[0] for x in edge_attributes])
    edge_position_max = max([x[1] for x in edge_attributes])
    del edge_attributes

    # Gather the node attributes in a single pass.
    node_types = set()
    node_texts = set()
    node_preprocessed_texts = set()
    node_x_dimensionalities = set()
    node_y_dimensionalities = set()

    for node in proto.node:
      node_types.add(node.type)
      node_texts.add(node.text)
      node_preprocessed_texts.add(node.preprocessed_text)
      node_x_dimensionalities.add(len(node.x))
      node_y_dimensionalities.add(len(node.y))

    if len(node_x_dimensionalities) != 1:
      raise ValueError(
        "Graph contains multiple node-level x dimensionalities: "
        f"{node_x_dimensionalities}"
      )
    if len(node_y_dimensionalities) != 1:
      raise ValueError(
        "Graph contains multiple node-level y dimensionalities: "
        f"{node_y_dimensionalities}"
      )

    serialized_proto = proto.SerializeToString()

    return ProgramGraph(
      ir_id=ir_id,
      split=split,
      node_count=len(proto.node),
      edge_count=len(proto.edge),
      node_type_count=len(node_types),
      edge_flow_count=len(edge_flows),
      node_unique_text_count=len(node_texts),
      node_unique_preprocessed_text_count=len(node_preprocessed_texts),
      graph_x_dimensionality=len(proto.x),
      graph_y_dimensionality=len(proto.y),
      node_x_dimensionality=list(node_x_dimensionalities)[0],
      node_y_dimensionality=list(node_y_dimensionalities)[0],
      edge_position_max=edge_position_max,
      serialized_proto_size=len(serialized_proto),
      data=ProgramGraphData(
        sha1=crypto.sha1(serialized_proto), serialized_proto=serialized_proto,
      ),
    )