def test_FromGraphTuples_two_tuples( node_x_dimensionality: int, node_y_dimensionality: int, graph_x_dimensionality: int, graph_y_dimensionality: int, ): """Test disjoint graph creation from a pair of input graphs.""" tuples_in = [ random_graph_tuple_generator.CreateRandomGraphTuple( node_x_dimensionality=node_x_dimensionality, node_y_dimensionality=node_y_dimensionality, graph_x_dimensionality=graph_x_dimensionality, graph_y_dimensionality=graph_y_dimensionality, ), random_graph_tuple_generator.CreateRandomGraphTuple( node_x_dimensionality=node_x_dimensionality, node_y_dimensionality=node_y_dimensionality, graph_x_dimensionality=graph_x_dimensionality, graph_y_dimensionality=graph_y_dimensionality, ), ] # Create a disjoint GraphTuple. disjoint_tuple = graph_tuple.GraphTuple.FromGraphTuples(tuples_in) try: assert disjoint_tuple.disjoint_graph_count == 2 assert disjoint_tuple.node_count == sum(t.node_count for t in tuples_in) assert disjoint_tuple.edge_count == sum(t.edge_count for t in tuples_in) assert disjoint_tuple.edge_position_max == max(t.edge_position_max for t in tuples_in) # Only a single graph means an array of all zeros. assert np.array_equal( disjoint_tuple.disjoint_nodes_list, np.concatenate([ np.zeros(tuples_in[0].node_count, dtype=np.int32), np.ones(tuples_in[1].node_count, dtype=np.int32), ]), ) # Dimensionalities. assert (disjoint_tuple.node_x_dimensionality == tuples_in[0].node_x_dimensionality) assert disjoint_tuple.node_y_dimensionality == node_y_dimensionality assert disjoint_tuple.graph_x_dimensionality == graph_x_dimensionality assert disjoint_tuple.graph_y_dimensionality == graph_y_dimensionality except AssertionError: fs.Write("/tmp/graph_tuples_in.pickle", pickle.dumps(tuples_in)) fs.Write("/tmp/graph_tuple_out.pickle", pickle.dumps(disjoint_tuple)) app.Error( "Assertion failed! Wrote graphs to /tmp/graph_tuples_in.pickle " "and /tmp/graph_tuple_out.pickle") raise
def test_SetFeaturesAndLabels(dimensionalities: Tuple[int, int], copy: bool): """Test new label setting.""" node_y_dimensionality, graph_y_dimensionality = dimensionalities in_tuple = random_graph_tuple_generator.CreateRandomGraphTuple( node_y_dimensionality=node_y_dimensionality, graph_y_dimensionality=graph_y_dimensionality, ) old_node_y = np.copy(in_tuple.node_y) old_graph_y = np.copy(in_tuple.graph_y) new_node_y = np.random.rand(in_tuple.node_count, in_tuple.node_y_dimensionality) new_graph_y = np.random.rand(in_tuple.graph_y_dimensionality) out_tuple = in_tuple.SetFeaturesAndLabels( node_y=np.copy(new_node_y), graph_y=np.copy(new_graph_y), copy=copy, ) # Test that input tuple is not modified. assert np.array_equal(in_tuple.node_y, old_node_y) assert np.array_equal(in_tuple.graph_y, old_graph_y) # Test that output tuple has correct labels. assert np.array_equal(out_tuple.node_y, new_node_y) assert np.array_equal(out_tuple.graph_y, new_graph_y)
def CreateRandomGraphTuple( node_x_dimensionality: int = 1, node_y_dimensionality: int = 0, graph_x_dimensionality: int = 0, graph_y_dimensionality: int = 0, with_data_flow: bool = False, split_count: int = 0, ) -> graph_tuple_database.GraphTuple: """Create a random graph tuple.""" mapped = graph_tuple_database.GraphTuple.CreateFromGraphTuple( graph_tuple=random_graph_tuple_generator.CreateRandomGraphTuple( node_x_dimensionality=node_x_dimensionality, node_y_dimensionality=node_y_dimensionality, graph_x_dimensionality=graph_x_dimensionality, graph_y_dimensionality=graph_y_dimensionality, ), ir_id=random.randint(0, int(4e6)), split=random.randint(0, split_count - 1) if split_count else None, ) if with_data_flow: mapped.data_flow_steps = random.randint(1, 50) mapped.data_flow_root_node = random.randint(0, mapped.node_count - 1) mapped.data_flow_positive_node_count = random.randint( 1, mapped.node_count - 1) return mapped
def test_CreateFromGraphTuple_graph_y_dimensionality( db_session: graph_tuple_database.Database.SessionType, ): """Check graph label dimensionality.""" graph_tuple = random_graph_tuple_generator.CreateRandomGraphTuple( graph_y_dimensionality=0) a = graph_tuple_database.GraphTuple.CreateFromGraphTuple(graph_tuple, ir_id=1) assert a.graph_y_dimensionality == 0 graph_tuple = random_graph_tuple_generator.CreateRandomGraphTuple( graph_y_dimensionality=2) b = graph_tuple_database.GraphTuple.CreateFromGraphTuple(graph_tuple, ir_id=1) assert b.graph_y_dimensionality == 2 db_session.add_all([a, b]) db_session.commit()
def test_fuzz_disjoint_graph_tuples(): """Fuzz graph tuples with randomly generated graphs.""" disjoint_graph_count = random.randint(2, 10) node_x_dimensionality = random.randint(1, 3) node_y_dimensionality = random.randint(0, 3) graph_x_dimensionality = random.randint(0, 3) graph_y_dimensionality = random.randint(0, 3) graph_tuples_in = [ random_graph_tuple_generator.CreateRandomGraphTuple( node_x_dimensionality=node_x_dimensionality, node_y_dimensionality=node_y_dimensionality, graph_x_dimensionality=graph_x_dimensionality, graph_y_dimensionality=graph_y_dimensionality, ) for _ in range(disjoint_graph_count) ] t = graph_tuple.GraphTuple.FromGraphTuples(graph_tuples_in) try: assert t.disjoint_graph_count == disjoint_graph_count assert t.node_count == sum([d.node_count for d in graph_tuples_in]) assert t.edge_count == sum([d.edge_count for d in graph_tuples_in]) assert t.edge_position_max == max(d.edge_position_max for d in graph_tuples_in) assert t.node_x_dimensionality == node_x_dimensionality assert t.node_y_dimensionality == node_y_dimensionality assert t.graph_x_dimensionality == graph_x_dimensionality assert t.graph_y_dimensionality == graph_y_dimensionality except AssertionError: fs.Write("/tmp/graph_tuples_in.pickle", pickle.dumps(graph_tuples_in)) fs.Write("/tmp/graph_tuple_out.pickle", pickle.dumps(t)) raise graph_tuples_out = list(t.ToGraphTuples()) try: assert len(graph_tuples_in) == len(graph_tuples_out) for tuple_in, tuple_out in zip(graph_tuples_in, graph_tuples_out): assert tuple_out.node_x_dimensionality == node_x_dimensionality assert tuple_out.node_y_dimensionality == node_y_dimensionality assert tuple_out.graph_x_dimensionality == graph_x_dimensionality assert tuple_out.graph_y_dimensionality == graph_y_dimensionality for i, (a, b) in enumerate( zip(tuple_in.adjacencies, tuple_out.adjacencies)): assert a.shape == b.shape assert np.array_equal(a, b) for a, b in zip(tuple_in.edge_positions, tuple_out.edge_positions): assert a.shape == b.shape assert np.array_equal(a, b) assert np.array_equal(tuple_in.node_x, tuple_out.node_x) assert np.array_equal(tuple_in.node_y, tuple_out.node_y) assert np.array_equal(tuple_in.graph_x, tuple_out.graph_x) assert np.array_equal(tuple_in.graph_y, tuple_out.graph_y) except AssertionError: fs.Write("/tmp/graph_tuples_in.pickle", pickle.dumps(graph_tuples_in)) fs.Write("/tmp/graph_tuples_out.pickle", pickle.dumps(graph_tuples_out)) raise
def test_CreateFromGraphTuple_node_x_dimensionality( db_session: graph_tuple_database.Database.SessionType, ): """Test node feature dimensionality.""" graph_tuple = random_graph_tuple_generator.CreateRandomGraphTuple() a = graph_tuple_database.GraphTuple.CreateFromGraphTuple(graph_tuple, ir_id=1) assert a.node_x_dimensionality == 1 db_session.add(a) db_session.commit()
def test_GraphBatcher_divisible_node_count(): """Test the number of batches returned with evenly divisible node counts.""" batcher = graph_batcher.GraphBatcher( MockIterator([ random_graph_tuple_generator.CreateRandomGraphTuple(node_count=5), random_graph_tuple_generator.CreateRandomGraphTuple(node_count=5), random_graph_tuple_generator.CreateRandomGraphTuple(node_count=5), random_graph_tuple_generator.CreateRandomGraphTuple(node_count=5), ]), max_node_count=10, ) batches = list(batcher) assert len(batches) == 2 assert batches[0].is_disjoint_graph assert batches[0].disjoint_graph_count == 2 assert batches[1].is_disjoint_graph assert batches[1].disjoint_graph_count == 2
def test_GraphBatcher_collect_all_inputs(graph_count: int): batcher = graph_batcher.GraphBatcher( MockIterator([ random_graph_tuple_generator.CreateRandomGraphTuple() for _ in range(graph_count) ])) batches = list(batcher) assert len(batches) == 1 assert batches[0].is_disjoint_graph assert batches[0].disjoint_graph_count == graph_count
def test_GraphBatcher_max_node_count_limit_handler_skip(): """Test that graph is included when larger than max node count.""" big_graph = random_graph_tuple_generator.CreateRandomGraphTuple( node_count=10) batcher = graph_batcher.GraphBatcher( MockIterator([big_graph]), max_node_count=5, max_node_count_limit_handler="include", ) assert next(batcher)
def test_GraphBatcher_max_node_count_limit_handler_error(): """Test that error is raised when graph is larger than max node count.""" big_graph = random_graph_tuple_generator.CreateRandomGraphTuple( node_count=10) batcher = graph_batcher.GraphBatcher( MockIterator([big_graph]), max_node_count=5, max_node_count_limit_handler="error", ) with test.Raises(ValueError): next(batcher)
def test_fuzz_GraphBatcher(graph_count: int, max_graph_count: int, max_node_count: int): """Fuzz the graph batcher with a range of parameter choices and input sizes. """ graphs = MockIterator([ random_graph_tuple_generator.CreateRandomGraphTuple() for _ in range(graph_count) ]) batcher = graph_batcher.GraphBatcher(graphs, max_node_count=max_node_count, max_graph_count=max_graph_count) batches = list(batcher) assert sum(b.disjoint_graph_count for b in batches) == graph_count
def test_CreateFromGraphTuple_attributes(): """Test that attributes are copied over.""" ir_id = random.randint(0, int(1e4)) graph_tuple = random_graph_tuple_generator.CreateRandomGraphTuple() a = graph_tuple_database.GraphTuple.CreateFromGraphTuple(graph_tuple, ir_id=ir_id) assert a.ir_id == ir_id assert a.node_count == graph_tuple.node_count assert a.edge_count == graph_tuple.edge_count assert a.control_edge_count == graph_tuple.control_edge_count assert a.data_edge_count == graph_tuple.data_edge_count assert a.call_edge_count == graph_tuple.call_edge_count assert a.edge_position_max == graph_tuple.edge_position_max
def test_GraphBatcher_exact_graph_count(): """Test the number of batches when exact graph counts are required.""" batcher = graph_batcher.GraphBatcher( MockIterator([ random_graph_tuple_generator.CreateRandomGraphTuple() for _ in range(7) ]), exact_graph_count=3, ) batches = list(batcher) assert len(batches) == 2 assert batches[0].disjoint_graph_count == 3 assert batches[1].disjoint_graph_count == 3
def test_GraphBatcher_max_node_count_limit_handler_skip(): """Test that graph is skipped when larger than max node count.""" big_graph = random_graph_tuple_generator.CreateRandomGraphTuple( node_count=10) batcher = graph_batcher.GraphBatcher( MockIterator([big_graph]), max_node_count=5, max_node_count_limit_handler="skip", ) try: next(batcher) except StopIteration: pass
def two_graph_db_session(request) -> graph_tuple_database.Database.SessionType: """A test fixture which yields a database with two graph tuples.""" with testing_databases.DatabaseContext(graph_tuple_database.Database, request.param) as db: a = graph_tuple_database.GraphTuple.CreateFromGraphTuple( graph_tuple=random_graph_tuple_generator.CreateRandomGraphTuple(), ir_id=1) b = graph_tuple_database.GraphTuple.CreateFromGraphTuple( graph_tuple=random_graph_tuple_generator.CreateRandomGraphTuple(), ir_id=2) with db.Session() as session: session.add_all([a, b]) session.commit() # Sanity check that the graphs have been added to the database. assert (session.query( sql.func.count( graph_tuple_database.GraphTuple.ir_id)).scalar() == 2) assert (session.query( sql.func.count( graph_tuple_database.GraphTupleData.id)).scalar() == 2) yield session
def test_FromGraphTuples_single_tuple( node_x_dimensionality: int, node_y_dimensionality: int, graph_x_dimensionality: int, graph_y_dimensionality: int, ) -> graph_tuple.GraphTuple: """Test disjoint graph creation from a single graph.""" t = random_graph_tuple_generator.CreateRandomGraphTuple( node_x_dimensionality=node_x_dimensionality, node_y_dimensionality=node_y_dimensionality, graph_x_dimensionality=graph_x_dimensionality, graph_y_dimensionality=graph_y_dimensionality, ) # Create a disjoint GraphTuple. d = graph_tuple.GraphTuple.FromGraphTuples([t]) try: assert d.disjoint_graph_count == 1 assert d.node_count == t.node_count assert d.edge_count == t.edge_count assert d.edge_position_max == t.edge_position_max # Only a single graph means an array of all zeros. assert np.array_equal(d.disjoint_nodes_list, np.zeros(d.node_count, dtype=np.int32)) # Dimensionalities. assert d.node_x_dimensionality == t.node_x_dimensionality assert d.node_y_dimensionality == t.node_y_dimensionality assert d.graph_x_dimensionality == t.graph_x_dimensionality assert d.graph_y_dimensionality == t.graph_y_dimensionality # Feature and label vectors. assert np.array_equal(d.node_x, t.node_x) assert np.array_equal(d.node_y, t.node_y) if t.has_graph_x: assert np.array_equal(d.graph_x[0], t.graph_x) else: assert d.graph_x == t.graph_x if t.has_graph_y: assert np.array_equal(d.graph_y[0], t.graph_y) else: assert d.graph_y == t.graph_y except AssertionError: fs.Write("/tmp/graph_tuple_in.pickle", pickle.dumps(t)) fs.Write("/tmp/graph_tuple_out.pickle", pickle.dumps(d)) raise
def test_CreateRandomGraphTuple( disjoint_graph_count: int, node_x_dimensionality: int, node_y_dimensionality: int, graph_x_dimensionality: int, graph_y_dimensionality: int, ): """Black-box test of generator properties.""" graph_tuple = random_graph_tuple_generator.CreateRandomGraphTuple( disjoint_graph_count=disjoint_graph_count, node_x_dimensionality=node_x_dimensionality, node_y_dimensionality=node_y_dimensionality, graph_x_dimensionality=graph_x_dimensionality, graph_y_dimensionality=graph_y_dimensionality, ) assert graph_tuple.disjoint_graph_count == disjoint_graph_count assert graph_tuple.node_x_dimensionality == node_x_dimensionality assert graph_tuple.node_y_dimensionality == node_y_dimensionality assert graph_tuple.graph_x_dimensionality == graph_x_dimensionality assert graph_tuple.graph_y_dimensionality == graph_y_dimensionality
def test_fuzz_GraphTuple_CreateFromGraphTuple( db_session: graph_tuple_database.Database.SessionType, ): """Fuzz the networkx -> proto conversion using randomly generated graphs.""" graph_tuple = random_graph_tuple_generator.CreateRandomGraphTuple() t = graph_tuple_database.GraphTuple.CreateFromGraphTuple( graph_tuple=graph_tuple, ir_id=random.randint(0, int(4e6))) # Test the derived properties of the generated graph tuple. assert t.edge_count == (t.control_edge_count + t.data_edge_count + t.call_edge_count) assert len(t.sha1) == 40 assert t.node_count == graph_tuple.node_count assert t.edge_count == graph_tuple.edge_count assert t.tuple.node_count == graph_tuple.node_count assert t.tuple.edge_count == graph_tuple.edge_count assert len(t.tuple.adjacencies) == 3 assert len(t.tuple.edge_positions) == 3 # Add it to the database to catch SQL integrity errors. db_session.add(t) db_session.commit()
def test_CreateRandomGraphTuple_node_count(node_count: int): """Test generating protos with specific node counts.""" graph_tuple = random_graph_tuple_generator.CreateRandomGraphTuple( node_count=node_count ) assert graph_tuple.node_count == node_count