def generate_buckets(vertices, bits, max_edges, num_ops, verify_isomorphism): buckets = {} matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits), (vertices, vertices), dtype=np.int8) if (not graph_util.is_full_dag(matrix) or graph_util.num_edges(matrix) > max_edges): return # Iterate through all possible labelings for labeling in itertools.product( *[range(num_ops) for _ in range(vertices - 2)]): labeling = [-1] + list(labeling) + [-2] fingerprint = graph_util.hash_module(matrix, labeling) if fingerprint not in buckets: buckets[fingerprint] = (matrix.tolist(), labeling) # This catches the "false positive" case of two models which are not # isomorphic hashing to the same bucket. elif verify_isomorphism: canonical_graph = buckets[fingerprint] if not graph_util.is_isomorphic( (matrix.tolist(), labeling), canonical_graph): logging.fatal( 'Matrix:\n%s\nLabel: %s\nis not isomorphic to' ' canonical matrix:\n%s\nLabel: %s', str(matrix), str(labeling), str(canonical_graph[0]), str(canonical_graph[1])) sys.exit() return buckets
def generate_graph(max_vertices, max_edges, num_ops, verify_isomorphism, output_file): FLAGS = Namespace(max_vertices=max_vertices, num_ops=num_ops, max_edges=max_edges, verify_isomorphism=verify_isomorphism, output_file=output_file) total_graphs = 0 # Total number of graphs (including isomorphisms) # hash --> (matrix, label) for the canonical graph associated with each hash buckets = {} logging.info('Using %d vertices, %d op labels, max %d edges', FLAGS.max_vertices, FLAGS.num_ops, FLAGS.max_edges) for vertices in range(2, FLAGS.max_vertices + 1): for bits in range(2**(vertices * (vertices - 1) // 2)): # Construct adj matrix from bit string matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits), (vertices, vertices), dtype=np.int8) # Discard any graphs which can be pruned or exceed constraints if (not graph_util.is_full_dag(matrix) or graph_util.num_edges(matrix) > FLAGS.max_edges): continue # Iterate through all possible labelings for labeling in itertools.product( *[range(FLAGS.num_ops) for _ in range(vertices - 2)]): total_graphs += 1 labeling = [-1] + list(labeling) + [-2] fingerprint = graph_util.hash_module(matrix, labeling) if fingerprint not in buckets: buckets[fingerprint] = (matrix.tolist(), labeling) # This catches the "false positive" case of two models which are not # isomorphic hashing to the same bucket. elif FLAGS.verify_isomorphism: canonical_graph = buckets[fingerprint] if not graph_util.is_isomorphic( (matrix.tolist(), labeling), canonical_graph): logging.fatal( 'Matrix:\n%s\nLabel: %s\nis not isomorphic to' ' canonical matrix:\n%s\nLabel: %s', str(matrix), str(labeling), str(canonical_graph[0]), str(canonical_graph[1])) sys.exit() logging.info('Up to %d vertices: %d graphs (%d without hashing)', vertices, len(buckets), total_graphs) with open(FLAGS.output_file, 'w') as f: json.dump(buckets, f, sort_keys=True)
def test_is_full_dag(self): """Tests is_full_dag classifies DAGs.""" self.assertTrue(graph_util.is_full_dag(np.array( [[0, 1, 0], [0, 0, 1], [0, 0, 0]]))) self.assertTrue(graph_util.is_full_dag(np.array( [[0, 1, 1], [0, 0, 1], [0, 0, 0]]))) self.assertTrue(graph_util.is_full_dag(np.array( [[0, 1, 1, 0], [0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 0]]))) # vertex 1 not connected to input self.assertFalse(graph_util.is_full_dag(np.array( [[0, 0, 1], [0, 0, 1], [0, 0, 0]]))) # vertex 1 not connected to output self.assertFalse(graph_util.is_full_dag(np.array( [[0, 1, 1], [0, 0, 0], [0, 0, 0]]))) # 1, 3 are connected to each other but disconnected from main path self.assertFalse(graph_util.is_full_dag(np.array( [[0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]))) # no path from input to output self.assertFalse(graph_util.is_full_dag(np.array( [[0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0]]))) # completely disconnected vertex self.assertFalse(graph_util.is_full_dag(np.array( [[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0]])))
def make_graphs(vertices, bits): matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits), (vertices, vertices), dtype=np.int8) if graph_util.num_edges(matrix) > max_edges: return [] if not graph_util.is_full_dag(matrix): return [] out = [] for labeling in itertools.product( *[range(num_ops) for _ in range(vertices - 2)]): labeling = [-1] + list(labeling) + [-2] out.append({ "hash": graph_util.hash_module(matrix, labeling), "adj": matrix.tolist(), "labeling": labeling, }) return out
def generate_graphs(nasbench): """A function that generates all possible graphs that could have been processed via NAS Bench, and yields tuples of x values and y values, where y is zero when x is not contained in NASBench-101 Arguments: nasbench: NASBench an instantiation of the NASBench class provided in the official release of nas bench source code Returns: generator: Iterator a generator tha yields tuples of x values and y values, where y is zero when x is not contained in NASBench-101 """ # these settings were used in the NASBench-101 paper max_vertices = 7 max_edges = 9 max_epochs = 108 max_adjacency_size = max_vertices * (max_vertices - 1) // 2 # a helper function that maps a model architecture to a metric def model_to_metric(_ops, _matrix): model_spec = api.ModelSpec(matrix=_matrix, ops=[ID_TO_NODE[t] for t in _ops]) computed_metrics = nasbench.get_metrics_from_spec(model_spec)[1] return np.mean([d["final_test_accuracy"] for d in computed_metrics[max_epochs]])\ .astype(np.float32).reshape([1]) # generate all possible graphs and labellings for vertices in range(2, max_vertices + 1): for bits in range(2**(vertices * (vertices - 1) // 2)): # generate an adjacency matrix for the graph matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits), (vertices, vertices), dtype=np.int8) # discard graphs which can be pruned or exceed constraints if (not graph_util.is_full_dag(matrix) or graph_util.num_edges(matrix) > max_edges): continue # convert the binary adjacency matrix to a vector vector = matrix[np.triu_indices(matrix.shape[0], k=1)] # Iterate through all possible labellings for labelling in itertools.product( *[[CONV1X1, CONV3X3, MAXPOOL3X3] for _ in range(vertices - 2)]): # convert the graph and labelling to numpy arrays ops = [INPUT] + list(labelling) + [OUTPUT] ops = np.array([NODE_TO_ID[t] for t in ops]).astype(np.int32) # yield samples encoded in a standard sequence format yield np.concatenate( [[NODE_TO_ID[START]], ops, [NODE_TO_ID[SEPARATOR]], vector + NODE_TO_ID[ADJACENCY_ZERO], [NODE_TO_ID[STOP]], [NODE_TO_ID[PAD]] * (max_vertices - ops.size + max_adjacency_size - vector.size)], axis=0), model_to_metric(ops, matrix)
def main(_): total_graphs = 0 # Total number of graphs (including isomorphisms) total_unlabeled_graphs = 0 # Total number of unlabeled graphs # hash --> (matrix, label) for the canonical graph associated with each hash buckets = {} logging.info('Using %d vertices, %d op labels, min %d max %d edges', FLAGS.max_vertices, FLAGS.num_ops, FLAGS.min_edges, FLAGS.max_edges) for vertices in range(FLAGS.min_vertices, FLAGS.max_vertices + 1): for bits in range(2**(vertices * (vertices - 1) // 2)): if bits % 100000 == 0: print('bits:', bits) # Construct adj matrix from bit string matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits), (vertices, vertices), dtype=np.int8) # Discard any graphs which can be pruned or exceed constraints if (not graph_util.is_full_dag(matrix) or graph_util.num_edges(matrix) > FLAGS.max_edges or graph_util.num_edges(matrix) < FLAGS.min_edges): continue # this step should be redundant with is_full_dag() if graph_util.hanging_edge(matrix): print(np.array(matrix)) continue print('found valid ulabeled graph') print(matrix) total_unlabeled_graphs += 1 # Iterate through all possible labelings for labeling in itertools.product( *[range(FLAGS.num_ops) for _ in range(vertices - 2)]): total_graphs += 1 labeling = [-1] + list(labeling) + [-2] fingerprint = graph_util.hash_module(matrix, labeling) # todo: check if hash is in nasbench if fingerprint not in buckets: buckets[fingerprint] = (matrix.tolist(), labeling) # This catches the "false positive" case of two models which are not # isomorphic hashing to the same bucket. elif FLAGS.verify_isomorphism: canonical_graph = buckets[fingerprint] if not graph_util.is_isomorphic( (matrix.tolist(), labeling), canonical_graph): logging.fatal( 'Matrix:\n%s\nLabel: %s\nis not isomorphic to' ' canonical matrix:\n%s\nLabel: %s', str(matrix), str(labeling), str(canonical_graph[0]), str(canonical_graph[1])) sys.exit() logging.info('Up to %d vertices: %d graphs (%d without hashing)', vertices, len(buckets), total_graphs) logging.info('%d unlabeled graphs', total_unlabeled_graphs) print('finished') with tf.io.gfile.GFile(FLAGS.output_file, 'w') as f: print('outputting now to ', FLAGS.output_file) json.dump(buckets, f, sort_keys=True)