Esempio n. 1
0
def generate_buckets(vertices, bits, max_edges, num_ops, verify_isomorphism):

    buckets = {}
    matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits),
                             (vertices, vertices),
                             dtype=np.int8)
    if (not graph_util.is_full_dag(matrix)
            or graph_util.num_edges(matrix) > max_edges):
        return

    # Iterate through all possible labelings
    for labeling in itertools.product(
            *[range(num_ops) for _ in range(vertices - 2)]):
        labeling = [-1] + list(labeling) + [-2]
        fingerprint = graph_util.hash_module(matrix, labeling)

        if fingerprint not in buckets:
            buckets[fingerprint] = (matrix.tolist(), labeling)

        # This catches the "false positive" case of two models which are not
        # isomorphic hashing to the same bucket.
        elif verify_isomorphism:
            canonical_graph = buckets[fingerprint]
            if not graph_util.is_isomorphic(
                (matrix.tolist(), labeling), canonical_graph):
                logging.fatal(
                    'Matrix:\n%s\nLabel: %s\nis not isomorphic to'
                    ' canonical matrix:\n%s\nLabel: %s', str(matrix),
                    str(labeling), str(canonical_graph[0]),
                    str(canonical_graph[1]))
                sys.exit()
    return buckets
Esempio n. 2
0
def generate_graph(max_vertices, max_edges, num_ops, verify_isomorphism,
                   output_file):

    FLAGS = Namespace(max_vertices=max_vertices,
                      num_ops=num_ops,
                      max_edges=max_edges,
                      verify_isomorphism=verify_isomorphism,
                      output_file=output_file)

    total_graphs = 0  # Total number of graphs (including isomorphisms)
    # hash --> (matrix, label) for the canonical graph associated with each hash
    buckets = {}

    logging.info('Using %d vertices, %d op labels, max %d edges',
                 FLAGS.max_vertices, FLAGS.num_ops, FLAGS.max_edges)
    for vertices in range(2, FLAGS.max_vertices + 1):
        for bits in range(2**(vertices * (vertices - 1) // 2)):
            # Construct adj matrix from bit string
            matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits),
                                     (vertices, vertices),
                                     dtype=np.int8)

            # Discard any graphs which can be pruned or exceed constraints
            if (not graph_util.is_full_dag(matrix)
                    or graph_util.num_edges(matrix) > FLAGS.max_edges):
                continue

            # Iterate through all possible labelings
            for labeling in itertools.product(
                    *[range(FLAGS.num_ops) for _ in range(vertices - 2)]):
                total_graphs += 1
                labeling = [-1] + list(labeling) + [-2]
                fingerprint = graph_util.hash_module(matrix, labeling)

                if fingerprint not in buckets:
                    buckets[fingerprint] = (matrix.tolist(), labeling)

                # This catches the "false positive" case of two models which are not
                # isomorphic hashing to the same bucket.
                elif FLAGS.verify_isomorphism:
                    canonical_graph = buckets[fingerprint]
                    if not graph_util.is_isomorphic(
                        (matrix.tolist(), labeling), canonical_graph):
                        logging.fatal(
                            'Matrix:\n%s\nLabel: %s\nis not isomorphic to'
                            ' canonical matrix:\n%s\nLabel: %s', str(matrix),
                            str(labeling), str(canonical_graph[0]),
                            str(canonical_graph[1]))
                        sys.exit()

        logging.info('Up to %d vertices: %d graphs (%d without hashing)',
                     vertices, len(buckets), total_graphs)

    with open(FLAGS.output_file, 'w') as f:
        json.dump(buckets, f, sort_keys=True)
Esempio n. 3
0
  def test_is_full_dag(self):
    """Tests is_full_dag classifies DAGs."""
    self.assertTrue(graph_util.is_full_dag(np.array(
        [[0, 1, 0],
         [0, 0, 1],
         [0, 0, 0]])))

    self.assertTrue(graph_util.is_full_dag(np.array(
        [[0, 1, 1],
         [0, 0, 1],
         [0, 0, 0]])))

    self.assertTrue(graph_util.is_full_dag(np.array(
        [[0, 1, 1, 0],
         [0, 0, 0, 1],
         [0, 0, 0, 1],
         [0, 0, 0, 0]])))

    # vertex 1 not connected to input
    self.assertFalse(graph_util.is_full_dag(np.array(
        [[0, 0, 1],
         [0, 0, 1],
         [0, 0, 0]])))

    # vertex 1 not connected to output
    self.assertFalse(graph_util.is_full_dag(np.array(
        [[0, 1, 1],
         [0, 0, 0],
         [0, 0, 0]])))

    # 1, 3 are connected to each other but disconnected from main path
    self.assertFalse(graph_util.is_full_dag(np.array(
        [[0, 0, 1, 0, 0],
         [0, 0, 0, 1, 0],
         [0, 0, 0, 0, 1],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0]])))

    # no path from input to output
    self.assertFalse(graph_util.is_full_dag(np.array(
        [[0, 0, 1, 0],
         [0, 0, 0, 1],
         [0, 0, 0, 0],
         [0, 0, 0, 0]])))

    # completely disconnected vertex
    self.assertFalse(graph_util.is_full_dag(np.array(
        [[0, 1, 0, 0],
         [0, 0, 0, 1],
         [0, 0, 0, 0],
         [0, 0, 0, 0]])))
Esempio n. 4
0
def make_graphs(vertices, bits):
    matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits),
                             (vertices, vertices),
                             dtype=np.int8)

    if graph_util.num_edges(matrix) > max_edges:
        return []

    if not graph_util.is_full_dag(matrix):
        return []

    out = []
    for labeling in itertools.product(
            *[range(num_ops) for _ in range(vertices - 2)]):
        labeling = [-1] + list(labeling) + [-2]

        out.append({
            "hash": graph_util.hash_module(matrix, labeling),
            "adj": matrix.tolist(),
            "labeling": labeling,
        })

    return out
Esempio n. 5
0
def generate_graphs(nasbench):
    """A function that generates all possible graphs that could have been
    processed via NAS Bench, and yields tuples of x values and y values,
    where y is zero when x is not contained in NASBench-101

    Arguments:

    nasbench: NASBench
        an instantiation of the NASBench class provided in the official
        release of nas bench source code

    Returns:

    generator: Iterator
        a generator tha yields tuples of x values and y values, where
        y is zero when x is not contained in NASBench-101

    """

    # these settings were used in the NASBench-101 paper
    max_vertices = 7
    max_edges = 9
    max_epochs = 108
    max_adjacency_size = max_vertices * (max_vertices - 1) // 2

    # a helper function that maps a model architecture to a metric
    def model_to_metric(_ops, _matrix):
        model_spec = api.ModelSpec(matrix=_matrix,
                                   ops=[ID_TO_NODE[t] for t in _ops])
        computed_metrics = nasbench.get_metrics_from_spec(model_spec)[1]
        return np.mean([d["final_test_accuracy"] for d in
                        computed_metrics[max_epochs]])\
            .astype(np.float32).reshape([1])

    # generate all possible graphs and labellings
    for vertices in range(2, max_vertices + 1):
        for bits in range(2**(vertices * (vertices - 1) // 2)):

            # generate an adjacency matrix for the graph
            matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits),
                                     (vertices, vertices),
                                     dtype=np.int8)

            # discard graphs which can be pruned or exceed constraints
            if (not graph_util.is_full_dag(matrix)
                    or graph_util.num_edges(matrix) > max_edges):
                continue

            # convert the binary adjacency matrix to a vector
            vector = matrix[np.triu_indices(matrix.shape[0], k=1)]

            # Iterate through all possible labellings
            for labelling in itertools.product(
                    *[[CONV1X1, CONV3X3, MAXPOOL3X3]
                      for _ in range(vertices - 2)]):

                # convert the graph and labelling to numpy arrays
                ops = [INPUT] + list(labelling) + [OUTPUT]
                ops = np.array([NODE_TO_ID[t] for t in ops]).astype(np.int32)

                # yield samples encoded in a standard sequence format
                yield np.concatenate(
                    [[NODE_TO_ID[START]], ops, [NODE_TO_ID[SEPARATOR]],
                     vector + NODE_TO_ID[ADJACENCY_ZERO], [NODE_TO_ID[STOP]],
                     [NODE_TO_ID[PAD]] * (max_vertices - ops.size +
                                          max_adjacency_size - vector.size)],
                    axis=0), model_to_metric(ops, matrix)
Esempio n. 6
0
def main(_):
    total_graphs = 0  # Total number of graphs (including isomorphisms)
    total_unlabeled_graphs = 0  # Total number of unlabeled graphs
    # hash --> (matrix, label) for the canonical graph associated with each hash
    buckets = {}

    logging.info('Using %d vertices, %d op labels, min %d max %d edges',
                 FLAGS.max_vertices, FLAGS.num_ops, FLAGS.min_edges,
                 FLAGS.max_edges)
    for vertices in range(FLAGS.min_vertices, FLAGS.max_vertices + 1):
        for bits in range(2**(vertices * (vertices - 1) // 2)):
            if bits % 100000 == 0:
                print('bits:', bits)

            # Construct adj matrix from bit string
            matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits),
                                     (vertices, vertices),
                                     dtype=np.int8)

            # Discard any graphs which can be pruned or exceed constraints
            if (not graph_util.is_full_dag(matrix)
                    or graph_util.num_edges(matrix) > FLAGS.max_edges
                    or graph_util.num_edges(matrix) < FLAGS.min_edges):

                continue

            # this step should be redundant with is_full_dag()
            if graph_util.hanging_edge(matrix):
                print(np.array(matrix))
                continue

            print('found valid ulabeled graph')
            print(matrix)
            total_unlabeled_graphs += 1

            # Iterate through all possible labelings
            for labeling in itertools.product(
                    *[range(FLAGS.num_ops) for _ in range(vertices - 2)]):
                total_graphs += 1
                labeling = [-1] + list(labeling) + [-2]
                fingerprint = graph_util.hash_module(matrix, labeling)

                # todo: check if hash is in nasbench
                if fingerprint not in buckets:
                    buckets[fingerprint] = (matrix.tolist(), labeling)

                # This catches the "false positive" case of two models which are not
                # isomorphic hashing to the same bucket.
                elif FLAGS.verify_isomorphism:
                    canonical_graph = buckets[fingerprint]
                    if not graph_util.is_isomorphic(
                        (matrix.tolist(), labeling), canonical_graph):
                        logging.fatal(
                            'Matrix:\n%s\nLabel: %s\nis not isomorphic to'
                            ' canonical matrix:\n%s\nLabel: %s', str(matrix),
                            str(labeling), str(canonical_graph[0]),
                            str(canonical_graph[1]))
                        sys.exit()

        logging.info('Up to %d vertices: %d graphs (%d without hashing)',
                     vertices, len(buckets), total_graphs)
        logging.info('%d unlabeled graphs', total_unlabeled_graphs)

    print('finished')

    with tf.io.gfile.GFile(FLAGS.output_file, 'w') as f:
        print('outputting now to ', FLAGS.output_file)
        json.dump(buckets, f, sort_keys=True)