Python num_edgesの例、nasbench.lib.graph_util.num_edges Pythonの例

コード例 #1

0

ファイルを表示

ファイル: nas_cifar10.py プロジェクト: bkj/nasbench

    def objective_function(self, config, budget=108):

        edge_prob = []
        for i in range(VERTICES * (VERTICES - 1) // 2):
            edge_prob.append(config["edge_%d" % i])

        idx = np.argsort(edge_prob)[::-1][:config["num_edges"]]
        binay_encoding = np.zeros(len(edge_prob))
        binay_encoding[idx] = 1
        matrix = np.zeros([VERTICES, VERTICES], dtype=np.int8)
        idx = np.triu_indices(matrix.shape[0], k=1)
        for i in range(VERTICES * (VERTICES - 1) // 2):
            row = idx[0][i]
            col = idx[1][i]
            matrix[row, col] = binay_encoding[i]

        if graph_util.num_edges(matrix) > MAX_EDGES:
            self.record_invalid(config, 1, 1, 0)
            return 1, 0

        labeling = [config["op_node_%d" % i] for i in range(5)]
        labeling = ['input'] + list(labeling) + ['output']
        model_spec = api.ModelSpec(matrix, labeling)
        try:
            data = self.dataset.query(model_spec, epochs=budget)
        except api.OutOfDomainError:
            self.record_invalid(config, 1, 1, 0)
            return 1, 0

        self.record_valid(config, model_spec, budget)

        return 1 - data["validation_accuracy"], data["training_time"]

コード例 #2

0

ファイルを表示

ファイル: nas_cifar10.py プロジェクト: bkj/nasbench

    def objective_function(self, config, budget=108):
        # bit = 0
        # for i in range(VERTICES * (VERTICES - 1) // 2):
        #     bit += config["edge_%d" % i] * 2 ** i
        # matrix = np.fromfunction(graph_util.gen_is_edge_fn(bit),
        #                          (VERTICES, VERTICES),
        #                          dtype=np.int8)
        matrix = np.zeros([VERTICES, VERTICES], dtype=np.int8)
        idx = np.triu_indices(matrix.shape[0], k=1)
        for i in range(VERTICES * (VERTICES - 1) // 2):
            row = idx[0][i]
            col = idx[1][i]
            matrix[row, col] = config["edge_%d" % i]

        # if not graph_util.is_full_dag(matrix) or graph_util.num_edges(matrix) > MAX_EDGES:
        if graph_util.num_edges(matrix) > MAX_EDGES:
            self.record_invalid(config, 1, 1, 0)
            return 1, 0

        labeling = [config["op_node_%d" % i] for i in range(5)]
        labeling = ['input'] + list(labeling) + ['output']
        model_spec = api.ModelSpec(matrix, labeling)
        try:
           data = self.dataset.query(model_spec, epochs=budget)
        except api.OutOfDomainError:
            self.record_invalid(config, 1, 1, 0)
            return 1, 0

        self.record_valid(config, model_spec, budget)
        return 1 - data["validation_accuracy"], data["training_time"]

コード例 #3

0

ファイルを表示

def config2data_A(config, b):
    VERTICES = 7
    MAX_EDGES = 9
    budget = 108

    matrix = np.zeros([VERTICES, VERTICES], dtype=np.int8)
    idx = np.triu_indices(matrix.shape[0], k=1)
    for i in range(VERTICES * (VERTICES - 1) // 2):
        row = idx[0][i]
        col = idx[1][i]
        matrix[row, col] = config["edge_%d" % i]

    # if not graph_util.is_full_dag(matrix) or graph_util.num_edges(matrix) > MAX_EDGES:
    if graph_util.num_edges(matrix) > MAX_EDGES:
        return None, None

    labeling = [config["op_node_%d" % i] for i in range(5)]
    labeling = ['input'] + list(labeling) + ['output']
    model_spec = api.ModelSpec(matrix, labeling)
    try:
        data = b.dataset.query(model_spec, epochs=budget)
        msp = b.dataset.get_metrics_from_spec(model_spec)
    except api.OutOfDomainError:
        return None, None

    test_acc = [msp[1][108][k]['final_test_accuracy'] for k in range(3)]

    return data, np.mean(test_acc)

コード例 #4

0

ファイルを表示

ファイル: nas_cifar10.py プロジェクト: bkj/nasbench

    def objective_function(self, config, budget=108):

        bitlist = [0] * (VERTICES * (VERTICES - 1) // 2)
        for i in range(MAX_EDGES):
            bitlist[config["edge_%d" % i]] = 1
        out = 0
        for bit in bitlist:
            out = (out << 1) | bit

        matrix = np.fromfunction(graph_util.gen_is_edge_fn(out),
                                 (VERTICES, VERTICES),
                                 dtype=np.int8)
        # if not graph_util.is_full_dag(matrix) or graph_util.num_edges(matrix) > MAX_EDGES:
        if graph_util.num_edges(matrix) > MAX_EDGES:
            self.record_invalid(config, 1, 1, 0)
            return 1, 0

        labeling = [config["op_node_%d" % i] for i in range(5)]
        labeling = ['input'] + list(labeling) + ['output']
        model_spec = api.ModelSpec(matrix, labeling)
        try:
            data = self.dataset.query(model_spec, epochs=budget)
        except api.OutOfDomainError:
            self.record_invalid(config, 1, 1, 0)
            return 1, 0

        self.record_valid(config, model_spec, budget)

        return 1 - data["validation_accuracy"], data["training_time"]

コード例 #5

0

ファイルを表示

def generate_buckets(vertices, bits, max_edges, num_ops, verify_isomorphism):

    buckets = {}
    matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits),
                             (vertices, vertices),
                             dtype=np.int8)
    if (not graph_util.is_full_dag(matrix)
            or graph_util.num_edges(matrix) > max_edges):
        return

    # Iterate through all possible labelings
    for labeling in itertools.product(
            *[range(num_ops) for _ in range(vertices - 2)]):
        labeling = [-1] + list(labeling) + [-2]
        fingerprint = graph_util.hash_module(matrix, labeling)

        if fingerprint not in buckets:
            buckets[fingerprint] = (matrix.tolist(), labeling)

        # This catches the "false positive" case of two models which are not
        # isomorphic hashing to the same bucket.
        elif verify_isomorphism:
            canonical_graph = buckets[fingerprint]
            if not graph_util.is_isomorphic(
                (matrix.tolist(), labeling), canonical_graph):
                logging.fatal(
                    'Matrix:\n%s\nLabel: %s\nis not isomorphic to'
                    ' canonical matrix:\n%s\nLabel: %s', str(matrix),
                    str(labeling), str(canonical_graph[0]),
                    str(canonical_graph[1]))
                sys.exit()
    return buckets

コード例 #6

0

ファイルを表示

def config2data_B(config, b):
    VERTICES = 7
    MAX_EDGES = 9
    budget = 108

    bitlist = [0] * (VERTICES * (VERTICES - 1) // 2)
    for i in range(MAX_EDGES):
        bitlist[config["edge_%d" % i]] = 1
    out = 0
    for bit in bitlist:
        out = (out << 1) | bit

    matrix = np.fromfunction(graph_util.gen_is_edge_fn(out),
                             (VERTICES, VERTICES),
                             dtype=np.int8)
    # if not graph_util.is_full_dag(matrix) or graph_util.num_edges(matrix) > MAX_EDGES:
    if graph_util.num_edges(matrix) > MAX_EDGES:
        return None, None

    labeling = [config["op_node_%d" % i] for i in range(5)]
    labeling = ['input'] + list(labeling) + ['output']
    model_spec = api.ModelSpec(matrix, labeling)
    try:
        data = b.dataset.query(model_spec, epochs=budget)
        msp = b.dataset.get_metrics_from_spec(model_spec)
    except api.OutOfDomainError:
        return None, None
    test_acc = [msp[1][108][k]['final_test_accuracy'] for k in range(3)]
    return data, test_acc

コード例 #7

0

ファイルを表示

ファイル: nasbench_101.py プロジェクト: jtitusj/HPOBench

    def _query_benchmark(self, config: Dict, budget: int = 108) -> Dict:
        """
        Copy of the 'objective_function' from nas_cifar10.py
        We adapted the file in such a way, that the complete result is returned. The original implementation returns
        only the validation error. Now, it can also return the test loss for a given configuration.

        Parameters
        ----------
        config : Dict
        budget : int
            The number of epochs. Must be one of: 4 12 36 108. Otherwise a accuracy of 0 is returned.

        Returns
        -------
        Dict
        """
        # Unify the return value to a dictionary.
        failure = {
            "test_accuracy": 0,
            "validation_accuracy": 0,
            "training_time": 0,
            "info": "failure"
        }

        if self.benchmark.multi_fidelity is False:
            assert budget == 108

        edge_prob = []
        for i in range(VERTICES * (VERTICES - 1) // 2):
            edge_prob.append(config["edge_%d" % i])

        idx = np.argsort(edge_prob)[::-1][:config["num_edges"]]
        binay_encoding = np.zeros(len(edge_prob))
        binay_encoding[idx] = 1
        matrix = np.zeros([VERTICES, VERTICES], dtype=np.int8)
        idx = np.triu_indices(matrix.shape[0], k=1)
        for i in range(VERTICES * (VERTICES - 1) // 2):
            row = idx[0][i]
            col = idx[1][i]
            matrix[row, col] = binay_encoding[i]

        if graph_util.num_edges(matrix) > MAX_EDGES:
            self.benchmark.record_invalid(config, 1, 1, 0)
            return failure

        labeling = [config["op_node_%d" % i] for i in range(5)]
        labeling = ['input'] + list(labeling) + ['output']
        model_spec = api.ModelSpec(matrix, labeling)
        try:
            data = self.benchmark.dataset.query(model_spec, epochs=budget)
        except api.OutOfDomainError:
            self.benchmark.record_invalid(config, 1, 1, 0)
            return failure

        self.benchmark.record_valid(config, data, model_spec)

        # We dont need this field.
        data.pop('module_adjacency')

        return data

コード例 #8

0

ファイルを表示

ファイル: nasbench_101.py プロジェクト: jtitusj/HPOBench

    def _query_benchmark(self, config: Dict, budget: int = 108) -> Dict:
        """
        Copy of the 'objective_function' from nas_cifar10.py
        We adapted the file in such a way, that the complete result is returned. The original implementation returns
        only the validation error. Now, it can also return the test loss for a given configuration.

        Parameters
        ----------
        config : Dict
        budget : int
            The number of epochs. Must be one of: 4 12 36 108. Otherwise a accuracy of 0 is returned.

        Returns
        -------
        Dict
        """
        failure = {
            "test_accuracy": 0,
            "validation_accuracy": 0,
            "training_time": 0,
            "info": "failure"
        }

        if self.benchmark.multi_fidelity is False:
            assert budget == 108

        bitlist = [0] * (VERTICES * (VERTICES - 1) // 2)
        for i in range(MAX_EDGES):
            bitlist[config["edge_%d" % i]] = 1
        out = 0
        for bit in bitlist:
            out = (out << 1) | bit

        matrix = np.fromfunction(graph_util.gen_is_edge_fn(out),
                                 (VERTICES, VERTICES),
                                 dtype=np.int8)
        # if not graph_util.is_full_dag(matrix) or graph_util.num_edges(matrix) > MAX_EDGES:
        if graph_util.num_edges(matrix) > MAX_EDGES:
            self.benchmark.record_invalid(config, 1, 1, 0)
            return failure

        labeling = [config["op_node_%d" % i] for i in range(5)]
        labeling = ['input'] + list(labeling) + ['output']
        model_spec = api.ModelSpec(matrix, labeling)
        try:
            data = self.benchmark.dataset.query(model_spec, epochs=budget)
        except api.OutOfDomainError:
            self.benchmark.record_invalid(config, 1, 1, 0)
            return failure

        self.benchmark.record_valid(config, data, model_spec)

        # We dont need this field.
        data.pop('module_adjacency')

        return data

コード例 #9

0

ファイルを表示

ファイル: generate_graph.py プロジェクト: seanhtchoi/eval-nas

def generate_graph(max_vertices, max_edges, num_ops, verify_isomorphism,
                   output_file):

    FLAGS = Namespace(max_vertices=max_vertices,
                      num_ops=num_ops,
                      max_edges=max_edges,
                      verify_isomorphism=verify_isomorphism,
                      output_file=output_file)

    total_graphs = 0  # Total number of graphs (including isomorphisms)
    # hash --> (matrix, label) for the canonical graph associated with each hash
    buckets = {}

    logging.info('Using %d vertices, %d op labels, max %d edges',
                 FLAGS.max_vertices, FLAGS.num_ops, FLAGS.max_edges)
    for vertices in range(2, FLAGS.max_vertices + 1):
        for bits in range(2**(vertices * (vertices - 1) // 2)):
            # Construct adj matrix from bit string
            matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits),
                                     (vertices, vertices),
                                     dtype=np.int8)

            # Discard any graphs which can be pruned or exceed constraints
            if (not graph_util.is_full_dag(matrix)
                    or graph_util.num_edges(matrix) > FLAGS.max_edges):
                continue

            # Iterate through all possible labelings
            for labeling in itertools.product(
                    *[range(FLAGS.num_ops) for _ in range(vertices - 2)]):
                total_graphs += 1
                labeling = [-1] + list(labeling) + [-2]
                fingerprint = graph_util.hash_module(matrix, labeling)

                if fingerprint not in buckets:
                    buckets[fingerprint] = (matrix.tolist(), labeling)

                # This catches the "false positive" case of two models which are not
                # isomorphic hashing to the same bucket.
                elif FLAGS.verify_isomorphism:
                    canonical_graph = buckets[fingerprint]
                    if not graph_util.is_isomorphic(
                        (matrix.tolist(), labeling), canonical_graph):
                        logging.fatal(
                            'Matrix:\n%s\nLabel: %s\nis not isomorphic to'
                            ' canonical matrix:\n%s\nLabel: %s', str(matrix),
                            str(labeling), str(canonical_graph[0]),
                            str(canonical_graph[1]))
                        sys.exit()

        logging.info('Up to %d vertices: %d graphs (%d without hashing)',
                     vertices, len(buckets), total_graphs)

    with open(FLAGS.output_file, 'w') as f:
        json.dump(buckets, f, sort_keys=True)

コード例 #10

0

ファイルを表示

ファイル: nasbench_101.py プロジェクト: ibrahim85/HPOBench

    def _query_benchmark(self, config: Dict, run_index: int, budget: int = 108) -> Dict:
        """
        Copied from the 'objective_function' from nas_cifar10.py
        We adapted the file in such a way, that the complete result is returned. The original implementation returns
        only the validation error. Now, it can also return the test loss for a given configuration.

        Parameters
        ----------
        config : Dict
        run_index : int
            Specifies the seed to use. Can be one of 0, 1, 2.
        budget : int
            The number of epochs. Must be one of: 4 12 36 108. Otherwise a accuracy of 0 is returned.

        Returns
        -------
        Dict
        """

        failure = {"test_accuracy": 0, "train_accuracy": 0, "validation_accuracy": 0, "training_time": 0,
                   "info": "failure", "trainable_parameters": 0, "module_operations": 0}

        if self.benchmark.multi_fidelity is False:
            assert budget == 108

        matrix = np.zeros([VERTICES, VERTICES], dtype=np.int8)
        idx = np.triu_indices(matrix.shape[0], k=1)
        for i in range(VERTICES * (VERTICES - 1) // 2):
            row = idx[0][i]
            col = idx[1][i]
            matrix[row, col] = config["edge_%d" % i]

        # if not graph_util.is_full_dag(matrix) or graph_util.num_edges(matrix) > MAX_EDGES:
        if graph_util.num_edges(matrix) > MAX_EDGES:
            self.benchmark.record_invalid(config, 1, 1, 0)
            return failure

        labeling = [config["op_node_%d" % i] for i in range(5)]
        labeling = ['input'] + list(labeling) + ['output']
        model_spec = api.ModelSpec(matrix, labeling)

        try:
            data = modified_query(self.benchmark, run_index=run_index, model_spec=model_spec, epochs=budget)
        except api.OutOfDomainError:
            self.benchmark.record_invalid(config, 1, 1, 0)
            return failure

        self.benchmark.record_valid(config, data, model_spec)

        # We dont need this field.
        data.pop('module_adjacency')

        return data

コード例 #11

0

ファイルを表示

ファイル: nasbench_101.py プロジェクト: zeta1999/aw_nas

    def sample(self, n, batch_size=None):
        if self.mode == "eval":
            # return the current rollout
            return [NasBench101Rollout(
                *self.cur_solution, search_space=self.search_space)] * n

        assert batch_size is None
        rollouts = []
        cur_matrix, cur_ops = self.cur_solution
        ss = self.search_space
        for n_r in range(n):
            if np.random.rand() < self.mutation_edges_prob:
                while 1:
                    edge_ind = np.random.randint(0, ss.num_possible_edges, size=1)
                    while graph_util.num_edges(cur_matrix) == ss.max_edges and \
                          cur_matrix[ss.idx[0][edge_ind], ss.idx[1][edge_ind]] == 0:
                        edge_ind = np.random.randint(0, ss.num_possible_edges, size=1)
                    new_matrix = cur_matrix.copy()
                    new_matrix[ss.idx[0][edge_ind], ss.idx[1][edge_ind]] \
                        = 1 - cur_matrix[ss.idx[0][edge_ind], ss.idx[1][edge_ind]]
                    new_rollout = NasBench101Rollout(new_matrix, cur_ops,
                                                     search_space=self.search_space)
                    try:
                        ss.nasbench._check_spec(new_rollout.genotype)
                    except api.OutOfDomainError:
                        # ignore out-of-domain archs (disconnected)
                        continue
                    else:
                        cur_matrix = new_matrix
                        break
            else:
                ops_ind = np.random.randint(0, ss.num_ops, size=1)[0]
                new_ops = np.random.randint(0, ss.num_op_choices - 1, size=1)[0]
                while new_ops == cur_ops[ops_ind]:
                    new_ops = np.random.randint(0, ss.num_op_choices - 1, size=1)[0]
                cur_ops[ops_ind] = new_ops
            rollouts.append(NasBench101Rollout(
                cur_matrix,
                cur_ops,
                search_space=self.search_space
            ))
        return rollouts

コード例 #12

0

ファイルを表示

ファイル: nasbench101_cifar10.py プロジェクト: jakeyoo87/DRL2021-Project

    def objective_function_from_matrix(self, matrix, budget=108):
        if self.multi_fidelity is False:
            assert budget == 108

        # if not graph_util.is_full_dag(matrix) or graph_util.num_edges(matrix) > MAX_EDGES:
        if graph_util.num_edges(matrix) > MAX_EDGES:
            # self.record_invalid(config, 1, 1, 0)
            return 1, 0

        labeling = [
            'input', 'conv1x1-bn-relu', 'conv3x3-bn-relu', 'conv3x3-bn-relu',
            'conv3x3-bn-relu', 'maxpool3x3', 'output'
        ]
        model_spec = api.ModelSpec(matrix, labeling)
        try:
            data = self.dataset.query(model_spec, epochs=budget)
        except api.OutOfDomainError:
            # self.record_invalid(config, 1, 1, 0)
            return 1, 0

        self.record_valid(matrix, data, model_spec)
        return 1 - data["validation_accuracy"], data["training_time"]

コード例 #13

0

ファイルを表示

def make_graphs(vertices, bits):
    matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits),
                             (vertices, vertices),
                             dtype=np.int8)

    if graph_util.num_edges(matrix) > max_edges:
        return []

    if not graph_util.is_full_dag(matrix):
        return []

    out = []
    for labeling in itertools.product(
            *[range(num_ops) for _ in range(vertices - 2)]):
        labeling = [-1] + list(labeling) + [-2]

        out.append({
            "hash": graph_util.hash_module(matrix, labeling),
            "adj": matrix.tolist(),
            "labeling": labeling,
        })

    return out

コード例 #14

0

ファイルを表示

 def evaluate(self, c):
     c = self._check_shape(c)
     evals = []
     test_error = []
     training_time = []
     matrix = np.zeros([VERTICES, VERTICES], dtype=np.int8)
     idx = np.triu_indices(matrix.shape[0], k=1)
     for _c in c:
         for i in range(MATRIX_ELEMENTS):
             row = idx[0][i]
             col = idx[1][i]
             matrix[row, col] = _c[i]
         if graph_util.num_edges(matrix) > MAX_EDGES:
             evals.append(1)
             test_error.append(1)
             training_time.append(0)
         else:
             ops = [OPS[i] for i in _c[MATRIX_ELEMENTS:]]
             ops = ["input"] + list(ops) + ["output"]
             model_spec = api.ModelSpec(matrix=matrix, ops=ops)
             try:
                 data = self.nasbench.query(model_spec, epochs=108)
                 evals.append(1 - data["validation_accuracy"])
                 test_error.append(1 - data["test_accuracy"])
                 training_time.append(data["training_time"])
                 self.estimated_wall_clock_time += data["training_time"]
             except api.OutOfDomainError:
                 evals.append(1)
                 test_error.append(1)
                 training_time.append(0)
     evals = np.array(evals)
     evals = evals if self.minimize else -evals
     test_error = np.array(test_error)
     training_time = np.array(training_time)
     info = {"training_time": training_time, "test_error": test_error}
     return evals, info

コード例 #15

0

ファイルを表示

def generate_graphs(nasbench):
    """A function that generates all possible graphs that could have been
    processed via NAS Bench, and yields tuples of x values and y values,
    where y is zero when x is not contained in NASBench-101

    Arguments:

    nasbench: NASBench
        an instantiation of the NASBench class provided in the official
        release of nas bench source code

    Returns:

    generator: Iterator
        a generator tha yields tuples of x values and y values, where
        y is zero when x is not contained in NASBench-101

    """

    # these settings were used in the NASBench-101 paper
    max_vertices = 7
    max_edges = 9
    max_epochs = 108
    max_adjacency_size = max_vertices * (max_vertices - 1) // 2

    # a helper function that maps a model architecture to a metric
    def model_to_metric(_ops, _matrix):
        model_spec = api.ModelSpec(matrix=_matrix,
                                   ops=[ID_TO_NODE[t] for t in _ops])
        computed_metrics = nasbench.get_metrics_from_spec(model_spec)[1]
        return np.mean([d["final_test_accuracy"] for d in
                        computed_metrics[max_epochs]])\
            .astype(np.float32).reshape([1])

    # generate all possible graphs and labellings
    for vertices in range(2, max_vertices + 1):
        for bits in range(2**(vertices * (vertices - 1) // 2)):

            # generate an adjacency matrix for the graph
            matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits),
                                     (vertices, vertices),
                                     dtype=np.int8)

            # discard graphs which can be pruned or exceed constraints
            if (not graph_util.is_full_dag(matrix)
                    or graph_util.num_edges(matrix) > max_edges):
                continue

            # convert the binary adjacency matrix to a vector
            vector = matrix[np.triu_indices(matrix.shape[0], k=1)]

            # Iterate through all possible labellings
            for labelling in itertools.product(
                    *[[CONV1X1, CONV3X3, MAXPOOL3X3]
                      for _ in range(vertices - 2)]):

                # convert the graph and labelling to numpy arrays
                ops = [INPUT] + list(labelling) + [OUTPUT]
                ops = np.array([NODE_TO_ID[t] for t in ops]).astype(np.int32)

                # yield samples encoded in a standard sequence format
                yield np.concatenate(
                    [[NODE_TO_ID[START]], ops, [NODE_TO_ID[SEPARATOR]],
                     vector + NODE_TO_ID[ADJACENCY_ZERO], [NODE_TO_ID[STOP]],
                     [NODE_TO_ID[PAD]] * (max_vertices - ops.size +
                                          max_adjacency_size - vector.size)],
                    axis=0), model_to_metric(ops, matrix)

コード例 #16

0

ファイルを表示

ファイル: generate_graphs.py プロジェクト: yangliu-re/nasbench

def main(_):
    total_graphs = 0  # Total number of graphs (including isomorphisms)
    total_unlabeled_graphs = 0  # Total number of unlabeled graphs
    # hash --> (matrix, label) for the canonical graph associated with each hash
    buckets = {}

    logging.info('Using %d vertices, %d op labels, min %d max %d edges',
                 FLAGS.max_vertices, FLAGS.num_ops, FLAGS.min_edges,
                 FLAGS.max_edges)
    for vertices in range(FLAGS.min_vertices, FLAGS.max_vertices + 1):
        for bits in range(2**(vertices * (vertices - 1) // 2)):
            if bits % 100000 == 0:
                print('bits:', bits)

            # Construct adj matrix from bit string
            matrix = np.fromfunction(graph_util.gen_is_edge_fn(bits),
                                     (vertices, vertices),
                                     dtype=np.int8)

            # Discard any graphs which can be pruned or exceed constraints
            if (not graph_util.is_full_dag(matrix)
                    or graph_util.num_edges(matrix) > FLAGS.max_edges
                    or graph_util.num_edges(matrix) < FLAGS.min_edges):

                continue

            # this step should be redundant with is_full_dag()
            if graph_util.hanging_edge(matrix):
                print(np.array(matrix))
                continue

            print('found valid ulabeled graph')
            print(matrix)
            total_unlabeled_graphs += 1

            # Iterate through all possible labelings
            for labeling in itertools.product(
                    *[range(FLAGS.num_ops) for _ in range(vertices - 2)]):
                total_graphs += 1
                labeling = [-1] + list(labeling) + [-2]
                fingerprint = graph_util.hash_module(matrix, labeling)

                # todo: check if hash is in nasbench
                if fingerprint not in buckets:
                    buckets[fingerprint] = (matrix.tolist(), labeling)

                # This catches the "false positive" case of two models which are not
                # isomorphic hashing to the same bucket.
                elif FLAGS.verify_isomorphism:
                    canonical_graph = buckets[fingerprint]
                    if not graph_util.is_isomorphic(
                        (matrix.tolist(), labeling), canonical_graph):
                        logging.fatal(
                            'Matrix:\n%s\nLabel: %s\nis not isomorphic to'
                            ' canonical matrix:\n%s\nLabel: %s', str(matrix),
                            str(labeling), str(canonical_graph[0]),
                            str(canonical_graph[1]))
                        sys.exit()

        logging.info('Up to %d vertices: %d graphs (%d without hashing)',
                     vertices, len(buckets), total_graphs)
        logging.info('%d unlabeled graphs', total_unlabeled_graphs)

    print('finished')

    with tf.io.gfile.GFile(FLAGS.output_file, 'w') as f:
        print('outputting now to ', FLAGS.output_file)
        json.dump(buckets, f, sort_keys=True)