コード例 #1
0
    def test_normal(self):
        graphs = helper.get_complete_graphs(num_graphs=NUM_GRAPHS, as_tuples=False)
        gram_matrix = spgk.transform(graphs, depth=DEPTH)

        for graph_idx, row in enumerate(gram_matrix):
            for other_graph_ix, similarity in enumerate(row):
                self.assertEqual(similarity, 1)
コード例 #2
0
    def test_no_similarity(self):
        graph_1 = nx.Graph()
        graph_1.add_edge('1', '2')

        graph_2 = nx.Graph()
        graph_2.add_edge('3', '4')

        graphs = [graph_1, graph_2]

        gram_matrix = spgk.transform(graphs, depth=DEPTH)

        for graph_idx, row in enumerate(gram_matrix):
            row_max = max(row)
            for other_graph_ix, similarity in enumerate(row):
                # If the compared graphs are the same, the score should be above 0
                # and should be the highest for that graph (because no graph should be more similar than the same graph)
                if graph_idx == other_graph_ix:
                    self.assertGreater(similarity, 0)
                    self.assertEqual(similarity, row_max)
                else:
                    self.assertEqual(similarity, 0)
コード例 #3
0
def process_graph_cache_file(graph_cache_file, args):
    graph_cache_filename = graph_cache_file.split('/')[-1].rsplit('.')[0]
    dataset = filename_utils.get_dataset_from_filename(graph_cache_file)

    if '.phi.' in graph_cache_filename or not filter_utils.file_should_be_processed(
            graph_cache_filename, args.include_filter, args.exclude_filter,
            args.limit_dataset):
        return

    LOGGER.info('{:15} starting ({})'.format(dataset, graph_cache_filename))

    fast_wl_trans = FastWLGraphKernelTransformer(
        h=args.wl_h, use_early_stopping=False, truncate_to_highest_label=False)

    try:
        phi_graph_cache_file = graph_cache_file.replace('.npy', '.phi.npy')
        X_graphs, Y = dataset_helper.get_dataset_cached(graph_cache_file)
        X_graphs = graph_helper.get_graphs_only(X_graphs)

        # Kernel: WL
        if args.use_wl:
            used_phi_graph_cache_file = phi_graph_cache_file
            splitted_phi_graph_cache_file = phi_graph_cache_file.replace(
                '.phi', '.splitted.phi')
            phi_same_label_graph_cache_file = phi_graph_cache_file.replace(
                dataset, '{}_same-label'.format(dataset)).replace(
                    '.phi', '.splitted.phi')

            # Stop here if all files have already been created
            if not args.force and np.all([
                    os.path.exists(x) for x in
                [
                    splitted_phi_graph_cache_file, used_phi_graph_cache_file,
                    phi_same_label_graph_cache_file
                ]
            ]):
                return

            X_, Y_ = np.array(np.copy(X_graphs)), np.array(np.copy(Y))
            if args.wl_sort_classes:
                X_, Y_ = sort(X_, Y_, by=Y_)

            num_vertices = len(graph_helper.get_all_node_labels(X_))
            fast_wl_trans.set_params(phi_dim=num_vertices)

            X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(
                np.copy(X_),
                np.copy(Y_),
                stratify=Y_,
                test_size=args.wl_test_size)

            X_train, Y_train = sort(X_train, Y_train, by=Y_train)
            X_test, Y_test = sort(X_test, Y_test, by=Y_test)

            # Splitted version
            if args.force or not os.path.exists(splitted_phi_graph_cache_file):
                t = sklearn.base.clone(fast_wl_trans).set_params(
                    same_label=True)
                phi_train = t.fit_transform(np.copy(X_train))
                phi_test = t.transform(np.copy(X_test))

                with open(splitted_phi_graph_cache_file, 'wb') as f:
                    pickle.dump((phi_train, phi_test, X_train, X_test, Y_train,
                                 Y_test), f)

            # Splitted, same label
            if args.force or not os.path.exists(
                    phi_same_label_graph_cache_file):
                t = sklearn.base.clone(fast_wl_trans)
                phi_train = t.fit_transform(X_train)
                phi_test = t.transform(X_test)

                with open(phi_same_label_graph_cache_file, 'wb') as f:
                    pickle.dump((phi_train, phi_test, X_train, X_test, Y_train,
                                 Y_test), f)

            # Whole dataset
            if args.force or not os.path.exists(used_phi_graph_cache_file):
                t = sklearn.base.clone(fast_wl_trans)
                with open(used_phi_graph_cache_file, 'wb') as f:
                    pickle.dump((t.fit_transform(X_), Y_), f)

        # Kernel: spgk
        if args.use_spgk:
            for depth in args.spgk_depth:
                spgk_graph_cache_file = graph_cache_file.replace(
                    '.npy', '.spgk-{}.gram.npy'.format(depth))

                if args.force or not os.path.exists(spgk_graph_cache_file):
                    K = spgk.transform(X_graphs, depth=depth)

                    with open(spgk_graph_cache_file, 'wb') as f:
                        pickle.dump((K, Y), f)
    except Exception as e:
        LOGGER.exception(e)

    LOGGER.info('{:15} finished ({})'.format(dataset, graph_cache_filename))