Ejemplo n.º 1
0
 def process() -> tuple:
     X, Y = dataset_helper.get_dataset_cached(graph_cache_file)
     X = graph_helper.get_graphs_only(X)
     graph_helper.convert_graphs_to_adjs_tuples(X)
     X = [(x, [0] * len(y)) for x, y in X]
     estimator, params = task_helper.get_graph_estimator_and_params(X, Y)
     return ClassificationData(X, Y, estimator, params)
 def transform(self, X, y=None, **fit_params):
     if self.from_nx_to_tuple:
         X = graph_helper.get_graphs_only(X)
         graph_helper.convert_graphs_to_adjs_tuples(X)
     else:
         graph_helper.convert_adjs_tuples_to_graphs(X)
     return X
    def test_convert_graph_datasets(self):
        for graph_dataset, dataset_name in self.iterate_graph_cache_datasets():
            X, Y = dataset_helper.get_dataset_cached(graph_dataset)
            self.assertTrue(len(X))
            self.assertTrue(len(Y))

            graph_helper.convert_graphs_to_adjs_tuples(X)

            for x in X:
                self.assertTrue(isinstance(x, tuple))
                self.assertTrue(isinstance(x[0], scipy.sparse.spmatrix))
                self.assertTrue(isinstance(x[1], list))
                break
def get_graphs(as_adj=False):
    g_train = nx.Graph()
    g_train.add_edge('A', 'B')

    g_test = nx.Graph()
    g_test.add_edge('A', 'C')

    if as_adj:
        g_test = graph_helper.convert_graphs_to_adjs_tuples([g_test],
                                                            copy=True)[0]
        g_train = graph_helper.convert_graphs_to_adjs_tuples([g_train],
                                                             copy=True)[0]

    return g_train, g_test
Ejemplo n.º 5
0
def get_graph_estimator_and_params(X,
                                   Y=None,
                                   reduced: bool = False,
                                   with_node_weights: bool = False,
                                   add_phi_dim=False):
    assert len(X)
    X_ = X

    estimator, params = graph_pipeline.get_params(
        reduced=reduced, with_node_weights=with_node_weights)

    if add_phi_dim:
        if isinstance(X[0], nx.Graph):
            X_ = graph_helper.convert_graphs_to_adjs_tuples(X, copy=True)
        assert isinstance(X_[0], tuple) and isinstance(X_[0][1], list)
        num_vertices = get_num_vertices(X_) * 5
        graph_pipeline.add_num_vertices_to_fast_wl_params(params, num_vertices)

    return estimator, params
def _retrieve_node_weights_and_convert_graphs(X,
                                              node_weight_function=None,
                                              same_label=False,
                                              use_directed=True,
                                              use_nx=True):
    X = graph_helper.get_graphs_only(X)
    if not use_directed:
        if use_nx:
            X = [nx.Graph(x) for x in X]
            assert not np.any([x.is_directed() for x in X])
        else:
            raise NotImplementedError(
                '!use_directed and !use_nx not implemented')
    node_weight_factors = get_node_weight_factors(X,
                                                  metric=node_weight_function)
    X = graph_helper.convert_graphs_to_adjs_tuples(X, copy=True)
    if same_label:
        X = [(adj, ['dummy'] * len(labels)) for adj, labels in X]

    return X, node_weight_factors
Ejemplo n.º 7
0
from time import time
from utils import dataset_helper, graph_helper, primes, time_utils

DATASET = 'ng20'
H = 10
USED_SPARSE_MATRIX = scipy.sparse.lil_matrix
PERF_LOG = './perf.log'

# Warm up primes cache
primes.get_highest_prime_range()

# Get dataset
X, Y = dataset_helper.get_concept_map_for_dataset(DATASET, graphs_only=True)

# Convert to adj
X = graph_helper.convert_graphs_to_adjs_tuples(X, copy=True)

# Same label for all nodes
adj, labels = zip(*X)
labels = [[1] * len(x) for x in labels]
X = list(zip(adj, labels))

# Start profiling
thread = flamegraph.start_profile_thread(fd=open(PERF_LOG, "w"))
start = time()

# Calculate fast_wl
phi_lists, new_label_lookups, new_label_counters = fast_wl.transform(
    X, h=H, used_matrix_type=USED_SPARSE_MATRIX)

# Convert to sparse lil_matrix for "fancy" indexing (needed for other pipeline elements)