def test_weighted(self):
        g, checker = weighted_tree(is_directed=True)

        gen = DirectedGraphSAGENodeGenerator(g, 7, [5, 3], [5, 3], weighted=True)
        samples = gen.flow([0] * 10)

        checker(node_id for array in samples[0][0] for node_id in array.ravel())
예제 #2
0
    def sample_one_hop(self, num_in_samples, num_out_samples):
        g = create_simple_graph()
        nodes = list(g.nodes())

        in_samples = [num_in_samples]
        out_samples = [num_out_samples]
        gen = DirectedGraphSAGENodeGenerator(
            g, g.number_of_nodes(), in_samples, out_samples
        )

        # Obtain tree of sampled features
        node_ilocs = g.node_ids_to_ilocs(nodes)
        features = gen.sample_features(node_ilocs, 0)

        num_hops = len(in_samples)
        tree_len = 2 ** (num_hops + 1) - 1
        assert len(features) == tree_len

        # Check node features
        node_features = features[0]
        assert len(node_features) == len(nodes)
        assert node_features.shape == (len(nodes), 1, 1)
        for idx, node in enumerate(nodes):
            assert node_features[idx, 0, 0] == -1.0 * node

        # Check in-node features
        in_features = features[1]
        assert in_features.shape == (len(nodes), in_samples[0], 1)
        for n_idx in range(in_samples[0]):
            for idx, node in enumerate(nodes):
                if node == 1:
                    # None -> 1
                    assert in_features[idx, n_idx, 0] == 0.0
                elif node == 2:
                    # 1 -> 2
                    assert in_features[idx, n_idx, 0] == -1.0
                elif node == 3:
                    # 2 -> 3
                    assert in_features[idx, n_idx, 0] == -2.0
                else:
                    assert False

        # Check out-node features
        out_features = features[2]
        assert out_features.shape == (len(nodes), out_samples[0], 1)
        for n_idx in range(out_samples[0]):
            for idx, node in enumerate(nodes):
                if node == 1:
                    # 1 -> 2
                    assert out_features[idx, n_idx, 0] == -2.0
                elif node == 2:
                    # 2 -> 3
                    assert out_features[idx, n_idx, 0] == -3.0
                elif node == 3:
                    # 3 -> None
                    assert out_features[idx, n_idx, 0] == 0.0
                else:
                    assert False
예제 #3
0
    def test_two_hop(self):
        g = create_simple_graph()
        nodes = list(g.nodes())

        gen = DirectedGraphSAGENodeGenerator(
            g, batch_size=g.number_of_nodes(), in_samples=[1, 1], out_samples=[1, 1]
        )
        flow = gen.flow(node_ids=nodes, shuffle=False)

        node_ilocs = g.node_ids_to_ilocs(nodes)
        features = gen.sample_features(node_ilocs, 0)
        num_hops = 2
        tree_len = 2 ** (num_hops + 1) - 1
        assert len(features) == tree_len

        # Check node features
        node_features = features[0]
        assert len(node_features) == len(nodes)
        assert node_features.shape == (len(nodes), 1, 1)
        for idx, node in enumerate(nodes):
            assert node_features[idx, 0, 0] == -1.0 * node

        # Check in-node features
        in_features = features[1]
        assert in_features.shape == (len(nodes), 1, 1)
        for idx, node in enumerate(nodes):
            if node == 1:
                # *None -> 1
                assert in_features[idx, 0, 0] == 0.0
            elif node == 2:
                # *1 -> 2
                assert in_features[idx, 0, 0] == -1.0
            elif node == 3:
                # *2 -> 3
                assert in_features[idx, 0, 0] == -2.0
            else:
                assert False

        # Check out-node features
        out_features = features[2]
        assert out_features.shape == (len(nodes), 1, 1)
        for idx, node in enumerate(nodes):
            if node == 1:
                # 1 -> *2
                assert out_features[idx, 0, 0] == -2.0
            elif node == 2:
                # 2 -> *3
                assert out_features[idx, 0, 0] == -3.0
            elif node == 3:
                # 3 -> *None
                assert out_features[idx, 0, 0] == 0.0
            else:
                assert False

        # Check in-in-node features
        in_features = features[3]
        assert in_features.shape == (len(nodes), 1, 1)
        for idx, node in enumerate(nodes):
            if node == 1:
                # *None -> None -> 1
                assert in_features[idx, 0, 0] == 0.0
            elif node == 2:
                # *None -> 1 -> 2
                assert in_features[idx, 0, 0] == 0.0
            elif node == 3:
                # *1 -> 2 -> 3
                assert in_features[idx, 0, 0] == -1.0
            else:
                assert False

        # Check in-out-node features
        in_features = features[4]
        assert in_features.shape == (len(nodes), 1, 1)
        for idx, node in enumerate(nodes):
            if node == 1:
                # *None <- None -> 1
                assert in_features[idx, 0, 0] == 0.0
            elif node == 2:
                # *2 <- 1 -> 2
                assert in_features[idx, 0, 0] == -2.0
            elif node == 3:
                # *3 <- 2 -> 3
                assert in_features[idx, 0, 0] == -3.0
            else:
                assert False

        # Check out-in-node features
        out_features = features[5]
        assert out_features.shape == (len(nodes), 1, 1)
        for idx, node in enumerate(nodes):
            if node == 1:
                # 1 -> 2 <- *1
                assert out_features[idx, 0, 0] == -1.0
            elif node == 2:
                # 2 -> 3 <- *2
                assert out_features[idx, 0, 0] == -2.0
            elif node == 3:
                # 3 -> None <- *None
                assert out_features[idx, 0, 0] == 0.0
            else:
                assert False

        # Check out-out-node features
        out_features = features[6]
        assert out_features.shape == (len(nodes), 1, 1)
        for idx, node in enumerate(nodes):
            if node == 1:
                # 1 -> 2 -> *3
                assert out_features[idx, 0, 0] == -3.0
            elif node == 2:
                # 2 -> 3 -> *None
                assert out_features[idx, 0, 0] == 0.0
            elif node == 3:
                # 3 -> None -> *None
                assert out_features[idx, 0, 0] == 0.0
            else:
                assert False
예제 #4
0
                target_encoding, targets[train_utterance_indeces])
            print(train_multi_hot.shape)

        # -----------------------------------------------------
        # -- 4. Build GraphSAGE model and generator for train -
        # -----------------------------------------------------
        batch_size = args.batch_size
        # in_samples = [1] # <-- settings for A1
        # out_samples = [1]
        # layer_sizes = [32]
        # class_layer_size = 128
        in_samples, out_samples, layer_sizes, class_layer_size = model_sizes(
            args.model_size)

        generator = DirectedGraphSAGENodeGenerator(graph_train_sampled,
                                                   batch_size, in_samples,
                                                   out_samples)

        if args.dataset in ['SwDA', 'MRDA']:
            assert (len(train_utterance_indeces) == len(train_one_hot))
            train_gen = generator.flow(train_utterance_indeces,
                                       train_one_hot,
                                       shuffle=True)
        else:
            assert (len(train_utterance_indeces) == len(train_multi_hot))
            train_gen = generator.flow(train_utterance_indeces,
                                       train_multi_hot,
                                       shuffle=True)

        # -----------------------------------------------------
        # -- 5. Specify machine learning model ----------------
    # visualization of the embeddings
    visi(in_layer=gcn_inp,
         out_layer=gcn_out,
         gen_nodes=gcn_generator.flow(node_classes.index),
         nodes=node_classes,
         img_path="../data/cora/img/gcn.png",
         title="GCN embs",
         is_sage=False)

    ###############################################################

    # creating SAGE model
    batch_size = 50
    num_samples = [10, 5]
    sage_generator = DirectedGraphSAGENodeGenerator(stellar_g, batch_size,
                                                    num_samples, num_samples)
    train_sage_gen = sage_generator.flow(train_dataset.index,
                                         train_targets,
                                         shuffle=False)
    sage = DirectedGraphSAGE(layer_sizes=[32, 32],
                             generator=sage_generator,
                             bias=False,
                             dropout=0.5)
    sage_inp, sage_out = sage.in_out_tensors()

    # creating KERAS model with the SAGE model layers
    sage_dense_layer = layers.Dense(units=train_targets.shape[1],
                                    activation="softmax")(sage_out)
    keras_sage = Model(inputs=sage_inp, outputs=sage_dense_layer)
    keras_sage.compile(optimizer="adam",
                       loss=losses.categorical_crossentropy,
예제 #6
0
print(G.info())

# %% [markdown]
# ## Data Generators
# 
# Now we create the data generators using `CorruptedGenerator`. `CorruptedGenerator` returns shuffled node features along with the regular node features and we train our model to discriminate between the two. 
# 
# Note that:
# 
# - We typically pass all nodes to `corrupted_generator.flow` because this is an unsupervised task
# - We don't pass `targets` to `corrupted_generator.flow` because these are binary labels (true nodes, false nodes) that are created by `CorruptedGenerator`

# %%
# HinSAGE model 
graphsage_generator = DirectedGraphSAGENodeGenerator(
    G, batch_size=50, in_samples=[30, 5], out_samples=[30, 5], seed=0
)

graphsage_model = DirectedGraphSAGE(
    layer_sizes=[128, 16], activations=["relu", "relu"], generator=graphsage_generator, aggregator=MeanPoolingAggregator
)


corrupted_generator = CorruptedGenerator(graphsage_generator)
gen = corrupted_generator.flow(G.nodes())

# %% [markdown]
# ## Model Creation and Training
# 
# We create and train our `DeepGraphInfomax` model. Note that the loss used here must always be `tf.nn.sigmoid_cross_entropy_with_logits`.
예제 #7
0
# Split the data, using labels
train_subjects, test_subjects = model_selection.train_test_split(
    node_subjects, train_size=0.1, test_size=None, stratify=node_subjects)

# Use a label binarizer to convert results into one hot
target_encoding = preprocessing.LabelBinarizer()
train_targets = target_encoding.fit_transform(train_subjects)
test_targets = target_encoding.transform(test_subjects)

# For directed graph, we keep track of sampling from nodes coming
# in and nodes coming out (for our random walk)
batch_size = 50
in_samples = [5, 2]
out_samples = [5, 2]
generator = DirectedGraphSAGENodeGenerator(G, batch_size, in_samples,
                                           out_samples)

# make training iterator
train_gen = generator.flow(train_subjects.index, train_targets, shuffle=True)
graphsage_model = DirectedGraphSAGE(
    layer_sizes=[32, 32],
    generator=generator,
    bias=False,
    dropout=0.5,
)

x_inp, x_out = graphsage_model.in_out_tensors()
prediction = layers.Dense(units=train_targets.shape[1],
                          activation="softmax")(x_out)
model = Model(inputs=x_inp, outputs=prediction)
model.compile(
예제 #8
0
Counter(train_data["subject"])
target_encoding = feature_extraction.DictVectorizer(sparse=False)

train_targets = target_encoding.fit_transform(train_data[["subject"]].to_dict("records"))
test_targets = target_encoding.transform(test_data[["subject"]].to_dict("records"))

node_features = node_data[feature_names]

G = sg.StellarDiGraph(nodes={"paper": node_features}, edges={"cites": edgelist})


batch_size = 50
in_samples = [5, 2]
out_samples = [5, 2]

generator = DirectedGraphSAGENodeGenerator(G, batch_size, in_samples, out_samples)
train_gen = generator.flow(train_data.index, train_targets, shuffle=True)
graphsage_model = DirectedGraphSAGE(
    layer_sizes=[32, 32], generator=generator, bias=False, dropout=0.5,
)

x_inp, x_out = graphsage_model.build()
prediction = layers.Dense(units=train_targets.shape[1], activation="softmax")(x_out)

model = Model(inputs=x_inp, outputs=prediction)
model.compile(
    optimizer=optimizers.Adam(lr=0.005),
    loss=losses.categorical_crossentropy,
    metrics=["acc"],
)