def test_weighted(self): g, checker = weighted_tree(is_directed=True) gen = DirectedGraphSAGENodeGenerator(g, 7, [5, 3], [5, 3], weighted=True) samples = gen.flow([0] * 10) checker(node_id for array in samples[0][0] for node_id in array.ravel())
def sample_one_hop(self, num_in_samples, num_out_samples): g = create_simple_graph() nodes = list(g.nodes()) in_samples = [num_in_samples] out_samples = [num_out_samples] gen = DirectedGraphSAGENodeGenerator( g, g.number_of_nodes(), in_samples, out_samples ) # Obtain tree of sampled features node_ilocs = g.node_ids_to_ilocs(nodes) features = gen.sample_features(node_ilocs, 0) num_hops = len(in_samples) tree_len = 2 ** (num_hops + 1) - 1 assert len(features) == tree_len # Check node features node_features = features[0] assert len(node_features) == len(nodes) assert node_features.shape == (len(nodes), 1, 1) for idx, node in enumerate(nodes): assert node_features[idx, 0, 0] == -1.0 * node # Check in-node features in_features = features[1] assert in_features.shape == (len(nodes), in_samples[0], 1) for n_idx in range(in_samples[0]): for idx, node in enumerate(nodes): if node == 1: # None -> 1 assert in_features[idx, n_idx, 0] == 0.0 elif node == 2: # 1 -> 2 assert in_features[idx, n_idx, 0] == -1.0 elif node == 3: # 2 -> 3 assert in_features[idx, n_idx, 0] == -2.0 else: assert False # Check out-node features out_features = features[2] assert out_features.shape == (len(nodes), out_samples[0], 1) for n_idx in range(out_samples[0]): for idx, node in enumerate(nodes): if node == 1: # 1 -> 2 assert out_features[idx, n_idx, 0] == -2.0 elif node == 2: # 2 -> 3 assert out_features[idx, n_idx, 0] == -3.0 elif node == 3: # 3 -> None assert out_features[idx, n_idx, 0] == 0.0 else: assert False
def test_two_hop(self): g = create_simple_graph() nodes = list(g.nodes()) gen = DirectedGraphSAGENodeGenerator( g, batch_size=g.number_of_nodes(), in_samples=[1, 1], out_samples=[1, 1] ) flow = gen.flow(node_ids=nodes, shuffle=False) node_ilocs = g.node_ids_to_ilocs(nodes) features = gen.sample_features(node_ilocs, 0) num_hops = 2 tree_len = 2 ** (num_hops + 1) - 1 assert len(features) == tree_len # Check node features node_features = features[0] assert len(node_features) == len(nodes) assert node_features.shape == (len(nodes), 1, 1) for idx, node in enumerate(nodes): assert node_features[idx, 0, 0] == -1.0 * node # Check in-node features in_features = features[1] assert in_features.shape == (len(nodes), 1, 1) for idx, node in enumerate(nodes): if node == 1: # *None -> 1 assert in_features[idx, 0, 0] == 0.0 elif node == 2: # *1 -> 2 assert in_features[idx, 0, 0] == -1.0 elif node == 3: # *2 -> 3 assert in_features[idx, 0, 0] == -2.0 else: assert False # Check out-node features out_features = features[2] assert out_features.shape == (len(nodes), 1, 1) for idx, node in enumerate(nodes): if node == 1: # 1 -> *2 assert out_features[idx, 0, 0] == -2.0 elif node == 2: # 2 -> *3 assert out_features[idx, 0, 0] == -3.0 elif node == 3: # 3 -> *None assert out_features[idx, 0, 0] == 0.0 else: assert False # Check in-in-node features in_features = features[3] assert in_features.shape == (len(nodes), 1, 1) for idx, node in enumerate(nodes): if node == 1: # *None -> None -> 1 assert in_features[idx, 0, 0] == 0.0 elif node == 2: # *None -> 1 -> 2 assert in_features[idx, 0, 0] == 0.0 elif node == 3: # *1 -> 2 -> 3 assert in_features[idx, 0, 0] == -1.0 else: assert False # Check in-out-node features in_features = features[4] assert in_features.shape == (len(nodes), 1, 1) for idx, node in enumerate(nodes): if node == 1: # *None <- None -> 1 assert in_features[idx, 0, 0] == 0.0 elif node == 2: # *2 <- 1 -> 2 assert in_features[idx, 0, 0] == -2.0 elif node == 3: # *3 <- 2 -> 3 assert in_features[idx, 0, 0] == -3.0 else: assert False # Check out-in-node features out_features = features[5] assert out_features.shape == (len(nodes), 1, 1) for idx, node in enumerate(nodes): if node == 1: # 1 -> 2 <- *1 assert out_features[idx, 0, 0] == -1.0 elif node == 2: # 2 -> 3 <- *2 assert out_features[idx, 0, 0] == -2.0 elif node == 3: # 3 -> None <- *None assert out_features[idx, 0, 0] == 0.0 else: assert False # Check out-out-node features out_features = features[6] assert out_features.shape == (len(nodes), 1, 1) for idx, node in enumerate(nodes): if node == 1: # 1 -> 2 -> *3 assert out_features[idx, 0, 0] == -3.0 elif node == 2: # 2 -> 3 -> *None assert out_features[idx, 0, 0] == 0.0 elif node == 3: # 3 -> None -> *None assert out_features[idx, 0, 0] == 0.0 else: assert False
target_encoding, targets[train_utterance_indeces]) print(train_multi_hot.shape) # ----------------------------------------------------- # -- 4. Build GraphSAGE model and generator for train - # ----------------------------------------------------- batch_size = args.batch_size # in_samples = [1] # <-- settings for A1 # out_samples = [1] # layer_sizes = [32] # class_layer_size = 128 in_samples, out_samples, layer_sizes, class_layer_size = model_sizes( args.model_size) generator = DirectedGraphSAGENodeGenerator(graph_train_sampled, batch_size, in_samples, out_samples) if args.dataset in ['SwDA', 'MRDA']: assert (len(train_utterance_indeces) == len(train_one_hot)) train_gen = generator.flow(train_utterance_indeces, train_one_hot, shuffle=True) else: assert (len(train_utterance_indeces) == len(train_multi_hot)) train_gen = generator.flow(train_utterance_indeces, train_multi_hot, shuffle=True) # ----------------------------------------------------- # -- 5. Specify machine learning model ----------------
# visualization of the embeddings visi(in_layer=gcn_inp, out_layer=gcn_out, gen_nodes=gcn_generator.flow(node_classes.index), nodes=node_classes, img_path="../data/cora/img/gcn.png", title="GCN embs", is_sage=False) ############################################################### # creating SAGE model batch_size = 50 num_samples = [10, 5] sage_generator = DirectedGraphSAGENodeGenerator(stellar_g, batch_size, num_samples, num_samples) train_sage_gen = sage_generator.flow(train_dataset.index, train_targets, shuffle=False) sage = DirectedGraphSAGE(layer_sizes=[32, 32], generator=sage_generator, bias=False, dropout=0.5) sage_inp, sage_out = sage.in_out_tensors() # creating KERAS model with the SAGE model layers sage_dense_layer = layers.Dense(units=train_targets.shape[1], activation="softmax")(sage_out) keras_sage = Model(inputs=sage_inp, outputs=sage_dense_layer) keras_sage.compile(optimizer="adam", loss=losses.categorical_crossentropy,
print(G.info()) # %% [markdown] # ## Data Generators # # Now we create the data generators using `CorruptedGenerator`. `CorruptedGenerator` returns shuffled node features along with the regular node features and we train our model to discriminate between the two. # # Note that: # # - We typically pass all nodes to `corrupted_generator.flow` because this is an unsupervised task # - We don't pass `targets` to `corrupted_generator.flow` because these are binary labels (true nodes, false nodes) that are created by `CorruptedGenerator` # %% # HinSAGE model graphsage_generator = DirectedGraphSAGENodeGenerator( G, batch_size=50, in_samples=[30, 5], out_samples=[30, 5], seed=0 ) graphsage_model = DirectedGraphSAGE( layer_sizes=[128, 16], activations=["relu", "relu"], generator=graphsage_generator, aggregator=MeanPoolingAggregator ) corrupted_generator = CorruptedGenerator(graphsage_generator) gen = corrupted_generator.flow(G.nodes()) # %% [markdown] # ## Model Creation and Training # # We create and train our `DeepGraphInfomax` model. Note that the loss used here must always be `tf.nn.sigmoid_cross_entropy_with_logits`.
# Split the data, using labels train_subjects, test_subjects = model_selection.train_test_split( node_subjects, train_size=0.1, test_size=None, stratify=node_subjects) # Use a label binarizer to convert results into one hot target_encoding = preprocessing.LabelBinarizer() train_targets = target_encoding.fit_transform(train_subjects) test_targets = target_encoding.transform(test_subjects) # For directed graph, we keep track of sampling from nodes coming # in and nodes coming out (for our random walk) batch_size = 50 in_samples = [5, 2] out_samples = [5, 2] generator = DirectedGraphSAGENodeGenerator(G, batch_size, in_samples, out_samples) # make training iterator train_gen = generator.flow(train_subjects.index, train_targets, shuffle=True) graphsage_model = DirectedGraphSAGE( layer_sizes=[32, 32], generator=generator, bias=False, dropout=0.5, ) x_inp, x_out = graphsage_model.in_out_tensors() prediction = layers.Dense(units=train_targets.shape[1], activation="softmax")(x_out) model = Model(inputs=x_inp, outputs=prediction) model.compile(
Counter(train_data["subject"]) target_encoding = feature_extraction.DictVectorizer(sparse=False) train_targets = target_encoding.fit_transform(train_data[["subject"]].to_dict("records")) test_targets = target_encoding.transform(test_data[["subject"]].to_dict("records")) node_features = node_data[feature_names] G = sg.StellarDiGraph(nodes={"paper": node_features}, edges={"cites": edgelist}) batch_size = 50 in_samples = [5, 2] out_samples = [5, 2] generator = DirectedGraphSAGENodeGenerator(G, batch_size, in_samples, out_samples) train_gen = generator.flow(train_data.index, train_targets, shuffle=True) graphsage_model = DirectedGraphSAGE( layer_sizes=[32, 32], generator=generator, bias=False, dropout=0.5, ) x_inp, x_out = graphsage_model.build() prediction = layers.Dense(units=train_targets.shape[1], activation="softmax")(x_out) model = Model(inputs=x_inp, outputs=prediction) model.compile( optimizer=optimizers.Adam(lr=0.005), loss=losses.categorical_crossentropy, metrics=["acc"], )