def test_hetero_multigraph_split(self): G = generate_dense_hete_multigraph() hete = HeteroGraph(G) # node hete_node = hete.split(task='node') for node_type in hete.node_label_index: num_nodes = len(hete.node_label_index[node_type]) num_nodes_reduced = num_nodes - 3 node_0 = 1 + int(num_nodes_reduced * 0.8) node_1 = 1 + int(num_nodes_reduced * 0.1) node_2 = num_nodes - node_0 - node_1 self.assertEqual(len(hete_node[0].node_label_index[node_type]), node_0) self.assertEqual(len(hete_node[1].node_label_index[node_type]), node_1) self.assertEqual(len(hete_node[2].node_label_index[node_type]), node_2) # edge hete_edge = hete.split(task='edge') for edge_type in hete.edge_label_index: num_edges = int(hete.edge_label_index[edge_type].shape[1]) num_edges_reduced = num_edges - 3 edge_0 = 1 + int(num_edges_reduced * 0.8) edge_1 = 1 + int(num_edges_reduced * 0.1) edge_2 = num_edges - edge_0 - edge_1 self.assertEqual(hete_edge[0].edge_label_index[edge_type].shape[1], edge_0) self.assertEqual(hete_edge[1].edge_label_index[edge_type].shape[1], edge_1) self.assertEqual(hete_edge[2].edge_label_index[edge_type].shape[1], edge_2) # link prediction hete_link = hete.split(task='link_pred', split_ratio=[0.5, 0.3, 0.2]) # calculate the expected edge num for each splitted subgraph hete_link_train_edge_num, hete_link_val_edge_num, hete_link_test_edge_num = 0, 0, 0 for key, val in hete.edge_label_index.items(): val_length = val.shape[1] val_length_reduced = val_length - 3 hete_link_train_edge_num += 1 + int(0.5 * val_length_reduced) hete_link_val_edge_num += 1 + int(0.3 * val_length_reduced) hete_link_test_edge_num += \ val_length - 2 - int(0.5 * val_length_reduced) - int(0.3 * val_length_reduced) self.assertEqual(len(hete_link[0].edge_label), hete_link_train_edge_num) self.assertEqual(len(hete_link[1].edge_label), hete_link_val_edge_num) self.assertEqual(len(hete_link[2].edge_label), hete_link_test_edge_num)
def test_hetero_multigraph_split(self): G = generate_dense_hete_multigraph() hete = HeteroGraph(G) hete = HeteroGraph(node_feature=hete.node_feature, node_label=hete.node_label, edge_feature=hete.edge_feature, edge_label=hete.edge_label, edge_index=hete.edge_index, directed=True) # node hete_node = hete.split(task='node') for node_type in hete.node_label_index: num_nodes = len(hete.node_label_index[node_type]) node_0 = int(num_nodes * 0.8) node_1 = int(num_nodes * 0.1) node_2 = num_nodes - node_0 - node_1 self.assertEqual( len(hete_node[0].node_label_index[node_type]), node_0, ) self.assertEqual( len(hete_node[1].node_label_index[node_type]), node_1, ) self.assertEqual( len(hete_node[2].node_label_index[node_type]), node_2, ) # edge hete_edge = hete.split(task='edge') for edge_type in hete.edge_label_index: num_edges = int(hete.edge_label_index[edge_type].shape[1]) edge_0 = int(num_edges * 0.8) edge_1 = int(num_edges * 0.1) edge_2 = num_edges - edge_0 - edge_1 self.assertEqual( hete_edge[0].edge_label_index[edge_type].shape[1], edge_0, ) self.assertEqual( hete_edge[1].edge_label_index[edge_type].shape[1], edge_1, ) self.assertEqual( hete_edge[2].edge_label_index[edge_type].shape[1], edge_2, ) # link prediction hete_link = hete.split(task='link_pred', split_ratio=[0.5, 0.3, 0.2]) # calculate the expected edge num for each splitted subgraph edge_0, edge_1, edge_2 = 0, 0, 0 for _, val in hete.edge_label_index.items(): num_edges = val.shape[1] edge_0 += int(0.5 * num_edges) edge_1 += int(0.3 * num_edges) edge_2 += num_edges - int(0.5 * num_edges) - int(0.3 * num_edges) train_edge_num = sum([ hete_link[0].edge_label[message_type].shape[0] for message_type in hete_link[0].edge_label ]) val_edge_num = sum([ hete_link[1].edge_label[message_type].shape[0] for message_type in hete_link[1].edge_label ]) test_edge_num = sum([ hete_link[2].edge_label[message_type].shape[0] for message_type in hete_link[2].edge_label ]) self.assertEqual(train_edge_num, edge_0) self.assertEqual(val_edge_num, edge_1) self.assertEqual(test_edge_num, edge_2)
def test_hetero_multigraph_split(self): G = generate_dense_hete_multigraph() hete = HeteroGraph(G) hete = HeteroGraph( node_feature=hete.node_feature, node_label=hete.node_label, edge_feature=hete.edge_feature, edge_label=hete.edge_label, edge_index=hete.edge_index, directed=True ) # node hete_node = hete.split(task='node') for node_type in hete.node_label_index: num_nodes = len(hete.node_label_index[node_type]) num_nodes_reduced = num_nodes - 3 node_0 = 1 + int(num_nodes_reduced * 0.8) node_1 = 1 + int(num_nodes_reduced * 0.1) node_2 = num_nodes - node_0 - node_1 self.assertEqual( len(hete_node[0].node_label_index[node_type]), node_0, ) self.assertEqual( len(hete_node[1].node_label_index[node_type]), node_1, ) self.assertEqual( len(hete_node[2].node_label_index[node_type]), node_2, ) # edge hete_edge = hete.split(task='edge') for edge_type in hete.edge_label_index: num_edges = int(hete.edge_label_index[edge_type].shape[1]) num_edges_reduced = num_edges - 3 edge_0 = 1 + int(num_edges_reduced * 0.8) edge_1 = 1 + int(num_edges_reduced * 0.1) edge_2 = num_edges - edge_0 - edge_1 self.assertEqual( hete_edge[0].edge_label_index[edge_type].shape[1], edge_0, ) self.assertEqual( hete_edge[1].edge_label_index[edge_type].shape[1], edge_1, ) self.assertEqual( hete_edge[2].edge_label_index[edge_type].shape[1], edge_2, ) # link prediction hete_link = hete.split(task='link_pred', split_ratio=[0.5, 0.3, 0.2]) # calculate the expected edge num for each splitted subgraph hete_link_train_edge_num = 0 hete_link_val_edge_num = 0 hete_link_test_edge_num = 0 for _, val in hete.edge_label_index.items(): val_length = val.shape[1] val_length_reduced = val_length - 3 hete_link_train_edge_num += 1 + int(0.5 * val_length_reduced) hete_link_val_edge_num += 1 + int(0.3 * val_length_reduced) hete_link_test_edge_num += ( val_length - 2 - int(0.5 * val_length_reduced) - int(0.3 * val_length_reduced) ) train_edge_num = sum([ hete_link[0].edge_label[message_type].shape[0] for message_type in hete_link[0].edge_label ]) val_edge_num = sum([ hete_link[1].edge_label[message_type].shape[0] for message_type in hete_link[1].edge_label ]) test_edge_num = sum([ hete_link[2].edge_label[message_type].shape[0] for message_type in hete_link[2].edge_label ]) self.assertEqual( train_edge_num, hete_link_train_edge_num ) self.assertEqual( val_edge_num, hete_link_val_edge_num ) self.assertEqual( test_edge_num, hete_link_test_edge_num, )