def test_edges_are_duplicated_as_expected(self): graph = nx.MultiDiGraph(name=0) p0 = Thing('V123', 'person', 'entity') p1 = Thing('V456', 'person', 'entity') par0 = Thing('V789', 'parentship', 'relation') # people graph.add_node(p0, type='person', solution=1) graph.add_node(p1, type='person', solution=1) # parentships graph.add_node(par0, type='parentship', solution=1) graph.add_edge(par0, p0, type='parent', solution=1) graph.add_edge(par0, p1, type='child', solution=1) duplicate_edges_in_reverse(graph) expected_graph = nx.MultiDiGraph(name=0) # people expected_graph.add_node(p0, type='person', solution=1) expected_graph.add_node(p1, type='person', solution=1) # parentships expected_graph.add_node(par0, type='parentship', solution=1) expected_graph.add_edge(par0, p0, type='parent', solution=1) expected_graph.add_edge(par0, p1, type='child', solution=1) # Duplicates expected_graph.add_edge(p0, par0, type='parent', solution=1) expected_graph.add_edge(p1, par0, type='child', solution=1) self.assertGraphsEqual(expected_graph, graph)
def __call__(self, graph): if self.obfuscate: obfuscate_labels(graph, self.obfuscate) # Encode attribute values as number graph = encode_values(graph, self.categorical, self.continuous) graph = nx.convert_node_labels_to_integers( graph, label_attribute=self.label_attribute ) if self.duplicate: graph = duplicate_edges_in_reverse(graph) # Node or Edge Type as int graph = encode_types(graph, multidigraph_node_data_iterator, self.node_types) graph = encode_types(graph, multidigraph_edge_data_iterator, self.edge_types) for data in multidigraph_node_data_iterator(graph): features = create_feature_vector(data) target = data[self.target_name] data.clear() data["x"] = features data["y"] = target for data in multidigraph_edge_data_iterator(graph): features = create_feature_vector(data) target = data[self.target_name] data.clear() data["edge_attr"] = features data["y_edge"] = target return graph
def pipeline(graphs, tr_ge_split, node_types, edge_types, num_processing_steps_tr=10, num_processing_steps_ge=10, num_training_iterations=10000, continuous_attributes=None, categorical_attributes=None, type_embedding_dim=5, attr_embedding_dim=6, edge_output_size=3, node_output_size=3, output_dir=None): ############################################################ # Manipulate the graph data ############################################################ # Encode attribute values graphs = [encode_values(graph, categorical_attributes, continuous_attributes) for graph in graphs] indexed_graphs = [nx.convert_node_labels_to_integers(graph, label_attribute='concept') for graph in graphs] graphs = [duplicate_edges_in_reverse(graph) for graph in indexed_graphs] graphs = [encode_types(graph, multidigraph_node_data_iterator, node_types) for graph in graphs] graphs = [encode_types(graph, multidigraph_edge_data_iterator, edge_types) for graph in graphs] input_graphs = [create_input_graph(graph) for graph in graphs] target_graphs = [create_target_graph(graph) for graph in graphs] tr_input_graphs = input_graphs[:tr_ge_split] tr_target_graphs = target_graphs[:tr_ge_split] ge_input_graphs = input_graphs[tr_ge_split:] ge_target_graphs = target_graphs[tr_ge_split:] ############################################################ # Build and run the KGCN ############################################################ thing_embedder = ThingEmbedder(node_types, type_embedding_dim, attr_embedding_dim, categorical_attributes, continuous_attributes) role_embedder = RoleEmbedder(len(edge_types), type_embedding_dim) kgcn = KGCN(thing_embedder, role_embedder, edge_output_size=edge_output_size, node_output_size=node_output_size) learner = KGCNLearner(kgcn, num_processing_steps_tr=num_processing_steps_tr, num_processing_steps_ge=num_processing_steps_ge) train_values, test_values, tr_info = learner(tr_input_graphs, tr_target_graphs, ge_input_graphs, ge_target_graphs, num_training_iterations=num_training_iterations, log_dir=output_dir) plot_across_training(*tr_info, output_file=f'{output_dir}learning.png') plot_predictions(graphs[tr_ge_split:], test_values, num_processing_steps_ge, output_file=f'{output_dir}graph.png') logit_graphs = graphs_tuple_to_networkxs(test_values["outputs"][-1]) indexed_ge_graphs = indexed_graphs[tr_ge_split:] ge_graphs = [apply_logits_to_graphs(graph, logit_graph) for graph, logit_graph in zip(indexed_ge_graphs, logit_graphs)] for ge_graph in ge_graphs: for data in multidigraph_data_iterator(ge_graph): data['probabilities'] = softmax(data['logits']) data['prediction'] = int(np.argmax(data['probabilities'])) _, _, _, _, _, solveds_tr, solveds_ge = tr_info return ge_graphs, solveds_tr, solveds_ge