def pipeline(graphs, tr_ge_split, node_types, edge_types, num_processing_steps_tr=10, num_processing_steps_ge=10, num_training_iterations=10000, continuous_attributes=None, categorical_attributes=None, type_embedding_dim=5, attr_embedding_dim=6, edge_output_size=3, node_output_size=3, output_dir=None): ############################################################ # Manipulate the graph data ############################################################ # Encode attribute values for graph in graphs: for node_data in multidigraph_node_data_iterator(graph): typ = node_data['type'] if categorical_attributes is not None and typ in categorical_attributes.keys( ): # Add the integer value of the category for each categorical attribute instance category_values = categorical_attributes[typ] node_data['encoded_value'] = category_values.index( node_data['value']) elif continuous_attributes is not None and typ in continuous_attributes.keys( ): min_val, max_val = continuous_attributes[typ] node_data['encoded_value'] = (node_data['value'] - min_val) / (max_val - min_val) else: node_data['encoded_value'] = 0 for edge_data in multidigraph_edge_data_iterator(graph): edge_data['encoded_value'] = 0 indexed_graphs = [ nx.convert_node_labels_to_integers(graph, label_attribute='concept') for graph in graphs ] graphs = [duplicate_edges_in_reverse(graph) for graph in indexed_graphs] graphs = [encode_types(graph, node_types, edge_types) for graph in graphs] input_graphs = [create_input_graph(graph) for graph in graphs] target_graphs = [create_target_graph(graph) for graph in graphs] tr_input_graphs = input_graphs[:tr_ge_split] tr_target_graphs = target_graphs[:tr_ge_split] ge_input_graphs = input_graphs[tr_ge_split:] ge_target_graphs = target_graphs[tr_ge_split:] ############################################################ # Build and run the KGCN ############################################################ attr_embedders = configure_embedders(node_types, attr_embedding_dim, categorical_attributes, continuous_attributes) kgcn = KGCN(len(node_types), len(edge_types), type_embedding_dim, attr_embedding_dim, attr_embedders, edge_output_size=edge_output_size, node_output_size=node_output_size) learner = KGCNLearner(kgcn, num_processing_steps_tr=num_processing_steps_tr, num_processing_steps_ge=num_processing_steps_ge) train_values, test_values, tr_info = learner( tr_input_graphs, tr_target_graphs, ge_input_graphs, ge_target_graphs, num_training_iterations=num_training_iterations, log_dir=output_dir) plot_across_training(*tr_info, output_file=f'{output_dir}learning.png') plot_predictions(ge_input_graphs, test_values, num_processing_steps_ge, output_file=f'{output_dir}graph.png') logit_graphs = graphs_tuple_to_networkxs(test_values["outputs"][-1]) indexed_ge_graphs = indexed_graphs[tr_ge_split:] ge_graphs = [ apply_logits_to_graphs(graph, logit_graph) for graph, logit_graph in zip(indexed_ge_graphs, logit_graphs) ] for ge_graph in ge_graphs: for data in multidigraph_data_iterator(ge_graph): data['probabilities'] = softmax(data['logits']) data['prediction'] = int(np.argmax(data['probabilities'])) _, _, _, _, _, solveds_tr, solveds_ge = tr_info return ge_graphs, solveds_tr, solveds_ge
def pipeline(graphs, tr_ge_split, node_types, edge_types, num_processing_steps_tr=10, num_processing_steps_ge=10, num_training_iterations=10000, categorical_attributes=None, type_embedding_dim=5, attr_embedding_dim=6, edge_output_size=3, node_output_size=3): ############################################################ # Manipulate the graph data ############################################################ # Encode attribute values for graph in graphs: for data in multidigraph_data_iterator(graph): data['encoded_value'] = 0 for node_data in multidigraph_node_data_iterator(graph): typ = node_data['type'] # Add the integer value of the category for each categorical attribute instance for attr_typ, category_values in categorical_attributes.items(): if typ == attr_typ: node_data['encoded_value'] = category_values.index( node_data['value']) indexed_graphs = [ nx.convert_node_labels_to_integers(graph, label_attribute='concept') for graph in graphs ] graphs = [duplicate_edges_in_reverse(graph) for graph in indexed_graphs] graphs = [encode_types(graph, node_types, edge_types) for graph in graphs] input_graphs = [create_input_graph(graph) for graph in graphs] target_graphs = [create_target_graph(graph) for graph in graphs] tr_input_graphs = input_graphs[:tr_ge_split] tr_target_graphs = target_graphs[:tr_ge_split] ge_input_graphs = input_graphs[tr_ge_split:] ge_target_graphs = target_graphs[tr_ge_split:] ############################################################ # Build and run the KGCN ############################################################ type_categories_list = [i for i, _ in enumerate(node_types)] non_attribute_nodes = type_categories_list.copy() attr_embedders = dict() # Construct categorical attribute embedders for attr_typ, category_values in categorical_attributes.items(): num_categories = len(category_values) def make_embedder(): return CategoricalAttribute(num_categories, attr_embedding_dim, name=attr_typ + '_cat_embedder') attr_typ_index = node_types.index(attr_typ) # Record the embedder, and the index of the type that it should encode attr_embedders[make_embedder] = [attr_typ_index] non_attribute_nodes.pop(attr_typ_index) # All entities and relations (non-attributes) also need an embedder with matching output dimension, which does # nothing. This is provided as a list of their indices def make_blank_embedder(): return BlankAttribute(attr_embedding_dim) attr_embedders[make_blank_embedder] = non_attribute_nodes kgcn = KGCN(len(node_types), len(edge_types), type_embedding_dim, attr_embedding_dim, attr_embedders, edge_output_size=edge_output_size, node_output_size=node_output_size) learner = KGCNLearner(kgcn, num_processing_steps_tr=num_processing_steps_tr, num_processing_steps_ge=num_processing_steps_ge) train_values, test_values, tr_info = learner( tr_input_graphs, tr_target_graphs, ge_input_graphs, ge_target_graphs, num_training_iterations=num_training_iterations) plot_across_training(*tr_info) plot_predictions(ge_input_graphs, test_values, num_processing_steps_ge) logit_graphs = graphs_tuple_to_networkxs(test_values["outputs"][-1]) indexed_ge_graphs = indexed_graphs[tr_ge_split:] ge_graphs = [ apply_logits_to_graphs(graph, logit_graph) for graph, logit_graph in zip(indexed_ge_graphs, logit_graphs) ] for ge_graph in ge_graphs: for data in multidigraph_data_iterator(ge_graph): data['probabilities'] = softmax(data['logits']) data['prediction'] = int(np.argmax(data['probabilities'])) _, _, _, _, _, solveds_tr, solveds_ge = tr_info return ge_graphs, solveds_tr, solveds_ge
def pipeline(graphs, tr_ge_split, node_types, edge_types, num_processing_steps_tr=10, num_processing_steps_ge=10, num_training_iterations=10000, continuous_attributes=None, categorical_attributes=None, type_embedding_dim=5, attr_embedding_dim=6, edge_output_size=3, node_output_size=3, output_dir=None): ############################################################ # Manipulate the graph data ############################################################ # Encode attribute values graphs = [ encode_values(graph, categorical_attributes, continuous_attributes) for graph in graphs ] indexed_graphs = [ nx.convert_node_labels_to_integers(graph, label_attribute='concept') for graph in graphs ] graphs = [duplicate_edges_in_reverse(graph) for graph in indexed_graphs] graphs = [ encode_types(graph, multidigraph_node_data_iterator, node_types) for graph in graphs ] graphs = [ encode_types(graph, multidigraph_edge_data_iterator, edge_types) for graph in graphs ] input_graphs = [create_input_graph(graph) for graph in graphs] target_graphs = [create_target_graph(graph) for graph in graphs] tr_input_graphs = input_graphs[:tr_ge_split] tr_target_graphs = target_graphs[:tr_ge_split] ge_input_graphs = input_graphs[tr_ge_split:] ge_target_graphs = target_graphs[tr_ge_split:] ############################################################ # Build and run the KGCN ############################################################ thing_embedder = ThingEmbedder(node_types, type_embedding_dim, attr_embedding_dim, categorical_attributes, continuous_attributes) role_embedder = RoleEmbedder(len(edge_types), type_embedding_dim) kgcn = KGCN(thing_embedder, role_embedder, edge_output_size=edge_output_size, node_output_size=node_output_size) learner = KGCNLearner(kgcn, num_processing_steps_tr=num_processing_steps_tr, num_processing_steps_ge=num_processing_steps_ge) train_values, test_values, tr_info = learner( tr_input_graphs, tr_target_graphs, ge_input_graphs, ge_target_graphs, num_training_iterations=num_training_iterations, log_dir=output_dir) plot_across_training(*tr_info, output_file=f'{output_dir}learning.png') plot_predictions(graphs[tr_ge_split:], test_values, num_processing_steps_ge, output_file=f'{output_dir}graph.png') logit_graphs = graphs_tuple_to_networkxs(test_values["outputs"][-1]) indexed_ge_graphs = indexed_graphs[tr_ge_split:] ge_graphs = [ apply_logits_to_graphs(graph, logit_graph) for graph, logit_graph in zip(indexed_ge_graphs, logit_graphs) ] for ge_graph in ge_graphs: for data in multidigraph_data_iterator(ge_graph): data['probabilities'] = softmax(data['logits']) data['prediction'] = int(np.argmax(data['probabilities'])) _, _, _, _, _, solveds_tr, solveds_ge = tr_info return ge_graphs, solveds_tr, solveds_ge
def pipeline(graphs, tr_ge_split, node_types, edge_types, num_processing_steps_tr=10, num_processing_steps_ge=10, num_training_iterations=10000, continuous_attributes=None, categorical_attributes=None, type_embedding_dim=5, attr_embedding_dim=6, edge_output_size=3, node_output_size=3, output_dir=None, do_test=False, save_fle="test_model.ckpt", reload_fle=""): ############################################################ # Manipulate the graph data ############################################################ # Encode attribute values graphs = [ encode_values(graph, categorical_attributes, continuous_attributes) for graph in graphs ] indexed_graphs = [ nx.convert_node_labels_to_integers(graph, label_attribute='concept') for graph in graphs ] graphs = [duplicate_edges_in_reverse(graph) for graph in indexed_graphs] graphs = [ encode_types(graph, multidigraph_node_data_iterator, node_types) for graph in graphs ] graphs = [ encode_types(graph, multidigraph_edge_data_iterator, edge_types) for graph in graphs ] input_graphs = [create_input_graph(graph) for graph in graphs] target_graphs = [create_target_graph(graph) for graph in graphs] tr_input_graphs = input_graphs[:tr_ge_split] tr_target_graphs = target_graphs[:tr_ge_split] ge_input_graphs = input_graphs[tr_ge_split:] ge_target_graphs = target_graphs[tr_ge_split:] ############################################################ # Build and run the KGCN ############################################################ thing_embedder = ThingEmbedder(node_types, type_embedding_dim, attr_embedding_dim, categorical_attributes, continuous_attributes) role_embedder = RoleEmbedder(len(edge_types), type_embedding_dim) kgcn = KGCN(thing_embedder, role_embedder, edge_output_size=edge_output_size, node_output_size=node_output_size) learner = KGCNLearner( kgcn, num_processing_steps_tr= num_processing_steps_tr, # These processing steps indicate how many message-passing iterations to do for every training / testing step num_processing_steps_ge=num_processing_steps_ge, log_dir=output_dir, save_fle=f'{output_dir}/{save_fle}', reload_fle=f'{output_dir}/{reload_fle}') # only test if not (Path(output_dir) / reload_fle).is_dir() and do_test is True: print("\n\nVALIDATION ONLY\n\n") test_values, tr_info = learner.infer(ge_input_graphs, ge_target_graphs) #,log_dir=output_dir) # train else: print("\n\nTRAINING\n\n") train_values, test_values, tr_info = learner.train( tr_input_graphs, #input_graphs tr_target_graphs, ge_input_graphs, ge_target_graphs, num_training_iterations=num_training_iterations) #,log_dir=output_dir) plot_across_training(*tr_info, output_file=f'{output_dir}/learning.png') plot_predictions(graphs[tr_ge_split:], test_values, num_processing_steps_ge, output_file=f'{output_dir}/graph.png') logit_graphs = graphs_tuple_to_networkxs(test_values["outputs"][-1]) indexed_ge_graphs = indexed_graphs[tr_ge_split:] ge_graphs = [ apply_logits_to_graphs(graph, logit_graph) for graph, logit_graph in zip(indexed_ge_graphs, logit_graphs) ] for ge_graph in ge_graphs: for data in multidigraph_data_iterator(ge_graph): data['probabilities'] = softmax(data['logits']) # assing 0,1,2 based argmax of logits -> TODO: threshold data['prediction'] = int(np.argmax(data['probabilities'])) _, _, _, _, _, solveds_tr, solveds_ge = tr_info return ge_graphs, solveds_tr, solveds_ge