예제 #1
0
    def test_edges_are_duplicated_as_expected(self):
        graph = nx.MultiDiGraph(name=0)

        p0 = Thing('V123', 'person', 'entity')
        p1 = Thing('V456', 'person', 'entity')
        par0 = Thing('V789', 'parentship', 'relation')

        # people
        graph.add_node(p0, type='person', solution=1)
        graph.add_node(p1, type='person', solution=1)

        # parentships
        graph.add_node(par0, type='parentship', solution=1)
        graph.add_edge(par0, p0, type='parent', solution=1)
        graph.add_edge(par0, p1, type='child', solution=1)

        duplicate_edges_in_reverse(graph)

        expected_graph = nx.MultiDiGraph(name=0)

        # people
        expected_graph.add_node(p0, type='person', solution=1)
        expected_graph.add_node(p1, type='person', solution=1)

        # parentships
        expected_graph.add_node(par0, type='parentship', solution=1)
        expected_graph.add_edge(par0, p0, type='parent', solution=1)
        expected_graph.add_edge(par0, p1, type='child', solution=1)

        # Duplicates
        expected_graph.add_edge(p0, par0, type='parent', solution=1)
        expected_graph.add_edge(p1, par0, type='child', solution=1)
        self.assertGraphsEqual(expected_graph, graph)
예제 #2
0
    def __call__(self, graph):
        if self.obfuscate:
            obfuscate_labels(graph, self.obfuscate)
        # Encode attribute values as number
        graph = encode_values(graph, self.categorical, self.continuous)
        graph = nx.convert_node_labels_to_integers(
            graph, label_attribute=self.label_attribute
        )
        if self.duplicate:
            graph = duplicate_edges_in_reverse(graph)
        # Node or Edge Type as int
        graph = encode_types(graph, multidigraph_node_data_iterator, self.node_types)
        graph = encode_types(graph, multidigraph_edge_data_iterator, self.edge_types)

        for data in multidigraph_node_data_iterator(graph):
            features = create_feature_vector(data)
            target = data[self.target_name]
            data.clear()
            data["x"] = features
            data["y"] = target

        for data in multidigraph_edge_data_iterator(graph):
            features = create_feature_vector(data)
            target = data[self.target_name]
            data.clear()
            data["edge_attr"] = features
            data["y_edge"] = target

        return graph
예제 #3
0
파일: pipeline.py 프로젝트: hkuich/kglib
def pipeline(graphs,
             tr_ge_split,
             node_types,
             edge_types,
             num_processing_steps_tr=10,
             num_processing_steps_ge=10,
             num_training_iterations=10000,
             continuous_attributes=None,
             categorical_attributes=None,
             type_embedding_dim=5,
             attr_embedding_dim=6,
             edge_output_size=3,
             node_output_size=3,
             output_dir=None):

    ############################################################
    # Manipulate the graph data
    ############################################################

    # Encode attribute values
    graphs = [encode_values(graph, categorical_attributes, continuous_attributes) for graph in graphs]

    indexed_graphs = [nx.convert_node_labels_to_integers(graph, label_attribute='concept') for graph in graphs]
    graphs = [duplicate_edges_in_reverse(graph) for graph in indexed_graphs]

    graphs = [encode_types(graph, multidigraph_node_data_iterator, node_types) for graph in graphs]
    graphs = [encode_types(graph, multidigraph_edge_data_iterator, edge_types) for graph in graphs]

    input_graphs = [create_input_graph(graph) for graph in graphs]
    target_graphs = [create_target_graph(graph) for graph in graphs]

    tr_input_graphs = input_graphs[:tr_ge_split]
    tr_target_graphs = target_graphs[:tr_ge_split]
    ge_input_graphs = input_graphs[tr_ge_split:]
    ge_target_graphs = target_graphs[tr_ge_split:]

    ############################################################
    # Build and run the KGCN
    ############################################################

    thing_embedder = ThingEmbedder(node_types, type_embedding_dim, attr_embedding_dim, categorical_attributes,
                                   continuous_attributes)

    role_embedder = RoleEmbedder(len(edge_types), type_embedding_dim)

    kgcn = KGCN(thing_embedder,
                role_embedder,
                edge_output_size=edge_output_size,
                node_output_size=node_output_size)

    learner = KGCNLearner(kgcn,
                          num_processing_steps_tr=num_processing_steps_tr,
                          num_processing_steps_ge=num_processing_steps_ge)

    train_values, test_values, tr_info = learner(tr_input_graphs,
                                                 tr_target_graphs,
                                                 ge_input_graphs,
                                                 ge_target_graphs,
                                                 num_training_iterations=num_training_iterations,
                                                 log_dir=output_dir)

    plot_across_training(*tr_info, output_file=f'{output_dir}learning.png')
    plot_predictions(graphs[tr_ge_split:], test_values, num_processing_steps_ge, output_file=f'{output_dir}graph.png')

    logit_graphs = graphs_tuple_to_networkxs(test_values["outputs"][-1])

    indexed_ge_graphs = indexed_graphs[tr_ge_split:]
    ge_graphs = [apply_logits_to_graphs(graph, logit_graph) for graph, logit_graph in
                 zip(indexed_ge_graphs, logit_graphs)]

    for ge_graph in ge_graphs:
        for data in multidigraph_data_iterator(ge_graph):
            data['probabilities'] = softmax(data['logits'])
            data['prediction'] = int(np.argmax(data['probabilities']))

    _, _, _, _, _, solveds_tr, solveds_ge = tr_info
    return ge_graphs, solveds_tr, solveds_ge