Beispiel #1
0
    def test_learner_runs(self):
        input_graph = nx.MultiDiGraph()
        # TODO Remove 'input' and 'solution' fields, only needed for plotting which should be separated
        input_graph.add_node(0, type='person', features=np.array([0, 1, 2], dtype=np.float32))
        input_graph.add_edge(1, 0, type='employee', features=np.array([0, 1, 2], dtype=np.float32))
        input_graph.add_node(1, type='employment', features=np.array([0, 1, 2], dtype=np.float32))
        input_graph.add_edge(1, 2, type='employer', features=np.array([0, 1, 2], dtype=np.float32))
        input_graph.add_node(2, type='company', features=np.array([0, 1, 2], dtype=np.float32))
        input_graph.graph['features'] = np.zeros(5, dtype=np.float32)

        target_graph = nx.MultiDiGraph()
        # TODO Remove 'input' and 'solution' fields, only needed for plotting which should be separated
        target_graph.add_node(0, type='person', features=np.array([0, 1, 0], dtype=np.float32))
        target_graph.add_edge(1, 0, type='employee', features=np.array([0, 0, 1], dtype=np.float32))
        target_graph.add_node(1, type='employment', features=np.array([0, 0, 1], dtype=np.float32))
        target_graph.add_edge(1, 2, type='employer', features=np.array([0, 0, 1], dtype=np.float32))
        target_graph.add_node(2, type='company', features=np.array([0, 1, 0], dtype=np.float32))
        target_graph.graph['features'] = np.zeros(5, dtype=np.float32)

        attr_embedding_dim = 6
        attr_embedders = {lambda: BlankAttribute(attr_embedding_dim): [0, 1, 2]}

        kgcn = KGCN(3, 2, 5, attr_embedding_dim, attr_embedders, edge_output_size=3, node_output_size=3)

        learner = KGCNLearner(kgcn, num_processing_steps_tr=2, num_processing_steps_ge=2)

        learner([input_graph], [target_graph], [input_graph], [target_graph], num_training_iterations=50)
Beispiel #2
0
def pipeline(graphs,
             tr_ge_split,
             node_types,
             edge_types,
             num_processing_steps_tr=10,
             num_processing_steps_ge=10,
             num_training_iterations=10000,
             continuous_attributes=None,
             categorical_attributes=None,
             type_embedding_dim=5,
             attr_embedding_dim=6,
             edge_output_size=3,
             node_output_size=3,
             output_dir=None):

    ############################################################
    # Manipulate the graph data
    ############################################################

    # Encode attribute values
    for graph in graphs:
        for node_data in multidigraph_node_data_iterator(graph):
            typ = node_data['type']

            if categorical_attributes is not None and typ in categorical_attributes.keys(
            ):
                # Add the integer value of the category for each categorical attribute instance
                category_values = categorical_attributes[typ]
                node_data['encoded_value'] = category_values.index(
                    node_data['value'])

            elif continuous_attributes is not None and typ in continuous_attributes.keys(
            ):
                min_val, max_val = continuous_attributes[typ]
                node_data['encoded_value'] = (node_data['value'] -
                                              min_val) / (max_val - min_val)

            else:
                node_data['encoded_value'] = 0

        for edge_data in multidigraph_edge_data_iterator(graph):
            edge_data['encoded_value'] = 0

    indexed_graphs = [
        nx.convert_node_labels_to_integers(graph, label_attribute='concept')
        for graph in graphs
    ]
    graphs = [duplicate_edges_in_reverse(graph) for graph in indexed_graphs]

    graphs = [encode_types(graph, node_types, edge_types) for graph in graphs]

    input_graphs = [create_input_graph(graph) for graph in graphs]
    target_graphs = [create_target_graph(graph) for graph in graphs]

    tr_input_graphs = input_graphs[:tr_ge_split]
    tr_target_graphs = target_graphs[:tr_ge_split]
    ge_input_graphs = input_graphs[tr_ge_split:]
    ge_target_graphs = target_graphs[tr_ge_split:]

    ############################################################
    # Build and run the KGCN
    ############################################################

    attr_embedders = configure_embedders(node_types, attr_embedding_dim,
                                         categorical_attributes,
                                         continuous_attributes)

    kgcn = KGCN(len(node_types),
                len(edge_types),
                type_embedding_dim,
                attr_embedding_dim,
                attr_embedders,
                edge_output_size=edge_output_size,
                node_output_size=node_output_size)

    learner = KGCNLearner(kgcn,
                          num_processing_steps_tr=num_processing_steps_tr,
                          num_processing_steps_ge=num_processing_steps_ge)

    train_values, test_values, tr_info = learner(
        tr_input_graphs,
        tr_target_graphs,
        ge_input_graphs,
        ge_target_graphs,
        num_training_iterations=num_training_iterations,
        log_dir=output_dir)

    plot_across_training(*tr_info, output_file=f'{output_dir}learning.png')
    plot_predictions(ge_input_graphs,
                     test_values,
                     num_processing_steps_ge,
                     output_file=f'{output_dir}graph.png')

    logit_graphs = graphs_tuple_to_networkxs(test_values["outputs"][-1])

    indexed_ge_graphs = indexed_graphs[tr_ge_split:]
    ge_graphs = [
        apply_logits_to_graphs(graph, logit_graph)
        for graph, logit_graph in zip(indexed_ge_graphs, logit_graphs)
    ]

    for ge_graph in ge_graphs:
        for data in multidigraph_data_iterator(ge_graph):
            data['probabilities'] = softmax(data['logits'])
            data['prediction'] = int(np.argmax(data['probabilities']))

    _, _, _, _, _, solveds_tr, solveds_ge = tr_info
    return ge_graphs, solveds_tr, solveds_ge
Beispiel #3
0
def pipeline(graphs,
             tr_ge_split,
             node_types,
             edge_types,
             num_processing_steps_tr=10,
             num_processing_steps_ge=10,
             num_training_iterations=10000,
             continuous_attributes=None,
             categorical_attributes=None,
             type_embedding_dim=5,
             attr_embedding_dim=6,
             edge_output_size=3,
             node_output_size=3,
             output_dir=None):

    ############################################################
    # Manipulate the graph data
    ############################################################

    # Encode attribute values
    graphs = [
        encode_values(graph, categorical_attributes, continuous_attributes)
        for graph in graphs
    ]

    indexed_graphs = [
        nx.convert_node_labels_to_integers(graph, label_attribute='concept')
        for graph in graphs
    ]
    graphs = [duplicate_edges_in_reverse(graph) for graph in indexed_graphs]

    graphs = [
        encode_types(graph, multidigraph_node_data_iterator, node_types)
        for graph in graphs
    ]
    graphs = [
        encode_types(graph, multidigraph_edge_data_iterator, edge_types)
        for graph in graphs
    ]

    input_graphs = [create_input_graph(graph) for graph in graphs]
    target_graphs = [create_target_graph(graph) for graph in graphs]

    tr_input_graphs = input_graphs[:tr_ge_split]
    tr_target_graphs = target_graphs[:tr_ge_split]
    ge_input_graphs = input_graphs[tr_ge_split:]
    ge_target_graphs = target_graphs[tr_ge_split:]

    ############################################################
    # Build and run the KGCN
    ############################################################

    thing_embedder = ThingEmbedder(node_types, type_embedding_dim,
                                   attr_embedding_dim, categorical_attributes,
                                   continuous_attributes)

    role_embedder = RoleEmbedder(len(edge_types), type_embedding_dim)

    kgcn = KGCN(thing_embedder,
                role_embedder,
                edge_output_size=edge_output_size,
                node_output_size=node_output_size)

    learner = KGCNLearner(kgcn,
                          num_processing_steps_tr=num_processing_steps_tr,
                          num_processing_steps_ge=num_processing_steps_ge)

    train_values, test_values, tr_info = learner(
        tr_input_graphs,
        tr_target_graphs,
        ge_input_graphs,
        ge_target_graphs,
        num_training_iterations=num_training_iterations,
        log_dir=output_dir)

    plot_across_training(*tr_info, output_file=f'{output_dir}learning.png')
    plot_predictions(graphs[tr_ge_split:],
                     test_values,
                     num_processing_steps_ge,
                     output_file=f'{output_dir}graph.png')

    logit_graphs = graphs_tuple_to_networkxs(test_values["outputs"][-1])

    indexed_ge_graphs = indexed_graphs[tr_ge_split:]
    ge_graphs = [
        apply_logits_to_graphs(graph, logit_graph)
        for graph, logit_graph in zip(indexed_ge_graphs, logit_graphs)
    ]

    for ge_graph in ge_graphs:
        for data in multidigraph_data_iterator(ge_graph):
            data['probabilities'] = softmax(data['logits'])
            data['prediction'] = int(np.argmax(data['probabilities']))

    _, _, _, _, _, solveds_tr, solveds_ge = tr_info
    return ge_graphs, solveds_tr, solveds_ge
Beispiel #4
0
def pipeline(graphs,
             tr_ge_split,
             node_types,
             edge_types,
             num_processing_steps_tr=10,
             num_processing_steps_ge=10,
             num_training_iterations=10000,
             categorical_attributes=None,
             type_embedding_dim=5,
             attr_embedding_dim=6,
             edge_output_size=3,
             node_output_size=3):

    ############################################################
    # Manipulate the graph data
    ############################################################

    # Encode attribute values
    for graph in graphs:

        for data in multidigraph_data_iterator(graph):
            data['encoded_value'] = 0

        for node_data in multidigraph_node_data_iterator(graph):
            typ = node_data['type']

            # Add the integer value of the category for each categorical attribute instance
            for attr_typ, category_values in categorical_attributes.items():
                if typ == attr_typ:
                    node_data['encoded_value'] = category_values.index(
                        node_data['value'])

    indexed_graphs = [
        nx.convert_node_labels_to_integers(graph, label_attribute='concept')
        for graph in graphs
    ]
    graphs = [duplicate_edges_in_reverse(graph) for graph in indexed_graphs]

    graphs = [encode_types(graph, node_types, edge_types) for graph in graphs]

    input_graphs = [create_input_graph(graph) for graph in graphs]
    target_graphs = [create_target_graph(graph) for graph in graphs]

    tr_input_graphs = input_graphs[:tr_ge_split]
    tr_target_graphs = target_graphs[:tr_ge_split]
    ge_input_graphs = input_graphs[tr_ge_split:]
    ge_target_graphs = target_graphs[tr_ge_split:]

    ############################################################
    # Build and run the KGCN
    ############################################################

    type_categories_list = [i for i, _ in enumerate(node_types)]
    non_attribute_nodes = type_categories_list.copy()

    attr_embedders = dict()

    # Construct categorical attribute embedders
    for attr_typ, category_values in categorical_attributes.items():
        num_categories = len(category_values)

        def make_embedder():
            return CategoricalAttribute(num_categories,
                                        attr_embedding_dim,
                                        name=attr_typ + '_cat_embedder')

        attr_typ_index = node_types.index(attr_typ)

        # Record the embedder, and the index of the type that it should encode
        attr_embedders[make_embedder] = [attr_typ_index]

        non_attribute_nodes.pop(attr_typ_index)

    # All entities and relations (non-attributes) also need an embedder with matching output dimension, which does
    # nothing. This is provided as a list of their indices
    def make_blank_embedder():
        return BlankAttribute(attr_embedding_dim)

    attr_embedders[make_blank_embedder] = non_attribute_nodes

    kgcn = KGCN(len(node_types),
                len(edge_types),
                type_embedding_dim,
                attr_embedding_dim,
                attr_embedders,
                edge_output_size=edge_output_size,
                node_output_size=node_output_size)

    learner = KGCNLearner(kgcn,
                          num_processing_steps_tr=num_processing_steps_tr,
                          num_processing_steps_ge=num_processing_steps_ge)

    train_values, test_values, tr_info = learner(
        tr_input_graphs,
        tr_target_graphs,
        ge_input_graphs,
        ge_target_graphs,
        num_training_iterations=num_training_iterations)

    plot_across_training(*tr_info)
    plot_predictions(ge_input_graphs, test_values, num_processing_steps_ge)

    logit_graphs = graphs_tuple_to_networkxs(test_values["outputs"][-1])

    indexed_ge_graphs = indexed_graphs[tr_ge_split:]
    ge_graphs = [
        apply_logits_to_graphs(graph, logit_graph)
        for graph, logit_graph in zip(indexed_ge_graphs, logit_graphs)
    ]

    for ge_graph in ge_graphs:
        for data in multidigraph_data_iterator(ge_graph):
            data['probabilities'] = softmax(data['logits'])
            data['prediction'] = int(np.argmax(data['probabilities']))

    _, _, _, _, _, solveds_tr, solveds_ge = tr_info
    return ge_graphs, solveds_tr, solveds_ge