예제 #1
0
    def test_reconstruct_graph(self):
        """
        Test the reconstruction of an inputlayer.
        """
        graph = gb.create_directed_barbell(10, 10)
        random.seed(2)
        for u in graph.nodes(data=True):
            u[1]['label1'] = int(u[0])
            u[1]['label2'] = random.uniform(0.0, 1.0)
        gae = GraphAutoEncoder(graph,
                               learning_rate=0.01,
                               support_size=[5, 5],
                               dims=[3, 5, 7, 6, 2],
                               batch_size=12,
                               max_total_steps=100,
                               verbose=True)

        l1_struct, graph2 = gae.get_l1_structure(15, show_graph=False)
        # check if the nodes of the reconstructed graph is equal to 5
        self.assertEqual(
            graph2.number_of_nodes(), 5,
            "Number of nodes in reconstructed graph does not match with expectations"
        )

        # check if the returned nodes are correct by summing the node values.
        sum_values = np.sum(l1_struct, 1)
        self.assertAlmostEqual(
            sum_values[0, 1], 120, 4,
            "sum of nodes ids in reconstructed graph does not match with expectations"
        )
        self.assertAlmostEqual(
            sum_values[0, 0], 2.399999, 4,
            "sum of edges in reconstructed graph does not match with expectations"
        )
예제 #2
0
def train_model(params):
    """
    function to create and train the model. This is called by hyper opt.
    It returned the loss (=optimisation metric), status and a dict with
    supporting information.
    """
    dims = [
        int(params['dim0']),
        int(params['dim1']),
        int(params['dim2']),
        int(params['dim3']), 2
    ]

    gae = GraphAutoEncoder(G,
                           learning_rate=0.01,
                           support_size=[5, 5],
                           dims=dims,
                           batch_size=12,
                           max_total_steps=250)

    train_res = {}
    for i in range(len(gae.dims)):
        train_res["l" + str(i + 1)] = gae.train_layer(i + 1, act=tf.nn.relu)

    train_res['all'] = gae.train_layer(len(gae.dims),
                                       all_layers=True,
                                       act=tf.nn.relu)

    loss_val = train_res['all']['val_l'][-3:]
    print(f"loss val {loss_val}")
    loss = sum(loss_val) / len(loss_val)
    train_res['loss'] = loss

    return {'loss': loss, 'status': STATUS_OK, 'hist': train_res}
예제 #3
0
    def test_train_layer3(self):
        """
        Test with 3 hubs sampling using different support sizes per layer.
        """
        graph = gb.create_directed_barbell(4, 4)
        gae = GraphAutoEncoder(graph,
                               support_size=[3, 4, 5],
                               dims=[2, 3, 3, 3, 3, 2],
                               batch_size=3,
                               max_total_steps=1,
                               verbose=False,
                               seed=2,
                               act=tf.nn.relu)

        exp = [
            153.83647, 309.56152, 311.00153, 459.34726, 484.33817, 504.59387
        ]
        for i in range(6):
            res = gae.train_layer(i + 1)
            self.assertAlmostEqual(
                res['l'][0], exp[i], 4,
                f"loss of layer {i+1} does not match with expectations")

        res = gae.train_layer(6, all_layers=True)
        self.assertAlmostEqual(
            res['l'][0], 504.55478, 4,
            "loss of the layer 6 all traning does not match with expectations")
예제 #4
0
    def test_fit(self):
        """
        Test if fit function results in the same results as when trained separately
        """
        graph = gb.create_directed_barbell(4, 4)
        gae = GraphAutoEncoder(graph,
                               learning_rate=0.01,
                               support_size=[5, 5],
                               dims=[3, 5, 7, 6, 2],
                               batch_size=12,
                               max_total_steps=50,
                               verbose=True)

        train_res = {}
        for i in range(len(gae.dims)):
            train_res["l" + str(i + 1)] = gae.train_layer(i + 1)

        train_res['all'] = gae.train_layer(len(gae.dims),
                                           all_layers=True,
                                           dropout=None)
        embed = gae.calculate_embeddings()

        gae2 = GraphAutoEncoder(graph,
                                learning_rate=0.01,
                                support_size=[5, 5],
                                dims=[3, 5, 7, 6, 2],
                                batch_size=12,
                                max_total_steps=50,
                                verbose=True)
        gae2.fit(graph)
        embed2 = gae2.calculate_embeddings()
        embed3 = np.subtract(embed, embed2)
        self.assertAlmostEqual(
            np.sum(embed3), 0, 4,
            "fit method results in a different model when trained separately")
예제 #5
0
    def test_train_layer5(self):
        """
        Test using final combination layer. Test if training works correctly and if the calculation
        of the embeddings works correctly.
        """
        graph = gb.create_directed_barbell(4, 4)
        for in_node, out_node, lbl in graph.edges(data=True):
            lbl['edge_lbl1'] = in_node / (out_node + 0.011) + 0.22

        gae = GraphAutoEncoder(graph,
                               support_size=[3, 3],
                               dims=[2, 3, 3, 2, 2],
                               batch_size=3,
                               max_total_steps=10,
                               verbose=False,
                               seed=2,
                               weight_label='edge_lbl1',
                               act=tf.nn.relu)

        for i in range(len(gae.dims)):
            res = gae.train_layer(i + 1, act=tf.nn.relu)

        self.assertAlmostEqual(
            res['l'][0], 134.9637, 4,
            "loss of the last layer does not match with expectations using a \
                               final combination layer")

        res = gae.train_layer(len(gae.dims), all_layers=True, act=tf.nn.relu)
        embed = gae.calculate_embeddings()
        self.assertAlmostEqual(
            embed[0][2], 38.221458435058594, 4,
            "embedding of the first batch node differs from expected value")
예제 #6
0
    def create_embedding(self, mdl, date_range=range(1, 25)):
        gae = None
        combined_feat = None
        for dag in date_range:
            print(f"processing dag {dag}")
            node, edge = self.proces_month(dag)
            cnt = node.shape[0]
            G = self.create_graph(node, edge)
            if gae is None:
                dims = self.get_dims(int(mdl.split("_")[1]))
                act = tf.nn.sigmoid if mdl.split(
                    "_")[7] == 'sigm' else tf.nn.tanh
                do = mdl.split("_")[5]
                gae = GraphAutoEncoder(
                    G,
                    support_size=AmlSimPreprocessor.support_size,
                    dims=dims,
                    batch_size=AmlSimPreprocessor.batch_size,
                    hub0_feature_with_neighb_dim=AmlSimPreprocessor.
                    hub0_feature_with_neighb_dim,
                    useBN=AmlSimPreprocessor.useBN,
                    verbose=False,
                    seed=1,
                    learning_rate=0.01,
                    act=act,
                    dropout=do)
                gae.load_weights(self.out_dir + 'mdl_' + mdl)
            embed = gae.calculate_embeddings(G)

            #combine with nodes
            if AmlSimPreprocessor.hub0_feature_with_neighb_dim is None:
                pd_embed = pd.DataFrame(
                    data=embed[:cnt, 1:],
                    index=embed[:cnt, 0],
                    columns=[f'embed_{i}' for i in range(dims[-1] * 2)])
            else:
                pd_embed = pd.DataFrame(
                    data=embed[:cnt, 1:],
                    index=embed[:cnt, 0],
                    columns=[f'embed_{i}' for i in range(dims[-1])])
            feat = pd.merge(node,
                            pd_embed,
                            left_index=True,
                            right_index=True,
                            how='inner')

            if combined_feat is None:
                combined_feat = feat
            else:
                combined_feat = pd.concat([combined_feat, feat])

        feat_file = self.out_dir + "features_" + str(dims[-1])
        combined_feat.to_parquet(feat_file)

        # return column list
        excl_cols = ['is_sar', 'dag', 'orig_id', 'id']
        feat_cols = [c for c in combined_feat.columns if c not in excl_cols]
        return feat_file, feat_cols
예제 #7
0
    def gs_graphcase(self, G, dim_size):
        gs_res = {}
        dims = self.get_dims(dim_size)

        for lr in AmlSimPreprocessor.learning_rates:
            for do in AmlSimPreprocessor.dropout_rates:
                for act in AmlSimPreprocessor.act_functions:
                    gae = GraphAutoEncoder(
                        G,
                        support_size=AmlSimPreprocessor.support_size,
                        dims=dims,
                        batch_size=AmlSimPreprocessor.batch_size,
                        hub0_feature_with_neighb_dim=AmlSimPreprocessor.
                        hub0_feature_with_neighb_dim,
                        useBN=AmlSimPreprocessor.useBN,
                        verbose=True,
                        seed=1,
                        learning_rate=lr,
                        act=act,
                        dropout=do)
                    train_res = gae.fit(epochs=AmlSimPreprocessor.epochs,
                                        layer_wise=False)

                    # save results
                    act_str = 'tanh' if act == tf.nn.tanh else 'sigm'
                    run_id = f'dim_{dim_size}_lr_{lr}_do_{do}_act_{act_str}_layers_{self.layers}'
                    pickle.dump(train_res[None].history,
                                open(self.out_dir + 'res_' + run_id, "wb"))
                    gae.save_weights(self.out_dir + 'mdl_' + run_id)

                    # print and store result
                    val_los = sum(train_res[None].history['val_loss'][-2:]) / 2
                    gs_res[run_id] = val_los
                    print(
                        f'dims:{dim_size}, lr:{lr}, dropout lvl:{do}, act func:{act_str} resultsing val loss {val_los}'
                    )

        # print all results, save and return best model
        for k, v in gs_res.items():
            print(f'run: {k} with result {v}')
        pickle.dump(
            gs_res,
            open(self.out_dir + f'graphcase_gs_results_dim_{dim_size}', "wb"))
        return max(gs_res, key=gs_res.get)
예제 #8
0
    def test_train_layer2(self):
        """
        Test if the loss is reduced during training
        """
        graph = gb.create_directed_barbell(4, 4)
        gae = GraphAutoEncoder(graph,
                               support_size=[3, 3],
                               dims=[2, 3, 3, 2],
                               batch_size=3,
                               max_total_steps=10,
                               verbose=False,
                               seed=2,
                               act=tf.nn.relu)
        res = gae.train_layer(1, learning_rate=0.0001)
        self.assertTrue(res['val_l'][0] > res['val_l'][-1],
                        "loss has not decreased while training layer 1")

        res = gae.train_layer(2, learning_rate=0.0001)
        self.assertTrue(res['val_l'][0] > res['val_l'][-1],
                        "loss has not decreased while training layer 2")

        res = gae.train_layer(3, learning_rate=0.0001)
        self.assertTrue(res['val_l'][0] > res['val_l'][-1],
                        "loss has not decreased while training layer 3")

        res = gae.train_layer(4, learning_rate=0.0001)
        self.assertTrue(res['val_l'][0] > res['val_l'][-1],
                        "loss has not decreased while training layer 4")
예제 #9
0
 def test_consistency_checks(self):
     """
     Test the checks during initializations.
     """
     graph = gb.create_directed_barbell(10, 10)
     with self.assertRaises(AssertionError):
         gae = GraphAutoEncoder(graph,
                                support_size=[5, 5],
                                dims=[2, 6, 6],
                                batch_size=1024,
                                max_total_steps=10,
                                verbose=True,
                                seed=2)
예제 #10
0
    def test_train_layer4(self):
        """
        Test using multiple edge label icw a custom weight label. The test checks if the
        weights are calculated correct.
        """
        graph = gb.create_directed_barbell(4, 4)
        for in_node, out_node, lbl in graph.edges(data=True):
            lbl['edge_lbl1'] = in_node / (out_node + 0.011) + 0.22

        gae = GraphAutoEncoder(graph,
                               support_size=[3, 3],
                               dims=[2, 3, 3, 2],
                               batch_size=3,
                               max_total_steps=10,
                               verbose=False,
                               seed=2,
                               weight_label='edge_lbl1',
                               act=tf.nn.relu)
        res = gae.train_layer(1, learning_rate=0.0001)
        self.assertAlmostEqual(
            res['l'][0], 49.392754, 4,
            "loss of the layer 1 does not match with expectations using a \
                               custom edge label")
예제 #11
0
    def test_train_layer(self):
        """
        Test if the loss of the initial setup is correct.
        """
        graph = gb.create_directed_barbell(4, 4)
        # ad node ids to the graph as label
        labels3 = [(i, i) for i in range(13)]
        labels3 = dict(labels3)
        nx.set_node_attributes(graph, labels3, 'label3')
        gae = GraphAutoEncoder(graph,
                               support_size=[3, 3],
                               dims=[2, 3, 3, 2],
                               batch_size=3,
                               max_total_steps=1,
                               verbose=False,
                               seed=2,
                               act=tf.nn.relu)
        res = gae.train_layer(1)
        self.assertAlmostEqual(
            res['l'][0], 2158.0686, 4,
            "loss of the initial setup does not match with expectations")

        res = gae.train_layer(2)
        self.assertAlmostEqual(
            res['l'][0], 2613.2725, 4,
            "loss of the initial setup does not match with expectations")

        res = gae.train_layer(3)
        self.assertAlmostEqual(
            res['l'][0], 2693.6736, 4,
            "loss of the initial setup does not match with expectations")

        res = gae.train_layer(4)
        self.assertAlmostEqual(
            res['l'][0], 2842.3582, 3,
            "loss of the initial setup does not match with expectations")

        res = gae.train_layer(4, all_layers=True)
        self.assertAlmostEqual(
            res['l'][0], 2842.1409, 4,
            "loss of the initial setup does not match with expectations")
예제 #12
0
#%% create graph

graph = gb.create_directed_barbell(10, 10)
graph.remove_edge(21, 20)
graph.add_edge(29,20, weight=1)
# correction edge weight for node # 20
ndic = graph.nodes(data='label1')
for u, v, d in graph.edges(data=True):
    if(v > 9) & (v < 21):
        d['weight'] = 1
    else:
        d['weight'] = ndic[u] * ndic[v]

#%% create and train model
gae = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2],
                       batch_size=30, max_total_steps=1000, verbose=True, act=tf.nn.tanh)
if TRAIN:
    train_res = {}
    for i in range(len(gae.dims)):
        if i in [1, 2]:
            train_res["l"+str(i+1)] = gae.train_layer(i+1, dropout=0.1)
        else:
            train_res["l"+str(i+1)] = gae.train_layer(i+1)

    train_res['all'] = gae.train_layer(len(gae.dims), all_layers=True)
    pickle.dump(train_res, open(RESULTS_FILE, "wb"))
    gae.save_model(MODEL_FILENAME)
else:
    gae.load_model(MODEL_FILENAME, graph)

embed = gae.calculate_embeddings()
예제 #13
0
from  GAE.graph_case_controller import GraphAutoEncoder
import example_graph_bell as gb
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import pickle
import random
#%%
graph = gb.create_directed_barbell(10, 10)
random.seed(2)
for u in graph.nodes(data=True):
    u[1]['label1'] = int(u[0])
    u[1]['label2'] = random.uniform(0.0, 1.0)
gae = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2],
                       batch_size=12, max_total_steps=10, verbose=True, useBN=True)
gae.fit()
embed = gae.calculate_embeddings()
l1_struct, graph2 = gae.get_l1_structure(15, show_graph=True, node_label='feat0')

#%%

# print(l1_struct)
# train_res = {}
# for i in range(len(gae.dims)):
#     train_res["l"+str(i+1)] = gae.train_layer(i+1)

# train_res['all'] = gae.train_layer(len(gae.dims), all_layers=True, dropout=None)
# embed = gae.calculate_embeddings()
# filename = '/Users/tonpoppe/workspace/GraphCase/data/model1'
# gae.save_model(filename)
예제 #14
0
# r = G.out_edges(data=True)

# in_edges_dict = {}
# in_weight_dict = {}
# for out_node, in_node, weight in G.in_edges(data=True):
#         in_edges_dict[in_node] = in_edges_dict.get(in_node, list()) + \
#                             [(out_node, list(weight.values()))]
#         in_weight_dict[in_node] = in_weight_dict.get(in_node, list()) + \
#                             [(out_node, weight['weight'])]

# print(in_edges_dict)
# print(in_weight_dict)
gae = GraphAutoEncoder(G,
                       support_size=[3, 4],
                       dims=[2, 6, 6, 2, 1],
                       batch_size=5,
                       max_total_steps=10,
                       verbose=True,
                       seed=2)

for i in range(len(gae.dims)):
    h = gae.train_layer(i + 1, act=tf.nn.relu)

h = gae.train_layer(len(gae.dims), all_layers=True, act=tf.nn.relu)
# # print(h1['val_l'])

e = gae.calculate_embeddings()
print(f"e: \n {e}")

# fig, ax = plt.subplots()
# ax.scatter(e[:,1], e[:,2])
예제 #15
0
graph = karate.create_karakte_mirror_network({'weight': 'random'}, {
    'label0': 1,
    'label1': 'random'
})
graph = karate.create_karakte_mirror_network({'weight': 'random'}, {
    'label0': 1,
    'label1': 'random'
})
for node in graph.nodes(data=True):
    node[1]['label0'] = int(node[0])

gae = GraphAutoEncoder(graph,
                       learning_rate=0.01,
                       support_size=[3, 3],
                       dims=[3, 5, 7, 6, 2],
                       batch_size=12,
                       max_total_steps=10,
                       verbose=True,
                       useBN=True)


def plot_node(graph, node_id):
    local_graph = []
    for neightbor in graph.neighbors(node_id):
        local_graph = local_graph + [n for n in graph.neighbors(neightbor)]
    local_graph = list(set(local_graph))  # make list unique
    subgraph = graph.subgraph(local_graph)

    # plot subgraph
    nt = net.Network(notebook=True, directed=True)
    nt.from_nx(subgraph)
예제 #16
0
    def test_save_load(self):
        """
        Test if saving and loading the model in a new object gives the same results
        """
        filename = os.getcwd() + "/data/test_save_load"
        graph = gb.create_directed_barbell(4, 4)
        gae = GraphAutoEncoder(graph,
                               learning_rate=0.01,
                               support_size=[5, 5],
                               dims=[3, 5, 7, 6, 2],
                               batch_size=12,
                               max_total_steps=50,
                               verbose=True)
        gae.fit(graph)
        embed = gae.calculate_embeddings()
        gae.save_model(filename)

        gae2 = GraphAutoEncoder(graph,
                                learning_rate=0.01,
                                support_size=[5, 5],
                                dims=[3, 5, 7, 6, 2],
                                batch_size=12,
                                max_total_steps=50,
                                verbose=True)
        gae2.load_model(filename, graph)
        embed2 = gae2.calculate_embeddings()

        embed3 = np.subtract(embed, embed2)
        self.assertAlmostEqual(
            np.sum(embed3), 0, 4,
            "loaded model gives different result then original")