Example #1
0
    def test_fit(self):
        """
        Test if fit function results in the same results as when trained separately
        """
        graph = gb.create_directed_barbell(4, 4)
        gae = GraphAutoEncoder(graph,
                               learning_rate=0.01,
                               support_size=[5, 5],
                               dims=[3, 5, 7, 6, 2],
                               batch_size=12,
                               max_total_steps=50,
                               verbose=True)

        train_res = {}
        for i in range(len(gae.dims)):
            train_res["l" + str(i + 1)] = gae.train_layer(i + 1)

        train_res['all'] = gae.train_layer(len(gae.dims),
                                           all_layers=True,
                                           dropout=None)
        embed = gae.calculate_embeddings()

        gae2 = GraphAutoEncoder(graph,
                                learning_rate=0.01,
                                support_size=[5, 5],
                                dims=[3, 5, 7, 6, 2],
                                batch_size=12,
                                max_total_steps=50,
                                verbose=True)
        gae2.fit(graph)
        embed2 = gae2.calculate_embeddings()
        embed3 = np.subtract(embed, embed2)
        self.assertAlmostEqual(
            np.sum(embed3), 0, 4,
            "fit method results in a different model when trained separately")
Example #2
0
    def test_save_load(self):
        """
        Test if saving and loading the model in a new object gives the same results
        """
        filename = os.getcwd() + "/data/test_save_load"
        graph = gb.create_directed_barbell(4, 4)
        gae = GraphAutoEncoder(graph,
                               learning_rate=0.01,
                               support_size=[5, 5],
                               dims=[3, 5, 7, 6, 2],
                               batch_size=12,
                               max_total_steps=50,
                               verbose=True)
        gae.fit(graph)
        embed = gae.calculate_embeddings()
        gae.save_model(filename)

        gae2 = GraphAutoEncoder(graph,
                                learning_rate=0.01,
                                support_size=[5, 5],
                                dims=[3, 5, 7, 6, 2],
                                batch_size=12,
                                max_total_steps=50,
                                verbose=True)
        gae2.load_model(filename, graph)
        embed2 = gae2.calculate_embeddings()

        embed3 = np.subtract(embed, embed2)
        self.assertAlmostEqual(
            np.sum(embed3), 0, 4,
            "loaded model gives different result then original")
Example #3
0
    def test_train_layer5(self):
        """
        Test using final combination layer. Test if training works correctly and if the calculation
        of the embeddings works correctly.
        """
        graph = gb.create_directed_barbell(4, 4)
        for in_node, out_node, lbl in graph.edges(data=True):
            lbl['edge_lbl1'] = in_node / (out_node + 0.011) + 0.22

        gae = GraphAutoEncoder(graph,
                               support_size=[3, 3],
                               dims=[2, 3, 3, 2, 2],
                               batch_size=3,
                               max_total_steps=10,
                               verbose=False,
                               seed=2,
                               weight_label='edge_lbl1',
                               act=tf.nn.relu)

        for i in range(len(gae.dims)):
            res = gae.train_layer(i + 1, act=tf.nn.relu)

        self.assertAlmostEqual(
            res['l'][0], 134.9637, 4,
            "loss of the last layer does not match with expectations using a \
                               final combination layer")

        res = gae.train_layer(len(gae.dims), all_layers=True, act=tf.nn.relu)
        embed = gae.calculate_embeddings()
        self.assertAlmostEqual(
            embed[0][2], 38.221458435058594, 4,
            "embedding of the first batch node differs from expected value")
Example #4
0
    def create_embedding(self, mdl, date_range=range(1, 25)):
        gae = None
        combined_feat = None
        for dag in date_range:
            print(f"processing dag {dag}")
            node, edge = self.proces_month(dag)
            cnt = node.shape[0]
            G = self.create_graph(node, edge)
            if gae is None:
                dims = self.get_dims(int(mdl.split("_")[1]))
                act = tf.nn.sigmoid if mdl.split(
                    "_")[7] == 'sigm' else tf.nn.tanh
                do = mdl.split("_")[5]
                gae = GraphAutoEncoder(
                    G,
                    support_size=AmlSimPreprocessor.support_size,
                    dims=dims,
                    batch_size=AmlSimPreprocessor.batch_size,
                    hub0_feature_with_neighb_dim=AmlSimPreprocessor.
                    hub0_feature_with_neighb_dim,
                    useBN=AmlSimPreprocessor.useBN,
                    verbose=False,
                    seed=1,
                    learning_rate=0.01,
                    act=act,
                    dropout=do)
                gae.load_weights(self.out_dir + 'mdl_' + mdl)
            embed = gae.calculate_embeddings(G)

            #combine with nodes
            if AmlSimPreprocessor.hub0_feature_with_neighb_dim is None:
                pd_embed = pd.DataFrame(
                    data=embed[:cnt, 1:],
                    index=embed[:cnt, 0],
                    columns=[f'embed_{i}' for i in range(dims[-1] * 2)])
            else:
                pd_embed = pd.DataFrame(
                    data=embed[:cnt, 1:],
                    index=embed[:cnt, 0],
                    columns=[f'embed_{i}' for i in range(dims[-1])])
            feat = pd.merge(node,
                            pd_embed,
                            left_index=True,
                            right_index=True,
                            how='inner')

            if combined_feat is None:
                combined_feat = feat
            else:
                combined_feat = pd.concat([combined_feat, feat])

        feat_file = self.out_dir + "features_" + str(dims[-1])
        combined_feat.to_parquet(feat_file)

        # return column list
        excl_cols = ['is_sar', 'dag', 'orig_id', 'id']
        feat_cols = [c for c in combined_feat.columns if c not in excl_cols]
        return feat_file, feat_cols
Example #5
0
                       batch_size=30, max_total_steps=1000, verbose=True, act=tf.nn.tanh)
if TRAIN:
    train_res = {}
    for i in range(len(gae.dims)):
        if i in [1, 2]:
            train_res["l"+str(i+1)] = gae.train_layer(i+1, dropout=0.1)
        else:
            train_res["l"+str(i+1)] = gae.train_layer(i+1)

    train_res['all'] = gae.train_layer(len(gae.dims), all_layers=True)
    pickle.dump(train_res, open(RESULTS_FILE, "wb"))
    gae.save_model(MODEL_FILENAME)
else:
    gae.load_model(MODEL_FILENAME, graph)

embed = gae.calculate_embeddings()


# %% get tabel with node details
indeg = graph.in_degree()
outdeg = graph.out_degree()
tbl = np.array([[y, x['label1'], x['label2'], indeg[y], outdeg[y], embed[y, 1], embed[y, 2]]
                for y, x in graph.nodes(data=True)])
pd_tbl = pd.DataFrame(tbl[:, 1:], tbl[:, 0],
                      ['label1', 'label2', 'in_degree', 'out_degree', 'embed1', 'embed2'])
print(pd_tbl)

#%%  show graph
pos = nx.kamada_kawai_layout(graph, scale=10, weight=None)
node_count = graph.number_of_nodes()
outdeg = graph.out_degree()