Example #1
0
def main():
    # Parameters for Stochastic block model graph
    # Todal of 1000 nodes
    node_num = 1000
    # Test with two communities
    community_num = 2
    # At each iteration migrate 10 nodes from one community to the another
    node_change_num = 10
    # Length of total time steps the graph will dynamically change
    length = 7
    # output directory for result
    outdir = './output'
    intr = './intermediate'
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    if not os.path.exists(intr):
        os.mkdir(intr)
    testDataType = 'sbm_cd'
    # Generate the dynamic graph
    dynamic_sbm_series = list(sbm.get_community_diminish_series_v2(node_num,
                                                                   community_num,
                                                                   length,
                                                                   1,  # comminity ID to perturb
                                                                   node_change_num))
    graphs = [g[0] for g in dynamic_sbm_series]
    # parameters for the dynamic embedding
    # dimension of the embedding
    dim_emb = 8
    lookback = 2

    # dynAERNN
    embedding = DynAERNN(d=dim_emb,
                         beta=5,
                         n_prev_graphs=lookback,
                         nu1=1e-6,
                         nu2=1e-6,
                         n_aeunits=[500, 300],
                         n_lstmunits=[500, dim_emb],
                         rho=0.3,
                         n_iter=2,
                         xeta=1e-3,
                         n_batch=100,
                         modelfile=['./intermediate/enc_model_dynAERNN.json',
                                    './intermediate/dec_model_dynAERNN.json'],
                         weightfile=['./intermediate/enc_weights_dynAERNN.hdf5',
                                     './intermediate/dec_weights_dynAERNN.hdf5'],
                         savefilesuffix="testing")

    embs = []
    t1 = time()
    for temp_var in range(lookback + 1, length + 1):
        emb, _ = embedding.learn_embeddings(graphs[:temp_var])
        embs.append(emb)
    print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1))
    plt.figure()
    plt.clf()
    plot_dynamic_sbm_embedding.plot_dynamic_sbm_embedding_v2(embs[-5:-1], dynamic_sbm_series[-5:])
    plt.show()
Example #2
0
                embs.append(emb)
print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1))
plt.figure()
plt.clf()    
plot_dynamic_sbm_embedding.plot_dynamic_sbm_embedding_v2(embs[-5:-1], dynamic_sbm_series[-5:])    
plt.show()

#dynAERNN
embedding = DynAERNN(d   = dim_emb,
            beta           = 5,
            n_prev_graphs  = lookback,
            nu1            = 1e-6,
            nu2            = 1e-6,
            n_aeunits      = [500, 300],
            n_lstmunits    = [500,dim_emb],
            rho            = 0.3,
            n_iter         = 250,
            xeta           = 1e-3,
            n_batch        = 100,
            modelfile      = ['./intermediate/enc_model_dynAERNN.json', 
                              './intermediate/dec_model_dynAERNN.json'],
            weightfile     = ['./intermediate/enc_weights_dynAERNN.hdf5', 
                              './intermediate/dec_weights_dynAERNN.hdf5'],
            savefilesuffix = "testing")

embs = []
t1 = time()
for temp_var in range(lookback+1, length+1):
                emb, _ = embedding.learn_embeddings(graphs[:temp_var])
                embs.append(emb)
print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1))
plt.figure()
Example #3
0
def main(args):

    # Set seeds
    np.random.seed(args.seed)
    from tensorflow import set_random_seed
    set_random_seed(args.seed)

    # Set the number of timesteps in the sequence
    num_timesteps = args.seq_len - 1  # one timestep per pair of consecutive graphs
    num_training_loops = num_timesteps - 1  # Num training loops to actually do (keep last graph for test/validation)

    data_loc = os.path.join(args.data_loc, args.dataset)

    # Preload the training graphs into memory...not very scaleable but helps with CPU load
    # Preload all but the last graph as this is use for val/test
    graphs = []
    for i in range(num_timesteps):
        adj_train, features = third_party_utils.load_adj_graph(
            f'{data_loc}_t{i}.npz')  # Load the input graph
        graphs.append(
            nx.from_scipy_sparse_matrix(adj_train, create_using=nx.DiGraph()))
        print(f'{args.dataset}_t{i} Loaded')
    assert len(
        graphs
    ) == num_timesteps  #Should be the length of the time series as the index will start from zero
    print("Training graphs loaded into memory")

    # Extract the val/test graph which is the final one in the sequence
    val_test_graph_previous, _ = third_party_utils.load_adj_graph(
        f'{data_loc}_t{num_timesteps-1}.npz')
    val_test_graph, _ = third_party_utils.load_adj_graph(
        f'{data_loc}_t{num_timesteps}.npz')
    val_test_graph_adj, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = third_party_utils.mask_test_edges(
        val_test_graph)
    val_test_graph_adj, train_edges_pre, val_edges_pre, val_edges_false, test_edges_pre, test_edges_false = third_party_utils.mask_test_edges(
        val_test_graph_previous)

    pos_edges = np.concatenate((val_edges, test_edges, train_edges)).tolist()
    pos_edges = set(map(tuple, pos_edges))
    pos_edges_pre = np.concatenate(
        (val_edges_pre, test_edges_pre, train_edges_pre)).tolist()
    pos_edges_pre = set(map(tuple, pos_edges_pre))
    new_edges = np.array(list(pos_edges - pos_edges_pre))

    num_edges = len(new_edges)
    new_edges_false = test_edges[:num_edges]

    print(
        f"Validation and Test edges capture from graph {args.dataset}_t{args.seq_len-1} in the sequence"
    )

    # Chose the model to run
    #AE Static ----------------------------------------------------------------------------
    # None offset auto encoder seems to be
    if args.model == "AE":

        embedding = AE(d=dim_emb,
                       beta=5,
                       nu1=1e-6,
                       nu2=1e-6,
                       K=3,
                       n_units=[
                           500,
                           300,
                       ],
                       n_iter=100,
                       xeta=1e-6,
                       n_batch=100,
                       modelfile=[
                           './intermediate/enc_modelsbm.json',
                           './intermediate/dec_modelsbm.json'
                       ],
                       weightfile=[
                           './intermediate/enc_weightssbm.hdf5',
                           './intermediate/dec_weightssbm.hdf5'
                       ])
        t1 = time()
        #ae static

        # Loop through each of the graphs in the time series and train model
        print("Starting training AE")
        # for temp_var in range(num_training_loops):
        #     emb, _= embedding.learn_embeddings(graphs[temp_var])

        emb, _ = embedding.learn_embeddings(graphs[:num_training_loops])
        print(embedding._method_name + ':\n\tTraining time: %f' %
              (time() - t1))
        print(
            third_party_utils.eval_gae(test_edges, test_edges_false,
                                       embedding))

        accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae(
            new_edges, new_edges_false, embedding, use_embeddings=False)
        ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae(
            test_edges, test_edges_false, embedding, use_embeddings=False)

    #dynAE ------------------------------------------------------------------------------
    # As proposed in dyngraph2vec paper. Seems to just be an offset dense auto encoder trained to predict next graph.
    elif args.model == "DynAE":

        embedding = DynAE(d=dim_emb,
                          beta=5,
                          n_prev_graphs=lookback,
                          nu1=1e-6,
                          nu2=1e-6,
                          n_units=[
                              500,
                              300,
                          ],
                          rho=0.3,
                          n_iter=150,
                          xeta=1e-5,
                          n_batch=100,
                          modelfile=[
                              './intermediate/enc_model_dynAE.json',
                              './intermediate/dec_model_dynAE.json'
                          ],
                          weightfile=[
                              './intermediate/enc_weights_dynAE.hdf5',
                              './intermediate/dec_weights_dynAE.hdf5'
                          ],
                          savefilesuffix="testing")
        t1 = time()
        # for temp_var in range(lookback+1, num_training_loops+1):
        #     print(temp_var)
        #     print(graphs[:temp_var])
        #     emb, _ = embedding.learn_embeddings(graphs[:temp_var])

        emb, _ = embedding.learn_embeddings(graphs[:num_training_loops])

        if new_edges.size != 0:
            print("Here yo")
            accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae(
                new_edges, new_edges_false, embedding, use_embeddings=False)
            print(
                third_party_utils.eval_gae(new_edges,
                                           new_edges_false,
                                           embedding,
                                           use_embeddings=False))
        else:
            accuracy, roc_score, ap_score, tn, fp, fn, tp = 0, 0, 0, 0, 0, 0, 0

        ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae(
            test_edges, test_edges_false, embedding, use_embeddings=False)
        print(
            third_party_utils.eval_gae(test_edges,
                                       test_edges_false,
                                       embedding,
                                       use_embeddings=False))

    #dynRNN ------------------------------------------------------------------------------
    # As proposed in dyngraph2vec paper. Only seems to use LSTM cells with no compression beforehand.
    elif args.model == "DynRNN":

        embedding = DynRNN(d=dim_emb,
                           beta=5,
                           n_prev_graphs=lookback,
                           nu1=1e-6,
                           nu2=1e-6,
                           n_enc_units=[500, 200],
                           n_dec_units=[200, 500],
                           rho=0.3,
                           n_iter=150,
                           xeta=1e-4,
                           n_batch=100,
                           modelfile=[
                               './intermediate/enc_model_dynRNN.json',
                               './intermediate/dec_model_dynRNN.json'
                           ],
                           weightfile=[
                               './intermediate/enc_weights_dynRNN.hdf5',
                               './intermediate/dec_weights_dynRNN.hdf5'
                           ],
                           savefilesuffix="testing")

        t1 = time()
        # for temp_var in range(lookback+1, num_training_loops+1):
        #     emb, _ = embedding.learn_embeddings(graphs[:temp_var])

        emb, _ = embedding.learn_embeddings(graphs[:num_training_loops])

        if new_edges.size != 0:
            accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae(
                new_edges, new_edges_false, embedding, use_embeddings=False)
            print(
                third_party_utils.eval_gae(new_edges,
                                           new_edges_false,
                                           embedding,
                                           use_embeddings=False))
        else:
            accuracy, roc_score, ap_score, tn, fp, fn, tp = 0, 0, 0, 0, 0, 0, 0

        ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae(
            test_edges, test_edges_false, embedding, use_embeddings=False)
        print(
            third_party_utils.eval_gae(test_edges,
                                       test_edges_false,
                                       embedding,
                                       use_embeddings=False))

    #dynAERNN ------------------------------------------------------------------------------
    # As proposed in dyngraph2vec paper. Use auto encoder before passing to an LSTM cell.
    elif args.model == "DynAERNN":

        embedding = DynAERNN(d=dim_emb,
                             beta=5,
                             n_prev_graphs=lookback,
                             nu1=1e-6,
                             nu2=1e-6,
                             n_aeunits=[500, 300],
                             n_lstmunits=[300, dim_emb],
                             rho=0.3,
                             n_iter=150,
                             xeta=1e-3,
                             n_batch=100,
                             modelfile=[
                                 './intermediate/enc_model_dynAERNN.json',
                                 './intermediate/dec_model_dynAERNN.json'
                             ],
                             weightfile=[
                                 './intermediate/enc_weights_dynAERNN.hdf5',
                                 './intermediate/dec_weights_dynAERNN.hdf5'
                             ],
                             savefilesuffix="testing")

        t1 = time()
        # for temp_var in range(lookback+1, num_training_loops+1):
        #                 emb, _ = embedding.learn_embeddings(graphs[:temp_var])

        #lp.expLP(graphs, embedding, 2, 0, 0)

        emb, _ = embedding.learn_embeddings(graphs[:num_training_loops])

        if new_edges.size != 0:
            accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae(
                new_edges, new_edges_false, embedding, use_embeddings=False)
            print(
                third_party_utils.eval_gae(new_edges,
                                           new_edges_false,
                                           embedding,
                                           use_embeddings=False))
        else:
            accuracy, roc_score, ap_score, tn, fp, fn, tp = 0, 0, 0, 0, 0, 0, 0

        ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae(
            test_edges, test_edges_false, embedding, use_embeddings=False)
        print(
            third_party_utils.eval_gae(test_edges,
                                       test_edges_false,
                                       embedding,
                                       use_embeddings=False))

    return accuracy, roc_score, ap_score, tn, fp, fn, tp, ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp
Example #4
0
def main():
    # data_list = ['cellphone', 'enron', 'fbmessages', 'HS11', 'HS12', 'primary', 'workplace']
    data_list = ['bitcoin_alpha', 'bitcoin_otc', 'college_msg', 'enron_all', 'enron_all_shuffle']
    funcs = ['AE', 'AERNN']
    for data in data_list:
        graphs = process('data/' + data)
        length = len(graphs)
        dim_emb = 128
        lookback = 3

        for func in funcs:
            MAP_list = []
            for i in range(length - lookback - 1):
                if func == 'AERNN':
                    embedding = DynAERNN(d=dim_emb,
                                         beta=5,
                                         n_prev_graphs=lookback,
                                         nu1=1e-6,
                                         nu2=1e-6,
                                         n_aeunits=[500, 300],
                                         n_lstmunits=[500, dim_emb],
                                         rho=0.3,
                                         n_iter=250,
                                         xeta=1e-3,
                                         n_batch=100,
                                         modelfile=None,
                                         weightfile=None,
                                         savefilesuffix=None)
                elif func == 'RNN':
                    embedding = DynRNN(d=dim_emb,
                                       beta=5,
                                       n_prev_graphs=lookback,
                                       nu1=1e-6,
                                       nu2=1e-6,
                                       n_enc_units=[500, 300],
                                       n_dec_units=[500, 300],
                                       rho=0.3,
                                       n_iter=250,
                                       xeta=1e-3,
                                       n_batch=100,
                                       modelfile=None,
                                       weightfile=None,
                                       savefilesuffix=None)
                else:
                    embedding = DynAE(d=dim_emb,
                                      beta=5,
                                      n_prev_graphs=lookback,
                                      nu1=1e-6,
                                      nu2=1e-6,
                                      n_units=[500, 300, ],
                                      rho=0.3,
                                      n_iter=250,
                                      xeta=1e-4,
                                      n_batch=100,
                                      modelfile=None,
                                      weightfile=None,
                                      savefilesuffix=None)

                embs = []
                t1 = time()
                # for temp_var in range(lookback + 1, length + 1):
                emb, _ = embedding.learn_embeddings(graphs[i: i + lookback + 1])
                embs.append(emb)
                print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1))
                pred_adj = graphify(embedding.predict_next_adj())
                edge_index_pre = evaluation_util.getEdgeListFromAdjMtx(adj=pred_adj)
                MAP = metrics.computeMAP(edge_index_pre, graphs[i + lookback + 1])
                MAP_list.append(MAP)
                print('第' + str(i) + '-' + str(i + lookback) + '个时间片的MAP值为' + str(MAP))
            MAP_list.append(np.mean(MAP_list))
            result = {'MAP值': MAP_list}
            label = []
            for i in range(len(MAP_list) - 1):
                row = '第' + str(i) + '-' + str(i + lookback) + '个时间片'
                label.append(row)
            label.append('mean_MAP')
            if not os.path.exists('result/' + data):
                os.mkdir('result/' + data)
            csv_path = 'result/' + data + '/' + str(func) + '.csv'
            df = pd.DataFrame(result, index=label)
            df.to_csv(csv_path)
Example #5
0
        length = 7
    lookback = args.lookback

    if args.testDataType == 'sbm_rp':
        node_num = 1000
        community_num = 50
        node_change_num = 10
        dynamic_sbm_series = dynamic_SBM_graph.get_random_perturbation_series(node_num, community_num, length,
                                                                              node_change_num)
        dynamic_embedding = DynAERNN(
            d=100,
            beta=100,
            n_prev_graphs=lookback,
            nu1=1e-6,
            nu2=1e-6,
            n_units=[50, 30, ],
            rho=0.3,
            n_iter=30,
            xeta=0.005,
            n_batch=50,
            modelfile=['./intermediate/enc_model.json', './intermediate/dec_model.json'],
            weightfile=['./intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5'],
        )
        dynamic_embedding.learn_embeddings([g[0] for g in dynamic_sbm_series])
        plot_dynamic_sbm_embedding.plot_dynamic_sbm_embedding(dynamic_embedding.get_embeddings(), dynamic_sbm_series)
        plt.savefig('result/visualization_DynRNN_rp.png')
        plt.show()
    elif args.testDataType == 'sbm_cd':
        node_num = 100
        community_num = 2
        node_change_num = args.nodemigration
        dynamic_sbm_series = dynamic_SBM_graph.get_community_diminish_series_v2(node_num,
def plot_dynam_graph(title, graph_list):
    #dynAERNN
    graphs = []
    for graph in graph_list:
        if not graph == None:
            graphs.append(graph)
    dynamic_sbm_series = graphs
    outdir = os.path.exists(os.path.dirname(__file__)+"/out")
    testDataType = 'sbm_cd'
    length = len(graph_list)
    dim_emb  = 128
    lookback = 2
    embedding = DynAERNN(d   = dim_emb,
                beta           = 5,
                n_prev_graphs  = lookback,
                nu1            = 1e-6,
                nu2            = 1e-6,
                n_aeunits      = [500, 300],
                n_lstmunits    = [500,dim_emb],
                rho            = 0.3,
                n_iter         = 250,
                xeta           = 1e-3,
                n_batch        = 100,
                modelfile      = None, #['./intermediate/enc_model_dynAERNN.json', 
                                #'./intermediate/dec_model_dynAERNN.json'],
                weightfile     = None, #['./intermediate/enc_weights_dynAERNN.hdf5', 
                                #'./intermediate/dec_weights_dynAERNN.hdf5'],
                savefilesuffix = None) #"testing")

    embs = []
    t1 = time()
    for temp_var in range(lookback+1, length+1):
                    emb, _ = embedding.learn_embeddings(graphs[:temp_var])
                    embs.append(emb)
    print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1))
    plt.figure()
    plt.clf()    
    plot_dynamic_sbm_embedding.plot_dynamic_sbm_embedding_v2(embs[-5:-1], dynamic_sbm_series[-5:])    
    plt.show()

    # #dynamicTriad
    datafile  = dataprep_util.prep_input_dynTriad(graphs, length, testDataType)
    embedding= dynamicTriad(niters     = 20,
                    starttime  = 0,
                    datafile   = datafile,
                    batchsize  = 1000,
                    nsteps     = length,
                    embdim     = dim_emb,
                    stepsize   = 1,
                    stepstride = 1,
                    outdir     = outdir,
                    cachefn    = '/tmp/'+ testDataType,
                    lr         = 0.1,
                    beta       = [0.1,0.1],
                    negdup     = 1,
                    datasetmod = 'core.dataset.adjlist',
                    trainmod   = 'dynamicgem.dynamictriad.core.algorithm.dynamic_triad',
                    pretrain_size = length,
                    sampling_args = {},
                    validation = 'link_reconstruction',
                    datatype   = testDataType,
                    scale      = 1,
                    classifier = 'lr',
                    debug      = False,
                    test       = 'link_predict',
                    repeat     = 1,
                    resultdir  = outdir,
                    testDataType = testDataType,
                    clname       = 'lr')
                    #node_num     = node_num )
    t1 = time()
    embedding.learn_embedding()
    print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1))
    embedding.get_embedding()
    embedding.plotresults(dynamic_sbm_series)