Exemple #1
0
            experiment_num += 1
            experiment_file_name = '{}-experiment-{}-results.json'.format(
                network, experiment_num)

        network_results_dir = './results/' + experiment_file_name

        # Iterate over fractions of edges to hide
        for frac_hidden in FRAC_EDGES_HIDDEN:
            val_frac = 0.05
            test_frac = frac_hidden - val_frac

            # Read train-test split
            experiment_name = '{}-{}-hidden'.format(network, frac_hidden)
            print("Current experiment: ", experiment_name)
            train_test_split_file = TRAIN_TEST_SPLITS_FOLDER + experiment_name + '.pkl'

            # Run all link prediction methods on current graph, store results
            network_results[experiment_name] = lp.calculate_all_scores(network_adj, features_matrix=None,
                                                                        directed=False, \
                                                                        test_frac=test_frac, val_frac=val_frac, \
                                                                        random_state=RANDOM_SEED, verbose=2,
                                                                        train_test_split_file=train_test_split_file,
                                                                        tf_dtype=tf.float16)

            # Save experiment results at each iteration
            with open(network_results_dir, 'w') as fp:
                json.dump(network_results, fp, indent=4)

        # Save final experiment results
        with open(network_results_dir, 'w') as fp:
            json.dump(network_results, fp, indent=4)
Exemple #2
0
            feat = graph_tuple[1]
            
            experiment_name = 'fb-{}-{}-hidden'.format(g_name, frac_hidden)
            print "Current experiment: ", experiment_name

            # # TODO: remove this!
            # if experiment_name !='fb-combined-0.25-hidden' and \
            #     experiment_name != 'fb-combined-0.5-hidden' and \
            #     experiment_name != 'fb-combined-0.75-hidden':
            #     continue

            train_test_split_file = TRAIN_TEST_SPLITS_FOLDER + experiment_name + '.pkl'
            
            # Run all link prediction methods on current graph, store results
            fb_results[experiment_name] = lp.calculate_all_scores(adj, feat, \
                                                         test_frac=test_frac, val_frac=val_frac, \
                                                         random_state=RANDOM_SEED, verbose=2,
                                                         train_test_split_file=train_test_split_file)

            # Save experiment results at each iteration
            with open(FB_RESULTS_DIR, 'wb') as f:
                pickle.dump(fb_results, f, protocol=2)
            
    # Save final experiment results
    with open(FB_RESULTS_DIR, 'wb') as f:
        pickle.dump(fb_results, f, protocol=2)



    ### ---------- NETWORKX ---------- ###
    nx_results = {}
Exemple #3
0
def facebook_networks():
    ### ---------- Load in FB Graphs ---------- ###
    FB_EGO_USERS = [0, 107, 1684, 1912, 3437, 348, 3980, 414, 686, 698]
    fb_graphs = {}  # Dictionary to store all FB ego network graphs

    # Read in each FB Ego graph
    # Store graphs in dictionary as (adj, features) tuples
    for user in FB_EGO_USERS:
        network_dir = './data/fb-processed/{0}-adj-feat.pkl'.format(user)
        with open(network_dir, 'rb') as f:
            adj, features = pickle.load(f)

        # Store in dict
        fb_graphs[user] = (adj, features)

    # Read in combined FB graph
    combined_dir = './data/fb-processed/combined-adj-sparsefeat.pkl'
    # with open(combined_dir, 'rb') as f:
    #     adj, features = pickle.load(f)
    #     fb_graphs['combined'] = (adj, features)

    ### ---------- Run Link Prediction Tests ---------- ###
    for i in range(NUM_REPEATS):

        ## ---------- FACEBOOK ---------- ###
        fb_results = {}

        # Check existing experiment results, increment file number by 1
        past_results = os.listdir('./results/')
        txt_past_results = os.listdir('./results/txt/')
        experiment_num = 0
        experiment_file_name = 'fb-experiment-{}-results.pkl'.format(
            experiment_num)
        while (experiment_file_name in past_results):
            experiment_num += 1
            experiment_file_name = 'fb-experiment-{}-results.pkl'.format(
                experiment_num)

        FB_RESULTS_DIR = './results/' + experiment_file_name

        # save as txt format
        txt_experiment_num = 0
        txt_experiment_file_name = 'txt-fb-experiment-{}-results.json'.format(
            txt_experiment_num)
        while (txt_experiment_file_name in txt_past_results):
            txt_experiment_num += 1
            txt_experiment_file_name = 'txt-fb-experiment-{}-results.json'.format(
                txt_experiment_num)

        TXT_FB_RESULTS_DIR = './results/txt/' + txt_experiment_file_name

        TRAIN_TEST_SPLITS_FOLDER = './train-test-splits/'

        # Iterate over fractions of edges to hide
        for frac_hidden in FRAC_EDGES_HIDDEN:
            val_frac = 0.05
            test_frac = frac_hidden - val_frac

            # Iterate over each graph
            for g_name, graph_tuple in fb_graphs.items():
                adj = graph_tuple[0]
                feat = graph_tuple[1]

                experiment_name = 'fb-{}-{}-hidden'.format(g_name, frac_hidden)
                print("Current experiment: ", experiment_name)

                # # TODO: remove this!
                # if experiment_name !='fb-combined-0.25-hidden' and \
                #     experiment_name != 'fb-combined-0.5-hidden' and \
                #     experiment_name != 'fb-combined-0.75-hidden':
                #     continue

                train_test_split_file = TRAIN_TEST_SPLITS_FOLDER + experiment_name + '.pkl'

                # Run all link prediction methods on current graph, store results
                fb_results[experiment_name] = lp.calculate_all_scores(adj, feat, \
                                                                      test_frac=test_frac, val_frac=val_frac, \
                                                                      random_state=RANDOM_SEED, verbose=2,
                                                                      train_test_split_file=train_test_split_file)

                # Save experiment results at each iteration
                with open(FB_RESULTS_DIR, 'wb') as f:
                    pickle.dump(fb_results, f, protocol=2)

                # with open(TXT_FB_RESULTS_DIR, 'w') as f:
                #     json.dump(fb_results, f, indent=4)

        # Save final experiment results
        with open(FB_RESULTS_DIR, 'wb') as f:
            pickle.dump(fb_results, f, protocol=2)
Exemple #4
0
def random_networks():
    ### ---------- Create Random NetworkX Graphs ---------- ###
    # Dictionary to store all nx graphs
    nx_graphs = {}

    # Small graphs
    # N_SMALL = 200
    # nx_graphs['er-small'] = nx.erdos_renyi_graph(n=N_SMALL, p=.03, seed=RANDOM_SEED) # Erdos-Renyi
    # nx_graphs['ws-small'] = nx.watts_strogatz_graph(n=N_SMALL, k=11, p=.1, seed=RANDOM_SEED) # Watts-Strogatz
    # nx_graphs['ba-small'] = nx.barabasi_albert_graph(n=N_SMALL, m=6, seed=RANDOM_SEED) # Barabasi-Albert
    # nx_graphs['pc-small'] = nx.powerlaw_cluster_graph(n=N_SMALL, m=6, p=.02, seed=RANDOM_SEED) # Powerlaw Cluster
    # nx_graphs['sbm-small'] = nx.random_partition_graph(sizes=[N_SMALL//10]*10, p_in=.1, p_out=.01, seed=RANDOM_SEED) # Stochastic Block Model

    # Larger graphs
    NUM = [12, 100, 1000, 10000]
    for N_LARGE in NUM:
        # N_LARGE = 1000
        # nx_graphs['er-large'] = nx.erdos_renyi_graph(n=N_LARGE, p=.03, seed=RANDOM_SEED) # Erdos-Renyi
        # nx_graphs['ws-large'] = nx.watts_strogatz_graph(n=N_LARGE, k=11, p=.1, seed=RANDOM_SEED)  # Watts-Strogatz
        nx_graphs['ba-large'] = nx.barabasi_albert_graph(
            n=N_LARGE, m=10, seed=RANDOM_SEED)  # Barabasi-Albert
        # nx_graphs['pc-large'] = nx.powerlaw_cluster_graph(n=N_LARGE, m=6, p=.02, seed=RANDOM_SEED) # Powerlaw Cluster
        # nx_graphs['sbm-large'] = nx.random_partition_graph(sizes=[N_LARGE//10]*10, p_in=.05, p_out=.005, seed=RANDOM_SEED) # Stochastic Block Model

        # Remove isolates from random graphs
        for g_name, nx_g in nx_graphs.items():
            isolates = nx.isolates(nx_g)
            if len(list(isolates)) > 0:
                for isolate_node in isolates:
                    nx_graphs[g_name].remove_node(isolate_node)

        ### ---------- Run Link Prediction Tests ---------- ###
        for i in range(NUM_REPEATS):
            ## ---------- NETWORKX ---------- ###
            nx_results = {}

            # Check existing experiment results, increment file number by 1
            past_results = os.listdir('./results/')
            txt_past_results = os.listdir('./results/txt/')
            experiment_num = 0
            experiment_file_name = 'nx-experiment-{}-results.pkl'.format(
                experiment_num)
            while (experiment_file_name in past_results):
                experiment_num += 1
                experiment_file_name = 'nx-experiment-{}-results.pkl'.format(
                    experiment_num)

            NX_RESULTS_DIR = './results/' + experiment_file_name

            # save as txt format
            txt_experiment_num = 0
            txt_experiment_file_name = 'txt-nx-experiment-{}-results.json'.format(
                txt_experiment_num)
            while (txt_experiment_file_name in txt_past_results):
                txt_experiment_num += 1
                txt_experiment_file_name = 'txt-nx-experiment-{}-results.json'.format(
                    txt_experiment_num)

            TXT_NX_RESULTS_DIR = './results/txt/' + txt_experiment_file_name

            # Iterate over fractions of edges to hide
            for frac_hidden in FRAC_EDGES_HIDDEN:
                val_frac = 0.05
                test_frac = frac_hidden - val_frac

                # Iterate over each graph
                for g_name, nx_g in nx_graphs.items():
                    adj = nx.adjacency_matrix(nx_g)

                    experiment_name = 'nx-{}-{}-hidden'.format(
                        g_name, frac_hidden)
                    print("Current experiment: ", experiment_name)

                    # Run all link prediction methods on current graph, store results
                    nx_results[experiment_name] = lp.calculate_all_scores(adj, \
                                                                          test_frac=test_frac, val_frac=val_frac, \
                                                                          random_state=RANDOM_SEED, verbose=0)

                    # Save experiment results at each iteration
                    with open(NX_RESULTS_DIR, 'wb') as f:
                        pickle.dump(nx_results, f, protocol=2)

                    with open(TXT_NX_RESULTS_DIR, 'w') as f:
                        json.dump(nx_results, f, indent=4)

            # Save final experiment results
            with open(NX_RESULTS_DIR, 'wb') as f:
                pickle.dump(nx_results, f, protocol=2)

            with open(TXT_NX_RESULTS_DIR, 'w+') as f:
                json.dump(nx_results, f, indent=4)
Exemple #5
0
def facebook_networks():
    ###---------- 读取Facebook网络数据----------###
    FB_EGO_USERS = [0, 107, 1684, 1912, 3437, 348, 3980, 414, 686, 698]
    fb_graphs = {}   # 保存所有的FB自我中心网络

    # 读取每个FB自我中心网络
    for user in FB_EGO_USERS:
        network_dir = './data/fb-processed/{0}-adj-feat.pkl'.format(user)
        with open(network_dir, 'rb') as f:
            adj, features = pickle.load(f)

        # 保存在字典目录
        fb_graphs[user] = (adj, features)

    # 读取FB-combined网络
    combined_dir = './data/fb-processed/combined-adj-sparsefeat.pkl'
    with open(combined_dir, 'rb') as f:
        adj, features = pickle.load(f)
        fb_graphs['combined'] = (adj, features)

    ### ---------- 运行链路预测实验 ---------- ###
    for i in range(NUM_REPEATS):

        fb_results = {}

        # 根据实验命名实验结果文件
        past_results = os.listdir('./results/')
        txt_past_results = os.listdir('./results/txt/')
        experiment_num = 0
        experiment_file_name = 'fb-experiment-{}-results.pkl'.format(experiment_num)
        while (experiment_file_name in past_results):
            experiment_num += 1
            experiment_file_name = 'fb-experiment-{}-results.pkl'.format(experiment_num)

        FB_RESULTS_DIR = './results/' + experiment_file_name

        # 保存为txt文件
        txt_experiment_num = 0
        txt_experiment_file_name = 'txt-fb-experiment-{}-results.json'.format(txt_experiment_num)
        while (txt_experiment_file_name in txt_past_results):
            txt_experiment_num += 1
            txt_experiment_file_name = 'txt-fb-experiment-{}-results.json'.format(txt_experiment_num)

        TXT_FB_RESULTS_DIR = './results/txt/' + txt_experiment_file_name

        TRAIN_TEST_SPLITS_FOLDER = './train-test-splits/'

        # 遍历不同隐藏比例的数据集(训练集、验证集、测试集)
        for frac_hidden in FRAC_EDGES_HIDDEN:
            val_frac = 0.05
            test_frac = frac_hidden - val_frac

            # 遍历每个网络集
            for g_name, graph_tuple in fb_graphs.items():
                adj = graph_tuple[0]
                feat = graph_tuple[1]

                experiment_name = 'fb-{}-{}-hidden'.format(g_name, frac_hidden)
                print("Current experiment: ", experiment_name)


                train_test_split_file = TRAIN_TEST_SPLITS_FOLDER + experiment_name + '.pkl'

                # 在当前网络上运行所有链接预测方法,返回结果
                fb_results[experiment_name] = lp.calculate_all_scores(adj, feat, \
                                                                      test_frac=test_frac, val_frac=val_frac, \
                                                                      random_state=RANDOM_SEED, verbose=2,
                                                                      train_test_split_file=train_test_split_file)

                # 每次遍历保存实验结果
                # pickle文件
                with open(FB_RESULTS_DIR, 'wb') as f:
                    pickle.dump(fb_results, f, protocol=2)

                # json文件
                with open(TXT_FB_RESULTS_DIR, 'w') as f:
                    json.dump(fb_results, f, indent=4)

        # 保存最终实验结果
        # pickle文件
        with open(FB_RESULTS_DIR, 'wb') as f:
            pickle.dump(fb_results, f, protocol=2)

        # json文件
        with open(TXT_FB_RESULTS_DIR, 'w') as f:
            json.dump(fb_results, f, indent=4)
Exemple #6
0
def random_networks():
    ### ---------- 生成随机网络图 ---------- ###
    # 保存生成的随机网络
    nx_graphs = {}


    #NUM=[10, 100, 1000, 10000]
    NUM = [2000]
    for N_LARGE in NUM:
        # N_LARGE = 1000
        # nx_graphs['er-large'] = nx.erdos_renyi_graph(n=N_LARGE, p=.03, seed=RANDOM_SEED) # Erdos-Renyi
        # nx_graphs['ws-large'] = nx.watts_strogatz_graph(n=N_LARGE, k=11, p=.1, seed=RANDOM_SEED)  # Watts-Strogatz
        nx_graphs['ba-large'] = nx.barabasi_albert_graph(n=N_LARGE, m=6, seed=RANDOM_SEED)  # Barabasi-Albert
        # nx_graphs['pc-large'] = nx.powerlaw_cluster_graph(n=N_LARGE, m=6, p=.02, seed=RANDOM_SEED) # Powerlaw Cluster
        # nx_graphs['sbm-large'] = nx.random_partition_graph(sizes=[N_LARGE//10]*10, p_in=.05, p_out=.005, seed=RANDOM_SEED) # Stochastic Block Model

        # 移除孤立点
        for g_name, nx_g in nx_graphs.items():
            isolates = nx.isolates(nx_g)
            if len(list(isolates)) > 0:
                for isolate_node in isolates:
                    nx_graphs[g_name].remove_node(isolate_node)

        ### ---------- 运行链路预测实验 ---------- ###
        for i in range(NUM_REPEATS):
            ## ---------- NETWORKX ---------- ###
            nx_results = {}

            # 根据实验命名实验结果文件
            past_results = os.listdir('./results/')
            txt_past_results = os.listdir('./results/txt/')
            experiment_num = 0
            experiment_file_name = 'nx-experiment-{}-results.pkl'.format(experiment_num)
            while (experiment_file_name in past_results):
                experiment_num += 1
                experiment_file_name = 'nx-experiment-{}-results.pkl'.format(experiment_num)

            NX_RESULTS_DIR = './results/' + experiment_file_name

            # 保存为txt文件
            txt_experiment_num = 0
            txt_experiment_file_name = 'txt-nx-experiment-{}-results.json'.format(txt_experiment_num)
            while (txt_experiment_file_name in txt_past_results):
                txt_experiment_num += 1
                txt_experiment_file_name = 'txt-nx-experiment-{}-results.json'.format(txt_experiment_num)

            TXT_NX_RESULTS_DIR = './results/txt/' + txt_experiment_file_name

            # 遍历不同隐藏比例的数据集(训练集、验证集、测试集)
            for frac_hidden in FRAC_EDGES_HIDDEN:
                val_frac = 0.05
                test_frac = frac_hidden - val_frac

                # 遍历每个随机网络
                for g_name, nx_g in nx_graphs.items():
                    adj = nx.adjacency_matrix(nx_g)

                    experiment_name = 'nx-{}-{}-hidden'.format(g_name, frac_hidden)
                    print("Current experiment: ", experiment_name)

                    # 在当前网络上运行所有链接预测方法,返回结果
                    nx_results[experiment_name] = lp.calculate_all_scores(adj, \
                                                                          test_frac=test_frac, val_frac=val_frac, \
                                                                          random_state=RANDOM_SEED, verbose=0)

                    # 每次遍历保存实验结果
                    with open(NX_RESULTS_DIR, 'wb') as f:
                        pickle.dump(nx_results, f, protocol=2)

                    with open(TXT_NX_RESULTS_DIR, 'w') as f:
                        json.dump(nx_results, f, indent=4)

            # 保存最终实验结果
            with open(NX_RESULTS_DIR, 'wb') as f:
                pickle.dump(nx_results, f, protocol=2)

            with open(TXT_NX_RESULTS_DIR, 'w+') as f:
                json.dump(nx_results, f, indent=4)
    while (experiment_file_name in past_results):
        experiment_num += 1
        experiment_file_name = 'twitter-experiment-{}-results.json'.format(experiment_num)

    twitter_results_dir = './results/' + experiment_file_name

    # Iterate over fractions of edges to hide
    for frac_hidden in FRAC_EDGES_HIDDEN:
        val_frac = 0.1
        test_frac = frac_hidden - val_frac

        # Read train-test split
        experiment_name = 'twitter-combined-{}-hidden'.format(frac_hidden)
        print "Current experiment: ", experiment_name
        train_test_split_file = TRAIN_TEST_SPLITS_FOLDER + experiment_name + '.pkl'

        # Run all link prediction methods on current graph, store results
        twitter_results[experiment_name] = lp.calculate_all_scores(twitter_adj, features_matrix=None, 
                                                     directed=True, \
                                                     test_frac=test_frac, val_frac=val_frac, \
                                                     random_state=RANDOM_SEED, verbose=2,
                                                     train_test_split_file=train_test_split_file,
                                                     tf_dtype=tf.float16)

        # Save experiment results at each iteration
        with open(twitter_results_dir, 'w') as fp:
            json.dump(twitter_results, fp, indent=4)

    # Save final experiment results
    with open(twitter_results_dir, 'w') as fp:
        json.dump(twitter_results, fp, indent=4)