Esempio n. 1
0
    def test_community_analysis_single_channel_cutoff_20(self):
        log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"],
                                      self.start_date, self.end_date)
        expected_result = util.load_from_disk(
            self.current_directory +
            '/data/output/community_analysis_single_channel_cutoff_20')
        nicks, nick_same_list = nickTracker.nick_tracker(log_data)
        default_cutoff = config.THRESHOLD_MESSAGE_NUMBER_GRAPH
        config.THRESHOLD_MESSAGE_NUMBER_GRAPH = 20
        message_number_graph = network.message_number_graph(
            log_data, nicks, nick_same_list, False)
        saver.save_net_nx_graph(
            message_number_graph, self.current_directory,
            "message-exchange-" + self.start_date + "-cutoff-" +
            str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH))

        expected_output = community.infomap_igraph(
            ig_graph=None,
            net_file_location=self.current_directory + "/message-exchange-" +
            self.start_date + "-cutoff-" +
            str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net')
        os.remove(self.current_directory + "/message-exchange-" +
                  self.start_date + "-cutoff-" +
                  str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net')
        config.THRESHOLD_MESSAGE_NUMBER_GRAPH = default_cutoff

        self.assertTrue(expected_result[0].isomorphic(expected_output[0]))
        self.assertEqual(
            compare_communities(expected_result[1], expected_output[1]), 0)
Esempio n. 2
0
 def test_infomap_igraph(self):
     message_graph, message_comm = community.infomap_igraph(ig_graph=None, net_file_location= self.current_directory + '/data/message_number_graph.net')
     expected_result = util.load_from_disk(self.current_directory + '/data/community')
     
     dis = clustering.compare_communities(message_comm, expected_result) #calculate distance between two communities
     
     self.assertEqual(dis, 0.0)
Esempio n. 3
0
    def test_community_analysis_multi_channel(self):
        log_data = reader.linux_input(self.log_data_dir, ["ALL"],
                                      self.start_date, self.end_date)
        expected_result = util.load_from_disk(
            self.current_directory +
            '/data/output/community_analysis_multi_channel')
        nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker(
            log_data, True)
        dict_out, graph = network.channel_user_presence_graph_and_csv(
            nicks, nick_same_list, channels_for_user, nick_channel_dict,
            nicks_hash, channels_hash)

        presence_type = ["CC", "UU", "CU"]
        expected_output = {ptype: {} for ptype in presence_type}
        for ptype in presence_type:
            saver.save_net_nx_graph(dict_out[ptype]["graph"],
                                    self.current_directory, "adj" + ptype)
            saver.save_net_nx_graph(dict_out[ptype]["reducedGraph"],
                                    self.current_directory, "radj" + ptype)
            expected_output[ptype]['adj'] = community.infomap_igraph(
                ig_graph=None,
                net_file_location=self.current_directory + '/adj' + ptype +
                '.net')
            expected_output[ptype]['radj'] = community.infomap_igraph(
                ig_graph=None,
                net_file_location=self.current_directory + '/radj' + ptype +
                '.net')

            os.remove(self.current_directory + '/adj' + ptype + '.net')
            os.remove(self.current_directory + '/radj' + ptype + '.net')

            self.assertTrue(expected_result[ptype]['adj'][0].isomorphic(
                expected_output[ptype]['adj'][0]))
            self.assertEqual(
                compare_communities(expected_output[ptype]['adj'][1],
                                    expected_result[ptype]['adj'][1]), 0)
            self.assertTrue(expected_result[ptype]['radj'][0].isomorphic(
                expected_output[ptype]['radj'][0]))
            self.assertEqual(
                compare_communities(expected_output[ptype]['radj'][1],
                                    expected_result[ptype]['radj'][1]), 0)
#%%
argv = ['a', '/Users/hamishgibbs/Documents/Covid-19/unequal_mobility_uk/output/spi_m/infomap/cluster_full.csv']

im = pd.read_csv(argv[1])
im['quadkey'] = [f'{n:012}' for n in im.quadkey.values]
#%%
im = im.groupby('date')    
im = [im.get_group(x) for x in im.groups]
#%%
i0 = im[0][['quadkey', 'cluster']].rename(columns = {'cluster':'cluster_0'})

i1 = im[1][['quadkey', 'cluster']].rename(columns = {'cluster':'cluster_1'})
#%%
ic = pd.merge(i0, i1).dropna(axis = 0, how = 'any')

compare_communities(ic['cluster_0'].tolist(), ic['cluster_1'].tolist(), method = 'nmi')

#%%
nmi = []
for i, df in enumerate(im): 
    if i > 0:
        i0 = im[i - 1][['quadkey', 'cluster']].rename(columns = {'cluster':'cluster_0'})

        i1 = im[i][['quadkey', 'cluster']].rename(columns = {'cluster':'cluster_1'})
        
        ic = pd.merge(i0, i1).dropna(axis = 0, how = 'any')

        nmi.append(compare_communities(ic['cluster_0'].tolist(), ic['cluster_1'].tolist(), method = 'nmi'))

#%%
import matplotlib.pyplot as plt
Esempio n. 5
0
# first parameter group
for din in range(9, 18):
    for _ in range(100):
        file = DIR_FMT.format(64) + GML_FMT.format(din, _)
        g = ig.Graph.Read_GML(file)
        ground_truth = [int(v['value']) for v in g.vs]

        for __ in range(100):
            p1 = g.community_fastgreedy().as_clustering()
            p2 = g.community_infomap()
            p3 = g.community_label_propagation()
            p4 = g.community_multilevel()
            p5 = g.community_walktrap().as_clustering()

            Fastgreedy_performance[0].append(
                compare_communities(ground_truth, p1, method='nmi'))
            Infomap_performance[0].append(
                compare_communities(ground_truth, p2, method='nmi'))
            LPA_performance[0].append(
                compare_communities(ground_truth, p3, method='nmi'))
            Louvein_performance[0].append(
                compare_communities(ground_truth, p4, method='nmi'))
            Walktrap_performance[0].append(
                compare_communities(ground_truth, p5, method='nmi'))

        g = nx.read_gml(file, label='id')
        partitions = FPPM_repeat(g)
        for p in partitions:
            FPPM_performance[0].append(
                compare_communities(ground_truth, p, method='nmi'))
Esempio n. 6
0
for idx, N in enumerate(SIZES):
    for Mu in range(1, 10):
        for _ in range(100):
            file = DIR_FMT.format(N) + GML_FMT.format(Mu, _)
            g = ig.Graph.Read_GML(file)
            ground_truth = [int(v['value']) for v in g.vs]

            for __ in range(100):
                p1 = g.community_fastgreedy().as_clustering()
                p2 = g.community_infomap()
                p3 = g.community_label_propagation()
                p4 = g.community_multilevel()
                p5 = g.community_walktrap().as_clustering()

                Fastgreedy_performance[idx].append(compare_communities(ground_truth, p1, method='nmi'))
                Infomap_performance[idx].append(compare_communities(ground_truth, p2, method='nmi'))
                LPA_performance[idx].append(compare_communities(ground_truth, p3, method='nmi'))
                Louvein_performance[idx].append(compare_communities(ground_truth, p4, method='nmi'))
                Walktrap_performance[idx].append(compare_communities(ground_truth, p5, method='nmi'))

            g = nx.read_gml(file, label='id')
            partitions = FPPM_repeat(g)
            for p in partitions:
                FPPM_performance[idx].append(compare_communities(ground_truth, p, method='nmi'))

with open('LFR_benchmarks/Fastgreedy_performance.pickle', 'wb') as f:
    pickle.dump(Fastgreedy_performance, f)
with open('LFR_benchmarks/Infomap_performance.pickle', 'wb') as f:
    pickle.dump(Infomap_performance, f)
with open('LFR_benchmarks/LPA_performance.pickle', 'wb') as f: