Beispiel #1
0
def test_karate_club():
    nodes, edges = PyLouvain.from_file("data/karate.txt")
    pyl = PyLouvain(nodes, edges)
    partition, q = pyl.apply_method(gamma=1.0)
    odds = bayes_model_selection(nodes, edges, partition)

    print(partition, q, odds)
Beispiel #2
0
def test(graphname, gnc=None):
    nodes, edges = PyLouvain.from_file("data/%s.txt" % graphname)
    pyl = PyLouvain(nodes, edges)

    name_pickle = 'fig/save_%s_%d.p' % (graphname, len(nodes))
    if not os.path.isfile(name_pickle):
        print("pickle file", name_pickle, "is missing. Recompute.")

        start = time.time()
        partition, q = pyl.apply_method()
        print("Modularity Time", time.time() - start)

        start = time.time()
        partition2 = multiscale(nodes, edges, 0.5)
        print("Multiscale Time", time.time() - start)

        results = {"LV": partition, "MS": partition2}
        sizes_distri = {
            "Modularity": [len(p) for p in partition],
            "MultiScale": [len(p) for p in partition2]
        }

        pickle.dump(results, open(name_pickle, 'wb'))
        print("Pickle save", name_pickle)
    else:
        print("pickle file", name_pickle, "is found.")

        results = pickle.load(open(name_pickle, "rb"))
        sizes_distri = {
            "Modularity": [len(p) for p in results["LV"]],
            "MultiScale": [len(p) for p in results["MS"]]
        }

    if gnc:
        gnc_fp = open(gnc, "r+")
        gnc_map = {}
        sizes_distri["Ground Truth"] = []
        for i, line in enumerate(gnc_fp):
            x = line.split()
            sizes_distri["Ground Truth"].append(len(x))
            for j in x:
                gnc_map[int(j)] = i

        gnc_list = [gnc_map[k] for k in nodes]

        lv_map = {v: i for i, c in enumerate(partition) for v in c}
        lv_list = [lv_map[k] for k in nodes]

        ms_map = {v: i for i, c in enumerate(partition2) for v in c}
        ms_list = [ms_map[k] for k in nodes]

        print("Louvain NMI=", normalized_mutual_info_score(lv_list, gnc_list))
        print("Multi-scale NMI=",
              normalized_mutual_info_score(ms_list, gnc_list))

    hist(sizes_distri, graphname)
Beispiel #3
0
def test_small_networks(nodes, edges, gamma0):
    pyl = PyLouvain(nodes, edges)
    partition0, q0 = pyl.apply_method(gamma0)
    c0 = cmap(nodes, partition0)
    NMI = []

    gamma_list = np.linspace(0.2, 3.5, num=200)
    for gamma in gamma_list:
        partition, q = PyLouvain(nodes, edges).apply_method(gamma)
        c = cmap(nodes, partition)

        NMI.append(metrics.normalized_mutual_info_score(c0, c))

    plt.plot(gamma_list, NMI, 'b-*', markersize=10)
    plt.show()
Beispiel #4
0
 def test_karate_club(self):
     pyl = PyLouvain.from_file("data/karate.txt")
     partition, q = pyl.apply_method()
     q_ = q * 10000
     self.assertEqual(4, len(partition))
     self.assertEqual(4298, math.floor(q_))
     self.assertEqual(4299, math.ceil(q_))
Beispiel #5
0
 def test_karate_club(self):
     pyl = PyLouvain.from_file("data/karate.txt")
     partition, q = pyl.apply_method()
     q_ = q * 10000
     self.assertEqual(4, len(partition))
     self.assertEqual(4298, math.floor(q_))
     self.assertEqual(4299, math.ceil(q_))
def findGZCommunity():
    dbm = dbManager2('sina11', host='127.0.0.1', passwd='root')
    pyl = PyLouvain.from_db(dbm,
                            "select uid,fid from afrelation11 limit 0,10000")
    partition, q = pyl.apply_method()
    values = []
    j = 0
    print partition
    f = open("output.txt", "w")
    f.write(str(partition))
    f.close()
Beispiel #7
0
def test_football():

    # load GNC ground truth by txt file (as defined by conference)
    fconf = open("data/football.gnc.txt", "r")
    gnc = {str(i): int(line.strip()) for i, line in enumerate(fconf)}
    order_ = {i: stri for i, stri in enumerate(sorted(gnc.keys()))}

    x, y, z, r = [], [], [], []
    for gamma in np.linspace(0.5, 8.5, num=35):
        nodes, edges = PyLouvain.from_file("data/football.txt")
        pyl = PyLouvain(nodes, edges)
        partition, q = pyl.apply_method(gamma)
        odds = bayes_model_selection(nodes, edges, partition)

        print(len(partition), odds)
        x.append(gamma)
        y.append(odds)
        z.append(len(partition))

        comm = {n: i for i, ns in enumerate(partition) for n in ns}
        a = [comm[i] for i in nodes]
        b = [gnc[order_[i]] for i in nodes]
        #print("NMI=", metrics.adjusted_mutual_info_score(a, b))

        r.append(metrics.adjusted_mutual_info_score(a, b))
        #r.append(metrics.adjusted_rand_score(a, b))

    plt.plot(x, y, 'r-*', markersize=10)
    ax1 = plt.gca()
    ax1.tick_params(axis='x', labelsize=18)
    ax1.tick_params(axis='y', labelcolor='r', labelsize=15)
    ax2 = ax1.twinx()
    ax2.plot(x, z, 'm-^', markersize=10)
    ax2.tick_params(axis='y', labelcolor='m', labelsize=15)
    plt.tight_layout()

    plt.savefig("fig/football2.png")
Beispiel #8
0
def do(path, source_data):
    start_time = time.time()
    print 'start time: ',start_time
    print 'louvain算法社团划分开始...'
    pyl = PyLouvain.from_file(path, source_data)
    partition, q = pyl.apply_method()
    # print partition
    out_file = open(path+"community_result.txt", 'w')

    #读取节点信息文件
    nodes_file = open(path+'nodes_tmp.txt', 'r')
    nodes_lines = nodes_file.readlines()
    nodes_file.close()
    nodes = {} #保存 节点序号-节点名称 信息
    for line in nodes_lines:
        n = line.split()
        if not n:
            break
        nodes[n[1]] = n[0]

    #社团信息 格式:标记 成员数
    print '统计社团信息 写入社团状态文件'
    community_status = open(path+'community_status.txt', 'w')
    i = 1
    label = {} # 格式 节点名称-社团标记
    for community in partition:
        community_status.write(str(i)+'\t'+str(len(community))+'\n') #社团标记 成员数
        #成员贴上社团标记
        for per in community:
            label[nodes[str(per)]] = str(i)
        i += 1
    community_status.close()

    #关联用户互动
    print '关联用户互动数据 写入结果文件'
    relationship_file = open(path+source_data, 'r')
    relationship_lines = relationship_file.readlines()
    relationship_file.close()
    for rela in relationship_lines:
        r = rela.split()
        if not r:
            break
        out_file.write('-\t'+r[0]+'\t'+label[r[0]]+'\t'+r[1]+'\t'+label[r[1]]+'\t'+r[2]+'\n')
    out_file.close()

    print 'end time: ',time.time()
    print '花费时间: ',(time.time()-start_time)/60,' min'
Beispiel #9
0
def test_football2():

    for gamma in np.linspace(0.4, 0.9, num=10):
        print()
        print("gamma=", gamma)
        nodes, edges = PyLouvain.from_file("data/football.txt")
        partition = multiscale(nodes, edges, gamma)

        # load GNC ground truth by txt file (as defined by conference)
        fconf = open("data/football.gnc.txt", "r")
        gnc = {str(i): int(line.strip()) for i, line in enumerate(fconf)}
        order_ = {i: stri for i, stri in enumerate(sorted(gnc.keys()))}
        comm = {n: i for i, ns in enumerate(partition) for n in ns}
        a = [comm[i] for i in nodes]
        b = [gnc[order_[i]] for i in nodes]

        print("NMI=", metrics.adjusted_mutual_info_score(a, b))


#test_football2()
Beispiel #10
0
 def test_citations(self):
     pyl = PyLouvain.from_file("data/hep-th-citations")
     partition, q = pyl.apply_method()
Beispiel #11
0
 def test_lesmis(self):
     pyl = PyLouvain.from_gml_file("data/lesmis.gml")
     partition, q = pyl.apply_method()
Beispiel #12
0
 def test_arxiv(self):
     pyl = PyLouvain.from_file("data/facebook_combined.txt", 0.5)
     partition, q = pyl.apply_method()
     print(len(partition), q)
Beispiel #13
0
 def test_arxiv(self):
     pyl = PyLouvain.from_file("data/arxiv.txt")
     partition, q = pyl.apply_method()
Beispiel #14
0
def test_citations():
    nodes, edges = PyLouvain.from_file("data/hep-th-citations")
    pyl = PyLouvain(nodes, edges)
    partition, q = pyl.apply_method()
    print(partition, q)
def main():
    if not has_pyimpfuzzy:
        sys.exit("[!] pyimpfuzzy must be installed for this script.")

    if not has_py2neo:
        sys.exit("[!] py2neo must be installed for this script.")

    if not has_pylouvain and not args.nocluster:
        sys.exit("[!] Please download the pylouvain from https://github.com/patapizza/pylouvain.")

    try:
        graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD +"@:" + NEO4J_PORT + "/db/data/"
        GRAPH = Graph(graph_http)
    except:
        sys.exit("[!] Can't connect Neo4j Database.")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit()
    
    i= 0
    hashlist = []
    hashlist_new = []
    nodes = []
    edges = []
    relationships = []

    # This is a impfuzzys threshold
    if args.threshold:
        ss_threshold = args.threshold
    else:
        ss_threshold = 30
    print("[*] Impfuzzy threshold is %i." % ss_threshold)

    # Delete database data
    if args.delete:
        GRAPH.delete_all()
        print("[*] Delete all nodes and relationships from this Neo4j database.")

    # Load database data
    database = GRAPH.data("MATCH (m:Malware) RETURN m.id, m.name, m.impfuzzy, m.md5, m.sha1, m.sha256")

    if database:
        print("[*] Database nodes %d." % len(database))
        for d in database:
            hashlist.append([d["m.id"], d["m.name"], d["m.impfuzzy"], d["m.md5"], d["m.sha1"], d["m.sha256"]])

    nodes_count = len(database)
    # Load relationships
    relation_data = GRAPH.data("MATCH (m1:Malware)-[s:same]-(m2:Malware) RETURN m1.id,m2.id,s.value")
    if relation_data:
        print("[*] Database relationships %d." % len(relation_data))
        for r in relation_data:
            relationships.append([r["m1.id"], r["m2.id"], r["s.value"]])

    for x in range(nodes_count):
        nodes.append(x)

    print("[*] Creating a graph data.")

    # Import data from EXE or DLL
    if args.file:
        if os.path.isfile(args.file):
            i = nodes_count
            impfuzzy, md5, sha1, sha256 = get_digest(args.file)
            query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256
            if impfuzzy:
                if not GRAPH.data(query):
                    nodes.append(i)
                    hashlist_new.append([i, args.file, impfuzzy, md5, sha1, sha256])
                else:
                    print("[!] This malware is registered already. sha256: %s" % sha256)
            else:
                print("[!] Can't calculate the impfuzzy hash. sha256: %s" % sha256)
        else:
            sys.exit("[!] Can't open file {0}.".format(args.file))

    # Import data from directory
    if args.directory:
        try:
            files = os.listdir(args.directory)
        except OSError:
            sys.exit("[!] Can't open directory {0}.".format(args.directory))

        outf = args.directory + "_hash.csv"
        fl = open(outf, "w")
        i = nodes_count
        for file in files:
            filename = args.directory + "/" + file
            impfuzzy, md5, sha1, sha256 = get_digest(filename)
            fl.write("%s,%s,%s,%s,%s\n" % (file, impfuzzy, md5, sha1, sha256))
            query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256
            if impfuzzy:
                if not GRAPH.data(query) and sha256 not in [x[5] for x in hashlist_new]:
                    nodes.append(i)
                    hashlist_new.append([i, file, impfuzzy, md5, sha1, sha256])
                    i += 1
                else:
                    print("[!] This malware is registered already. sha256: %s" % sha256)
            else:
                print("[!] Can't calculate the impfuzzy hash. sha256: %s" % sha256)
        print("[*] Created hash list %s." % outf)
        fl.close()

    # Import data from csv file
    if args.listname:
        print("[*] Parse file %s." % args.listname)
        try:
            csvfile = csv.reader(open(args.listname), delimiter=",")
        except IOError:
            sys.exit("[!] Can't open file {0}.".format(args.listname))

        i = nodes_count
        for array in csvfile:
            query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % array[4]
            if array[1]:
                if not GRAPH.data(query):
                    nodes.append(i)
                    array.insert(0, i)
                    hashlist_new.append(array)
                    i += 1
                else:
                    print("[!] This malware is registered already. sha256: %s" % array[4])
            else:
                print("[!] Impfuzzy hash is blank. sha256: %s" % array[4])

    # Compare impfuzzy
    print("[*] The total number of malware is %i." % i)
    result_list = impfuzzy_comp(hashlist, hashlist_new)

    if len(database) != len(nodes):
        # Clustering
        if not args.nocluster:
            for edge in result_list + relationships:
                if edge[2] > ss_threshold:
                    edges.append([[edge[0], edge[1]], edge[2]])
                else:
                    edges.append([[edge[0], edge[1]], 0])
            pyl = PyLouvain(nodes, edges)
            partition, modularity = pyl.apply_method()
            print("[*] The number of clusters is %i." % (len(partition) - 1))
        else:
            print("[*] No clustering option.")

        # Create node
        tx = GRAPH.begin()
        if args.nocluster:
            for hash in hashlist_new:
                tx.append(statement_c, {"id": hash[0], "name": hash[1], "impfuzzy": hash[2],
                                        "md5": hash[3], "sha1": hash[4], "sha256": hash[5],
                                        "cluster": "NULL"})
        else:
            for hash in hashlist_new + hashlist:
                i=0
                for a in partition:
                    i=i+1
                    if hash[0] in a:
                        tx.append(statement_c, {"id": hash[0], "name": hash[1], "impfuzzy": hash[2],
                                                "md5": hash[3], "sha1": hash[4], "sha256": hash[5],
                                                "cluster": i})

        # Create relationship
        for result in result_list:
            if result[2] > ss_threshold:
                tx.append(statement_r, {"id1": result[0], "id2": result[1], "value": result[2]})

        tx.process()
        tx.commit()
        print("[*] Created a graph data.\n")
    else:
        print("[*] Not find a new malware.\n")

    print("  Access to http://localhost:7474 via Web browser.")
    print("  Use Cypher query. You can see the graph.\n")
    print("  == Cypher Query Examples ==")
    print("  [Visualizing the all clusters]")
    print("  $ MATCH (m:Malware) RETURN m\n")
    print("  [Visualizing the clusters that matches the MD5 hash]")
    print("  $ MATCH (m1:Malware)-[s]-() WHERE m1.md5 = \"[MD5]\"")
    print("    MATCH (m2:Malware) WHERE m2.cluster = m1.cluster")
    print("    RETURN m2\n")
    print("  [Visualizing the clusters that matches the threshold more than 90]")
    print("  $ MATCH (m:Malware)-[s:same]-() WHERE s.value > 90 RETURN m,s")
    print("  ===========================\n")
Beispiel #16
0
from pylouvain import PyLouvain
import math
from matplotlib import pyplot as plt
import networkx as nx

filepath = 'out.txt'

# 获取社区划分
pyl = PyLouvain.from_file(filepath)
node_dict = pyl.node_dict  # key是253916-2的形式,value是编号的形式
reverse_node_dict = dict(zip(node_dict.values(),
                             node_dict.keys()))  # key是编号的形式,value是253916-2的形式
partition, q = pyl.apply_method()
print(partition)
print("模块度:", q)

# 给各个社区节点分配颜色
community_num = len(partition)
print('community_num:', community_num)
color_board = ['red', 'green', 'blue', 'pink', 'orange', 'purple', 'scarlet']
color = {}
for index in range(community_num):
    print("社区" + str(index + 1) + ":" + str(len(partition[index])))
    for node_id in partition[index]:
        color[node_id] = color_board[
            index]  # color为一个字典,key为编号形式的节点,value为所属社区的颜色
new_color_dict = sorted(color.items(), key=lambda d: d[0],
                        reverse=False)  # 将color字典按照key的大小排序,并返回一个list
node_list = [reverse_node_dict[item[0]]
             for item in new_color_dict]  #存储编号从小到大顺序对应的253916-2的形式的节点
color_list = [item[1] for item in new_color_dict]  #存储node_list中对应的节点颜色
Beispiel #17
0
 def test_test(self):
     pyl = PyLouvain.from_file("data/year1990.txt")
     partition, q = pyl.apply_method()
     return partition
Beispiel #18
0
 def test_citations(self):
     pyl = PyLouvain.from_file("data/hep-th-citations")
     partition, q = pyl.apply_method()
Beispiel #19
0
 def test_arxiv(self):
     pyl = PyLouvain.from_file("data/arxiv.txt")
     partition, q = pyl.apply_method()
Beispiel #20
0
import matplotlib
from matplotlib import pyplot as plt
from sklearn import metrics
from pylouvain import PyLouvain
from sklearn import metrics
import numpy as np

nodes, edges = PyLouvain.from_file("data/karate.txt")
gamma0 = 0.78

#nodes, edges = PyLouvain.from_gml_file("data/lesmis.gml")
#nodes, edges = PyLouvain.from_gml_file("data/polbooks.gml")


def cmap(nodes, partition):
    m = {n: i for i, _ in enumerate(partition) for n in _}
    return [m[i] for i in nodes]


def test_small_networks(nodes, edges, gamma0):
    pyl = PyLouvain(nodes, edges)
    partition0, q0 = pyl.apply_method(gamma0)
    c0 = cmap(nodes, partition0)
    NMI = []

    gamma_list = np.linspace(0.2, 3.5, num=200)
    for gamma in gamma_list:
        partition, q = PyLouvain(nodes, edges).apply_method(gamma)
        c = cmap(nodes, partition)

        NMI.append(metrics.normalized_mutual_info_score(c0, c))
Beispiel #21
0
def test_polbooks():
    nodes, edges = PyLouvain.from_gml_file("data/polbooks.gml")
    pyl = PyLouvain(nodes, edges)

    partition, q = pyl.apply_method()
    print(partition, q)
Beispiel #22
0
import matplotlib
matplotlib.use("Agg")
from matplotlib import pyplot as plt
import pickle
import time
import numpy as np
from sklearn import metrics

from pylouvain import PyLouvain
from run import multiscale, bayes_model_selection

x, y, z = [], [], []
for gamma in np.linspace(0.2, 0.9, num=20):
    print("gamma=", gamma)
    pyl = PyLouvain.from_file("data/football.txt")
    partition = multiscale(pyl.nodes, pyl.edges, gamma)

    # load GNC ground truth by txt file (as defined by conference)
    fconf = open("data/football.gnc.txt", "r")
    gnc = {str(i): int(line.strip()) for i, line in enumerate(fconf)}
    order_ = {i: stri for i, stri in enumerate(sorted(gnc.keys()))}
    comm = {n: i for i, ns in enumerate(partition) for n in ns}
    a = [comm[i] for i in pyl.nodes]
    b = [gnc[order_[i]] for i in pyl.nodes]

    x.append(gamma)
    y.append(len(partition))
    z.append(metrics.adjusted_mutual_info_score(a, b))

    print("#comm=", len(partition), "NMI=",
          metrics.adjusted_mutual_info_score(a, b))
Beispiel #23
0
 def test_lesmis(self):
     pyl = PyLouvain.from_gml_file("data/lesmis.gml")
     partition, q = pyl.apply_method()
Beispiel #24
0
    def process(self):

        hashlist = []
        hashlist_new = []
        nodes = []
        edges = []
        relationships = []

        # recover all actual data
        database = self.graph.run(
            "MATCH (m:Malware) RETURN m.id, m.name, m.impfuzzy, m.scout_result, m.scout_confidence, m.md5, m.sha1, m.sha256, m.tag"
        ).data()
        if database:
            for d in database:
                hashlist.append([
                    d["m.id"], d["m.name"], d["m.impfuzzy"],
                    d["m.scout_result"], d["m.scout_confidence"], d["m.md5"],
                    d["m.sha1"], d["m.sha256"], d["m.tag"]
                ])

        nodes_count = len(database)
        i = nodes_count

        relation_data = self.graph.run(
            "MATCH (m1:Malware)-[s:same]-(m2:Malware) RETURN m1.id, m2.id, s.value"
        ).data()
        if relation_data:
            for r in relation_data:
                relationships.append([r["m1.id"], r["m2.id"], r["s.value"]])
        for x in range(nodes_count):
            nodes.append(x)

        # if massive check for each file
        if self.folder_path:
            for item in self.files:
                scout_result, impfuzzy, md5, sha1, sha256, scout_confidence = self.get_digest(
                    item[0])
                if scout_result in ("", 'A171', None):
                    continue

                query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256
                objs = self.graph.run(query).data()
                if not objs and sha256 not in [x[5] for x in hashlist_new]:
                    nodes.append(i)
                    hashlist_new.append([
                        i, item[0].split("/")[-1], impfuzzy, scout_result,
                        scout_confidence, md5, sha1, sha256, item[1]
                    ])
                    i += 1
                else:
                    continue
        else:
            # if single we are in the reporting module
            # if file is tested it need to have valid apiscout vector
            if self.check_file(self.filepath):
                scout_result, impfuzzy, md5, sha1, sha256, scout_confidence = self.get_digest(
                    self.filepath)
                if scout_result in ("", 'A171', None):
                    return {}
            else:
                return {}

            query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256

            objs = self.graph.run(query).data()
            if not objs:
                nodes.append(nodes_count)
                hashlist_new.append([
                    nodes_count, self.filename, impfuzzy, scout_result,
                    scout_confidence, md5, sha1, sha256, None
                ])
            else:
                return self.search_hash(sha256)

        # Calculate apiscout correlation
        result_list = self.scout_comp(hashlist, hashlist_new)

        if len(database) != len(nodes):
            for edge in result_list + relationships:
                if edge[2] > self.threshold:
                    edges.append([[edge[0], edge[1]], edge[2]])
                else:
                    edges.append([[edge[0], edge[1]], 0])
            pyl = PyLouvain(nodes, edges)
            partition, modularity = pyl.apply_method()

        # Create node
        tx = self.graph.begin()

        for hash in hashlist_new + hashlist:
            i = 0
            for a in partition:
                i += 1
                if hash[0] in a:
                    tx.append(
                        statement_c, {
                            "id": hash[0],
                            "name": hash[1],
                            "impfuzzy": hash[2],
                            "scout_result": hash[3],
                            "scout_confidence": hash[4],
                            "md5": hash[5],
                            "sha1": hash[6],
                            "sha256": hash[7],
                            "tag": hash[8],
                            "cluster": i
                        })

        # Create relationship
        for result in result_list:
            if result[2] > self.threshold:
                tx.append(statement_r, {
                    "id1": result[0],
                    "id2": result[1],
                    "value_scout": result[2]
                })

        tx.process()
        tx.commit()

        # recover info
        if self.filename:
            return self.search_hash(sha256)
Beispiel #25
0
 def test_polbooks(self):
     pyl = PyLouvain.from_gml_file("data/polbooks.gml")
     partition, q = pyl.apply_method()
Beispiel #26
0
 def test_polbooks(self):
     pyl = PyLouvain.from_gml_file("data/polbooks.gml")
     partition, q = pyl.apply_method()
Beispiel #27
0
def multiscale(nodes,
               edges,
               gamma,
               deg=None,
               depth=1,
               verbose=False,
               max_depth=4):
    '''
    Multi-scale community detection.
    Recursively split sub-graph by maximizing generalized modularity.
    Terminate at each level of recursion by bayes model selection.

    Args:
        nodes: a list of nodes
        edges: a list of edges ((src, dst), weight)
        gamma: the resolution parameter for the generalized modularity
    Return:
        a list of lists, each contains the nodes in a community
    '''

    if depth >= max_depth or len(nodes) < 2:
        return [nodes]

    verbose and print("    " * depth, "***", "depth=", depth, "N=", len(nodes))

    nodes.sort()
    d = {n: i for i, n in enumerate(nodes)}
    rd = {i: n for n, i in d.items()}
    nodes = list(range(len(d)))
    edges = [((d[e[0][0]], d[e[0][1]]), e[1]) for e in edges]

    if deg is None:
        deg = {i: 0 for i in nodes}
        for e in edges:
            deg[e[0][0]] += e[1]
            deg[e[0][1]] += e[1]

    pyl = PyLouvain(nodes, edges, deg)

    # execution
    partition, q = pyl.apply_method(gamma)
    verbose and print("    " * depth, "gamma=", gamma, "comm=", len(partition))

    if len(partition) < 2: return [list(map(rd.get, nodes))]
    odds = bayes_model_selection(pyl.nodes, pyl.edges, partition)
    verbose and print("    " * depth, "odds=", odds)
    if odds <= 1. or math.isnan(odds): return [list(map(rd.get, nodes))]

    comm = {n: i for i, ns in enumerate(partition) for n in ns}
    edge_list = [[] for _ in range(len(partition))]
    for e in edges:
        u, v = e[0][0], e[0][1]
        if comm[u] == comm[v]:
            edge_list[comm[u]].append(e)

    R = []
    for nodes_, edges_ in zip(partition, edge_list):
        groups = multiscale(nodes_, edges_, gamma, deg, depth + 1, verbose,
                            max_depth)
        for grp in groups:
            R.append([rd[n] for n in grp])

    return R
Beispiel #28
0
    end = time.time()

    commsFG_sizes = sorted([len(commsFG[i]) for i in range(len(commsFG))])
    verbose and print(commsFG_sizes)

    map_comm = {v:i for i, c in enumerate(commsFG) for v in c}
    a = [map_comm[k] for k in G.nodes()]
    print("FastGreedy Algorithm ARI=", adjusted_rand_score(a, gnc_list), "NMI=", normalized_mutual_info_score(a, gnc_list))
    print("which takes", end - start, "seconds")
    '''

    #=========== Benchmark Louvain ===============#
    print("Start Louvain community detection")

    start = time.time()
    pyl = PyLouvain(nodes, edges)
    commsLV, q = pyl.apply_method(1.0)
    end = time.time()

    commsLV_sizes = sorted([len(commsLV[i]) for i in range(len(commsLV))])
    verbose and print(commsLV_sizes)
    verbose and print(len(commsLV_sizes))

    map_comm = {v: i for i, c in enumerate(commsLV) for v in c}
    LV_list = [map_comm[k] for k in G.nodes()]
    print("Louvain Algorithm ARI=", adjusted_rand_score(LV_list, gnc_list),
          "NMI=", normalized_mutual_info_score(LV_list, gnc_list))
    print("which takes", end - start, "seconds")
    print("Size range = ", min(commsLV_sizes), max(commsLV_sizes))
    print()