def processNetwork(Graph, id_to_groups):
    with open("../../data/fastinf_graph_noweights_features.txt", "w+") as f:
        f.write("RELATED GROUPS GRAPH:\n")
        f.write('Edges: %d\n' % Graph.GetEdges())
        f.write('Nodes: %d\n\n' % Graph.GetNodes())

        MxWcc = snap.GetMxWcc(Graph)
        f.write("MAX WCC:\n")
        f.write('Edges: %f ' % MxWcc.GetEdges())
        f.write('Nodes: %f \n' % MxWcc.GetNodes())
        f.write('Node List: ')
        for node in MxWcc.Nodes():
            f.write('%d, ' % node.GetId())
        f.write('\n')
        for node in MxWcc.Nodes():
            f.write('%s, ' % id_to_groups[node.GetId()])

        f.write("\n\nALL WCCs:")
        Components = snap.TCnComV()
        snap.GetWccs(Graph, Components)
        for i, CnCom in enumerate(Components):
            if CnCom.Len() < 10: continue
            f.write('\nWcc%d: ' % i)
            for nodeid in CnCom:
                f.write('%d, ' % nodeid)

        MxScc = snap.GetMxScc(Graph)
        f.write("\n\nMAX SCC:\n")
        f.write('Edges: %f ' % MxScc.GetEdges())
        f.write('Nodes: %f \n' % MxScc.GetNodes())
        f.write('Node List: ')
        for node in MxScc.Nodes():
            f.write('%d, ' % node.GetId())
        f.write('\n')
        for node in MxScc.Nodes():
            f.write('%s, ' % id_to_groups[node.GetId()])

        f.write("\n\nALL SCCs:")
        Components = snap.TCnComV()
        snap.GetSccs(Graph, Components)
        for i, CnCom in enumerate(Components):
            if CnCom.Len() < 10: continue
            f.write('\nScc%d: ' % i)
            for nodeid in CnCom:
                f.write('%d, ' % nodeid)

        f.write('\n\nCLUSTERING AND COMMUNITIES:\n')
        f.write('Clustering coefficient: %f\n' % snap.GetClustCf(Graph, -1))
        f.write('Num Triads: %d\n' % snap.GetTriads(Graph, -1))
        Nodes = snap.TIntV()
        for node in Graph.Nodes():
            Nodes.Add(node.GetId())
        f.write('Modularity: %f' % snap.GetModularity(Graph, Nodes))
Exemple #2
0
def community_detection(G):
    '''
    See snap docs for details.
    '''
    # Only for large networks - I got 3000 node communities on a 9000 node graph...
    # modularity = snap.CommunityCNM(G, CmtyV)

    edgefile = "data/toronto_knn_20.csv"
    outfile = "data/CGN_knn_20.csv"
    dictfile = "data/CGN_dict_knn_20.json"
    edge = pd.read_csv(edgefile, ',', header=0)
    graph = nx.from_pandas_edgelist(edge, source='r1', target='r2')
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityGirvanNewman(G, CmtyV)

    community_id = 0
    comm_dict = dict()
    for Cmty in CmtyV:
        comm_dict[community_id] = []
        for c in Cmty:
            comm_dict[community_id].append(c)
        community_id += 1

    with open(outfile, "w+") as f:
        for idx, assignment in tqdm(assignments.iteritems()):
            print len(assignment)
            f.write(", ".join(assignment))
            f.write("\n")

    with open(dictfile, "w+") as f:
        json.dump(partition, f)
Exemple #3
0
def gen_G(D, Pi_minus, Pi_exo, V_exo, theta2, N):
    """
    Returns pairwise-stable network on N nodes. 
    
    D, Pi_minus, Pi_exo = outputs of gen_D().
    V_exo = 'exogenous' part of joint surplus (output of gen_V_exo).
    theta2 = transitivity parameter (theta[2]).
    """
    G = snap.GenRndGnm(snap.PUNGraph, N, 0)  # initialize empty graph
    Components = snap.TCnComV()
    snap.GetWccs(D, Components)  # collects components of D
    NIdV = snap.TIntV()  # initialize vector
    for C in Components:
        if C.Len() > 1:
            NIdV.Clr()
            for i in C:
                NIdV.Add(i)
            tempnet = gen_G_subgraph(NIdV, D, Pi_minus, Pi_exo, V_exo, theta2)
            for edge in tempnet.Edges():
                G.AddEdge(edge.GetSrcNId(), edge.GetDstNId())

    # add robust links
    for edge in Pi_exo.Edges():
        G.AddEdge(edge.GetSrcNId(), edge.GetDstNId())

    return G
Exemple #4
0
def get_thread_text(comments):
    "Groups comments into threads, then concatenates the text of each thread."
    comments.object_id = comments.object_id.astype(int)
    comments.parent_id = comments.parent_id.astype(int)
    comments.points = comments.points.astype(float).astype(int)
    nodes = set(comments.object_id).union(set(comments.parent_id))
    commentsGraph = snap.TUNGraph.New()
    for node in nodes:
        commentsGraph.AddNode(node)
    for edge in comments[['object_id', 'parent_id']].values.tolist():
        commentsGraph.AddEdge(*edge)
    commentThreads = snap.TCnComV()
    snap.GetSccs(commentsGraph, commentThreads)
    threadText = []
    for commentThread in commentThreads:
        commentsInThread = comments[comments['object_id'].isin(commentThread)]
        commentsInThread = commentsInThread.comment_text.astype(
            str)  # No more floats in here...
        #commentsInThread = [c.encode('ascii', 'ignore') for c in commentsInThread]
        commentsInThread = [
            c.decode('ascii', errors='replace').encode('ascii', 'ignore')
            for c in commentsInThread
        ]
        commentsInThread = [htmlParser.unescape(c) for c in commentsInThread]
        threadText.append(" ".join(commentsInThread))
    return " ".join(threadText)
Exemple #5
0
    def compute(self):
        for filename in os.listdir('./test_egonets'):
            index = filename.split('.egonet')[0]
            new_file = './edges/' + index + '.egonet.edges'

            G = snap.TUNGraph.New()
            G.AddNode(int(index))
            for node in self.adj_list[index]:
                G.AddNode(int(node))

            for line in file(new_file):
                line = line.strip('\n')
                x = line.split(' ')
                x = map(lambda x: int(x), x)
                if not G.IsEdge(x[1], x[0]):
                    G.AddEdge(x[0], x[1])

            print 'Computing for ' + index

            CmtyV = snap.TCnComV()
            modularity = snap.CommunityGirvanNewman(G, CmtyV)

            for Cmty in CmtyV:
                for NI in Cmty:
                    print NI,
                print

            G.Clr()
Exemple #6
0
def detect_community(G, id_to_title):

    print('dectect community ....')
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityGirvanNewman(G, CmtyV)

    f = open('./community_detection/assignment2_Nhom1_TuToanChien.txt', 'w')

    i = 0
    for Cmty in CmtyV:
        if i == 100:
            break

        f.write('Community ' + str(i) + ': \n')

        j = 0
        for NI in Cmty:
            if j == 10:
                break

            title = id_to_title[NI]
            f.write(str(NI) + '\t \t' + str(title) + '\n')
            j += 1

        i += 1

    f.close()
    def run(self, data, seed=None):
        if data.is_directed():
            raise UnsupportedException("only undirected graph is supported")
        if seed is not None: self.logger.info("seed ignored")
        UGraph = convert.to_snap(data)
        CmtyV = snap.TCnComV()
        timecost, modularity = utils.timeit(
            lambda: snap.CommunityGirvanNewman(UGraph, CmtyV))
        clusters = {}
        i = 0
        for Cmty in CmtyV:
            clusters[i] = []
            for NI in Cmty:
                clusters[i].append(NI)
            i += 1

        self.logger.info(
            "Made %d clusters in %f seconds. modularity of the graph is %f" %
            (len(clusters), timecost, modularity))

        result = {}
        result['timecost'] = timecost
        result['runname'] = self.name
        result['dataname'] = data.name
        result['meta'] = self.get_meta()
        result['modularity'] = modularity
        result['clusters'] = clusters

        save_result(result)
        self.result = result
        return self
def pageRank_components(g):
    print 'executing pagerank components ---- getting components for page rank'
    Components = snap.TCnComV()
    snap.GetWccs(g, Components)
    f = open('component_pr.txt', 'w')
    cgraphs = []
    for com in Components:
        v = snap.TIntV()
        for ni in com:
            v.Add(ni)
        cgraphs.append(snap.GetSubGraph_PNGraph(g, v))

    print 'components retrived for pagerank'
    f.write('Total components:' + str(len(cgraphs)) + '\n')
    for graph in cgraphs:
        if graph.GetNodes() == 2:
            continue
        sprank = snap.TIntFltH()
        snap.GetPageRank_PNGraph(graph, sprank)
        sprank.SortByDat(False)
        f.write(
            str(graph.GetNodes()) + ' ' + str(sprank[sprank.BegI().GetKey()]) +
            '\n')
    f.close()
    print 'finished writing pagerank components values'
Exemple #9
0
def analizzaGirvanNewman(pfPaj, pfAINN, pfMod):
    # prende un grafo in formato Pajek
    # restituisce le comunita come ID Nome Comunita
    g = snap.LoadPajek(snap.PUNGraph, pfPaj)

    comunita = snap.TCnComV()
    modularity = snap.CommunityGirvanNewman(g, comunita)
    dMod = {}  # {numero : classe}
    classe = 0
    for com in comunita:
        # print('comunita {} = '.format(classe), end='' )
        for nodo in com:
            # print('{} '.format(nodo), end='')
            dMod.update({nodo: classe})
        classe += 1
        # print('')
    print('Numero di comunita analizzaGirvanNewman: {} modularity: {}'.format(
        classe, modularity))

    dNum = {}
    with open(pfAINN, 'rb') as fAINN:
        for line in fAINN:
            autID, autNum, autNome = line.rstrip().split('\t')
            autNum = int(autNum)
            dNum.update({autNum: [autID, autNome]})
    # print(dNum)
    with open(pfMod, 'wb') as fMod:
        for autNum in dNum:
            fMod.write('{}\t{}\t{}\r\n'.format(dNum[autNum][0],
                                               dNum[autNum][1], dMod[autNum]))

    return classe  # numero di comunita trovate
Exemple #10
0
def GirvanNewmanMethodBySnap(graph):
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityGirvanNewman(graph, CmtyV)
    for Cmty in CmtyV:
        print("Community: ",CmtyV)
        # for NI in Cmty:
        #     print(NI)
    print("The modularity of the network is %f" % modularity)
Exemple #11
0
def quick_properties(graph, name, dic_path):
    """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """
    n_edges = graph.GetEdges()
    n_nodes = graph.GetNodes()
    print("##########")
    print("Quick overview of {} Network".format(name))
    print("##########")
    print("{} Nodes, {} Edges").format(n_nodes, n_edges)
    print("{} Self-edges ".format(snap.CntSelfEdges(graph)))
    print("{} Directed edges, {} Undirected edges".format(
        snap.CntUniqDirEdges(graph), snap.CntUniqUndirEdges(graph)))
    print("{} Reciprocated edges".format(snap.CntUniqBiDirEdges(graph)))
    print("{} 0-out-degree nodes, {} 0-in-degree nodes".format(
        snap.CntOutDegNodes(graph, 0), snap.CntInDegNodes(graph, 0)))
    node_in = graph.GetNI(snap.GetMxInDegNId(graph))
    node_out = graph.GetNI(snap.GetMxOutDegNId(graph))
    print("Maximum node in-degree: {}, maximum node out-degree: {}".format(
        node_in.GetDeg(), node_out.GetDeg()))
    print("###")
    components = snap.TCnComV()
    snap.GetWccs(graph, components)
    max_wcc = snap.GetMxWcc(graph)
    print "{} Weakly connected components".format(components.Len())
    print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(),
                                                   max_wcc.GetEdges())
    prankH = snap.TIntFltH()
    snap.GetPageRank(graph, prankH)
    sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(graph, NIdHubH, NIdAuthH)
    sorted_NIdHubH = sorted(NIdHubH,
                            key=lambda key: NIdHubH[key],
                            reverse=True)
    sorted_NIdAuthH = sorted(NIdAuthH,
                             key=lambda key: NIdAuthH[key],
                             reverse=True)
    with open(dic_path, 'rb') as dic_id:
        mydict = pickle.load(dic_id)
        print("3 most central players by PageRank scores: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[0])],
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_prankH[2])]))
        print("Top 3 hubs: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[2])]))
        print("Top 3 authorities: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[2])]))
Exemple #12
0
def is_uniquely_connected(graph):
    def is_unique(components):
        return len(list(filter(lambda comp: comp.Len() > 1, components))) == 1

    # First identify if there are strongly connected components in the graph
    s_components = snap.TCnComV()
    snap.GetSccs(graph, s_components)
    unique = is_unique(s_components)

    # if there is unique strongly connected component then we don't need to search
    # for the weakly because the graph is connected, otherwise implement the same search
    # on the weakly components.
    if not is_unique:
        w_components = snap.TCnComV()
        snap.GetWccs(graph, w_components)
        unique = is_unique(w_components)

    return unique
def out_modularity_gn(g):
    """Girvan-Newman method"""
    community_vector = snap.TCnComV()
    modularity = snap.CommunityGirvanNewman(g, community_vector)
    for community in community_vector:
        print "Community: "
        for i in community:
            print i
    print "The modularity of the network is %f" % modularity
Exemple #14
0
def get_component_distribution(ei_graph):
    """Returns the sizes of strongly connected components.

    returns: dict of (size of component -> num of such components)

    https://snap.stanford.edu/snappy/doc/reference/GetSccs.html
    """
    components = snap.TCnComV()
    snap.GetSccs(ei_graph.base(), components)
    return Counter(c.Len() for c in components)
Exemple #15
0
def community_partition(G):
    CommuV = snap.TCnComV()
    modularity = snap.CommunityCNM(G, CommuV)
    ComutyH = snap.TIntIntH()
    partition = 0
    for community in CommuV:
        for NI in community:
            ComutyH[NI] = partition
        partition = partition + 1
    return ComutyH
def comDetect(algorithm, clusterCommands, Graph, conn, cur):
    CmtyV = snap.TCnComV()
    before_time = time.time()
    if algorithm == "gn":
        modularity = snap.CommunityGirvanNewman(Graph, CmtyV)
    if algorithm == 'cnm':
        modularity = snap.CommunityCNM(Graph, CmtyV)
    print "Total handling time is: ", (time.time() - before_time)
    createTable(clusterCommands, CmtyV, conn, cur)
    print "The modularity of the network is %f" % modularity
Exemple #17
0
def community_gn(G):
	CmtyV = snap.TCnComV()
	modularity = snap.CommunityGirvanNewman(G,CmtyV)
	ret_list = []
	for Cmty in CmtyV:
		temp = []
		for NI in Cmty:
			temp.append(NI)
		ret_list.append(temp)
	return ret_list
Exemple #18
0
def getCnn():
    G1, id2, synset2, _, _, _ = generate_word_graph(True, False, False)
    print(G1.GetNodes())
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(G1, CmtyV)
    for Cmty in CmtyV:
        print "Community: "
        for NI in Cmty:
            print NI
    print "The modularity of the network is %f" % modularity
Exemple #19
0
def getStronglyConnectedComponents(Graph, node_to_g):
    prot_to_SCcomponent = {}
    Components = snap.TCnComV()
    snap.GetSccs(Graph, Components)
    for i, CnCom in enumerate(Components):
        for node in CnCom:
            my_prot = node_to_g[node]
            prot_to_SCcomponent[
                my_prot] = i + 1  ##1-index component membership.
    return prot_to_SCcomponent
Exemple #20
0
def findCommunity():
    #%% make a submission
    submission = pd.read_csv(submissionFolderName + 'sample_submission.csv')
    #submission = pd.read_csv(submissionFolderName + 'train_ID.csv')

    for userId in list(submission['UserId']):

        # read graph
        filename = str(userId) + '.egonet'
        G = snap.TUNGraph.New()
        read_nodeadjlist(egonetFolderName + filename, G)

        # do not calculate for large graphs (it takes too long)
        if G.GetNodes() > tooManyNodesThreshold:
            print 'skipping user ' + str(userId)
            continue
        else:
            print 'predicting for user ' + str(userId)

        # visualization
        plot = plotting(G, snap.gvlNeato)
        plot.run('gviz_plot_{}'.format(userId),
                 title='UserID = {}'.format(userId))

        # find comunities by using GirvanNewman
        listOfCircles = []
        CmtyV = snap.TCnComV()
        modularity = snap.CommunityGirvanNewman(G, CmtyV)
        for Cmty in CmtyV:
            #print 'Community'

            # leave only relativly large communities
            if len(Cmty) >= tooLittleFriendsInCircleThreshold:
                listOfCircles.append(list(Cmty))

            for NI in Cmty:
                #print NI
                continue
        print 'The modularity of the network is %f' % modularity

        # populate prediction string
        predictionString = ''
        for Cmty in listOfCircles:
            for NI in Cmty:
                predictionString = predictionString + str(NI) + ' '

        predictionString = predictionString[:-1]

        # if no prediction was created, use 'all friends in one circle'
        if len(listOfCircles) > 0:
            submission.ix[submission['UserId'] == userId,
                          'Predicted'] = predictionString

    submission.to_csv(submissionFolderName + str(submissionNumber) + '.csv',
                      index=False)
Exemple #21
0
    def run(self):
        snap.DelSelfEdges(self.graph)
        community_list = snap.TCnComV()
        snap.CommunityCNM(self.graph, community_list)

        self.community_list = list()
        for community in community_list:
            cmty = list()
            for node in community:
                cmty.append(node)
            self.community_list.append(cmty)
Exemple #22
0
def computeWeaklyConnectedComponents(graph, outFile):
    logger.info("Computing Weakly Connected Components")
    fw_cc = open(outFile, 'w')
    Components = snap.TCnComV()
    snap.GetWccs(graph, Components)
    for CnCom in Components:
        for item in CnCom:
            fw_cc.write(str(item) + "\n")
        fw_cc.write("\n")
    logger.info("Weakly Connected Components Computed!")
    logger.info("Weakly Connected Components Exported to " + outFile)
Exemple #23
0
def community_cnm(G):
	CmtyV = snap.TCnComV()
	modularity = snap.CommunityCNM(G,CmtyV)
	#print modularity
	ret_list = []
	for Cmty in CmtyV:
		temp = []
		for NI in Cmty:
			temp.append(NI)
		ret_list.append(temp)
	return ret_list
Exemple #24
0
def calculate_communities(G):
    g = networkx_to_snappy(G)
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityGirvanNewman(g, CmtyV)
    nodes_communities = {}  # {node: [community]}
    for i, Cmty in enumerate(CmtyV):
        for NI in Cmty:
            nodes_communities.setdefault(NI, [])
            nodes_communities[NI].append(i + 2)

    return nodes_communities
def split_communities(C_Net):
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(C_Net, CmtyV)
    print(len(CmtyV))  # number of communities
    Cs = []
    for Cmty in CmtyV:
        NIdV = snap.TIntV()
        for NI in Cmty:
            NIdV.add(NI)
        Cs.append(NIdV)
    return Cs
def runCNM(nodelist, weightedGraph):
    print("Building snap graph")
    snapWeightedGraph = snap.TUNGraph_New(len(nodelist), len(weightedGraph))
    for i in range(0, len(nodelist)):
        snapWeightedGraph.AddNode(i)
    for edge in weightedGraph:
        snapWeightedGraph.AddEdge(edge[0], edge[1])
    print("Clustering weighted graph")
    catagoryNodes = snap.TCnComV()
    print(f'Mod: {snap.CommunityCNM(snapWeightedGraph, catagoryNodes)}')
    return [[node for node in cat] for cat in catagoryNodes]
def getComms(graph):
    comms = snap.TCnComV()
    modularity = snap.CommunityCNM(graph, comms)
    print 'Modularity', modularity
    commDict = {}
    for i in xrange(len(comms)):
        for id in comms[i]:
            if id in commDict:
                'node in more than one comm?'
            else:
                commDict[id] = i
    return comms, commDict
def get_community_CNM(file_path, output_path):
    Graph, H = load_graph(file_path)
    Graph = convert_to_undirected(Graph)
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(Graph, CmtyV)
    output_str = 'Modularity: ' + str(
        modularity) + '\nNum of communities: ' + str(
            len(CmtyV)) + '\nCommunities:\n'
    for Cmty in CmtyV:
        output_str += str(len(Cmty)) + '\n'
    with open(output_path, 'w') as f:
        f.write(output_str)
Exemple #29
0
def get_communities(G_Undir, chords_dict):
    print("************")
    print("Communities")
    snap.DelSelfEdges(G_Undir)
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(G_Undir, CmtyV)
    for Cmty in CmtyV:
        print "Community: size", Cmty.Len()
        for NI in Cmty:
            print chords_dict[NI]
        print ""
        print ""
    print "The modularity of the network is %f" % modularity
    def detectCommunities(self,
                          algo="CNM",
                          snapgraph=None,
                          H=None,
                          nxGraph=None,
                          write=True,
                          printout=True):
        '''
        Detect communities using Clauset-Newman-Moore modularity-based greedy
        algorithm or Girvan-Neman betweeness-centrality based algorithm.

        Returns results as a dictionary and saves a text-file version.
        '''
        # Creates a SNAP Graph Object if none is provided
        if snapgraph == None:
            snapgraph, H = self.buildSnapGraph(networkxGraph=nxGraph)
            # G = snapgraph
        # Detect community and calculate modularity
        networkxGraph = H.Graph
        start = time.time()
        CmtyV = snap.TCnComV()
        if algo == "CNM":
            modularity = snap.CommunityCNM(snapgraph, CmtyV)
        else:
            modularity = snap.CommunityGirvanNewman(snapgraph, CmtyV)
        if printout == True:
            i = 1
            for Cmty in CmtyV:
                print(f"Community {i}: ")
                print(list(Cmty))
                # communities.append(list(Cmty))
                print("\n")
                i += 1
        print("The modularity of the network is %f" % modularity)
        print(f"Time : {time.time()-start} seconds")
        # Save to text file at savepath if write = True
        if write == True:
            self.writeTxt(CmtyV, modularity)
        # Create a dataframe of community assignments
        communities = []
        for i, cmty in enumerate(CmtyV):
            for c in cmty:
                communities.append((c, i + 1))
        community_df = pd.DataFrame(communities,
                                    columns=['hashtag_id', 'CNM_Label'])
        # Export labeleed graph as a JSON file.
        nx.set_node_attributes(
            networkxGraph,
            community_df.set_index('hashtag_id').to_dict('index'))
        print("Community labelled graph exported as ", self.name_arg, ".json")
        H.exportGraph("JSON")