예제 #1
0
def computeClusteringCoeff(G, NodeAttributes):
    NIdCCfH = snap.TIntFltH()
    snap.GetNodeClustCf(G, NIdCCfH)

    ClusterCoeffList = list()
    for nodeId in NIdCCfH:
        NodeAttributes[nodeId]['ClusterCoeff'] = NIdCCfH[nodeId]
        ClusterCoeffList.append((nodeId, NIdCCfH[nodeId]))

    ClusterCoeffList.sort(key=lambda x: x[1], reverse=True)
    minClusterCoeff = min(ClusterCoeffList, key=lambda x: x[1])[1]
    maxClusterCoeff = max(ClusterCoeffList, key=lambda x: x[1])[1]

    #
    # Sanity Check
    #
    print ClusterCoeffList[1], maxClusterCoeff, ClusterCoeffList[
        -1], minClusterCoeff

    NIdCCfH = snap.TIntFltH()
    snap.GetNodeClustCf(G, NIdCCfH)
    ClusterCoeffList = list()
    for nodeId in NIdCCfH:
        clusterCoeff = NIdCCfH[nodeId]
        normClusterCoeff = (clusterCoeff - minClusterCoeff) / (
            maxClusterCoeff - minClusterCoeff)
        NodeAttributes[nodeId]['NormClusterCoeff'] = normClusterCoeff

    #print NodeAttributes[2012]
    return NodeAttributes
예제 #2
0
def get_clustering_coeff(filename):
    graph = snap.LoadEdgeList(snap.PUNGraph, filename)
    degree_list = []
    coeff_list = []
    avg_coeff = collections.defaultdict(int)
    for node in graph.Nodes():
        avg_coeff[node.GetOutDeg()] = (
            avg_coeff[node.GetOutDeg()] +
            snap.GetNodeClustCf(graph, node.GetId())) / 2.0
        degree_list.append(node.GetOutDeg())
        coeff_list.append(snap.GetNodeClustCf(graph, node.GetId()))
    return avg_coeff
예제 #3
0
def CalculateClusteringCoefficient(graph):
    #output={}
    NIdCCfH = snap.TIntFltH()
    snap.GetNodeClustCf(graph, NIdCCfH)
    print "CLUSTERRING COEFFICIENT"
    for item in NIdCCfH:
        print "Node %d th have coefficient %f" % (item, NIdCCfH[item])
예제 #4
0
def ccDist(G):
  ccTemp = collections.defaultdict(lambda: (0, 0))
  for n in G.Nodes():
    cnt, totalCC = ccTemp[n.GetOutDeg()]
    ccTemp[n.GetOutDeg()] = (cnt+1, totalCC + snap.GetNodeClustCf(G, n.GetId()))

  ccD = collections.defaultdict(lambda: 0)
  for k, (cnt, totalCC) in ccTemp.items():
    ccD[k] = totalCC * 1.0/cnt

  return ccD
예제 #5
0
def calculate_clustering_coeff(G, nodes):
    coeffs = []
    for node in nodes:
        if not G.IsNode(node):
            continue
        coeffs.append(snap.GetNodeClustCf(G, node))
    avg = np.average(coeffs)
    std = np.std(coeffs)
    n = len(coeffs)
    err = 1.96 * std / np.sqrt(n)
    print("avg: {:0.3f}\tstd: {:0.3f}\tsamples: {}\terr: {:0.3f}".format(
        avg, std, n, err))
def _get_CC(Graph, H, output_path):
    NIdCCfH = snap.TIntFltH()
    snap.GetNodeClustCf(Graph, NIdCCfH)
    dataset = list()
    for ID in NIdCCfH:
        CC = dict()
        CC['username'] = H.GetKey(ID)
        CC['CC'] = NIdCCfH[ID]
        dataset.append(CC)
    dataset = pd.DataFrame(dataset)
    dataset = dataset[['username', 'CC']]
    dataset.to_csv(output_path, index=False, encoding='utf-8')
예제 #7
0
def print_ccf_of_random_node(G, GName):

    NIdCCfH = snap.TIntFltH()
    snap.GetNodeClustCf(G, NIdCCfH)
    random_node_index = snap.TInt.GetRnd(len(NIdCCfH))

    counter = 0
    for item in NIdCCfH:
        if counter == random_node_index:
            print "Clustering coefficient of random node {0} in {1}: {2}".format(
                item, GName[:-10], NIdCCfH[item])
            break
        counter = counter + 1
예제 #8
0
def DegClustDist(graph):
    """
    Calculates the degree - clustering coefficient distribution of a given graph. The graph must be in snap format.

    ...
    
    Parameters
    ----------
    graph : an instance of SNAP.TUNGraph()/SNAP.TNGraph() for undirected/directed graph
        The graph for which the degree distribution is to be calculated
    
    Returns
    -------
    coeff_dist : 2D numpy array with shape (2, :)
        The calculated degree distribution for graph.
        coeff_dist[0, :] : degree value
        coeff_dist[1, :] : mean clustering coefficient of the nodes with a given degree
    """
    
    deg_coeff = {}
    stopwatch = StopWatch(0.5)
    N_deg = graph.GetNodes()

    j = 0
    for node in graph.Nodes():
        PrintProgress(stopwatch(), j/N_deg)
        deg = node.GetDeg()
        clust = snap.GetNodeClustCf(graph, node.GetId())

        if deg in deg_coeff.keys():
            deg_coeff[deg].append(clust) 
        else:
            deg_coeff[deg] = [clust]
        j+=1

    coeff_dist = np.zeros((2, len(deg_coeff.keys())))
     
    i = 0
    for k, v in deg_coeff.items():
        coeff_dist[0, i] = k
        coeff_dist[1, i] = np.mean(v)
        i+=1
    
    # Sort results
    sort_ind = np.argsort(coeff_dist[0, :])
    coeff_dist = coeff_dist[:, sort_ind]
    PrintProgress(True, 1)
    print('\n')

    return coeff_dist
def get_clustering_coefficient(graph, gtype='snap'):
    nids, ccs = [], []
    if gtype == 'snap':
        NIdCCfH = snap.TIntFltH()
        snap.GetNodeClustCf(graph, NIdCCfH)
        for item in NIdCCfH:
            nids.append(item)
            ccs.append(NIdCCfH[item])
    elif gtype == 'nx':
        cc_output = nx.clustering(graph)
        for nid in np.sort(list(cc_output.keys())):
            nids.append(nid)
            ccs.append(cc_output[nid])
    return np.asarray(nids, dtype='uint32'), np.asarray(ccs, dtype='float32')
예제 #10
0
def calcClusteringCoefficientSingleNode(Node, Graph):
    """
    :param - Node: node from snap.PUNGraph object. Graph.Nodes() will give an
                   iterable of nodes in a graph
    :param - Graph: snap.PUNGraph object representing an undirected graph

    return type: float
    returns: local clustering coeffient of Node
    """
    ############################################################################
    # TODO: Your code here!
    C = 0.0
    C = snap.GetNodeClustCf(Graph, Node.GetId())
    ############################################################################
    return C
예제 #11
0
def q4_2_aux():
    FIn = snap.TFIn('HDN.graph')
    HDN = snap.TUNGraph.Load(FIn)
    edgesN = HDN.GetEdges()
    verticesN = HDN.GetNodes()
    print "nodes in HDN:", verticesN
    print "edged in HDN:", edgesN
    density = float(edgesN) / (verticesN * (verticesN - 1) / 2)
    print "density of the graph is", density
    CSum = 0.0
    #HDN.Dump()
    for i in range(1000):
        NId = HDN.GetRndNId()
        Ctemp = snap.GetNodeClustCf(HDN, NId)
        #print Ctemp
        CSum += Ctemp
    c = float(CSum) / 1000
    print "average Clustering coeficient in HDN:", c
예제 #12
0
def print_connectivity_clustering(G):
    """
    Prints the average clustering coefficient, number of triads in subgraph G
    Also prints clustering coefficient and number of triads for random nodes
    Also prints the number of edges that participate in at least one triad
    """

    GraphClustCoeff = snap.GetClustCf(G)
    print("Average clustering coefficient:", round(GraphClustCoeff, 4))

    print("Number of triads:", snap.GetTriads(G))

    NId = G.GetRndNId()
    print(f'Clustering coefficient of random node {NId}:', round(snap.GetNodeClustCf(G, NId)))

    NId = G.GetRndNId()
    print(f'Number of triads random node {NId} participates:', snap.GetNodeTriads(G, NId))

    print('Number of edges that participate in at least one triad:', snap.GetTriadEdges(G))
def get_each_nodes_ClusteringCofficient(graph):
    ClusteringCofficients = snap.TIntFltH()
    snap.GetNodeClustCf(graph, ClusteringCofficients)
    return ClusteringCofficients
예제 #14
0
def get_clustering_coeff(UGraph, attributes):
    coeff = np.zeros((UGraph.GetNodes(), ))
    for NI in UGraph.Nodes():
        i = NI.GetId()
        coeff[i] = snap.GetNodeClustCf(UGraph, i)
    attributes['ClusteringCoeff'] = coeff
예제 #15
0
import numpy as np
import snap
import matplotlib.pyplot as plt
import random

payoff_a = 2
payoff_b = 3
thersold = payoff_a / (payoff_a + payoff_b)
nodestatusList = []
nodedegreeList = []
nodeaffectList = []
nodeinitialList = []
G = snap.LoadEdgeList(snap.PUNGraph, "data/soc-Slashdot0902.txt", 0, 1, '\t')
snap.PlotClustCf(G, "project_cluster_coeff",
                 "Undirected graph - clustering coefficient")

DegToCCfV = snap.TFltPrV()
result = snap.GetClustCfAll(G, DegToCCfV)
for item in DegToCCfV:
    print("degree: %d, clustering coefficient: %f" %
          (item.GetVal1(), item.GetVal2()))
print("average clustering coefficient", result[0])

clusterfile = open("clustering_list.txt", "w")
NIdCCfH = snap.TIntFltH()
snap.GetNodeClustCf(G, NIdCCfH)
for item in NIdCCfH:
    clusterfile.write("%d %d\r\n" % (item, NIdCCfH[item]))
예제 #16
0
matplotlib.use('Agg')

import matplotlib.pyplot as plt

#loading steam-sweden dataset
Graph = snap.LoadEdgeList(snap.PUNGraph, "Steam-Sweden.txt", 0, 1)

#calculating number of triads with random sampling
NumTriads = snap.GetTriads(Graph, -1)
print "Number of triads: " + str(NumTriads)

#selecting  random node
rm_node = Graph.GetRndNId()

#random node clustering coefficient
rm_clus_coeff = snap.GetNodeClustCf(Graph, rm_node)
print "Clustering coefficient of random node ", rm_node, " in Steam-Sweden: ", rm_clus_coeff

#Number of triads a randomly selected node participates in
num_triads = snap.GetNodeTriads(Graph, rm_node)
print "Number of triads of node ", rm_node, " participates in ", num_triads, " triads"

#avg and global clustering coefficient
TriadV = snap.TIntTrV()
snap.GetTriads(Graph, TriadV, -1)
OpenTriads = 0
ClosedTriads = 0
for triple in TriadV:
    OpenTriads += triple.Val3()
    ClosedTriads += triple.Val2()
예제 #17
0
def graphStructure(elistName, elistPath):
    """
        Calculate properties of the graph as given in the assignment

        Args:
        elistName (str) -> Input elist name
        elistPath (pathlib.Path) -> Input elist using which graph needs to be built

        Return:
        RESULTS (dict) -> Dictionary containing results for different subparts of the assignment
    """

    RESULTS = {}
    subGraph = snap.LoadEdgeList(snap.PUNGraph, elistPath, 0, 1)

    # Part 1 (Size of the network)
    RESULTS['nodeCount'] = subGraph.GetNodes()
    RESULTS['edgeCount'] = subGraph.GetEdges()

    # Part 2 (Degree of nodes in the network)
    maxDegree = 0
    maxDegreeNodes = []
    degree7Count = 0

    for node in subGraph.Nodes():
        if node.GetDeg() == 7:
            degree7Count += 1

        maxDegree = max(maxDegree, node.GetDeg())

    for node in subGraph.Nodes():
        if node.GetDeg() == maxDegree:
            maxDegreeNodes.append(node.GetId())

    plotFilename = f"deg_dist_{elistName}"
    # Since it is an undirected graph, in/out degree is unimportant
    snap.PlotOutDegDistr(subGraph, plotFilename)

    RESULTS['maxDegree'] = maxDegree
    RESULTS['maxDegreeNodes'] = ','.join(map(str, maxDegreeNodes))
    RESULTS['degree7Count'] = degree7Count

    # Part 3 (Paths in the network)
    # Full Diameter Calculation
    fullDiameters = {
        10: snap.GetBfsFullDiam(subGraph, 10, False),
        100: snap.GetBfsFullDiam(subGraph, 100, False),
        1000: snap.GetBfsFullDiam(subGraph, 1000, False)
    }
    fullMean, fullVariance = meanVariance(fullDiameters.values())
    fullDiameters['mean'] = fullMean
    fullDiameters['variance'] = fullVariance
    RESULTS['fullDiameters'] = fullDiameters

    # Effective Diameter Calculation
    effDiameters = {
        10: snap.GetBfsEffDiam(subGraph, 10, False),
        100: snap.GetBfsEffDiam(subGraph, 100, False),
        1000: snap.GetBfsEffDiam(subGraph, 1000, False),
    }
    effMean, effVariance = meanVariance(effDiameters.values())
    effDiameters['mean'] = effMean
    effDiameters['variance'] = effVariance
    RESULTS['effDiameters'] = effDiameters

    plotFilename = f"shortest_path_{elistName}"
    snap.PlotShortPathDistr(subGraph, plotFilename)

    # Part 4 (Components of the network)
    edgeBridges = snap.TIntPrV()
    articulationPoints = snap.TIntV()
    RESULTS['fractionLargestConnected'] = snap.GetMxSccSz(subGraph)
    snap.GetEdgeBridges(subGraph, edgeBridges)
    snap.GetArtPoints(subGraph, articulationPoints)
    RESULTS['edgeBridges'] = len(edgeBridges)
    RESULTS['articulationPoints'] = len(articulationPoints)

    plotFilename = f"connected_comp_{elistName}"
    snap.PlotSccDistr(subGraph, plotFilename)

    # Part 5 (Connectivity and clustering in the network)
    RESULTS['avgClusterCoefficient'] = snap.GetClustCf(subGraph, -1)
    RESULTS['triadCount'] = snap.GetTriadsAll(subGraph, -1)[0]

    nodeX = subGraph.GetRndNId(Rnd)
    nodeY = subGraph.GetRndNId(Rnd)
    RESULTS['randomClusterCoefficient'] = (nodeX,
                                           snap.GetNodeClustCf(
                                               subGraph, nodeX))
    RESULTS['randomNodeTriads'] = (nodeY, snap.GetNodeTriads(subGraph, nodeY))
    RESULTS['edgesTriads'] = snap.GetTriadEdges(subGraph)

    plotFilename = f"clustering_coeff_{elistName}"
    snap.PlotClustCf(subGraph, plotFilename)

    return RESULTS
def get_clustering_coefficient(G, n):
    return snap.GetNodeClustCf(G, n)
예제 #19
0
def net_structure(dataset_dir, output_dir, graph_type, metric, net, alg):
    os.system('clear')
    print(
        "\n######################################################################\n"
    )
    print("\nScript para cálculo do coef_clust das comunidades detectadas\n")

    graphs_dir = "/home/amaury/graphs_hashmap_infomap_without_weight/" + str(
        net) + "/" + str(graph_type) + "/"

    if not os.path.exists(graphs_dir):
        print("Diretório não encontrado: " + str(graphs_dir))

    else:
        print(
            "\n######################################################################\n"
        )
        print(
            "\nScript para cálculo do Coeficiente de Clustering das comunidades detectadas - Rede "
            + str(net) + "\n")

        if not os.path.isdir(dataset_dir + str(net) + "/"):
            print("Diretório com avaliações da rede " + str(net) +
                  " não encontrado: " + str(dataset_dir + str(net) + "/"))
        else:
            for threshold in os.listdir(dataset_dir + str(net) + "/"):
                if os.path.isfile(str(output_dir) + str(threshold) + ".json"):
                    print("Arquivo de destino já existe. " + str(output_dir) +
                          str(threshold) + ".json")
                else:

                    coef_clust = [
                    ]  # Vetor com a Média dos coeficientes de cada grafo
                    coef_clust_data = {
                    }  # Dicionário com o ego coef_clust para cada comunidade
                    i = 0

                    for file in os.listdir(dataset_dir + str(net) + "/" +
                                           str(threshold) + "/"):
                        i += 1
                        ego_id = file.split(".txt")
                        ego_id = long(ego_id[0])
                        communities = [
                        ]  # Armazenar as comunidades da rede-ego
                        m_file = [
                        ]  # vetor de coeficientes das comunidades do ego i

                        try:
                            G = snap.LoadEdgeList(
                                snap.PNGraph,
                                str(graphs_dir) + str(ego_id) + ".edge_list",
                                0, 1
                            )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
                            n_edges = G.GetEdges(
                            )  # Número de arestas do grafo

                            if n_edges == 0:
                                a = 0
                                m_file.append(a)
                            else:
                                try:
                                    with open(
                                            dataset_dir + str(net) + "/" +
                                            str(threshold) + "/" + str(file),
                                            'r') as f:
                                        for line in f:
                                            comm = [
                                            ]  #Lista para armazenar as comunidades
                                            a = line.split(' ')
                                            for item in a:
                                                if item != "\n":
                                                    comm.append(item)
                                            communities.append(comm)
                                except Exception as e:
                                    print(
                                        "\nERRO - Impossível carregar as comunidades: "
                                        + dataset_dir + str(net) + "/" +
                                        str(threshold) + "/" + str(file) +
                                        "\n")
                                    print e

                                _cf = []
                                for comm in communities:
                                    if comm is not None:
                                        for nodeId in comm:
                                            if nodeId is not None:
                                                _cf.append(
                                                    snap.GetNodeClustCf(
                                                        G, int(nodeId))
                                                )  # Clusterinf Coefficient
                                result = calc.calcular(_cf)
                                m_file.append(result['media'])
                                print("Clustering Coef para o ego " + str(i) +
                                      " (" + str(file) + "): " +
                                      str(result['media']))
                                print

                        except Exception as e:
                            print(
                                "\nERRO - Impossível carregar o grafo para o ego: "
                                + str(ego_id) + "  --  " + str(graphs_dir) +
                                str(ego_id) + ".edge_list\n")
                            print e

                        _m_file = calc.calcular(m_file)
                        coef_clust_data[ego_id] = m_file
                        if _m_file is not None:
                            coef_clust.append(_m_file['media'])

                            print(
                                str(graph_type) + " - Rede: " + str(net) +
                                " - Threshold: " + str(threshold) +
                                " - Coef_Clustering para o ego " + str(i) +
                                " (" + str(file) + "): %5.3f" %
                                (_m_file['media']))
                            print(
                                "######################################################################"
                            )

                    M = calc.calcular_full(coef_clust)

                    if M is not None:
                        overview = {
                            'threshold': threshold,
                            'coef_clust': M,
                            'coef_clust_data': coef_clust_data
                        }
                        print(
                            "\n######################################################################\n"
                        )
                        print(
                            "Rede: %s   ---   Threshold: %s   ---   Coef_Clust: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f"
                            % (net, threshold, M['media'], M['variancia'],
                               M['desvio_padrao']))
                        print(
                            "\n######################################################################\n"
                        )

                    if overview is not None:
                        with open(
                                str(output_dir) + str(threshold) + ".json",
                                'a+') as f:
                            f.write(json.dumps(overview) + "\n")

    print(
        "\n######################################################################\n"
    )
def net_structure(dataset_dir, output_dir, net, IsDir, weight):
    print(
        "\n######################################################################\n"
    )
    if os.path.isfile(str(output_dir) + str(net) + "_clustering_coef.json"):
        print("Arquivo já existe: " + str(output_dir) + str(net) +
              "_clustering_coef.json")
    else:

        print("Dataset clustering coefficient - " + str(dataset_dir))

        cf = []  # Média dos coeficientes de clusterings por rede-ego
        gcf = []  # Média usando opção global
        n = []  # vetor com número de vértices para cada rede-ego
        e = []  # vetor com número de arestas para cada rede-ego
        i = 0

        for file in os.listdir(dataset_dir):

            i += 1
            print(
                str(output_dir) + str(net) + "/" + str(file) +
                " - Calculando propriedades para o ego " + str(i) + ": " +
                str(file))
            if IsDir is True:
                G = snap.LoadEdgeList(
                    snap.PNGraph, dataset_dir + file, 0, 1
                )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
            else:
                G = snap.LoadEdgeList(
                    snap.PUNGraph, dataset_dir + file, 0, 1
                )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
#			G.Dump()
#			time.sleep(5)

#####################################################################################

            n.append(G.GetNodes())  # Numero de vertices
            e.append(G.GetEdges())  # Numero de arestas
            n_nodes = G.GetNodes()
            n_edges = G.GetEdges()

            #####################################################################################
            #Usando opção local - Retorna o mesmo resultado do global
            if n_edges == 0:
                a = 0
                cf.append(a)
                print("Nenhuma aresta encontrada para a rede-ego " + str(i) +
                      " - (" + str(file))
            else:
                NIdCCfH = snap.TIntFltH()
                snap.GetNodeClustCf(G, NIdCCfH)
                _cf = []
                for item in NIdCCfH:
                    _cf.append(NIdCCfH[item])  # Clusterinf Coefficient
                result = calc.calcular(_cf)
                cf.append(result['media'])
                print("Clustering Coef para o ego " + str(i) + " (" +
                      str(file) + "): " + str(result['media']))
                print


#####################################################################################
#Usando opção global   - Retorna o mesmo resultado do local
#
#			if n_edges == 0:
#				a = 0
#				gcf.append(a)
#			else:
#				GraphClustCoeff = snap.GetClustCf (G)
#				gcf.append(GraphClustCoeff)
#				print "Clustering coefficient: %f" % GraphClustCoeff
#				print

#####################################################################################
        CF = calc.calcular_full(cf)

        overview = {}
        overview['ClusteringCoefficient'] = CF

        with open(str(output_dir) + str(net) + "_clustering_coef.json",
                  'w') as f:
            f.write(json.dumps(overview))

        with open(str(output_dir) + str(net) + "_clustering_coef.txt",
                  'w') as f:
            f.write(
                "\n######################################################################\n"
            )
            f.write(
                "Clustering Coef: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (CF['media'], CF['variancia'], CF['desvio_padrao']))
            f.write(
                "\n######################################################################\n"
            )

        print(
            "\n######################################################################\n"
        )
        print(
            "Clustering Coef: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
            % (CF['media'], CF['variancia'], CF['desvio_padrao']))
        print(
            "\n######################################################################\n"
        )
예제 #21
0
NIdHubH = snap.TIntFltH()
NIdAuthH = snap.TIntFltH()
snap.GetHits(G, NIdHubH, NIdAuthH)
write(NIdHubH, "hub.txt")
write(NIdAuthH, "auth.txt")

Nodes = snap.TIntFltH()
Edges = snap.TIntPrFltH()
snap.GetBetweennessCentr(G, Nodes, Edges, 1.0)
write(Nodes, "between.txt")

rows = []
for i, node in enumerate(G.Nodes()):
    if i % 10000 == 0:
        print "on iteration {}".format(i)
    nid = node.GetId()
    ecc = snap.GetNodeEcc(G, nid)
    clust = snap.GetNodeClustCf(G, nid)
    rows.append([nid, ecc, clust])

with open(base + "ecc_clust.txt", 'w') as f:
    for row in rows:
        f.write(",".join(map(str, row)) + "\n")

ArtNIdV = snap.TIntV()
snap.GetArtPoints(G, ArtNIdV)

with open(base + "art.txt", "w") as f:
    for NI in ArtNIdV:
        f.write("{}\n".format(NI))
예제 #22
0
def get_graph_overview(G, Gd=None):
    '''
	G here is an undirected graph
	'''

    # degree distribution
    CntV = snap.TIntPrV()
    snap.GetOutDegCnt(G, CntV)
    deg_x, deg_y = [], []
    max_deg = 0
    for item in CntV:
        max_deg = max(max_deg, item.GetVal1())
        deg_x.append(item.GetVal1())
        deg_y.append(item.GetVal2())
        # print item.GetVal1(), item.GetVal2()
    print 'max_deg = ', max_deg
    deg_cnt = np.zeros(max_deg + 1)
    for item in CntV:
        deg_cnt[item.GetVal1()] = item.GetVal2()
    print deg_cnt
    # plt.loglog(deg_x, deg_y)
    # plt.xlabel('Degree of nodes')
    # plt.ylabel('Number of nodes')
    # plt.savefig('Giu_deg_dist.png')
    # plt.clf()

    # clustering coefficient distribution
    cf = snap.GetClustCf(G)
    print 'average cf =', cf
    NIdCCfH = snap.TIntFltH()
    snap.GetNodeClustCf(G, NIdCCfH)
    ccf_sum = np.zeros(max_deg + 1)
    for item in NIdCCfH:
        ccf_sum[G.GetNI(item).GetDeg()] += NIdCCfH[item]
        # print item, NIdCCfH[item]
    ccf_x, ccf_y = [], []
    for i in range(max_deg + 1):
        if deg_cnt[i] != 0:
            ccf_sum[i] /= deg_cnt[i]
            ccf_x.append(i)
            ccf_y.append(ccf_sum[i])
    print ccf_y
    # plt.loglog(ccf_x, ccf_y)
    # plt.xlabel('Degree of nodes')
    # plt.ylabel('Average clustering coefficient of nodes with the degree')
    # plt.savefig('Giu_ccf_dist.png')
    # plt.clf()
    # snap.PlotClustCf(G, 'investor_network', 'Distribution of clustering coefficients')

    # diameter and shortest path distribution
    diam = snap.GetBfsFullDiam(G, 100)
    print diam
    # snap.PlotShortPathDistr(G, 'investor_network', 'Distribution of shortest path length')
    # rewired_diams = []
    # for i in range(100):
    # 	print 'rewire: ', i
    # 	G_config = rewire_undirected_graph(G)
    # 	rewired_diams.append(snap.GetBfsFullDiam(G_config, 400))
    # print rewired_diams
    # print 'null model diam mean: ', np.mean(rewired_diams)
    # print 'null model diam std: ', np.std(rewired_diams)

    # wcc and scc size distribution
    WccSzCnt = snap.TIntPrV()
    snap.GetWccSzCnt(G, WccSzCnt)
    print 'Distribution of wcc:'
    for item in WccSzCnt:
        print item.GetVal1(), item.GetVal2()

    if Gd != None:
        print 'Distribution of scc:'
        ComponentDist = snap.TIntPrV()
        snap.GetSccSzCnt(Gd, ComponentDist)
        for item in ComponentDist:
            print item.GetVal1(), item.GetVal2()
예제 #23
0
import snap
import sys
import numpy as np
import matplotlib
matplotlib.use('Agg')

import matplotlib.pyplot as plt

input_file = sys.argv[1]
Graph = snap.LoadEdgeList(snap.PUNGraph, input_file, 0, 1)

clc = set()
cluster = dict()

for node in Graph.Nodes():
    clustering = snap.GetNodeClustCf(Graph, node.GetId())
    cluster[node.GetId()] = clustering

for item in cluster:
    clc.add(cluster[item])

with open(sys.argv[1] + '.clustering.txt', 'w+') as fp:
    for p in sorted(cluster.items(), key=lambda (k, v): (v, k), reverse=True):
        fp.write("%s : %s\n" % p)

clc = sorted(clc, key=float, reverse=True)

#plotting clustering coefficient
plt.plot(np.arange(1, len(clc) + 1, 1), clc, 'b.')
plt.xlabel('Rank')
plt.ylabel('Clustering Coefficient')
Art_points = snap.TIntV()
snap.GetArtPoints(Graph1, Art_points)
art = Art_points.Len()
print("Number of articulation points: ", art)

str2 = "connected_comp_" + file_name
snap.PlotSccDistr(Graph1, str2,
                  "Distribution of sizes of connected components")

#5.Connectivity and clustering in the network
avg_cc = snap.GetClustCf(Graph1, -1)
print("Average clustering coefficient: %0.4f" % avg_cc)
triads = snap.GetTriads(Graph1, -1)
print("Number of triads: ", triads)

random1 = Graph1.GetRndNId(Rnd)
node_cc = snap.GetNodeClustCf(Graph1, random1)
print("Clustering coefficient of random node %d: %0.4f" % (random1, node_cc))

random2 = Graph1.GetRndNId(Rnd)
node_triads = snap.GetNodeTriads(Graph1, random2)
print("Number of triads random node %d participates: %d" %
      (random2, node_triads))

triad_edges = snap.GetTriadEdges(Graph1, -1)
print("Number of edges that participate in at least one triad: ", triad_edges)

str3 = "clustering_coeff_" + file_name
snap.PlotClustCf(Graph1, str3, "The distribution of clustering coefficient")
예제 #25
0
def getClusteringCoeff(Graph):
    coeffs = []
    for i in range(Graph.GetNodes()):
        coeffs.append(snap.GetNodeClustCf(Graph, i))
    return coeffs
예제 #26
0
def main():

    parser = ArgumentParser("node_heu",formatter_class=ArgumentDefaultsHelpFormatter,conflict_handler='resolve')

    # Required arguments
    parser.add_argument("--network", type=str, required=True, help='The path and name of the .mat file containing the adjacency matrix and node labels of the input network')
    parser.add_argument("--edgelist", type=str, required=True, help='The path and name of the edgelist file with no weights containing the edgelist of the input network')
    parser.add_argument("--dataset", type=str, required=True, help='The name of your dataset (used for output)')

    # Optional arguments
    parser.add_argument("--adj_matrix_name", default='network', help='The name of the adjacency matrix inside the .mat file')
    parser.add_argument("--label_matrix_name", default='group', help='The name of the labels matrix inside the .mat file')
    args = parser.parse_args()

    print (args)

    mat, A, graph, labels_matrix, labels_count, indices = load_graph(args.network, args.adj_matrix_name, args.label_matrix_name)
    
    s_time = time.time()

    # Load edgelist as undirected graph in SNAP
    G = snap.LoadEdgeList(snap.PUNGraph, args.edgelist)
    print ("Loading graph in SNAP ... {}".format(str(args.edgelist)))

    # Load edgelist for networkx
    G_NETX = nx.read_edgelist(args.edgelist)
    print ("Loading graph in NetworkX .... {}".format(str(args.edgelist)))

    # Get Average Neighbor Degreeh from NetworkX (only time NetworkX is used)
    AvgNeighDe = nx.average_neighbor_degree(G_NETX)

    # Calculate Page Rank
    p_time = time.time()
    PRankH = snap.TIntFltH()
    snap.GetPageRank(G, PRankH)
    print ("Finished in Page rank in {}".format(str(time.time()-p_time)))

    # Calculate Hub and Authrity Scores
    h_time = time.time()
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G, NIdHubH, NIdAuthH)
    print ("Finished in Hub and Auth Scores in {}".format(str(time.time()-h_time)))

    count = 0
    node_data = []
    fl_100 = time.time()
    print ("Num of nodes: {}".format(len(PRankH)))
    print ("Num of nodes with labels: {}".format(len(indices)))
    print ("Collecting other features for each node ...")
    for n in G.Nodes():
        nid = n.GetId()
        if nid in indices:
            node_data.append((nid, n.GetInDeg(), PRankH[n.GetId()], snap.GetNodeClustCf(G, nid), NIdHubH[n.GetId()], NIdAuthH[n.GetId()], AvgNeighDe[str(nid)], snap.GetNodeEcc(G, nid)))
            count = count + 1
            if count % 1000 == 0:
                print ("Processed {} nodes".format(str(count)))
                print (time.time() - fl_100)
                fl_100 = time.time()
                nhdf = pd.DataFrame(node_data, columns=('NodeId', 'Degree', 'PageRankScore', 'NodeClustCf', 'HubScore', 'AuthScore', 'AverageNeighborDegree', 'NodeEcc'))
                nhdf.to_csv((args.network.replace(".mat", "") + "_node_heuristic_features.csv"), index=False)
    		print ("File saved at {}".format((args.network.replace(".mat", "") + "_node_heuristic_features.csv")))

    nhdf = pd.DataFrame(node_data, columns=('NodeId', 'Degree', 'PageRankScore', 'NodeClustCf', 'HubScore', 'AuthScore', 'AverageNeighborDegree', 'NodeEcc'))
    nhdf.to_csv((args.network.replace(".mat", "") + "_node_heuristic_features.csv"), index=False)
    print ("File saved at {}".format((args.network.replace(".mat", "") + "_node_heuristic_features.csv")))


    print ("Finished in {}".format(str(time.time()-s_time)))
예제 #27
0
            list(connected_component.values()),
            s=15)
plt.xlabel("Size of Connected Components")
plt.ylabel("Number of components")
plt.title("Connected Component Distribution ({})".format(graph_filename[:-6]))
plt.savefig(plot_filedir)

# [5] Connectivity and Clustering in the Network
cluster_coeff = snap.GetClustCf(G, -1)
print("Average clustering coefficient: {}".format(round(cluster_coeff, 4)))

num_triads = snap.GetTriads(G, -1)
print("Number of triads: {}".format(num_triads))

node_id = G.GetRndNId(Rnd)
node_cluster_coeff = snap.GetNodeClustCf(G, node_id)
print("Clustering coefficient of random node {}: {}".format(
    node_id, round(node_cluster_coeff, 4)))

node_id = G.GetRndNId(Rnd)
node_num_triads = snap.GetNodeTriads(G, node_id)
print("Number of triads random node {} participates: {}".format(
    node_id, node_num_triads))

triad_edge = snap.GetTriadEdges(G)
print("Number of edges that participate in at least one triad: {}".format(
    triad_edge))

cf_dist = snap.TFltPrV()
coeff = snap.GetClustCf(G, cf_dist, -1)
degree_coeff = {}
        snap.GetTriads(email_enron_subgraph, -1))
if (sub_graph_name == "p2p-Gnutella04-subgraph"):
    # Computing no of Triads
    print "Number of Triads in p2p-Gnutella04-subgraph :" + str(
        snap.GetTriads(p2p_gnutella04_subgraph, -1))

# Task 1.2.5.3

if (sub_graph_name == "soc-Epinions1-subgraph"):
    # Clustering coeffiecient of a random node
    Rand = snap.TRnd(42)
    Rand.Randomize()
    RandNode1 = soc_epinions1_subgraph.GetRndNId(Rand)
    print "Clustering coefficient of random node " + str(
        RandNode1) + " in soc-Epinions1-subgraph : " + str(
            round(snap.GetNodeClustCf(soc_epinions1_subgraph, RandNode1), 4))
if (sub_graph_name == "cit-HepPh-subgraph"):
    # Clustering coeffiecient of a random node
    Rand = snap.TRnd(42)
    Rand.Randomize()
    RandNode2 = cit_heph_subgraph.GetRndNId(Rand)
    print "Clustering coefficient of random node " + str(
        RandNode2) + " in cit-HepPh-subgraph : " + str(
            round(snap.GetNodeClustCf(cit_heph_subgraph, RandNode2), 4))
if (sub_graph_name == "email-Enron-subgraph"):
    # Clustering coeffiecient of a random node
    Rand = snap.TRnd(42)
    Rand.Randomize()
    RandNode3 = email_enron_subgraph.GetRndNId(Rand)
    print "Clustering coefficient of random node " + str(
        RandNode3) + " in email-Enron-subgraph : " + str(
예제 #29
0
#b
EdgeBridgeV = snap.TIntPrV()
snap.GetEdgeBridges(fbsgel, EdgeBridgeV)
print("Number of edge bridges:", len(EdgeBridgeV))
#c
ArtNIdV = snap.TIntV()
snap.GetArtPoints(fbsgel, ArtNIdV)
print("Number of articulation points:", len(ArtNIdV))
#d Plot
snap.PlotSccDistr(fbsgel, "connected_comp_" + str(subgraph_name),
                  "connected_comp_" + str(subgraph_name))

#Q5
#a
print("Average clustering coefficient:", round(snap.GetClustCf(fbsgel, -1), 4))
#b
print("Number of triads:", snap.GetTriads(fbsgel, -1))
#c
RnId = fbsgel.GetRndNId(Rnd)
print("Clustering coefficient of random node " + str(RnId) + ":",
      round(snap.GetNodeClustCf(fbsgel, RnId), 4))
#d
print("Number of triads random node " + str(RnId) + " participates:",
      snap.GetNodeTriads(fbsgel, RnId))
#e
print("Number of edges that participate in at least one triad:",
      snap.GetTriadEdges(fbsgel, -1))
#f Plot
snap.PlotClustCf(fbsgel, "clustering_coeff_" + str(subgraph_name),
                 "clustering_coeff_" + str(subgraph_name))
예제 #30
0
def main():

    parentDir = os.getcwd()
    os.chdir(parentDir + "/subgraphs")
    sub_graph = snap.LoadEdgeList(snap.PUNGraph, sys.argv[1], 0, 1)
    subGraphName = sys.argv[1].split(".")[0]
    os.chdir(parentDir)

    #### 1 ########
    node_count = 0
    for node in sub_graph.Nodes():
        node_count = node_count + 1

    printWithOutNewLine("Number of nodes:", node_count)
    printWithOutNewLine("Number of edges:", snap.CntUniqBiDirEdges(sub_graph))

    #### 2 ########
    printWithOutNewLine("Number of nodes with degree=7:",
                        snap.CntDegNodes(sub_graph, 7))

    rndMaxDegNId = snap.GetMxDegNId(sub_graph)
    nodeDegPairs = snap.TIntPrV()
    snap.GetNodeInDegV(sub_graph, nodeDegPairs)
    maxDegVal = 0

    for pair in nodeDegPairs:
        if (pair.GetVal1() == rndMaxDegNId):
            maxDegVal = pair.GetVal2()
            break

    maxDegNodes = []
    for pair in nodeDegPairs:
        if (pair.GetVal2() == maxDegVal):
            maxDegNodes.append(pair.GetVal1())

    print("Node id(s) with highest degree:", end=" ")
    print(*maxDegNodes, sep=',')

    #### 3 ########
    sampledFullDiam = []
    sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 10, False))
    sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 100, False))
    sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 1000, False))

    sampledFullDiamStats = []
    sampledFullDiamStats.append(round(statistics.mean(sampledFullDiam), 4))
    sampledFullDiamStats.append(round(statistics.variance(sampledFullDiam), 4))

    printWithOutNewLine("Approximate full diameter by sampling 10 nodes:",
                        sampledFullDiam[0])
    printWithOutNewLine("Approximate full diameter by sampling 100 nodes:",
                        sampledFullDiam[1])
    printWithOutNewLine("Approximate full diameter by sampling 1000 nodes:",
                        sampledFullDiam[2])
    print("Approximate full diameter (mean and variance):", end=" ")
    print(*sampledFullDiamStats, sep=',')

    sampledEffDiam = []
    sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 10, False), 4))
    sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 100, False), 4))
    sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 1000, False), 4))

    sampledEffDiamStats = []
    sampledEffDiamStats.append(round(statistics.mean(sampledEffDiam), 4))
    sampledEffDiamStats.append(round(statistics.variance(sampledEffDiam), 4))

    printWithOutNewLine("Approximate effective diameter by sampling 10 nodes:",
                        sampledEffDiam[0])
    printWithOutNewLine(
        "Approximate effective diameter by sampling 100 nodes:",
        sampledEffDiam[1])
    printWithOutNewLine(
        "Approximate effective diameter by sampling 1000 nodes:",
        sampledEffDiam[2])
    print("Approximate effective diameter (mean and variance):", end=" ")
    print(*sampledEffDiamStats, sep=',')

    #### 4 ########
    printWithOutNewLine("Fraction of nodes in largest connected component:",
                        round(snap.GetMxSccSz(sub_graph), 4))

    bridgeEdges = snap.TIntPrV()
    snap.GetEdgeBridges(sub_graph, bridgeEdges)
    printWithOutNewLine("Number of edge bridges:", len(bridgeEdges))

    articulationPoints = snap.TIntV()
    snap.GetArtPoints(sub_graph, articulationPoints)
    printWithOutNewLine("Number of articulation points:",
                        len(articulationPoints))

    #### 5 ########
    printWithOutNewLine("Average clustering coefficient:",
                        round(snap.GetClustCf(sub_graph, -1), 4))

    printWithOutNewLine("Number of triads:", snap.GetTriads(sub_graph, -1))

    randomNodeId = sub_graph.GetRndNId()
    nodeIdCcfMap = snap.TIntFltH()
    snap.GetNodeClustCf(sub_graph, nodeIdCcfMap)

    print("Clustering coefficient of random node", end=" ")
    print(randomNodeId, end=": ")
    print(round(nodeIdCcfMap[randomNodeId], 4))

    print("Number of triads random node", end=" ")
    print(randomNodeId, end=" participates: ")
    print(snap.GetNodeTriads(sub_graph, randomNodeId))

    printWithOutNewLine(
        "Number of edges that participate in at least one triad:",
        snap.GetTriadEdges(sub_graph, -1))

    #### plots ########
    if not os.path.isdir('plots'):
        os.makedirs('plots')

    os.chdir(parentDir + "/plots")
    plotsDir = os.getcwd()

    snap.PlotOutDegDistr(sub_graph, subGraphName,
                         subGraphName + " Subgraph Degree Distribution")
    snap.PlotShortPathDistr(
        sub_graph, subGraphName,
        subGraphName + " Subgraph Shortest Path Lengths Distribution")
    snap.PlotSccDistr(
        sub_graph, subGraphName,
        subGraphName + " Subgraph Connected Components Size Distribution")
    snap.PlotClustCf(
        sub_graph, subGraphName,
        subGraphName + " Subgraph Clustering Coefficient Distribution")

    files = os.listdir(plotsDir)

    for file in files:
        if not file.endswith(".png"):
            os.remove(os.path.join(plotsDir, file))

    plots = os.listdir(plotsDir)
    filePrefix = "filename"
    for file in plots:
        nameSplit = file.split(".")
        if (len(nameSplit) == 2):
            continue
        if (nameSplit[0] == "ccf"):
            filePrefix = "clustering_coeff_"
        elif (nameSplit[0] == "outDeg"):
            filePrefix = "deg_dist_"
        elif (nameSplit[0] == "diam"):
            filePrefix = "shortest_path_"
        elif (nameSplit[0] == "scc"):
            filePrefix = "connected_comp_"

        os.rename(file, filePrefix + nameSplit[1] + "." + nameSplit[2])

    os.chdir(parentDir)