#4.Components of the network
fraction = snap.GetMxSccSz(Graph1)
print("Fraction of nodes in largest connected component: %0.4f" % fraction)

V_edges = snap.TIntPrV()
snap.GetEdgeBridges(Graph1, V_edges)
edge_bridges = V_edges.Len()
print("Number of edge bridges: ", edge_bridges)

Art_points = snap.TIntV()
snap.GetArtPoints(Graph1, Art_points)
art = Art_points.Len()
print("Number of articulation points: ", art)

str2 = "connected_comp_" + file_name
snap.PlotSccDistr(Graph1, str2,
                  "Distribution of sizes of connected components")

#5.Connectivity and clustering in the network
avg_cc = snap.GetClustCf(Graph1, -1)
print("Average clustering coefficient: %0.4f" % avg_cc)
triads = snap.GetTriads(Graph1, -1)
print("Number of triads: ", triads)

random1 = Graph1.GetRndNId(Rnd)
node_cc = snap.GetNodeClustCf(Graph1, random1)
print("Clustering coefficient of random node %d: %0.4f" % (random1, node_cc))

random2 = Graph1.GetRndNId(Rnd)
node_triads = snap.GetNodeTriads(Graph1, random2)
print("Number of triads random node %d participates: %d" %
      (random2, node_triads))
    G=snap.TUNGraph.New(N, M)
    for i in xrange(N):
        G.AddNode(i)
    for u, b in zip(data.user_id, data.business_id):
        G.AddEdge(u_to_id[u], b_to_id[b])

    assert G.GetNodes() == N
    assert G.GetEdges() == M

    return G

Gtrain=generateGraph(train)
Gval=generateGraph(trueValidation)
Gtest=generateGraph(trueTest)

snap.PlotSccDistr(Gtest, "destribution_gtest",
                  "G_{test}")

snap.PlotSccDistr(Gtest, "destribution_gtrain",
                  "G_{train}")

snap.PlotSccDistr(Gtest, "destribution_gval",
                  "G_{val}")

snap.PlotOutDegDistr(Gtest, "degree_gtest",
                  "G_{test}", False, True)

snap.PlotOutDegDistr(Gtrain, "degree_gtrain",
                  "G_{train}",  False, True)

snap.PlotOutDegDistr(Gval, "degree_gval",
                  "G_{val}",  False, True)
Example #3
0
def graphStructure(elistName, elistPath):
    """
        Calculate properties of the graph as given in the assignment

        Args:
        elistName (str) -> Input elist name
        elistPath (pathlib.Path) -> Input elist using which graph needs to be built

        Return:
        RESULTS (dict) -> Dictionary containing results for different subparts of the assignment
    """

    RESULTS = {}
    subGraph = snap.LoadEdgeList(snap.PUNGraph, elistPath, 0, 1)

    # Part 1 (Size of the network)
    RESULTS['nodeCount'] = subGraph.GetNodes()
    RESULTS['edgeCount'] = subGraph.GetEdges()

    # Part 2 (Degree of nodes in the network)
    maxDegree = 0
    maxDegreeNodes = []
    degree7Count = 0

    for node in subGraph.Nodes():
        if node.GetDeg() == 7:
            degree7Count += 1

        maxDegree = max(maxDegree, node.GetDeg())

    for node in subGraph.Nodes():
        if node.GetDeg() == maxDegree:
            maxDegreeNodes.append(node.GetId())

    plotFilename = f"deg_dist_{elistName}"
    # Since it is an undirected graph, in/out degree is unimportant
    snap.PlotOutDegDistr(subGraph, plotFilename)

    RESULTS['maxDegree'] = maxDegree
    RESULTS['maxDegreeNodes'] = ','.join(map(str, maxDegreeNodes))
    RESULTS['degree7Count'] = degree7Count

    # Part 3 (Paths in the network)
    # Full Diameter Calculation
    fullDiameters = {
        10: snap.GetBfsFullDiam(subGraph, 10, False),
        100: snap.GetBfsFullDiam(subGraph, 100, False),
        1000: snap.GetBfsFullDiam(subGraph, 1000, False)
    }
    fullMean, fullVariance = meanVariance(fullDiameters.values())
    fullDiameters['mean'] = fullMean
    fullDiameters['variance'] = fullVariance
    RESULTS['fullDiameters'] = fullDiameters

    # Effective Diameter Calculation
    effDiameters = {
        10: snap.GetBfsEffDiam(subGraph, 10, False),
        100: snap.GetBfsEffDiam(subGraph, 100, False),
        1000: snap.GetBfsEffDiam(subGraph, 1000, False),
    }
    effMean, effVariance = meanVariance(effDiameters.values())
    effDiameters['mean'] = effMean
    effDiameters['variance'] = effVariance
    RESULTS['effDiameters'] = effDiameters

    plotFilename = f"shortest_path_{elistName}"
    snap.PlotShortPathDistr(subGraph, plotFilename)

    # Part 4 (Components of the network)
    edgeBridges = snap.TIntPrV()
    articulationPoints = snap.TIntV()
    RESULTS['fractionLargestConnected'] = snap.GetMxSccSz(subGraph)
    snap.GetEdgeBridges(subGraph, edgeBridges)
    snap.GetArtPoints(subGraph, articulationPoints)
    RESULTS['edgeBridges'] = len(edgeBridges)
    RESULTS['articulationPoints'] = len(articulationPoints)

    plotFilename = f"connected_comp_{elistName}"
    snap.PlotSccDistr(subGraph, plotFilename)

    # Part 5 (Connectivity and clustering in the network)
    RESULTS['avgClusterCoefficient'] = snap.GetClustCf(subGraph, -1)
    RESULTS['triadCount'] = snap.GetTriadsAll(subGraph, -1)[0]

    nodeX = subGraph.GetRndNId(Rnd)
    nodeY = subGraph.GetRndNId(Rnd)
    RESULTS['randomClusterCoefficient'] = (nodeX,
                                           snap.GetNodeClustCf(
                                               subGraph, nodeX))
    RESULTS['randomNodeTriads'] = (nodeY, snap.GetNodeTriads(subGraph, nodeY))
    RESULTS['edgesTriads'] = snap.GetTriadEdges(subGraph)

    plotFilename = f"clustering_coeff_{elistName}"
    snap.PlotClustCf(subGraph, plotFilename)

    return RESULTS
          round(get_mean(effective_diameter), 4),
          ',',
          round(get_variance(effective_diameter), 4),
          sep="")

    snap.PlotShortPathDistr(graph, "temp", "Undirected graph - shortest path")
    os.system("mv diam.temp.png plots/shortest_path_" + subgraph_name + ".png")
    os.system("rm diam.*")

    print("Fraction of nodes in largest connected component:",
          round(snap.GetMxSccSz(graph), 4))
    print("Number of edge bridges:", get_bridges(graph).Len())
    print("Number of articulation points:",
          get_articulation_points(graph).Len())

    snap.PlotSccDistr(graph, "temp", "Undirected graph - scc distribution")
    os.system("mv scc.temp.png plots/connected_comp_" + subgraph_name + ".png")
    os.system("rm scc.*")

    print("Average clustering coefficient:", round(snap.GetClustCf(graph), 4))
    print("Number of triads:", snap.GetTriads(graph))
    random_node = graph.GetRndNId()
    print("Clustering coefficient of random node", random_node, ":",
          round(get_each_nodes_ClusteringCofficient(graph)[random_node], 4))
    random_node = graph.GetRndNId()
    print("Number of triads random node", random_node, "participates:",
          snap.GetNodeTriads(graph, random_node))
    print("Number of edges that participate in at least one triad:",
          snap.GetTriadEdges(graph))

    snap.PlotClustCf(graph, "temp",
def main():

    parentDir = os.getcwd()
    os.chdir(parentDir + "/subgraphs")
    sub_graph = snap.LoadEdgeList(snap.PUNGraph, sys.argv[1], 0, 1)
    subGraphName = sys.argv[1].split(".")[0]
    os.chdir(parentDir)

    #### 1 ########
    node_count = 0
    for node in sub_graph.Nodes():
        node_count = node_count + 1

    printWithOutNewLine("Number of nodes:", node_count)
    printWithOutNewLine("Number of edges:", snap.CntUniqBiDirEdges(sub_graph))

    #### 2 ########
    printWithOutNewLine("Number of nodes with degree=7:",
                        snap.CntDegNodes(sub_graph, 7))

    rndMaxDegNId = snap.GetMxDegNId(sub_graph)
    nodeDegPairs = snap.TIntPrV()
    snap.GetNodeInDegV(sub_graph, nodeDegPairs)
    maxDegVal = 0

    for pair in nodeDegPairs:
        if (pair.GetVal1() == rndMaxDegNId):
            maxDegVal = pair.GetVal2()
            break

    maxDegNodes = []
    for pair in nodeDegPairs:
        if (pair.GetVal2() == maxDegVal):
            maxDegNodes.append(pair.GetVal1())

    print("Node id(s) with highest degree:", end=" ")
    print(*maxDegNodes, sep=',')

    #### 3 ########
    sampledFullDiam = []
    sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 10, False))
    sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 100, False))
    sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 1000, False))

    sampledFullDiamStats = []
    sampledFullDiamStats.append(round(statistics.mean(sampledFullDiam), 4))
    sampledFullDiamStats.append(round(statistics.variance(sampledFullDiam), 4))

    printWithOutNewLine("Approximate full diameter by sampling 10 nodes:",
                        sampledFullDiam[0])
    printWithOutNewLine("Approximate full diameter by sampling 100 nodes:",
                        sampledFullDiam[1])
    printWithOutNewLine("Approximate full diameter by sampling 1000 nodes:",
                        sampledFullDiam[2])
    print("Approximate full diameter (mean and variance):", end=" ")
    print(*sampledFullDiamStats, sep=',')

    sampledEffDiam = []
    sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 10, False), 4))
    sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 100, False), 4))
    sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 1000, False), 4))

    sampledEffDiamStats = []
    sampledEffDiamStats.append(round(statistics.mean(sampledEffDiam), 4))
    sampledEffDiamStats.append(round(statistics.variance(sampledEffDiam), 4))

    printWithOutNewLine("Approximate effective diameter by sampling 10 nodes:",
                        sampledEffDiam[0])
    printWithOutNewLine(
        "Approximate effective diameter by sampling 100 nodes:",
        sampledEffDiam[1])
    printWithOutNewLine(
        "Approximate effective diameter by sampling 1000 nodes:",
        sampledEffDiam[2])
    print("Approximate effective diameter (mean and variance):", end=" ")
    print(*sampledEffDiamStats, sep=',')

    #### 4 ########
    printWithOutNewLine("Fraction of nodes in largest connected component:",
                        round(snap.GetMxSccSz(sub_graph), 4))

    bridgeEdges = snap.TIntPrV()
    snap.GetEdgeBridges(sub_graph, bridgeEdges)
    printWithOutNewLine("Number of edge bridges:", len(bridgeEdges))

    articulationPoints = snap.TIntV()
    snap.GetArtPoints(sub_graph, articulationPoints)
    printWithOutNewLine("Number of articulation points:",
                        len(articulationPoints))

    #### 5 ########
    printWithOutNewLine("Average clustering coefficient:",
                        round(snap.GetClustCf(sub_graph, -1), 4))

    printWithOutNewLine("Number of triads:", snap.GetTriads(sub_graph, -1))

    randomNodeId = sub_graph.GetRndNId()
    nodeIdCcfMap = snap.TIntFltH()
    snap.GetNodeClustCf(sub_graph, nodeIdCcfMap)

    print("Clustering coefficient of random node", end=" ")
    print(randomNodeId, end=": ")
    print(round(nodeIdCcfMap[randomNodeId], 4))

    print("Number of triads random node", end=" ")
    print(randomNodeId, end=" participates: ")
    print(snap.GetNodeTriads(sub_graph, randomNodeId))

    printWithOutNewLine(
        "Number of edges that participate in at least one triad:",
        snap.GetTriadEdges(sub_graph, -1))

    #### plots ########
    if not os.path.isdir('plots'):
        os.makedirs('plots')

    os.chdir(parentDir + "/plots")
    plotsDir = os.getcwd()

    snap.PlotOutDegDistr(sub_graph, subGraphName,
                         subGraphName + " Subgraph Degree Distribution")
    snap.PlotShortPathDistr(
        sub_graph, subGraphName,
        subGraphName + " Subgraph Shortest Path Lengths Distribution")
    snap.PlotSccDistr(
        sub_graph, subGraphName,
        subGraphName + " Subgraph Connected Components Size Distribution")
    snap.PlotClustCf(
        sub_graph, subGraphName,
        subGraphName + " Subgraph Clustering Coefficient Distribution")

    files = os.listdir(plotsDir)

    for file in files:
        if not file.endswith(".png"):
            os.remove(os.path.join(plotsDir, file))

    plots = os.listdir(plotsDir)
    filePrefix = "filename"
    for file in plots:
        nameSplit = file.split(".")
        if (len(nameSplit) == 2):
            continue
        if (nameSplit[0] == "ccf"):
            filePrefix = "clustering_coeff_"
        elif (nameSplit[0] == "outDeg"):
            filePrefix = "deg_dist_"
        elif (nameSplit[0] == "diam"):
            filePrefix = "shortest_path_"
        elif (nameSplit[0] == "scc"):
            filePrefix = "connected_comp_"

        os.rename(file, filePrefix + nameSplit[1] + "." + nameSplit[2])

    os.chdir(parentDir)
Example #6
0
from graph import *
import snap

# plot deg dist
snap.PlotInDegDistr(graph, "InDegDist", flnme+" in-degree distribution")
snap.PlotOutDegDistr(graph, "OutDegDist", flnme+" out-degree distribution")

# plot connected components dist
snap.PlotSccDistr(graph, "SccDist", flnme+" strongly connected components distribution")
snap.PlotWccDistr(graph, "WccDist", flnme+" weakly connected components distribution")

# plot cluster coefficient
snap.PlotClustCf(graph, "ClustCoef", flnme+" clustering coefficient")
Example #7
0
def Scc():
	return snap.PlotSccDistr(Graph, "ScaleFreeScc", "Undirected graph - scc distribution")
Example #8
0
    print "Approx. effective diameter in " + input_file + " with sampling ", i, " nodes: ", round(
        diameter[index], 3)
    index = index + 1

mean = float(sum(diameter) / 3.0)
variance = float((pow((diameter[0] - mean), 2) + pow(
    (diameter[1] - mean), 2) + pow((diameter[2] - mean), 2)) / 2.0)

print "Approx. effective diameter in " + input_file + " (mean and variance): ", round(
    mean, 3), ", ", round(variance, 3)

snap.PlotShortPathDistr(Graph1, "shortest_path_plot_" + input_file,
                        "Undirected graph - shortest path", 1000)
print "Shortest path distribution of " + input_file + " is in: diam.shortest_path_plot_" + input_file + ".png"

largest_component = snap.TCnComV()
snap.GetSccs(Graph1, largest_component)
largest = 0.0

for item in largest_component:
    if largest < item.Len():
        largest = item.Len()

print ""

print "Fraction of nodes in largest connected component in " + input_file + ": ", float(
    largest) / float(final_nodes)

snap.PlotSccDistr(Graph1, "conn_components_plot_" + input_file,
                  "Undirected graph - Connected components distribution")
print "Component size distribution of " + input_file + " is in: scc.conn_components_plot_" + input_file + ".png"
def compute_graph_statistics(graph_path, overwrite, compute_betweenness=False):
    graph_abs_path = os.path.abspath(graph_path)
    graph_name = os.path.basename(graph_abs_path).replace(".graph", "")
    fin = snap.TFIn(graph_abs_path)
    graph = snap.TNEANet.Load(fin)

    # rebuild the id => pkg dictionary
    id_pkg_dict = {}
    for node in graph.Nodes():
        id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg")
    directory = os.path.dirname(os.path.abspath(graph_path))
    json_path = os.path.join(directory, graph_name + "_statistics.json")
    if os.path.isfile(json_path):
        with open(json_path, "r") as f:
            statistics = json.load(f, object_pairs_hook=OrderedDict)
    else:
        statistics = OrderedDict()

    # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory
    os.chdir(directory)

    # general statistics
    output = os.path.join(directory, graph_name + "_main_statistics.txt")
    if not os.path.isfile(output) or overwrite:
        print("{0} Computing general statistics".format(datetime.datetime.now()))
        snap.PrintInfo(graph, "Play Store Graph -- main statistics", output, False)

    # info about the nodes with the max in degree
    if "max_in_degree" not in statistics or overwrite:
        print("{0} Computing max indegree".format(datetime.datetime.now()))
        max_in_deg_id = snap.GetMxInDegNId(graph)
        iterator = graph.GetNI(max_in_deg_id)
        max_in_deg = iterator.GetInDeg()
        max_in_deg_pkg = graph.GetStrAttrDatN(max_in_deg_id, "pkg")
        statistics["max_in_degree"] = max_in_deg
        statistics["max_in_degree_id"] = max_in_deg_id
        statistics["max_in_degree_pkg"] = max_in_deg_pkg

    # info about the nodes with the max out degree
    if "max_out_degree" not in statistics or overwrite:
        print("{0} Computing max outdegree".format(datetime.datetime.now()))
        max_out_deg_id = snap.GetMxOutDegNId(graph)
        iterator = graph.GetNI(max_out_deg_id)
        max_out_deg = iterator.GetOutDeg()
        max_out_deg_pkg = graph.GetStrAttrDatN(max_out_deg_id, "pkg")
        statistics["max_out_degree"] = max_out_deg
        statistics["max_out_degree_id"] = max_out_deg_id
        statistics["max_out_degree_pkg"] = max_out_deg_pkg

    # pagerank statistics
    output = graph_name + "_topNpagerank.eps"
    if not os.path.isfile(output) or "top_n_pagerank" not in statistics or overwrite:
        print("{0} Computing top 20 nodes with highest pagerank".format(datetime.datetime.now()))
        data_file = graph_name + "_pageranks"
        prank_hashtable = snap.TIntFltH()
        if not os.path.isfile(data_file) or overwrite:
            # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100
            snap.GetPageRank(graph, prank_hashtable, 0.85)
            fout = snap.TFOut(data_file)
            prank_hashtable.Save(fout)
        else:
            fin = snap.TFIn(data_file)
            prank_hashtable.Load(fin)

        top_n = get_top_nodes_from_hashtable(prank_hashtable)
        top_n.sort(key=itemgetter(1))
        if "top_n_pagerank" not in statistics or overwrite:
            top_n_labeled = []
            for pair in top_n:
                top_n_labeled.append((id_pkg_dict[pair[0]], pair[1]))
            statistics["top_n_pagerank"] = list(reversed(top_n_labeled))

        if not os.path.isfile(output) or overwrite:
            # let's build a subgraph induced on the top 20 pagerank nodes
            subgraph = get_subgraph(graph, [x[0] for x in top_n])
            labels_dict = get_labels_subset(id_pkg_dict, subgraph)
            values = snap_hashtable_to_dict(prank_hashtable, [x[0] for x in top_n])
            plot_subgraph_colored(subgraph, labels_dict, values, "PageRank",
                                  "Play Store Graph - top 20 PageRank nodes", output, "autumn_r")

    # betweeness statistics
    output = graph_name + "_topNbetweenness.eps"
    if compute_betweenness and (not os.path.isfile(output) or "betweenness" not in statistics or overwrite):
        print("{0} Computing top 20 nodes with highest betweenness".format(datetime.datetime.now()))
        data_file1 = graph_name + "_node_betweenness"
        data_file2 = graph_name + "_edge_betweenness"
        node_betwenness_hashtable = snap.TIntFltH()
        edge_betwenness_hashtable = snap.TIntPrFltH()
        if not os.path.isfile(data_file1) or not os.path.isfile(data_file2) or overwrite:
            snap.GetBetweennessCentr(graph, node_betwenness_hashtable, edge_betwenness_hashtable, 0.85, True)
            fout = snap.TFOut(data_file1)
            node_betwenness_hashtable.Save(fout)
            fout = snap.TFOut(data_file2)
            edge_betwenness_hashtable.Save(fout)

        else:
            fin = snap.TFIn(data_file1)
            node_betwenness_hashtable.Load(fin)
            fin = snap.TFIn(data_file2)
            edge_betwenness_hashtable.Load(fin)  # unused, as now

        top_n = get_top_nodes_from_hashtable(node_betwenness_hashtable)
        top_n.sort(key=itemgetter(1))
        if "top_n_betweenness" not in statistics or overwrite:
            top_n_labeled = []
            for pair in top_n:
                top_n_labeled.append((id_pkg_dict[pair[0]], pair[1]))
            statistics["top_n_betweenness"] = list(reversed(top_n_labeled))

        if not os.path.isfile(output) or overwrite:
            # let's build a subgraph induced on the top 20 betweenness nodes
            subgraph = get_subgraph(graph, [x[0] for x in top_n])
            labels_dict = get_labels_subset(id_pkg_dict, subgraph)
            values = snap_hashtable_to_dict(node_betwenness_hashtable, [x[0] for x in top_n])
            plot_subgraph_colored(subgraph, labels_dict, values, "Betweenness",
                                  "Play Store Graph - top 20 Betweenness nodes", output)

    # HITS statistics
    output_hub = graph_name + "_topNhitshubs.eps"
    output_auth = graph_name + "_topNhitsauth.eps"
    if not os.path.isfile(output_hub) or not os.path.isfile(output_auth) or "top_n_hits_hubs" not in statistics \
            or "top_n_hits_authorities" not in statistics or overwrite:
        print("{0} Computing top 20 HITS hubs and auths".format(datetime.datetime.now()))
        data_file1 = graph_name + "_hits_hubs"
        data_file2 = graph_name + "_hits_auth"
        hubs_hashtable = snap.TIntFltH()
        auth_hashtable = snap.TIntFltH()
        if not os.path.isfile(data_file1) or not os.path.isfile(data_file2) or overwrite:
            # MaxIter = 20
            snap.GetHits(graph, hubs_hashtable, auth_hashtable, 20)
            fout = snap.TFOut(data_file1)
            hubs_hashtable.Save(fout)
            fout = snap.TFOut(data_file2)
            auth_hashtable.Save(fout)

        else:
            fin = snap.TFIn(data_file1)
            hubs_hashtable.Load(fin)
            fin = snap.TFIn(data_file2)
            auth_hashtable.Load(fin)

        top_n_hubs = get_top_nodes_from_hashtable(hubs_hashtable)
        top_n_hubs.sort(key=itemgetter(1))
        if "top_n_hits_hubs" not in statistics or overwrite:
            top_n_labeled = []
            for pair in top_n_hubs:
                top_n_labeled.append((id_pkg_dict[pair[0]], pair[1]))
            statistics["top_n_hits_hubs"] = list(reversed(top_n_labeled))

        top_n_auth = get_top_nodes_from_hashtable(auth_hashtable)
        top_n_auth.sort(key=itemgetter(1))
        if "top_n_hits_authorities" not in statistics or overwrite:
            top_n_labeled = []
            for pair in top_n_auth:
                top_n_labeled.append((id_pkg_dict[pair[0]], pair[1]))
            statistics["top_n_hits_authorities"] = list(reversed(top_n_labeled))

        if not os.path.isfile(output_hub) or not os.path.isfile(output_auth) or overwrite:
            nodes_subset = set()
            for pair in top_n_hubs:
                nodes_subset.add(pair[0])
            for pair in top_n_auth:
                nodes_subset.add(pair[0])

            # let's build a subgraph induced on the top N HITS auths and hubs nodes
            subgraph = get_subgraph(graph, nodes_subset)
            labels_dict = get_labels_subset(id_pkg_dict, subgraph)
            values = snap_hashtable_to_dict(hubs_hashtable, nodes_subset)
            values2 = snap_hashtable_to_dict(auth_hashtable, nodes_subset)
            plot_subgraph_colored(subgraph, labels_dict, values, "HITS - Hub Index",
                                  "Play Store Graph - top 20 HITS hubs + top 20 HITS authorities", output_hub, "bwr")
            plot_subgraph_colored(subgraph, labels_dict, values2, "HITS - Authority Index",
                                  "Play Store Graph - top 20 HITS hubs + top 20 HITS authorities", output_auth,
                                  "bwr_r")

    # indegree histogram
    output = graph_name + "_indegree"
    if not os.path.isfile("inDeg." + output + ".plt") or not os.path.isfile(
                            "inDeg." + output + ".tab") or not os.path.isfile("inDeg." + output + ".png") or overwrite:
        print("{0} Computing indegree distribution".format(datetime.datetime.now()))
        snap.PlotInDegDistr(graph, output, "Play Store Graph - in-degree Distribution")

    # outdegree histogram
    output = graph_name + "_outdegree"
    if not os.path.isfile("outDeg." + output + ".plt") or not os.path.isfile(
                            "outDeg." + output + ".tab") or not os.path.isfile(
                        "outDeg." + output + ".png") or overwrite:
        print("{0} Computing outdegree distribution".format(datetime.datetime.now()))
        snap.PlotOutDegDistr(graph, output, "Play Store Graph - out-degree Distribution")

    # strongly connected components print
    output = graph_name + "_scc"
    if not os.path.isfile("scc." + output + ".plt") or not os.path.isfile(
                            "scc." + output + ".tab") or not os.path.isfile("scc." + output + ".png") or overwrite:
        print("{0} Computing scc distribution".format(datetime.datetime.now()))
        snap.PlotSccDistr(graph, output, "Play Store Graph - strongly connected components distribution")

    # weakly connected components print
    output = graph_name + "_wcc"
    if not os.path.isfile("wcc." + output + ".plt") or not os.path.isfile(
                            "wcc." + output + ".tab") or not os.path.isfile("wcc." + output + ".png") or overwrite:
        print("{0} Computing wcc distribution".format(datetime.datetime.now()))
        snap.PlotWccDistr(graph, output, "Play Store Graph - weakly connected components distribution")

    # clustering coefficient distribution
    output = graph_name + "_cf"
    if not os.path.isfile("ccf." + output + ".plt") or not os.path.isfile(
                            "ccf." + output + ".tab") or not os.path.isfile("ccf." + output + ".png") or overwrite:
        print("{0} Computing cf distribution".format(datetime.datetime.now()))
        snap.PlotClustCf(graph, output, "Play Store Graph - clustering coefficient distribution")

    # shortest path distribution
    output = graph_name + "_hops"
    if not os.path.isfile("hop." + output + ".plt") or not os.path.isfile(
                            "hop." + output + ".tab") or not os.path.isfile("hop." + output + ".png") or overwrite:
        print("{0} Computing shortest path distribution".format(datetime.datetime.now()))
        snap.PlotHops(graph, output, "Play Store Graph - Cumulative Shortest Paths (hops) distribution", True)

    # k-core edges distribution
    output = graph_name + "_kcore_edges"
    if not os.path.isfile("coreEdges." + output + ".plt") or not os.path.isfile(
                            "coreEdges." + output + ".tab") or not os.path.isfile(
                        "coreEdges." + output + ".png") or overwrite:
        print("{0} Computing k-core edges distribution".format(datetime.datetime.now()))
        snap.PlotKCoreEdges(graph, output, "Play Store Graph - K-Core edges distribution")

    # k-core nodes distribution
    output = graph_name + "_kcore_nodes"
    if not os.path.isfile("coreNodes." + output + ".plt") or not os.path.isfile(
                            "coreNodes." + output + ".tab") or not os.path.isfile(
                        "coreNodes." + output + ".png") or overwrite:
        print("{0} Computing k-core nodes distribution".format(datetime.datetime.now()))
        snap.PlotKCoreNodes(graph, output, "Play Store Graph - K-Core nodes distribution")

    with open(json_path, 'w') as outfile:
        json.dump(statistics, outfile, indent=2)
    ArtNIdV = snap.TIntV()
    snap.GetArtPoints(p2p_gnutella04_subgraph, ArtNIdV)

    art_point = 0
    for NI in ArtNIdV:
        art_point = art_point + 1
    print "Number of articulation points in p2p-Gnutella04-subgraph :" + str(
        art_point)

# Task 1.2.4.4

if (sub_graph_name == "soc-Epinions1-subgraph"):
    #Plotting the distribution of sizes of connected components

    snap.PlotSccDistr(soc_epinions1_subgraph, "soc-Epinions1-subgraph",
                      "Undirected Scc Distribution")
    print "Component size Distribution of soc-Epinions1-subgraph is in :" + 'scc.soc-Epinions1-subgraph.png'
if (sub_graph_name == "cit-HepPh-subgraph"):
    #Plotting the distribution of sizes of connected components

    snap.PlotSccDistr(cit_heph_subgraph, "cit-HepPh-subgraph",
                      "Undirected Scc Distribution")
    print " Component size Distribution of cit-HepPh-subgraph is in :" + 'scc.cit-HepPh-subgraph.png'
if (sub_graph_name == "email-Enron-subgraph"):
    #Plotting the distribution of sizes of connected component
    snap.PlotSccDistr(email_enron_subgraph, "email-Enron-subgraph",
                      "Undirected Scc Distribution")
    print "Component size Distribution of email-Enron-subgraph is in :" + 'scc.email-Enron-subgraph.png'
if (sub_graph_name == "p2p-Gnutella04-subgraph"):
    #Plotting the distribution of sizes of connected components
Example #11
0
 def connectivity(self):
     snap.PlotSccDistr(self.graph, "Connectivity", "Connectivity")
     img = mpimg.imread("scc.Connectivity.png")
     plt.figure()
     imgplot = plt.imshow(img)
     plt.show()
Example #12
0
ComponentDist = snap.TIntPrV()
snap.GetWccSzCnt(G, ComponentDist)
size = []
counts = []
print "WCC counts"
for comp in ComponentDist:
    size.append(comp.GetVal1())
    counts.append(comp.GetVal2())
    print "Size: %d Count: %d" % (comp.GetVal1(), comp.GetVal2())

plt.clf()
plt.figure()
plt.plot(size, counts, '.')

ComponentDist2 = snap.TIntPrV()
snap.GetWccSzCnt(G, ComponentDist2)
print "SCC counts"
for comp in ComponentDist2:
    print "Size: %d Count: %d" % (comp.GetVal1(), comp.GetVal2())

plt.title("Youtube Video WCC Size Distribution")
plt.xlabel("WCC Size")
plt.ylabel("Number of WCC of given size")
plt.savefig("wcc-distr3.pdf")

snap.PlotWccDistr(G, "wcc-distr3",
                  "Directed Related Video Graph - WCC distribution")

print "getting SCC size distribution..."
snap.PlotSccDistr(G, "scc-distr3",
                  "Directed Related Video Graph - SCC distribution")
snap.GetArtPoints(G, ArtNIdV)
print("Number of articulation points:", len(ArtNIdV))
print("Average clustering coefficient: %.4f" % snap.GetClustCf(G, -1))
print("Number of triads:", snap.GetTriads(G, -1))
Ran_n = G.GetRndNId(Rnd)
print("Clustering coefficient of random node %d: %.4f" %
      (Ran_n, snap.GetNodeClustCf(G, Ran_n)))
Ran_n = G.GetRndNId(Rnd)
print("Number of triads random node %d participates: %d" %
      (Ran_n, snap.GetNodeTriads(G, Ran_n)))
print("Number of edges that participate in at least one triad:",
      snap.GetTriadEdges(G))

snap.PlotInDegDistr(G, "D_" + sys.argv[1], "Degree Distribution")
MoveFile(os.path.join(dirname, "inDeg.D_" + sys.argv[1] + ".png"),
         os.path.join(dirname, "plots", "deg_dist_" + sys.argv[1] + ".png"))

snap.PlotShortPathDistr(G, "S_" + sys.argv[1], "Shortest path Distribution")
MoveFile(
    os.path.join(dirname, "diam.S_" + sys.argv[1] + ".png"),
    os.path.join(dirname, "plots", "shortest_path_" + sys.argv[1] + ".png"))

snap.PlotSccDistr(G, "C_" + sys.argv[1], "Component Size Distribution")
MoveFile(
    os.path.join(dirname, "scc.C_" + sys.argv[1] + ".png"),
    os.path.join(dirname, "plots", "connected_comp_" + sys.argv[1] + ".png"))

snap.PlotClustCf(G, "C_" + sys.argv[1], "Clustering Coefficient Distribution")
MoveFile(
    os.path.join(dirname, "ccf.C_" + sys.argv[1] + ".png"),
    os.path.join(dirname, "plots", "clustering_coeff_" + sys.argv[1] + ".png"))
Example #14
0
    MxConCompSize = sn.GetMxScc(graph).GetNodes()
    print("Fraction of nodes in largest connected component: {:0.4f}".format(
        MxConCompSize / graph.GetNodes()))

    ## Edge Bridges
    edgeBridge = sn.TIntPrV()
    sn.GetEdgeBridges(graph, edgeBridge)
    print("Number of edge bridges: {}".format(len(edgeBridge)))

    ## Articulation Points
    artPoints = sn.TIntV()
    sn.GetArtPoints(graph, artPoints)
    print("Number of articulation points: {}".format(len(artPoints)))

    ## Connected Components Distribution
    sn.PlotSccDistr(graph, name, "Connected Component Distribution")
    plotRemove("scc", "connected_comp", name)

    #Question 5

    ## Clustering Coefficient
    print("Average clustering coefficient: {:0.4f}".format(
        sn.GetClustCf(graph)))

    ## Triads
    print("Number of triads: {}".format(sn.GetTriads(graph)))

    ## Random Clustering Coefficient
    rndNode = graph.GetRndNId()
    print("Clustering coefficient of random node {}: {:0.4f}".format(
        rndNode, sn.GetNodeClustCf(graph, rndNode)))
Example #15
0
em = mean(effData)
ev = variance(effData)

print "Approx. effective diameter in %s with sampling 10 nodes: %d" % (
    file, effDiam10)
print "Approx. effective diameter in %s with sampling 100 nodes: %d" % (
    file, effDiam100)
print "Approx. effective diameter in %s with sampling 1000 nodes: %d" % (
    file, effDiam1000)
print "Approx. effective diameter in %s (mean and variance): %d, %d\n" % (
    file, em, ev)
# c) Plot of the distribution of the shortest path
plotFN1 = file + ".diam.short-path-plot.png"
snap.PlotShortPathDistr(UGraph, plotFN1,
                        "Undirected graph - Shortest path for file " + file)
print "\nShortest path distribution of %s is in: %s\n" % (file, plotFN1)

# 4) Components of the network:
print "Components of the network:\n"
# a) Fraction of nodes in the largest connected component
nodeFrac = snap.GetMxSccSz(UGraph)
print "Fraction of nodes in largest connected component in '%s': %d\n" % (
    file, nodeFrac)
# b) Plot of the distribution of sizes of connected components.
plotFN2 = file + ".scc.connected-components-plot.png"
snap.PlotSccDistr(UGraph, plotFN2,
                  "Undirected graph - scc distribution for file " + file)
print "\nComponent size distribution of %s is in: %s\n" % (file, plotFN2)

# end of program
print "\n\t End of program\n\n"