예제 #1
0
    def test_snap(self):
        """Test that snap.py installed correctly.
        """
        import snap
        num_nodes = 20

        # Generate different undirected graphs
        full_graph = snap.GenFull(snap.PUNGraph, num_nodes)
        star_graph = snap.GenStar(snap.PUNGraph, num_nodes)
        random_graph = snap.GenRndGnm(snap.PUNGraph, num_nodes, num_nodes * 3)

        # Basic statistics on the graphs
        self.assertEqual(snap.CntInDegNodes(full_graph, num_nodes - 1), num_nodes)
        self.assertEqual(snap.CntOutDegNodes(full_graph, num_nodes - 1), num_nodes)
        self.assertEqual(snap.GetMxInDegNId(star_graph), 0)
        self.assertEqual(snap.GetMxOutDegNId(star_graph), 0)

        # Iterator
        degree_to_count = snap.TIntPrV()
        snap.GetInDegCnt(full_graph, degree_to_count)
        # There should be only one entry (num_nodes - 1, num_nodes)
        for item in degree_to_count:
            self.assertEqual(num_nodes - 1, item.GetVal1())
            self.assertEqual(num_nodes, item.GetVal2())

        # Rewiring
        rewired_graph = snap.GenRewire(random_graph)
        for n1 in random_graph.Nodes():
            for n2 in rewired_graph.Nodes():
                if n1.GetId() == n2.GetId():
                    self.assertEqual(n1.GetOutDeg() + n1.GetInDeg(),
                                     n2.GetOutDeg() + n2.GetInDeg())
예제 #2
0
def avgDegreeDist(family, direction, numSamples, apiGraph):
    path = 'data/graphs/' + family + '/'
    files = os.listdir(path)
    if apiGraph:
        graph_files = filter(lambda x: '.apigraph' in x, files)
    else:
        graph_files = filter(lambda x: '.edges' in x, files)
    random.shuffle(graph_files)
    maxdeg = 0
    if apiGraph:
        Gs = [snap.TNEANet.Load(snap.TFIn(path + f)) for f in graph_files[:numSamples]]
    else:
        Gs = [snap.LoadEdgeList(snap.PNEANet, path + f, 0, 1) for f in graph_files[:numSamples]]
    if direction == 'in':
        maxdeg = max([G.GetNI((snap.GetMxInDegNId(G))).GetInDeg() for G in Gs])
    else:
        maxdeg = max([G.GetNI((snap.GetMxOutDegNId(G))).GetOutDeg() for G in Gs])

    avg_deg_dist = np.zeros(maxdeg + 1)
    for G in Gs:
        DegToCntV = snap.TIntPrV()
        if direction == 'in':
            snap.GetInDegCnt(G, DegToCntV)
        else:
            snap.GetOutDegCnt(G, DegToCntV)

        for item in DegToCntV:
            deg = item.GetVal1()
            avg_deg_dist[deg] += item.GetVal2()
    avg_deg_dist = avg_deg_dist / numSamples
    return avg_deg_dist
예제 #3
0
def quick_properties(graph, name, dic_path):
    """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """
    n_edges = graph.GetEdges()
    n_nodes = graph.GetNodes()
    print("##########")
    print("Quick overview of {} Network".format(name))
    print("##########")
    print("{} Nodes, {} Edges").format(n_nodes, n_edges)
    print("{} Self-edges ".format(snap.CntSelfEdges(graph)))
    print("{} Directed edges, {} Undirected edges".format(
        snap.CntUniqDirEdges(graph), snap.CntUniqUndirEdges(graph)))
    print("{} Reciprocated edges".format(snap.CntUniqBiDirEdges(graph)))
    print("{} 0-out-degree nodes, {} 0-in-degree nodes".format(
        snap.CntOutDegNodes(graph, 0), snap.CntInDegNodes(graph, 0)))
    node_in = graph.GetNI(snap.GetMxInDegNId(graph))
    node_out = graph.GetNI(snap.GetMxOutDegNId(graph))
    print("Maximum node in-degree: {}, maximum node out-degree: {}".format(
        node_in.GetDeg(), node_out.GetDeg()))
    print("###")
    components = snap.TCnComV()
    snap.GetWccs(graph, components)
    max_wcc = snap.GetMxWcc(graph)
    print "{} Weakly connected components".format(components.Len())
    print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(),
                                                   max_wcc.GetEdges())
    prankH = snap.TIntFltH()
    snap.GetPageRank(graph, prankH)
    sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(graph, NIdHubH, NIdAuthH)
    sorted_NIdHubH = sorted(NIdHubH,
                            key=lambda key: NIdHubH[key],
                            reverse=True)
    sorted_NIdAuthH = sorted(NIdAuthH,
                             key=lambda key: NIdAuthH[key],
                             reverse=True)
    with open(dic_path, 'rb') as dic_id:
        mydict = pickle.load(dic_id)
        print("3 most central players by PageRank scores: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[0])],
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_prankH[2])]))
        print("Top 3 hubs: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[2])]))
        print("Top 3 authorities: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[2])]))
예제 #4
0
    def genGraphInfo(self):
        graphName = self.graphName

        # get the number of nodes and edges in the graph
        print "Number of nodes in %s: %d" % (graphName, self.G.GetNodes())
        print "Number of edges in %s: %d" % (graphName, self.G.GetEdges())

        # get the node id(s) with highest degree

        nodeIdMaxDegree = snap.GetMxOutDegNId(self.G)

        maxDegree = -1
        for node in self.G.Nodes():
            if (node.GetId() == nodeIdMaxDegree):
                maxDegree = node.GetOutDeg()
                break

        nodeIdsMaxDegreeT = ""
        for node in self.G.Nodes():
            if (maxDegree == node.GetOutDeg()):
                nodeIdsMaxDegreeT += str(node.GetId()) + ","

        print "Node id(s) with highest degree in %s: %s" % (graphName,
                                                            nodeIdsMaxDegreeT)

        # plot degree distribution
        snap.PlotOutDegDistr(self.G, graphName, "Degree Distribution")
        degreeFileName = "outDeg." + graphName + ".png"
        print "Degree distribution of %s is in: %s" % (graphName,
                                                       degreeFileName)

        # plot shortest path distribution
        snap.PlotShortPathDistr(self.G, graphName,
                                "Shortest Path Distribution")
        shortestPathFileName = "diam." + graphName + ".png"
        print "Shortest path distribution of %s is in: %s" % (
            graphName, shortestPathFileName)

        # get the fraction of nodes in largest cc
        print "Fraction of nodes in largest connected component in %s: %f" % (
            graphName, snap.GetMxSccSz(self.G))

        # plot the component size distribution
        snap.PlotSccDistr(self.G, graphName, "Component size distribution")
        sccFileName = "scc." + graphName + ".png"
        print "Component size distribution of %s is in: %s" % (graphName,
                                                               sccFileName)
예제 #5
0
def estimate3SubgraphFrequencies(Network):
    G = snap.ConvertGraph(snap.PNGraph, Network)

    subgraph_counts = np.zeros(7)
    # 0 -> 0 edges
    # 1 -> 1 edge
    # 2 -> 2 edges to same node
    # 3 -> 2 edges from same node
    # 4 -> 2 edges though one node
    # 5 -> 3 edge cycle
    # 6 -> 3 edge, not cycle

    for _ in range(num_samples):
        sG = snap.GetRndSubGraph(G, 3)
        num_edges = sG.GetEdges()

        if num_edges == 0:
            subgraph_counts[0] += 1

        elif num_edges == 1:
            subgraph_counts[1] += 1

        elif num_edges == 2:
            max_indeg = sG.GetNI(snap.GetMxInDegNId(sG)).GetInDeg()
            max_outdeg = sG.GetNI(snap.GetMxOutDegNId(sG)).GetOutDeg()
            if max_indeg == 2:
                subgraph_counts[2] += 1
            elif max_outdeg == 2:
                subgraph_counts[3] += 1
            else:
                subgraph_counts[4] += 1

        else:
            max_indeg = sG.GetNI(snap.GetMxInDegNId(sG)).GetInDeg()
            if max_indeg == 1:
                subgraph_counts[5] += 1
            else:
                subgraph_counts[6] += 1

    return list(subgraph_counts / sum(subgraph_counts))
예제 #6
0
    def _initialize(self, mu, sigma_ratio):
        """
        NodeStat uses out links to initalize popularity, 
        then sample edge probabilities using in links
        """
        outdeg = snap.TIntPrV()
        snap.GetNodeOutDegV(self._graph, outdeg)

        max_out_nid = snap.GetMxOutDegNId(self._graph)
        max_out_deg = self._graph.GetNI(max_out_nid).GetOutDeg()

        for item in outdeg:
            nid, deg = item.GetVal1(), float(item.GetVal2())

            init_pop_mu = deg / max_out_deg + mu
            init_pop_sig = deg / max_out_deg * sigma_ratio

            # Initialized according to scaled number of followers
            init_pop = np.random.normal(init_pop_mu, init_pop_sig)
            self._graph.AddFltAttrDatN(nid, init_pop, self.pop)

        NodeStat._compute_prob(self._graph, self.sid)
예제 #7
0
def quick_properties(graph, name, dic_path):
    """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """
    results = {}
    n_edges = graph.GetEdges()
    n_nodes = graph.GetNodes()
    n_self_edges = snap.CntSelfEdges(graph)
    n_directed_edges, n_undirected_edges = snap.CntUniqDirEdges(
        graph), snap.CntUniqUndirEdges(graph)
    n_reciprocated_edges = snap.CntUniqBiDirEdges(graph)
    n_zero_out_nodes, n_zero_in_nodes = snap.CntOutDegNodes(
        graph, 0), snap.CntInDegNodes(graph, 0)
    max_node_in = graph.GetNI(snap.GetMxInDegNId(graph)).GetDeg()
    max_node_out = graph.GetNI(snap.GetMxOutDegNId(graph)).GetDeg()
    components = snap.TCnComV()
    snap.GetWccs(graph, components)
    max_wcc = snap.GetMxWcc(graph)
    results["a. Nodes"] = n_nodes
    results["b. Edges"] = n_edges
    results["c. Self-edges"] = n_self_edges
    results["d. Directed edges"] = n_directed_edges
    results["e. Undirected edges"] = n_undirected_edges
    results["f. Reciprocated edges"] = n_reciprocated_edges
    results["g. 0 out-degree nodes"] = n_zero_out_nodes
    results["h. 0 in-degree nodes"] = n_zero_in_nodes
    results["i. Maximum node out-degree"] = max_node_out
    results["j. Maximum node in-degree"] = max_node_in
    results["k. Weakly connected components"] = components.Len()
    results["l. Nodes, edges of largest WCC"] = (max_wcc.GetNodes(),
                                                 max_wcc.GetEdges())
    print("##########")
    print("Quick overview of {} Network".format(name))
    print("##########")
    print("{} Nodes, {} Edges".format(n_nodes, n_edges))
    print("{} Self-edges ".format(n_self_edges))
    print("{} Directed edges, {} Undirected edges".format(
        n_directed_edges, n_undirected_edges))
    print("{} Reciprocated edges".format(n_reciprocated_edges))
    print("{} 0-out-degree nodes, {} 0-in-degree nodes".format(
        n_zero_out_nodes, n_zero_in_nodes))
    print("Maximum node in-degree: {}, maximum node out-degree: {}".format(
        max_node_in, max_node_out))
    print("###")
    print "{} Weakly connected components".format(components.Len())
    print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(),
                                                   max_wcc.GetEdges())

    prankH = snap.TIntFltH()
    snap.GetPageRank(graph, prankH)
    sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(graph, NIdHubH, NIdAuthH)
    sorted_NIdHubH = sorted(NIdHubH,
                            key=lambda key: NIdHubH[key],
                            reverse=True)
    sorted_NIdAuthH = sorted(NIdAuthH,
                             key=lambda key: NIdAuthH[key],
                             reverse=True)
    with open(dic_path, 'rb') as dic_id:
        mydict = pickle.load(dic_id)
        print("3 most central players by PageRank scores: {}, {}, {}".format(
            name_from_index(sorted_prankH, 0, mydict),
            name_from_index(sorted_prankH, 1, mydict),
            name_from_index(sorted_prankH, 2, mydict)))
        print("Top 3 hubs: {}, {}, {}".format(
            name_from_index(sorted_NIdHubH, 0, mydict),
            name_from_index(sorted_NIdHubH, 1, mydict),
            name_from_index(sorted_NIdHubH, 2, mydict)))
        print("Top 3 authorities: {}, {}, {}".format(
            name_from_index(sorted_NIdAuthH, 0, mydict),
            name_from_index(sorted_NIdAuthH, 1, mydict),
            name_from_index(sorted_NIdAuthH, 2, mydict)))
        results["m. Three top PageRank"] = (name_from_index(
            sorted_prankH, 0, mydict), name_from_index(
                sorted_prankH, 1,
                mydict), name_from_index(sorted_prankH, 2, mydict))
        results["n. Three top hubs"] = (name_from_index(
            sorted_NIdHubH, 0,
            mydict), name_from_index(sorted_NIdHubH, 1, mydict),
                                        name_from_index(
                                            sorted_NIdHubH, 2, mydict))
        results["o. Three top authorities"] = (name_from_index(
            sorted_NIdAuthH, 0,
            mydict), name_from_index(sorted_NIdAuthH, 1, mydict),
                                               name_from_index(
                                                   sorted_NIdAuthH, 2, mydict))
    return results
예제 #8
0
파일: hw00.py 프로젝트: w29593617/cs224w
        nodes_fewer_than_10_incoming_edges = nodes_fewer_than_10_incoming_edges + 1
    if node.GetOutDeg() > max_out_degree:
        max_out_degree = node.GetOutDeg()

print("The wiki-vote graph has " + str(nodes_zero_out_degree) +
      " nodes of zero out-degree.")
print("The wiki-vote graph has " + str(nodes_zero_in_degree) +
      " nodes of zero in-degree.")
print("The wiki-vote graph has " + str(nodes_more_than_10_outgoing_edges) +
      " nodes with more than 10 outgoing-edges.")
print("The wiki-vote graph has " + str(nodes_fewer_than_10_incoming_edges) +
      " nodes with fewer than 10 incoming-edges.")

# Section 2 #
print('*' * 10 + ' Section II ' + '*' * 10)
NId = snap.GetMxOutDegNId(wiki_g)
x = numpy.arange(min_out_degree, max_out_degree + 1, 1)
y = numpy.ones(max_out_degree)
for node in wiki_g.Nodes():
    if node.GetOutDeg() != 0:
        y[node.GetOutDeg() - 1] = y[node.GetOutDeg() - 1] + 1
x = numpy.log10(x)
y = numpy.log10(y)

# Assume that the least-linear-regression y=ax+b
a, b = numpy.polyfit(x, y, deg=1)
y_reg = a * x + b

plt.figure(figsize=(12.8, 7.2))
plt.title('Distribution of Out-Degree of Nodes In Wiki_Vote Network')
plt.xlabel(r'$\log{OutDegree}$')
def compute_graph_statistics(graph_path, overwrite, compute_betweenness=False):
    graph_abs_path = os.path.abspath(graph_path)
    graph_name = os.path.basename(graph_abs_path).replace(".graph", "")
    fin = snap.TFIn(graph_abs_path)
    graph = snap.TNEANet.Load(fin)

    # rebuild the id => pkg dictionary
    id_pkg_dict = {}
    for node in graph.Nodes():
        id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg")
    directory = os.path.dirname(os.path.abspath(graph_path))
    json_path = os.path.join(directory, graph_name + "_statistics.json")
    if os.path.isfile(json_path):
        with open(json_path, "r") as f:
            statistics = json.load(f, object_pairs_hook=OrderedDict)
    else:
        statistics = OrderedDict()

    # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory
    os.chdir(directory)

    # general statistics
    output = os.path.join(directory, graph_name + "_main_statistics.txt")
    if not os.path.isfile(output) or overwrite:
        print("{0} Computing general statistics".format(datetime.datetime.now()))
        snap.PrintInfo(graph, "Play Store Graph -- main statistics", output, False)

    # info about the nodes with the max in degree
    if "max_in_degree" not in statistics or overwrite:
        print("{0} Computing max indegree".format(datetime.datetime.now()))
        max_in_deg_id = snap.GetMxInDegNId(graph)
        iterator = graph.GetNI(max_in_deg_id)
        max_in_deg = iterator.GetInDeg()
        max_in_deg_pkg = graph.GetStrAttrDatN(max_in_deg_id, "pkg")
        statistics["max_in_degree"] = max_in_deg
        statistics["max_in_degree_id"] = max_in_deg_id
        statistics["max_in_degree_pkg"] = max_in_deg_pkg

    # info about the nodes with the max out degree
    if "max_out_degree" not in statistics or overwrite:
        print("{0} Computing max outdegree".format(datetime.datetime.now()))
        max_out_deg_id = snap.GetMxOutDegNId(graph)
        iterator = graph.GetNI(max_out_deg_id)
        max_out_deg = iterator.GetOutDeg()
        max_out_deg_pkg = graph.GetStrAttrDatN(max_out_deg_id, "pkg")
        statistics["max_out_degree"] = max_out_deg
        statistics["max_out_degree_id"] = max_out_deg_id
        statistics["max_out_degree_pkg"] = max_out_deg_pkg

    # pagerank statistics
    output = graph_name + "_topNpagerank.eps"
    if not os.path.isfile(output) or "top_n_pagerank" not in statistics or overwrite:
        print("{0} Computing top 20 nodes with highest pagerank".format(datetime.datetime.now()))
        data_file = graph_name + "_pageranks"
        prank_hashtable = snap.TIntFltH()
        if not os.path.isfile(data_file) or overwrite:
            # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100
            snap.GetPageRank(graph, prank_hashtable, 0.85)
            fout = snap.TFOut(data_file)
            prank_hashtable.Save(fout)
        else:
            fin = snap.TFIn(data_file)
            prank_hashtable.Load(fin)

        top_n = get_top_nodes_from_hashtable(prank_hashtable)
        top_n.sort(key=itemgetter(1))
        if "top_n_pagerank" not in statistics or overwrite:
            top_n_labeled = []
            for pair in top_n:
                top_n_labeled.append((id_pkg_dict[pair[0]], pair[1]))
            statistics["top_n_pagerank"] = list(reversed(top_n_labeled))

        if not os.path.isfile(output) or overwrite:
            # let's build a subgraph induced on the top 20 pagerank nodes
            subgraph = get_subgraph(graph, [x[0] for x in top_n])
            labels_dict = get_labels_subset(id_pkg_dict, subgraph)
            values = snap_hashtable_to_dict(prank_hashtable, [x[0] for x in top_n])
            plot_subgraph_colored(subgraph, labels_dict, values, "PageRank",
                                  "Play Store Graph - top 20 PageRank nodes", output, "autumn_r")

    # betweeness statistics
    output = graph_name + "_topNbetweenness.eps"
    if compute_betweenness and (not os.path.isfile(output) or "betweenness" not in statistics or overwrite):
        print("{0} Computing top 20 nodes with highest betweenness".format(datetime.datetime.now()))
        data_file1 = graph_name + "_node_betweenness"
        data_file2 = graph_name + "_edge_betweenness"
        node_betwenness_hashtable = snap.TIntFltH()
        edge_betwenness_hashtable = snap.TIntPrFltH()
        if not os.path.isfile(data_file1) or not os.path.isfile(data_file2) or overwrite:
            snap.GetBetweennessCentr(graph, node_betwenness_hashtable, edge_betwenness_hashtable, 0.85, True)
            fout = snap.TFOut(data_file1)
            node_betwenness_hashtable.Save(fout)
            fout = snap.TFOut(data_file2)
            edge_betwenness_hashtable.Save(fout)

        else:
            fin = snap.TFIn(data_file1)
            node_betwenness_hashtable.Load(fin)
            fin = snap.TFIn(data_file2)
            edge_betwenness_hashtable.Load(fin)  # unused, as now

        top_n = get_top_nodes_from_hashtable(node_betwenness_hashtable)
        top_n.sort(key=itemgetter(1))
        if "top_n_betweenness" not in statistics or overwrite:
            top_n_labeled = []
            for pair in top_n:
                top_n_labeled.append((id_pkg_dict[pair[0]], pair[1]))
            statistics["top_n_betweenness"] = list(reversed(top_n_labeled))

        if not os.path.isfile(output) or overwrite:
            # let's build a subgraph induced on the top 20 betweenness nodes
            subgraph = get_subgraph(graph, [x[0] for x in top_n])
            labels_dict = get_labels_subset(id_pkg_dict, subgraph)
            values = snap_hashtable_to_dict(node_betwenness_hashtable, [x[0] for x in top_n])
            plot_subgraph_colored(subgraph, labels_dict, values, "Betweenness",
                                  "Play Store Graph - top 20 Betweenness nodes", output)

    # HITS statistics
    output_hub = graph_name + "_topNhitshubs.eps"
    output_auth = graph_name + "_topNhitsauth.eps"
    if not os.path.isfile(output_hub) or not os.path.isfile(output_auth) or "top_n_hits_hubs" not in statistics \
            or "top_n_hits_authorities" not in statistics or overwrite:
        print("{0} Computing top 20 HITS hubs and auths".format(datetime.datetime.now()))
        data_file1 = graph_name + "_hits_hubs"
        data_file2 = graph_name + "_hits_auth"
        hubs_hashtable = snap.TIntFltH()
        auth_hashtable = snap.TIntFltH()
        if not os.path.isfile(data_file1) or not os.path.isfile(data_file2) or overwrite:
            # MaxIter = 20
            snap.GetHits(graph, hubs_hashtable, auth_hashtable, 20)
            fout = snap.TFOut(data_file1)
            hubs_hashtable.Save(fout)
            fout = snap.TFOut(data_file2)
            auth_hashtable.Save(fout)

        else:
            fin = snap.TFIn(data_file1)
            hubs_hashtable.Load(fin)
            fin = snap.TFIn(data_file2)
            auth_hashtable.Load(fin)

        top_n_hubs = get_top_nodes_from_hashtable(hubs_hashtable)
        top_n_hubs.sort(key=itemgetter(1))
        if "top_n_hits_hubs" not in statistics or overwrite:
            top_n_labeled = []
            for pair in top_n_hubs:
                top_n_labeled.append((id_pkg_dict[pair[0]], pair[1]))
            statistics["top_n_hits_hubs"] = list(reversed(top_n_labeled))

        top_n_auth = get_top_nodes_from_hashtable(auth_hashtable)
        top_n_auth.sort(key=itemgetter(1))
        if "top_n_hits_authorities" not in statistics or overwrite:
            top_n_labeled = []
            for pair in top_n_auth:
                top_n_labeled.append((id_pkg_dict[pair[0]], pair[1]))
            statistics["top_n_hits_authorities"] = list(reversed(top_n_labeled))

        if not os.path.isfile(output_hub) or not os.path.isfile(output_auth) or overwrite:
            nodes_subset = set()
            for pair in top_n_hubs:
                nodes_subset.add(pair[0])
            for pair in top_n_auth:
                nodes_subset.add(pair[0])

            # let's build a subgraph induced on the top N HITS auths and hubs nodes
            subgraph = get_subgraph(graph, nodes_subset)
            labels_dict = get_labels_subset(id_pkg_dict, subgraph)
            values = snap_hashtable_to_dict(hubs_hashtable, nodes_subset)
            values2 = snap_hashtable_to_dict(auth_hashtable, nodes_subset)
            plot_subgraph_colored(subgraph, labels_dict, values, "HITS - Hub Index",
                                  "Play Store Graph - top 20 HITS hubs + top 20 HITS authorities", output_hub, "bwr")
            plot_subgraph_colored(subgraph, labels_dict, values2, "HITS - Authority Index",
                                  "Play Store Graph - top 20 HITS hubs + top 20 HITS authorities", output_auth,
                                  "bwr_r")

    # indegree histogram
    output = graph_name + "_indegree"
    if not os.path.isfile("inDeg." + output + ".plt") or not os.path.isfile(
                            "inDeg." + output + ".tab") or not os.path.isfile("inDeg." + output + ".png") or overwrite:
        print("{0} Computing indegree distribution".format(datetime.datetime.now()))
        snap.PlotInDegDistr(graph, output, "Play Store Graph - in-degree Distribution")

    # outdegree histogram
    output = graph_name + "_outdegree"
    if not os.path.isfile("outDeg." + output + ".plt") or not os.path.isfile(
                            "outDeg." + output + ".tab") or not os.path.isfile(
                        "outDeg." + output + ".png") or overwrite:
        print("{0} Computing outdegree distribution".format(datetime.datetime.now()))
        snap.PlotOutDegDistr(graph, output, "Play Store Graph - out-degree Distribution")

    # strongly connected components print
    output = graph_name + "_scc"
    if not os.path.isfile("scc." + output + ".plt") or not os.path.isfile(
                            "scc." + output + ".tab") or not os.path.isfile("scc." + output + ".png") or overwrite:
        print("{0} Computing scc distribution".format(datetime.datetime.now()))
        snap.PlotSccDistr(graph, output, "Play Store Graph - strongly connected components distribution")

    # weakly connected components print
    output = graph_name + "_wcc"
    if not os.path.isfile("wcc." + output + ".plt") or not os.path.isfile(
                            "wcc." + output + ".tab") or not os.path.isfile("wcc." + output + ".png") or overwrite:
        print("{0} Computing wcc distribution".format(datetime.datetime.now()))
        snap.PlotWccDistr(graph, output, "Play Store Graph - weakly connected components distribution")

    # clustering coefficient distribution
    output = graph_name + "_cf"
    if not os.path.isfile("ccf." + output + ".plt") or not os.path.isfile(
                            "ccf." + output + ".tab") or not os.path.isfile("ccf." + output + ".png") or overwrite:
        print("{0} Computing cf distribution".format(datetime.datetime.now()))
        snap.PlotClustCf(graph, output, "Play Store Graph - clustering coefficient distribution")

    # shortest path distribution
    output = graph_name + "_hops"
    if not os.path.isfile("hop." + output + ".plt") or not os.path.isfile(
                            "hop." + output + ".tab") or not os.path.isfile("hop." + output + ".png") or overwrite:
        print("{0} Computing shortest path distribution".format(datetime.datetime.now()))
        snap.PlotHops(graph, output, "Play Store Graph - Cumulative Shortest Paths (hops) distribution", True)

    # k-core edges distribution
    output = graph_name + "_kcore_edges"
    if not os.path.isfile("coreEdges." + output + ".plt") or not os.path.isfile(
                            "coreEdges." + output + ".tab") or not os.path.isfile(
                        "coreEdges." + output + ".png") or overwrite:
        print("{0} Computing k-core edges distribution".format(datetime.datetime.now()))
        snap.PlotKCoreEdges(graph, output, "Play Store Graph - K-Core edges distribution")

    # k-core nodes distribution
    output = graph_name + "_kcore_nodes"
    if not os.path.isfile("coreNodes." + output + ".plt") or not os.path.isfile(
                            "coreNodes." + output + ".tab") or not os.path.isfile(
                        "coreNodes." + output + ".png") or overwrite:
        print("{0} Computing k-core nodes distribution".format(datetime.datetime.now()))
        snap.PlotKCoreNodes(graph, output, "Play Store Graph - K-Core nodes distribution")

    with open(json_path, 'w') as outfile:
        json.dump(statistics, outfile, indent=2)
예제 #10
0
def maxOutdegree(G):
    node_id = snap.GetMxOutDegNId(G)
    return G.GetNI(node_id).GetOutDeg()