Beispiel #1
0
def get_thread_text(comments):
    "Groups comments into threads, then concatenates the text of each thread."
    comments.object_id = comments.object_id.astype(int)
    comments.parent_id = comments.parent_id.astype(int)
    comments.points = comments.points.astype(float).astype(int)
    nodes = set(comments.object_id).union(set(comments.parent_id))
    commentsGraph = snap.TUNGraph.New()
    for node in nodes:
        commentsGraph.AddNode(node)
    for edge in comments[['object_id', 'parent_id']].values.tolist():
        commentsGraph.AddEdge(*edge)
    commentThreads = snap.TCnComV()
    snap.GetSccs(commentsGraph, commentThreads)
    threadText = []
    for commentThread in commentThreads:
        commentsInThread = comments[comments['object_id'].isin(commentThread)]
        commentsInThread = commentsInThread.comment_text.astype(
            str)  # No more floats in here...
        #commentsInThread = [c.encode('ascii', 'ignore') for c in commentsInThread]
        commentsInThread = [
            c.decode('ascii', errors='replace').encode('ascii', 'ignore')
            for c in commentsInThread
        ]
        commentsInThread = [htmlParser.unescape(c) for c in commentsInThread]
        threadText.append(" ".join(commentsInThread))
    return " ".join(threadText)
Beispiel #2
0
def getStronglyConnectedComponents(Graph, node_to_g):
    prot_to_SCcomponent = {}
    Components = snap.TCnComV()
    snap.GetSccs(Graph, Components)
    for i, CnCom in enumerate(Components):
        for node in CnCom:
            my_prot = node_to_g[node]
            prot_to_SCcomponent[
                my_prot] = i + 1  ##1-index component membership.
    return prot_to_SCcomponent
Beispiel #3
0
def get_component_distribution(ei_graph):
    """Returns the sizes of strongly connected components.

    returns: dict of (size of component -> num of such components)

    https://snap.stanford.edu/snappy/doc/reference/GetSccs.html
    """
    components = snap.TCnComV()
    snap.GetSccs(ei_graph.base(), components)
    return Counter(c.Len() for c in components)
Beispiel #4
0
def computeStronglyConnectedComponents(graph, outFile):
    logger.info("Computing Strongly Connected Components")
    fw_cc = open(outFile, 'w')
    Components = snap.TCnComV()
    snap.GetSccs(graph, Components)
    for CnCom in Components:
        for item in CnCom:
            fw_cc.write(str(item) + "\n")
        fw_cc.write("\n")
    logger.info("Strongly Connected Components Computed!")
    logger.info("Strongly Connected Components Exported to " + outFile)
def processNetwork(Graph, id_to_groups):
    with open("../../data/fastinf_graph_noweights_features.txt", "w+") as f:
        f.write("RELATED GROUPS GRAPH:\n")
        f.write('Edges: %d\n' % Graph.GetEdges())
        f.write('Nodes: %d\n\n' % Graph.GetNodes())

        MxWcc = snap.GetMxWcc(Graph)
        f.write("MAX WCC:\n")
        f.write('Edges: %f ' % MxWcc.GetEdges())
        f.write('Nodes: %f \n' % MxWcc.GetNodes())
        f.write('Node List: ')
        for node in MxWcc.Nodes():
            f.write('%d, ' % node.GetId())
        f.write('\n')
        for node in MxWcc.Nodes():
            f.write('%s, ' % id_to_groups[node.GetId()])

        f.write("\n\nALL WCCs:")
        Components = snap.TCnComV()
        snap.GetWccs(Graph, Components)
        for i, CnCom in enumerate(Components):
            if CnCom.Len() < 10: continue
            f.write('\nWcc%d: ' % i)
            for nodeid in CnCom:
                f.write('%d, ' % nodeid)

        MxScc = snap.GetMxScc(Graph)
        f.write("\n\nMAX SCC:\n")
        f.write('Edges: %f ' % MxScc.GetEdges())
        f.write('Nodes: %f \n' % MxScc.GetNodes())
        f.write('Node List: ')
        for node in MxScc.Nodes():
            f.write('%d, ' % node.GetId())
        f.write('\n')
        for node in MxScc.Nodes():
            f.write('%s, ' % id_to_groups[node.GetId()])

        f.write("\n\nALL SCCs:")
        Components = snap.TCnComV()
        snap.GetSccs(Graph, Components)
        for i, CnCom in enumerate(Components):
            if CnCom.Len() < 10: continue
            f.write('\nScc%d: ' % i)
            for nodeid in CnCom:
                f.write('%d, ' % nodeid)

        f.write('\n\nCLUSTERING AND COMMUNITIES:\n')
        f.write('Clustering coefficient: %f\n' % snap.GetClustCf(Graph, -1))
        f.write('Num Triads: %d\n' % snap.GetTriads(Graph, -1))
        Nodes = snap.TIntV()
        for node in Graph.Nodes():
            Nodes.Add(node.GetId())
        f.write('Modularity: %f' % snap.GetModularity(Graph, Nodes))
Beispiel #6
0
    def sccs(self, returnNodes=True):
        """
        Returns a list of sets of nodes, or just the IDs if returnNodes is false (note that getting the nodes
        themselves adds overhead)
        """
        sccs = snap.TCnComV()
        sccList = []

        snap.GetSccs(self.rawGraph, sccs)

        for scc in sccs:
            sccList.append(SnapUtil.rawComponentToNodeSet(scc, self, returnNodes))
        sccList.sort(key=lambda x: len(x),reverse=True)
        return sccList
Beispiel #7
0
def label_nodes_SCCs(G):
    nodes_sccs = {}  # {node: scc_id}
    snappy_directed = networkx_to_snappy(G, True)
    components = snap.TCnComV()
    sccs = snap.GetSccs(snappy_directed, components)

    for i, CnCom in enumerate(components):
        for n in CnCom:
            nodes_sccs[n] = i

    for node in G.nodes():
        m = str(nodes_sccs[node])
        G.nodes[node]["SCC"] = m

    return G
 def build_chunk(self):
     comments = pd.concat([self.oldchunk, self.newchunk])
     self.register_users(comments.author.unique())
     commentsGraph = self.build_comment_graph(comments)
     commentThreads = snap.TCnComV()
     snap.GetSccs(commentsGraph, commentThreads)
     for commentThread in commentThreads:
         commentsInThread = comments[comments['object_id'].isin(
             commentThread)]
         userIdsInThread = [
             self.user_ids[un] for un in commentsInThread.author.values
         ]
         for u1, u2 in combinations(set(usersIdsInThread), 2):
             if not self.usersGraph.IsEdge(u1, u2):
                 self.usersGraph.AddEdge(u1, u2)
Beispiel #9
0
def is_uniquely_connected(graph):
    def is_unique(components):
        return len(list(filter(lambda comp: comp.Len() > 1, components))) == 1

    # First identify if there are strongly connected components in the graph
    s_components = snap.TCnComV()
    snap.GetSccs(graph, s_components)
    unique = is_unique(s_components)

    # if there is unique strongly connected component then we don't need to search
    # for the weakly because the graph is connected, otherwise implement the same search
    # on the weakly components.
    if not is_unique:
        w_components = snap.TCnComV()
        snap.GetWccs(graph, w_components)
        unique = is_unique(w_components)

    return unique
Beispiel #10
0
def main():
    citation = False

    if citation:
        folder = '../data/citation_networks/'
    else:
        folder = '../data/networks/'
    AssigneeGraphs = load_networks(folder)
    print "Generating features..."
    for AGraph in tqdm(AssigneeGraphs):
        # Calculate network features
        Graph = AGraph.Graph
        node_count = Graph.GetNodes()
        if node_count <= 0:
            print "0 nodes", AGraph.company_name
            continue
        edge_count = Graph.GetEdges()
        cc = snap.GetClustCf(Graph)
        Components = snap.TCnComV()
        snap.GetSccs(Graph, Components)
        num_sccs = len(Components)
        MxScc = snap.GetMxScc(Graph)
        max_scc_proportion = float(MxScc.GetNodes()) / node_count
        avg_patents_per_inventor =float(AGraph.metadata['number_of_patents']) / node_count
        modularity = get_modularity(Graph)
        net_stats = NetworkStats(node_count=node_count, edge_count=edge_count, clustering_cf=cc,
            num_sccs=num_sccs, max_scc_proportion=max_scc_proportion,
            avg_patents_per_inventor=avg_patents_per_inventor, modularity=modularity)
        AGraph.metadata['node_count'] = node_count
        AGraph.metadata['edge_count'] = edge_count
        AGraph.metadata['clustering_cf'] = cc
        AGraph.metadata['num_sccs'] = num_sccs
        AGraph.metadata['max_scc_proportion'] = max_scc_proportion
        AGraph.metadata['avg_patents_per_inventor'] = avg_patents_per_inventor
        AGraph.metadata['modularity'] = modularity
        with open(folder + AGraph.company_name + '.json', 'w') as fp:
            json.dump(AGraph.metadata, fp, sort_keys=True, indent=4)
    print len(AssigneeGraphs)
Beispiel #11
0
def calc_net_stats(folder):
		stats = []
		print "Loading features..."
		for AGraph in tqdm(AssigneeGraphs):
				# Calculate network features
				Graph = AGraph.Graph
				node_count = Graph.GetNodes()
				if node_count <= 0:
					# print "0 nodes", AGraph.company_name
					continue
				edge_count = Graph.GetEdges()
				cc = snap.GetClustCf(Graph)
				Components = snap.TCnComV()
				snap.GetSccs(Graph, Components)
				num_sccs = len(Components)
				MxScc = snap.GetMxScc(Graph)
				max_scc_proportion = float(MxScc.GetNodes()) / node_count
				avg_patents_per_inventor =float(AGraph.metadata['number_of_patents']) / node_count
				modularity = get_modularity(Graph)
				net_stats = NetworkStats(node_count=node_count, edge_count=edge_count, clustering_cf=cc,
					num_sccs=num_sccs, max_scc_proportion=max_scc_proportion,
					avg_patents_per_inventor=avg_patents_per_inventor, modularity=modularity)
				stats.append(net_stats)
		return stats
Beispiel #12
0
numVertices = 0
textFile = open('vertices_' + str(sys.argv[1]) + '.txt')
lines = textFile.readlines()
for line in lines:
    stripped_line = line.rstrip('\n')
    G1.AddNode(int(stripped_line))
    numVertices = numVertices + 1

# read in edges
with open('edges_' + str(sys.argv[1]) + '.txt') as f:
    for line in f:
        int_list = [int(i) for i in line.split()]
        G1.AddEdge(int_list[0], int_list[1])

Components = snap.TCnComV()
snap.GetSccs(G1, Components)
#for CnCom in Components:
#    print "Size of component: %d" % CnCom.Len()

total = 0
ComponentDist = snap.TIntPrV()
snap.GetSccSzCnt(G1, ComponentDist)
for comp in ComponentDist:
    #print "Size: %d - Number of Components: %d" % (comp.GetVal1(), comp.GetVal2())
    total = total + comp.GetVal2()

connectedness = total / (numVertices * numVertices * 1.0)

strValue = str.format("{0:.10f}", connectedness)

f = open('connectedness_' + str(sys.argv[1]) + '.txt', 'w')
import snap
# import os

# Graph = snap.GenRndGnm(snap.PNGraph, 100, 1000)
# print os.system("pwd")
Graph = snap.LoadEdgeList(snap.PNGraph, "../bitcoin_computed/txedgeunique.txt",
                          0, 1)
G_Nodes = Graph.GetNodes()
G_Edges = Graph.GetEdges()
print "Graph: Nodes %d, Edges %d" % (G_Nodes, G_Edges)

SCComponents = snap.TCnComV()
WCComponents = snap.TCnComV()

snap.GetSccs(Graph, SCComponents)
snap.GetWccs(Graph, WCComponents)

MaxWCCNodes = WCComponents[0]
MaxSCCNodes = SCComponents[0]
# print type(MaxSccNodes)
print MaxSCCNodes.Len()
print MaxWCCNodes.Len()

# Iterate over each edge and check for In, Out

SCCHashmap = snap.TIntH()
for node in MaxSCCNodes:
    SCCHashmap.AddKey(node)

InOutHashmap = snap.TIntH()
for node in MaxWCCNodes:
Beispiel #14
0
def get_strongly_connected_components_number(graph: snap.PNGraph):
    components = snap.TCnComV()
    snap.GetSccs(graph, components)
Beispiel #15
0
    print "Approx. effective diameter in " + input_file + " with sampling ", i, " nodes: ", round(
        diameter[index], 3)
    index = index + 1

mean = float(sum(diameter) / 3.0)
variance = float((pow((diameter[0] - mean), 2) + pow(
    (diameter[1] - mean), 2) + pow((diameter[2] - mean), 2)) / 2.0)

print "Approx. effective diameter in " + input_file + " (mean and variance): ", round(
    mean, 3), ", ", round(variance, 3)

snap.PlotShortPathDistr(Graph1, "shortest_path_plot_" + input_file,
                        "Undirected graph - shortest path", 1000)
print "Shortest path distribution of " + input_file + " is in: diam.shortest_path_plot_" + input_file + ".png"

largest_component = snap.TCnComV()
snap.GetSccs(Graph1, largest_component)
largest = 0.0

for item in largest_component:
    if largest < item.Len():
        largest = item.Len()

print ""

print "Fraction of nodes in largest connected component in " + input_file + ": ", float(
    largest) / float(final_nodes)

snap.PlotSccDistr(Graph1, "conn_components_plot_" + input_file,
                  "Undirected graph - Connected components distribution")
print "Component size distribution of " + input_file + " is in: scc.conn_components_plot_" + input_file + ".png"
Beispiel #16
0
"""

snap.DelDegKNodes(G0,1,0)
snap.DelDegKNodes(G0,1,0)
snap.DelDegKNodes(G0,1,0)
snap.DelDegKNodes(G0,1,0)

snap.PrintInfo(G0)


DegToCntV = snap.TIntPrV()
snap.GetOutDegCnt(G0, DegToCntV)
for item in DegToCntV:
    print "%d nodes with out-degree %d" % (item.GetVal2(), item.GetVal1())
"""
"""
Components = snap.TCnComV()
snap.GetSccs(G0, Components)
for CnCom in Components:
    print "Size of component: %d" % CnCom.Len()
"""
"""
DegToCntV = snap.TIntPrV()
snap.GetInDegCnt(G0, DegToCntV)
for item in DegToCntV:
    print "%d nodes with in-degree %d" % (item.GetVal2(), item.GetVal1())
"""
"""
for outDeg in range(25,3200):
	for inDeg in range (15,20):
		snap.DelDegKNodes(G0,outDeg,inDeg)
# Compute degree distribution and save it to an external textfile
degree_vertex_count = snap.TIntPrV()
s.GetOutDegCnt(u_rndm_graph, degree_vertex_count)
file = open("graph_rdm_undirected_degree_distrib.txt", "w")
file.write("#----------------------------------\n")
file.write("#       Degree Distribution        \n")
file.write("#----------------------------------\n")
file.write("\n")
for pairs in degree_vertex_count:
     file.write("vertex degree %d: nmbr vertices with such degree %d \n" % (pairs.GetVal1(), pairs.GetVal2()))
file.close()


# Compute the sizes of the connected component and save it to an external file
Components = snap.TCnComV()
snap.GetSccs(u_rndm_graph, Components)
file_2 = open("graph_rdm_undirected_connected_compo_sizes.txt", "w")
file_2.write("#----------------------------------\n")
file_2.write("#   Size of Connected Components   \n")
file_2.write("#----------------------------------\n")
file_2.write("\n")
file_2.write("Total number of different components = %d\n" % len(Components))
file_2.write("\n")
i = 1
for idx, component in enumerate(Components):
        file_2.write("Size of component #%d : %d\n" % (idx, len(component)))
file_2.close()


# Output the average of the shortest paths, adding more edges to the graph if it's not connected
average_shortest_paths = []
Beispiel #18
0
def strongConnectedComponent(clusterCommands, Graph, conn, cur):
    Components = snap.TCnComV()
    snap.GetSccs(Graph, Components)
    createTable(clusterCommands, Components, conn, cur)
Beispiel #19
0
takeup_bounds = np.zeros((B,2))

U_exo = gen_exo(data, theta)

for b in range(B):
    print(b)
    sys.stdout.flush()

    eps = np.random.logistic(size=U_exo.shape[0])
    U_exo_eps = U_exo + eps
    A = snap.GenConfModel(DegSeqV)

    start_time = time.time()
    D = gen_D(U_exo_eps, A, theta[1])
    components = snap.TCnComV()
    snap.GetSccs(D, components)
    component_lens = [C.Len() for C in components]
    print('Delta = {}.'.format(max(component_lens)))
    NE_sets = compute_NE(D, components, A, U_exo_eps, theta[1]) if not D_only else []
    end_time = time.time()
    timing[b] = end_time - start_time
    
    num_equil = 1
    total_takeup = []
    for i,C in enumerate(components):
        num = len(NE_sets[i]) if not D_only else 0
        if num == 0:
            num_equil = 0
            break
        else:
            num_equil *= num
Beispiel #20
0
def numSccs(G):
    sccs = snap.TCnComV()
    snap.GetSccs(G, sccs)
    return len(sccs)
Beispiel #21
0
node_map_SCC = {
}  # Dictionary mapping each node to the super node that represents in the SCC graph
node_map_cmprss = {
}  # Dictionary mapping each node to the super node that represents it in the compressed graph
sets_of_same_descendants = [
]  # List containing sets of nodes, where every node in that set has the same descendants
all_descendants = collections.OrderedDict(
)  # Dictionary that maps every node to a set of all its descendants
ancestors = collections.OrderedDict(
)  # Dictionary that maps every node to a set of all its ancestors
to_combine = [
]  # List containing sets of nodes, where every node in that set has the same ancestors and descendants
all_nodes = []  # List containing all nodes as Node data type rather than ints

Components = snap.TCnComV()
snap.GetSccs(graph, Components)

for CnCom in Components:

    # If the size of the connected component is greater than 1, create an empty set.
    # Add all nodes from CnCom into the set, and delete all edges between the nodes in CnCom.
    if CnCom.Len() > 1:
        nodes = set()
        MxScc = snap.GetMxScc(graph)
        for EI in MxScc.Edges():
            nodes.add(EI.GetSrcNId())
            nodes.add(EI.GetDstNId())
            graph.DelEdge(EI.GetSrcNId(), EI.GetDstNId())

        # Create a new node that will represent all nodes from CnCom and
        # map the new node to all nodes in CnCom
GW = snap.GetMxScc(G2)
print(GW.GetNodes())
print(GW.GetNodes(), "lolmero")

G4, id2, synset2, _, _, _ = generate_meaning_graph(True, False, True)
print(G4.GetNodes())
print(G4.GetEdges())
GW = snap.GetMxScc(G4)
print(GW.GetNodes())
print(GW.GetNodes(), "lolmerohyp")

G, id, synset, _, _, _ = generate_meaning_graph(False, True, False)
print(G.GetNodes())
print(G.GetEdges())
Gs = snap.TCnComV()
snap.GetSccs(G, Gs)
count = 0
for G3 in Gs:
    print(G3.Len())
    count += 1
    if count > 10:
        break
print("lolpoly")

paths = [0] * 50
count = 0
for edge in G.Edges():
    path = snap.GetShortPath(G2, synset2[id[edge.GetSrcNId()]],
                             synset2[id[edge.GetDstNId()]])
    paths[path] += 1
    if path == 2:
Beispiel #23
0
snap.GetNodeWcc(G, 1, CnCom)
print("CnCom.Len() = %d" % (CnCom.Len()))

# get nodes in weakly connected components
WCnComV = snap.TCnComV()
snap.GetWccs(G, WCnComV)
for i in range(0, WCnComV.Len()):
    print("WCnComV[%d].Len() = %d" % (i, WCnComV[i].Len()))
    for j in range(0, WCnComV[i].Len()):
        print("WCnComV[%d][%d] = %d" % (i, j, WCnComV[i][j]))

# get the size of the maximum weakly connected component
MxWccSz = snap.GetMxWccSz(G)
print("MxWccSz = %.5f" % (MxWccSz))

# get the graph with the largest weakly connected component
GMx = snap.GetMxWcc(G)
print("GMx: GetNodes() = %d, GetEdges() = %d" %
      (GMx.GetNodes(), GMx.GetEdges()))

# get strongly connected components
SCnComV = snap.TCnComV()
snap.GetSccs(G, SCnComV)
for i in range(0, SCnComV.Len()):
    print("SCnComV[%d].Len() = %d" % (i, SCnComV[i].Len()))

# get the graph representing the largest bi-connected component
GMxBi = snap.GetMxBiCon(G)
print("GMxBi: GetNodes() = %d, GetEdges() = %d" %
      (GMxBi.GetNodes(), GMxBi.GetEdges()))
Beispiel #24
0
                             Edges, 1.0)
    # Prepare BetweenessList Of List
    for edge in Edges:
        betweennessSubList = [edge.GetVal1(), edge.GetVal2(), Edges[edge]]
        betweennessList.append(betweennessSubList)

    # Descending  Order Sort Betweenness and take  highest betweenness
    betweennessList.sort(key=lambda x: x[-1], reverse=True)
    # Remove the edge with highest betweenness
    ''' NOTE: ONLY FIRST ROW USE FOR DELETE EDGES( HIGHEST BETWEENESS )'''
    GraphkarateclubMaintainForDeleteEdges.DelEdge(betweennessList[0][0],
                                                  betweennessList[0][1])
    '''Compute the modularity of the resultant graph'''
    Components = snap.TCnComV()
    # GetSccs For Components
    snap.GetSccs(GraphkarateclubMaintainForDeleteEdges, Components)
    Modularity = snap.GetModularity(Graphkarateclub, Components)

    # Add Modularity Value Append in Global List ModularityList
    ModularityList.append(Modularity)
    ''' community structure for which the graph has highest modularity'''
    if Modularity > CheckModularity:
        CheckModularity = Modularity
        CommunityList = Components

print "The modularity of the network is %f" % max(ModularityList)
'''Output the community structure for which the graph has highest modularity'''
for Cmty in CommunityList:
    print "Community: "
    for NI in Cmty:
        print NI