Example #1
0
def getDegCentr(graph):
	nid = snap.GetMxDegNId(graph)
	CDn = snap.GetDegreeCentr(graph, nid)
	n = graph.GetNodes()

	freeman_nom = 0.

	for NI in graph.Nodes():
		CDi = snap.GetDegreeCentr(graph, NI.GetId())
		freeman_nom += CDn - CDi

	return freeman_nom / (n - 2)
def Degree(d, e):
    f = open(d)
    s = f.read()
    s1 = re.split('\n', s)
    G1 = snap.PUNGraph.New()

    a = re.split(' ', s1[0])

    for i in range(0, int(a[0])):
        G1.AddNode(i)

    for i in range(1, int(a[1]) + 1):
        b = re.split(' ', s1[i])
        G1.AddEdge(int(b[0]), int(b[1]))

    DegCentr = dict()

    for NI in G1.Nodes():
        DegCentr[NI.GetId()] = snap.GetDegreeCentr(G1, NI.GetId())

# print "node: %d centrality: %f" % (NI.GetId(), DegCentr)

# print DegCentr
    EdgePara = dict()

    for i in range(1, int(a[1]) + 1):
        c = re.split(' ', s1[i])
        EdgePara[(int(c[0]), int(c[1]))] = e * DegCentr[int(
            c[0])] / (DegCentr[int(c[0])] + DegCentr[int(c[1])])
        EdgePara[(int(c[1]), int(c[0]))] = e * DegCentr[int(
            c[1])] / (DegCentr[int(c[0])] + DegCentr[int(c[1])])

    return EdgePara
Example #3
0
def degree(rankCommands, Graph, conn, cur):
    DegreeH = snap.TIntFltH()
    before_time = time.time()
    for NI in Graph.Nodes():
        DegreeH[NI.GetId()] = snap.GetDegreeCentr(Graph, NI.GetId())
    print "Total handling time is: ", (time.time() - before_time)
    slist = sorted(DegreeH, key=lambda key: DegreeH[key], reverse=True)
    createTable(rankCommands, slist, DegreeH, conn, cur)
Example #4
0
def calc_DegreeCentrality(Graph, node_to_g):
    prot_to_degree_centrality = {}
    for NI in Graph.Nodes():
        my_prot = node_to_g[NI.GetId()]
        ## degree centrality of the node
        DegCentr = snap.GetDegreeCentr(Graph, NI.GetId())
        prot_to_degree_centrality[my_prot] = DegCentr
    return prot_to_degree_centrality
Example #5
0
 def rank_degree(self):
     """ Return dictionary of node ID and its degree
     centrality score, in score order """
     DegreeCentr = {}
     for NI in self._graph.Nodes():
         deg = snap.GetDegreeCentr(self._graph, NI.GetId())
         DegreeCentr[NI.GetId()] = deg
     assert len(DegreeCentr) == self._num_nodes, 'Number of nodes must match'
     return snap_hash_to_dict(DegreeCentr)
def get_node_centrality(snap_graph):
    nids, deg_centr = [], []
    for NI in snap_graph.Nodes():
        centr = snap.GetDegreeCentr(snap_graph, NI.GetId())
        nids.append(NI.GetId())
        deg_centr.append(centr)

    return np.asarray(nids, dtype='uint32'), np.asarray(deg_centr,
                                                        dtype='float32')
Example #7
0
def degreeCentrality(graph, x):
    degCent = []
    for NI in graph.Nodes():
        DegCentr = snap.GetDegreeCentr(graph, NI.GetId())
        # print "node: %d centrality: %f" % (NI.GetId(), DegCentr)
        degCent.append([NI.GetId(), DegCentr])
    degCent = sorted(degCent, key=lambda x: x[1], reverse=True)
    degCent = degCent[:int(x)]
    return degCent
Example #8
0
 def GetMaxKDegreeCentrality(self, k):
     lstDeg = []
     nodesId = []
     for NI in self.graph.Nodes():
         DegCentr = snap.GetDegreeCentr(self.graph, NI.GetId())
         nodesId.append(NI.GetId())
         lstDeg.append(DegCentr)
     print lstDeg, nodesId
     return self.GetMaxK(lstDeg, nodesId, k)
Example #9
0
def model_degree(G):
    x = []
    y = []
    title_name = 'degree centrality'

    for NI in G.Nodes():
        DegCentr = snap.GetDegreeCentr(G, NI.GetId())
        x.append(NI.GetId())
        y.append(DegCentr)
    picture(x, y, title_name, 'node', 'centrality')
 def sample_degree_centrality(self, n_node=100):
     '''
      Degree centrality of a node is defined as its degree/(N-1), where N is the number of nodes in the network.
     
     :param n_node: number of nodes to sample
     
     '''
     snap = self.snap
     n_node = min(self.num_nodes, n_node)
     nodes = self.nodes
     src = np.random.choice(nodes, n_node, replace=False)
     ret = []
     for i in range(n_node):
         DegCentr = snap.GetDegreeCentr(self.graph, int(src[i]))
         ret.append(DegCentr)
     return ret
Example #11
0
def get_node_centrality(graph, gtype='snap'):
    nids, deg_centr = [], []
    if gtype == 'snap':
        for NI in graph.Nodes():
            centr = snap.GetDegreeCentr(graph, NI.GetId())
            nids.append(NI.GetId())
            deg_centr.append(centr)
    elif gtype == 'nx':
        nnodes = graph.number_of_nodes()
        output = graph.degree(range(nnodes), weight='weight')
        for (nid, con) in output:
            nids.append(nid)
            deg_centr.append(con)


#         deg_dict = nx.degree_centrality(graph)
#         for k in np.sort(list(deg_dict.keys())):
#             nids.append(k)
#             deg_centr.append(deg_dict[k])

    return np.asarray(nids, dtype='uint32'), np.asarray(deg_centr,
                                                        dtype='float32')
Example #12
0
    def GetOpinionLeaders(self, method, proportion, communities):
        k = int(self.graph.GetNodes() * proportion)
        if method == "W": # whole network
            self.opinionLeaders = self.GetMaxKDegree(k)
        else: # method = "C": each community
            if not communities:
                return set()
            dictIMN = {}
            dictNodeCommunity = {}
            for i in range(len(communities)):
                dictIMN[i] = [int(len(communities[i]) * proportion), 0]
                for node in communities[i]:
                    dictNodeCommunity[node] = i

            # get whole sorted list
            lstDeg = []
            nodesId = []
            for NI in self.graph.Nodes():
                DegCentr = snap.GetDegreeCentr(self.graph, NI.GetId())
                nodesId.append(NI.GetId())
                lstDeg.append(DegCentr)

            count = len(lstDeg)
            # nodes = range(0, count)
            for i in range(0, count):
                for j in range(i + 1, count):
                    if lstDeg[i] > lstDeg[j]:
                        lstDeg[i], lstDeg[j] = lstDeg[j], lstDeg[i]
                        nodesId[i], nodesId[j] = nodesId[j], nodesId[i]
            print nodesId
            for i in range(count-1,0,-1):
                node = nodesId[i]
                communityindex = dictNodeCommunity[node]
                if dictIMN[communityindex][1] < dictIMN[communityindex][0]:
                    self.opinionLeaders.add(node)
                    dictIMN[communityindex][1] += 1
                if len(self.opinionLeaders) == k:
                    break
        return self.opinionLeaders
def get_degree_centrality(G, n):
    return snap.GetDegreeCentr(G, n)
Example #14
0
def getAttribute(filename):
    UGraph = snap.LoadEdgeList(snap.PUNGraph, filename, 0, 1)
    UGraph.Dump()

    attributes = pd.DataFrame(np.zeros(shape=(UGraph.GetNodes(), 12)), 
                              columns=['Graph', 'Id', 'Degree', 'DegreeCentrality', 'NodeBetweennessCentrality', 
                                       'ClosenessCentrality', 'FarnessCentrality', 'PageRank', 'HubsScore', 
                                       'AuthoritiesScore', 'NodeEccentricity', 'EigenvectorCentrality'])
    
    attributes['Graph'] = [filename] * UGraph.GetNodes()
    
    # Degree
    id = []
    degree = []
    OutDegV = snap.TIntPrV()
    snap.GetNodeOutDegV(UGraph, OutDegV)
    for item in OutDegV:
        id.append(item.GetVal1())
        degree.append(item.GetVal2())
    attributes['Id'] = id
    attributes['Degree'] = degree

    # Degree, Closeness, Farness Centrality, Node Eccentricity
    degCentr = []
    cloCentr = []
    farCentr = []
    nodeEcc = []
    for NI in UGraph.Nodes():
        degCentr.append(snap.GetDegreeCentr(UGraph, NI.GetId()))
        cloCentr.append(snap.GetClosenessCentr(UGraph, NI.GetId()))
        farCentr.append(snap.GetFarnessCentr(UGraph, NI.GetId()))
        nodeEcc.append(snap.GetNodeEcc(UGraph, NI.GetId(), False))
    attributes['DegreeCentrality'] = degCentr
    attributes['ClosenessCentrality'] = cloCentr
    attributes['FarnessCentrality'] = farCentr
    attributes['NodeEccentricity'] = nodeEcc

    # Betweenness Centrality
    betCentr = []
    Nodes = snap.TIntFltH()
    Edges = snap.TIntPrFltH()
    snap.GetBetweennessCentr(UGraph, Nodes, Edges, 1.0)
    for node in Nodes:
        betCentr.append(Nodes[node])
    attributes['NodeBetweennessCentrality'] = betCentr

    # PageRank
    pgRank = []
    PRankH = snap.TIntFltH()
    snap.GetPageRank(UGraph, PRankH)
    for item in PRankH:
        pgRank.append(PRankH[item])
    attributes['PageRank'] = pgRank

    # Hubs, Authorities score 
    hubs = []
    auth = []
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(UGraph, NIdHubH, NIdAuthH)
    for item in NIdHubH:
        hubs.append(NIdHubH[item])
    for item in NIdAuthH:
        auth.append(NIdAuthH[item])
    attributes['HubsScore'] = hubs
    attributes['AuthoritiesScore'] = auth

    # Eigenvector Centrality
    eigenCentr = []
    NIdEigenH = snap.TIntFltH()
    snap.GetEigenVectorCentr(UGraph, NIdEigenH)
    for item in NIdEigenH:
        eigenCentr.append(NIdEigenH[item])
    attributes['EigenvectorCentrality'] = eigenCentr

    return attributes
maxnode = 0
for node in NIdEigenH:
    if maxd < NIdEigenH[node]:
        maxd = NIdEigenH[node]
        maxnode = node
    eigen1.add(NIdEigenH[node])

print "Max Eigen node ->", maxnode
print "Max Eigen Value ->", maxd
#Degree Centrality - Graph1
maxd = 0.0
maxnode = 0
for node in Graph1.Nodes():

    #degree centrality
    DegCentr = snap.GetDegreeCentr(Graph1, node.GetId())
    degree1.add(DegCentr)
    if maxd < DegCentr:
        maxd = DegCentr
        maxnode = node.GetId()

fp = open('myrecipe.names.txt')
for i in fp.readlines():
    ar = i.rstrip('\n').split('\t')
    if int(ar[0]) == maxnode:
        print "Max. Degree Centrality Node in 'My Recipes' network is ", maxnode, " : ", ar[
            1], " with centrality: ", maxd
        print "i.e.,", ar[1], "ingredient is used ", round(
            maxd * 100, 2), "% of the time in My Recipes network\n"
fp.close()
#Graph2
def degree_centrality(graph):
    return [sp.GetDegreeCentr(graph, node.GetId()) for node in graph.Nodes()]
Example #17
0
f = open("Graphdata/retweet.txt")
s = f.read()
s1 = re.split('\n', s)
G1 = snap.PUNGraph.New()

a = re.split(' ', s1[0])

for i in range(0, int(a[0])):
    G1.AddNode(i)

for i in range(1, int(a[1]) + 1):
    b = re.split(' ', s1[i])
    G1.AddEdge(int(b[0]), int(b[1]))

DegCentr = dict()

for NI in G1.Nodes():
    DegCentr[NI.GetId()] = snap.GetDegreeCentr(G1, NI.GetId())
    # print "node: %d centrality: %f" % (NI.GetId(), DegCentr)

# print DegCentr[15232]
EdgePara = dict()

for i in range(1, int(a[1]) + 1):
    c = re.split(' ', s1[i])
    EdgePara[(int(c[0]), int(c[1]))] = DegCentr[int(
        c[0])] / (DegCentr[int(c[0])] + DegCentr[int(c[1])])
    EdgePara[(int(c[1]), int(c[0]))] = DegCentr[int(
        c[1])] / (DegCentr[int(c[0])] + DegCentr[int(c[1])])

snap.DrawGViz(G1, snap.gvlNeato, "graph_undirected.png", "graph 2", True)
Example #18
0
import sys
import numpy as np
import matplotlib
matplotlib.use('Agg')

import matplotlib.pyplot as plt

input_file = sys.argv[1]
Graph = snap.LoadEdgeList(snap.PUNGraph, input_file, 0, 1)

dc = set()
degree = dict()

for node in Graph.Nodes():
    #degree centrality
    DegCentr = snap.GetDegreeCentr(Graph, node.GetId())
    degree[node.GetId()] = DegCentr

for item in degree:
    dc.add(degree[item])

with open(sys.argv[1] + '.degree.txt', 'w+') as fp:
    for p in sorted(degree.items(), key=lambda (k, v): (v, k), reverse=True):
        fp.write("%s : %s\n" % p)

dc = sorted(dc, key=float, reverse=True)

#plotting degree centrality
plt.plot(np.arange(1, len(dc) + 1, 1), dc, 'b.')
plt.xlabel('Rank')
plt.ylabel('Degree Centrality')
Example #19
0
                continue
            trial = random.random()
            if trial < 0.01:
                scheduled.add(neighborNodeId)
                influenceSet.add(neighborNodeId)

    return len(influenceSet)


nodelist = []
degree_centrality = {}
for node in smallWorld.Nodes():
    nodelist.append(node.GetId())

for node in nodelist:
    DegCentr = snap.GetDegreeCentr(smallWorld, node)
    degree_centrality[node] = DegCentr

tdc = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)
getset = []
some = 5

resultset = [0]
while (some <= 30):
    for i in range(0, some):
        getset.append(tdc[i][0])
    some = some + 5
    resultset.append(
        sum([
            getLengthInfluenceSet(smallWorld, set([node])) for node in getset
        ]))
    page_writer.writeheader()

    for item in PRankH:
        page_writer.writerow({'node_id': item, 'page_rank': PRankH[item]})

#Eigenvector Centrality
#epsilon = 10^-4, max_iters = 100
NIdEigenH = snap.TIntFltH()
snap.GetEigenVectorCentr(UGraph, NIdEigenH)
with open('na_power_eigcentr.csv',
          'w') as csvfile:  #print eig centralities to CSV
    fieldnames = ['node_id', 'eig_centr']
    eig_writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    eig_writer.writeheader()

    for item in NIdEigenH:
        eig_writer.writerow({'node_id': item, 'eig_centr': NIdEigenH[item]})

#Degree Centrality
#for some reason I get an error when I try to print all three in a single execution of the code;
#works if you comment one block out
with open('na_power_degcentr.csv', 'w') as csvfile:  #print degree centralities
    fieldnames = ['node_id', 'deg_centr']
    deg_writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    deg_writer.writeheader()

    for NI in UGraph.Nodes():
        n_id = NI.GetId()
        DegCentr = snap.GetDegreeCentr(UGraph, n_id)
        deg_writer.writerow({'node_id': n_id, 'deg_centr': DegCentr})
Example #21
0
                dataset['keyword']).get_feature_names()
            if re.search('[0-9].....', x) == None
        ]
    print 'Creating node and edge list'
    nx_input = output_network_inputs(id_dict, pack='snap')
    # pp.pprint(nx_input)
    # print_break('Network Graph: NetworkX')
    # G=nx.Graph()
    # G.add_nodes_from(nx_input['nodes'])
    # G.add_edges_from(nx_input['edges'])
    # measures = { 'centrality': nx.degree_centrality(G), 'clustering': nx.clustering(G), 'triads': nx.triangles(G) }
    # pp.pprint(measures)
    print_break('Network Graph: SNAP')
    t0 = time.time()
    G = snap.TUNGraph.New()
    print 'Adding Nodes'
    for i in tqdm(nx_input['nodes']):
        G.AddNode(i)
    print 'Adding Edges'
    for x in tqdm(nx_input['edges']):
        G.AddEdge(x[0], x[1])
    print 'Calculating measures'
    centrality = [snap.GetDegreeCentr(G, n.GetId()) for n in G.Nodes()]
    measures = {
        'centrality': np.mean(centrality),
        'clustering': snap.GetClustCf(G),
        'triads': snap.GetTriads(G)
    }
    pp.pprint(measures)
    print_break('SNAP Graph Measures Time elapsed: %s' % (time.time() - t0))
Example #22
0
import snap
import parser, make_graphs

filenames = ["0301/{}.txt".format(i) for i in range(0, 3)]
data = parser.Data(filenames)
graph = make_graphs.make_graph(data)
ugraph = snap.ConvertGraph(snap.PUNGraph, graph)
mxwcc = snap.GetMxWcc(graph)
umxwcc = snap.GetMxWcc(ugraph)
N = 20

# === GetDegreeCentr ===
s = []
for NI in umxwcc.Nodes():
    DegCentr = snap.GetDegreeCentr(umxwcc, NI.GetId())
    s.append((NI.GetId(), DegCentr))
s.sort(key=lambda x: x[1], reverse=True)  # sort with max centrality at front
print '=== GetDegreeCentr ==='
with open("GetDegreeCentr-0-2.txt", 'w') as f:
    for x in s:
        f.write("{} {}\n".format(*x))

# === GetBetweennessCentr ===
Nodes = snap.TIntFltH()
Edges = snap.TIntPrFltH()
snap.GetBetweennessCentr(mxwcc, Nodes, Edges, 1.0)
s = [(node, Nodes[node]) for node in Nodes]
s.sort(key=lambda x: x[1], reverse=True)  # sort with max centrality at front
print '=== GetBetweennessCentr ==='
with open("GetBetweennessCentr-0-2.txt", 'w') as f:
    for x in s:
Example #23
0
 def DegreeCentrality(self):
     lstDeg = {}
     for NI in self.graph.Nodes():
         DegCentr = snap.GetDegreeCentr(self.graph, NI.GetId())
         lstDeg[NI.GetId()] = DegCentr
     return lstDeg
import snap

UGraph = snap.GenRndGnm(snap.PUNGraph, 100, 1000)
for NI in UGraph.Nodes():
    DegCentr = snap.GetDegreeCentr(UGraph, NI.GetId())
    print "node: %d centrality: %f" % (NI.GetId(), DegCentr)
Example #25
0
for line in lines:
    tokens = line.split('||')
    if tokens[2] != '':
        nId = nameToNId[tokens[1]]
        uIdToNId[int(tokens[0])] = nId
graph = snap.ConvertGraph(snap.PUNGraph, network)
degCenters = {}
closeCenters = {}
pageRanks = snap.TIntFltH()
eigenCenters = snap.TIntFltH()
# btwnCenters = snap.TIntFltH()
# edgeHash = snap.TIntPrFltH()
print('Running PageRank...')
snap.GetPageRank(graph, pageRanks)
print('Running Eigenvector centrality...')
snap.GetEigenVectorCentr(graph, eigenCenters)
# print('Running Betweeness...')
# snap.GetBetweennessCentr(graph, btwnCenters, edgeHash)
print('Running Degree and Closeness...')
for uId, nId in uIdToNId.iteritems():
    print uId, nId
    degCenters[uId] = snap.GetDegreeCentr(graph, nId)
    closeCenters[uId] = snap.GetClosenessCentr(graph, nId)

outfile = open('csv/centralities.csv', 'w')
for uId, nId in uIdToNId.iteritems():
    outfile.write(str(uId) + ',' + str(pageRanks[nId]) + ',' +\
        str(eigenCenters[nId]) + ',' +\
        str(degCenters[uId]) + ',' + str(closeCenters[uId]) + '\n')
outfile.close()
Example #26
0
##  calculate indegree and outdegree centrality----Fail   So I choose NetworkX
import snap
txt_file = "/Users/dukechan/Downloads/sms_sna_oct18_directed.txt"
f = open('/Users/dukechan/Downloads/result4.txt', 'w')
f2 = open('/Users/dukechan/Downloads/result5.txt', 'w')
G = snap.LoadEdgeList(snap.PNGraph, txt_file, 4, 5)
InDegV = snap.TIntPrV()
OutDegV = snap.TIntPrV()
snap.GetNodeInDegV(G, InDegV)
snap.GetNodeOutDegV(G, OutDegV)
# indegree
for item in InDegV:
    DegCentr = snap.GetDegreeCentr(G, item.GetVal1())
    f.write("node: %d centrality: %f\n" % (item.GetVal1(), DegCentr))
f.close()
# outdegree
for item in OutDegV:
    DegCentr = snap.GetDegreeCentr(G, item.GetVal1())
    f2.write("node: %d centrality: %f\n" % (item.GetVal1(), DegCentr))
f2.close()

# problem :    centrality is  0    why????????
def basic_analysis():


	FIn = snap.TFIn("../graphs/ph_simple.graph")
	G = snap.TUNGraph.Load(FIn)

	numNodes = G.GetNodes()
	print "num nodes: ", numNodes
	numEdges = G.GetEdges()
	print "num edges: ", numEdges

	# clustering coefficient
	print "\nclustering coefficient"

	print "Clustering G: ", snap.GetClustCf(G)

	ER = snap.GenRndGnm(snap.PUNGraph, numNodes, numEdges)

	print "Clustering ER: ", snap.GetClustCf(ER)

	# degree distribution histogram

	print "\ndegree distribution histogram"

	x_erdosRenyi, y_erdosRenyi = getDataPointsToPlot(ER)
	plt.loglog(x_erdosRenyi, y_erdosRenyi, color = 'g', label = 'Erdos Renyi Network')

	x_smallWorld, y_smallWorld = getDataPointsToPlot(G)
	plt.loglog(x_smallWorld, y_smallWorld, linestyle = 'dashed', color = 'b', label = 'PH Agency Network')

	plt.xlabel('Node Degree (log)')
	plt.ylabel('Proportion of Nodes with a Given Degree (log)')
	plt.title('Degree Distribution of Erdos Renyi and PH Agency Network')
	plt.legend()
	plt.show()

	# degree
	print "\ndegree distribution"

	deg_sum = 0.0

	CntV = snap.TIntPrV()
	snap.GetOutDegCnt(G, CntV)
	for p in CntV:
		deg_sum += p.GetVal1() * p.GetVal2()

	max_node = G.GetNI(snap.GetMxDegNId(G))

	deg_sum /= float(numNodes)

	print "average degree: ", deg_sum # same for G and ER

	print "max degree: ", max_node.GetOutDeg(), ", id: ", max_node.GetId()

	deg_sum = 0.0
	
	max_node = ER.GetNI(snap.GetMxDegNId(ER))

	print "max degree: ", max_node.GetOutDeg(), ", id: ", max_node.GetId()

	# diameter
	print "\ndiameter"

	diam = snap.GetBfsFullDiam(G, 10)

	print "Diameter: ", diam

	print "ER Diameter: ", snap.GetBfsFullDiam(ER, 10)

	# triads
	print "\ntriads"

	print "Triads: ", snap.GetTriads(G)

	print "ER Triads: ", snap.GetTriads(ER)

	# centrality
	print "\ncentrality"

	max_dc = 0.0
	maxId = -1

	all_centr = []

	for NI in G.Nodes():
		DegCentr = snap.GetDegreeCentr(G, NI.GetId())
		all_centr.append(DegCentr)
		if DegCentr > max_dc:
			max_dc = DegCentr
			maxId = NI.GetId() 
	
	print "max"
	print "node: %d centrality: %f" % (maxId, max_dc)
	print "average centrality: ", np.mean(all_centr)

	print "ER"
	max_dc = 0.0
	maxId = -1

	all_centr = []

	for NI in ER.Nodes():
		DegCentr = snap.GetDegreeCentr(ER, NI.GetId())
		all_centr.append(DegCentr)
		if DegCentr > max_dc:
			max_dc = DegCentr
			maxId = NI.GetId() 
			
	print "max"
	print "node: %d centrality: %f" % (maxId, max_dc)
	print "average centrality: ", np.mean(all_centr)
Example #28
0
def getFeatures(G_CoSponsor, G_Campaign, bill_node, legislator_node, comm_node,
                legislator_node_from_campaign, G_Campaign_folded):
    '''
    return two pd: X, Y
    '''
    print "before dropping", len(legislator_node)
    for l in legislator_node:
        if not G_Campaign_folded.IsNode(l):
            legislator_node.remove(l)
        if l not in legislator_node_from_campaign:
            try:
                legislator_node.remove(l)
            except:
                pass

    cluster_0, cluster_1 = loadClusteringAttr()
    print "after dropping", len(legislator_node)

    Y = getY(G_CoSponsor, legislator_node)

    #compute a list of clustering coefficient
    NIdCCfH = snap.TIntFltH()
    snap.GetNodeClustCf(G_Campaign_folded, NIdCCfH)

    #compute a list of node centrality and degree
    node_centrality = {}
    in_deg = {}
    for i in legislator_node:
        if G_Campaign.IsNode(i):
            node_centrality[i] = snap.GetDegreeCentr(G_Campaign_folded, i)
            in_deg[i] = G_Campaign.GetNI(i).GetInDeg()

    print "begin to compute X"

    X = Y[['node_i', 'node_j']]

    #list of features
    X['Degree_Diff'] = 0
    X['Union_of_Neighbors'] = 0.0
    X['CommNeighbors'] = 0.0
    #X['Contribution_Sum'] = 0.0
    #X['Contribution_Diff'] = 0.0
    X['Clustering_Coeff_Diff'] = 0.0
    X['Clustering_Coeff_Sum'] = 0.0
    X['Clustering_Coeff_Avg'] = 0.0
    X['Jaccard'] = 0.0
    X['Shortest_Dist'] = 0.0
    X['Deg_Centrality_Diff'] = 0.0
    X['FromSameCluster'] = 0

    def compute_attri(x):
        NId_i = int(x['node_i'])
        NId_j = int(x['node_j'])
        if G_Campaign_folded.IsNode(NId_i) and G_Campaign_folded.IsNode(NId_j):
            node_i_contribution_sum = 0.0
            node_j_contribution_sum = 0.0
            neighbors_i = []
            neighbors_j = []

            clustering_cf_i = NIdCCfH[NId_i]

            clustering_cf_j = NIdCCfH[NId_j]

            CommNeighbors = snap.GetCmnNbrs(G_Campaign, NId_i, NId_j)
            NeighborsUnion = float(
                len(
                    list(set().union(getNeighbors(NId_i, G_Campaign),
                                     getNeighbors(NId_j, G_Campaign)))))

            FromSameCluster = 0
            if NId_i in cluster_0 and NId_j in cluster_0:
                FromSameCluster = 1
            if NId_i in cluster_1 and NId_j in cluster_1:
                FromSameCluster = 1
            '''
            Nbrs = snap.TIntV()
            snap.GetCmnNbrs(G_Campaign, NId_i,NId_j, Nbrs)
            for NId in Nbrs:
                eid_i = G_Campaign.GetEId(NId,NId_i)
                eid_j = G_Campaign.GetEId(NId,NId_j)
                neighbors_i.append(NId)
                neighbors_j.append(NId)
                node_i_contribution_sum += G_Campaign.GetIntAttrDatE(eid_i, 'TRANSACTION_AMT')              
                node_j_contribution_sum += G_Campaign.GetIntAttrDatE(eid_j, 'TRANSACTION_AMT')
            '''
            result = {
                'Degree_Diff':
                abs(in_deg[NId_i] - in_deg[NId_j]),
                'Union_of_Neighbors':
                NeighborsUnion,
                'CommNeighbors':
                CommNeighbors,
                'Clustering_Coeff_Diff':
                abs(clustering_cf_i - clustering_cf_j),
                'Clustering_Coeff_Sum':
                clustering_cf_i + clustering_cf_j,
                'Clustering_Coeff_Avg':
                clustering_cf_i + clustering_cf_j / 2.0,
                #'Contribution_Diff': abs(node_i_contribution_sum - node_j_contribution_sum),
                #'Contribution_Sum': node_i_contribution_sum + node_j_contribution_sum,
                'Jaccard':
                CommNeighbors * 1.0 / NeighborsUnion,
                'Shortest_Dist':
                snap.GetShortPath(G_Campaign, NId_i, NId_j),
                'Deg_Centrality_Diff':
                abs(node_centrality[NId_i] - node_centrality[NId_j]),
                'FromSameCluster':
                FromSameCluster
            }
        else:
            result = {}
        return pd.Series(result, name="Attri")

    begin = time.time()
    print "My program took", time.time() - start_time, "to begin compute X"

    X = X.apply(compute_attri, axis=1)
    print "before dropping nan from computing attribute", X.shape
    inds = pd.isnull(X).any(1).nonzero()[0]
    print "My program took", time.time() - start_time, "to finish compute X"
    end = time.time()

    print "time to compute x", begin - end

    X = X.drop(inds)
    Y = Y.drop(inds)

    print "after dropping nan from computing attribute", X.shape

    return X, Y
Example #29
0
    evCntr_dict[item] = evCntr_Vector[item]

evCntr_sort = sorted(evCntr_dict.values(), reverse=True)
evCntr_count10 = 0
for value in evCntr_sort:
    for key1, val1 in evCntr_dict.iteritems():
        if val1 == value:
            print val1, key1
            evCntr_count10 += 1
            break
    if evCntr_count10 == 10:
        break

dc_dict = dict()
for nex in ugraph.Nodes():
    dc_dict[nex.GetId()] = snap.GetDegreeCentr(ugraph, nex.GetId())

dc_sort = sorted(dc_dict.values(), reverse=True)
dc_count10 = 0
dc_set = set(dc_sort)
dc_sort1 = sorted(list(dc_set), reverse=True)
for value in dc_sort1:
    for key1, val1 in dc_dict.iteritems():
        if val1 == value and dc_count10 < 10:
            print val1, key1
            dc_count10 += 1
        if dc_count10 == 10:
            break
    if dc_count10 == 10:
        break
pg_rank_nodes = [None] * (len(nodes) + 1)