Esempio n. 1
0
    def _initialize(self, mu, sigma_ratio):

        indeg = snap.TIntPrV()
        snap.GetNodeInDegV(self._graph, indeg)

        for item in indeg:
            nid, deg = item.GetVal1(), item.GetVal2()
            if deg == 0:
                continue

            node = self._graph.GetNI(nid)

            # Sample a random probability for each in link
            p = np.clip(
                np.random.normal(
                    np.ones(deg, dtype=np.float32) / deg,
                    sigma_ratio / np.ones(deg)), 0., 1.)

            # Handle corner cases
            if p.sum() == 0:
                p = np.ones(deg, dtype=np.float32)

            p /= p.sum()

            for i in range(deg):
                edge = self._graph.GetEI(node.GetInNId(i), node.GetId())
                self._graph.AddFltAttrDatE(edge, p[i], self.prob)
def degree_distribution_graphs():
    InDegV = snap.TIntPrV()
    snap.GetNodeInDegV(G, InDegV)
    a = np.arange(1, snap.CntNonZNodes(G) - snap.CntInDegNodes(G, 0) + 2)
    i = 0
    for item in InDegV:
        if item.GetVal2() > 0:
            i = i + 1
            a[i] = item.GetVal2()

    bars, bins = np.histogram(a, bins=np.arange(1, max(a)))
    plt.hist(bars, bins)
    plt.grid()
    plt.show()

    plt.loglog(bins[0:-1], bars)
    plt.ylabel('# users per degree')
    plt.xlabel('in-degree')
    plt.grid()
    plt.show()

    plt.loglog(bins[0:-1], sum(bars) - np.cumsum(bars))
    plt.ylabel('# users with degree larger or equal than x')
    plt.xlabel('in-degree')
    plt.grid()
    plt.show()
def in_out_degree_correlation(G):
	result = None        #REMOVER
	in_degree = {}
	out_degree = {}
	v_in_d = []
	v_out_d = []
	
	InDegV = snap.TIntPrV()
	snap.GetNodeInDegV(G,InDegV)					#Retorna o id do vertice e o grau de entrada- inclusive se o grau for 0 
	for item in InDegV:
		node = item.GetVal1()
		degree = item.GetVal2()
		in_degree[node] = degree
	
	OutDegV = snap.TIntPrV()
	snap.GetNodeOutDegV(G, OutDegV)
	for item in OutDegV:
		node = item.GetVal1()
		degree = item.GetVal2()
		out_degree[node] = degree

	for k,v in in_degree.iteritems():
		if k in out_degree:
			v_in_d.append(v)
			v_out_d.append(out_degree[k])
	result = pearsonr(v_in_d,v_out_d)				#Retorna uma tupla (coef,p-value)
	return result[0]									 	#Retorna apenas o coef									
def in_degree_distribution(G):
    result = None  #REMOVER
    in_degree = {}
    out_degree = {}
    v_in_d = []
    v_out_d = []
    v_degrees = []

    InDegV = snap.TIntPrV()
    snap.GetNodeInDegV(
        G, InDegV
    )  #Retorna o id do vertice e o grau de entrada- inclusive se o grau for 0
    for item in InDegV:
        node = item.GetVal1()
        degree = item.GetVal2()
        in_degree[node] = degree

    OutDegV = snap.TIntPrV()
    snap.GetNodeOutDegV(G, OutDegV)
    for item in OutDegV:
        node = item.GetVal1()
        degree = item.GetVal2()
        out_degree[node] = degree

    for k, v in in_degree.iteritems():
        if k in out_degree:
            v_in_d.append(v)
            v_out_d.append(out_degree[k])
            soma = v + out_degree[k]
            v_degrees.append(soma)

    return v_in_d, v_out_d, v_degrees  #Retorna uma lista com in_degree e outra lista com out_degree, e mais uma com a soma dos graus de entrada e saída.
def _get_degree_in_graph(Graph, H, output_path):
    InDegV = snap.TIntPrV()
    snap.GetNodeInDegV(Graph, InDegV)
    InDeg_set = dict()
    for item in InDegV:
        username = H.GetKey(item.GetVal1())
        InDeg = item.GetVal2()
        InDeg_set[username] = InDeg
    OutDegV = snap.TIntPrV()
    snap.GetNodeOutDegV(Graph, OutDegV)
    OutDeg_set = dict()
    for item in OutDegV:
        username = H.GetKey(item.GetVal1())
        OutDeg = item.GetVal2()
        OutDeg_set[username] = OutDeg
    dataset = list()
    tot = len(InDeg_set)
    num = 0
    for username in InDeg_set:
        user_degree = dict()
        user_degree['username'] = username
        user_degree['in_degree'] = InDeg_set[username]
        user_degree['out_degree'] = OutDeg_set[username]
        profile_path = './data/Users/%s.json' % username
        if not os.path.exists(profile_path):
            continue
        with open(profile_path, 'r') as f:
            profile = json.load(f)
        in_set = set(profile['followers'])
        out_set = set(profile['following'])
        if user_degree['out_degree'] == 0:
            user_degree['balance'] = float(user_degree['in_degree']) / eps
        else:
            user_degree['balance'] = float(user_degree['in_degree']) / float(
                user_degree['out_degree'])
        bi = 0
        for out_username in out_set:
            if out_username in in_set:
                try:
                    ID = H.GetDat(out_username)
                    if ID is not -1 and Graph.IsNode(ID):
                        bi += 1
                except Exception as e:
                    print type(e)
                    print e.args
                    print e
        if user_degree['out_degree'] == 0:
            user_degree['reciprocity'] = float(bi) / eps
        else:
            user_degree['reciprocity'] = float(bi) / float(
                user_degree['out_degree'])
        dataset.append(user_degree)
        num += 1
        print '%d/%d' % (num, tot)
    dataset = pd.DataFrame(dataset)
    dataset = dataset[[
        'username', 'in_degree', 'out_degree', 'balance', 'reciprocity'
    ]]
    dataset.to_csv(output_path, index=False, encoding='utf-8')
Esempio n. 6
0
def indegree(rankCommands, Graph, conn, cur):
    InDegV = snap.TIntPrV()
    before_time = time.time()
    snap.GetNodeInDegV(Graph, InDegV)
    print "Total handling time is: ", (time.time() - before_time)
    DegH = snap.TIntIntH()
    slist = sortNodes(InDegV, DegH)
    createTable(rankCommands, slist, DegH, conn, cur)
Esempio n. 7
0
    def getNodesByDegree(self):
        result = snap.TIntPrV()
        nodesByDegree = []
        snap.GetNodeInDegV(self.rawGraph, result)
        for x in result:
            nodesByDegree.append((self.node(x.GetVal1()), x.GetVal2()))


        return sorted(nodesByDegree,key=lambda e: e[1], reverse=True)
def degree_stats():
    InDegV = snap.TIntPrV()
    snap.GetNodeInDegV(G, InDegV)
    numItemstoList = 10
    i = 0
    for item in InDegV:
        print("node ID %d: in-degree %d" % (item.GetVal1(), item.GetVal2()))
        i = i + 1
        if i == numItemstoList:
            break  # comment to output all nodes
def get_robustness(file_path, LSCC_output_path, LWCC_output_path):
    frac_list = [
        0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9
    ]
    Graph, H = load_graph(file_path)
    InDegV = snap.TIntPrV()
    snap.GetNodeInDegV(Graph, InDegV)
    OutDegV = snap.TIntPrV()
    snap.GetNodeOutDegV(Graph, OutDegV)
    degree = dict()
    for item in InDegV:
        ID = item.GetVal1()
        InDeg = item.GetVal2()
        degree[ID] = InDeg
    for item in OutDegV:
        ID = item.GetVal1()
        OutDeg = item.GetVal2()
        degree[ID] += OutDeg
    sorted_degree = sorted(degree.items(), key=itemgetter(1), reverse=True)
    tot = len(sorted_degree)
    pos = [int(tot * frac) for frac in frac_list]
    print pos
    cur = 0
    LSCC_robust = list()
    LWCC_robust = list()
    for i in range(tot):
        Graph.DelNode(sorted_degree[i][0])
        if i == pos[cur] - 1:
            LSCC_frac = snap.GetMxSccSz(Graph)
            LWCC_frac = snap.GetMxWccSz(Graph)
            singleton_frac = 1.0 - 1.0 * snap.CntNonZNodes(
                Graph) / Graph.GetNodes()
            LSCC_robust.append({
                'removed': frac_list[cur],
                'singleton': singleton_frac,
                'middle': 1.0 - singleton_frac - LSCC_frac,
                'LSCC': LSCC_frac
            })
            LWCC_robust.append({
                'removed': frac_list[cur],
                'singleton': singleton_frac,
                'middle': 1.0 - singleton_frac - LWCC_frac,
                'LWCC': LWCC_frac
            })
            cur += 1
        if cur >= len(pos):
            break
    LSCC_robust = pd.DataFrame(LSCC_robust)
    LSCC_robust = LSCC_robust[['removed', 'singleton', 'middle', 'LSCC']]
    LSCC_robust.to_csv(LSCC_output_path, index=False, encoding='utf-8')
    LWCC_robust = pd.DataFrame(LWCC_robust)
    LWCC_robust = LWCC_robust[['removed', 'singleton', 'middle', 'LWCC']]
    LWCC_robust.to_csv(LWCC_output_path, index=False, encoding='utf-8')
Esempio n. 10
0
def computeDegreeCentrality(G, NodeAttributes):
    #
    # 1. Degree Centrality
    #    Get In Degree and Out Degree for each node
    #
    InDegV = snap.TIntPrV()
    OutDegV = snap.TIntPrV()

    snap.GetNodeOutDegV(G, OutDegV)
    snap.GetNodeInDegV(G, InDegV)

    InDegreeList = [(item.GetVal1(), item.GetVal2()) for item in InDegV]
    OutDegreeList = [(item.GetVal1(), item.GetVal2()) for item in OutDegV]

    InDegreeList.sort(key=lambda x: x[1], reverse=True)
    OutDegreeList.sort(key=lambda x: x[1], reverse=True)

    minOutDegree = min(OutDegreeList, key=lambda x: x[1])[1]
    maxOutDegree = max(OutDegreeList, key=lambda x: x[1])[1]
    minInDegree = min(InDegreeList, key=lambda x: x[1])[1]
    maxInDegree = max(InDegreeList, key=lambda x: x[1])[1]

    #
    # Sanity Check
    #print maxOutDegree, minOutDegree, maxInDegree, minInDegree
    #print InDegreeList[0], InDegreeList[-1]

    for (nodeId, Degree) in InDegreeList:
        if not NodeAttributes.get(nodeId, None):
            NodeAttributes[nodeId] = dict()
        NodeAttributes[nodeId]['InDegree'] = Degree
        normalizedDegree = (float(Degree) - float(minInDegree)) / (
            float(maxInDegree - float(minInDegree)))
        NodeAttributes[nodeId]['NormInDegree'] = normalizedDegree

    for (nodeId, Degree) in OutDegreeList:
        NodeAttributes[nodeId]['OutDegree'] = Degree
        normalizedDegree = (float(Degree) - float(minOutDegree)) / (
            float(maxOutDegree - float(minOutDegree)))
        NodeAttributes[nodeId]['NormOutDegree'] = normalizedDegree

    #
    # Sanity Check
    #
    #print NodeAttributes[1874]
    #print NodeAttributes[893]

    return NodeAttributes
Esempio n. 11
0
def getMinAvgMax(graph):
    nodes = graph.GetNodes()
    edges = graph.GetEdges()
    InDegV = snap.TIntPrV()
    snap.GetNodeInDegV(graph, InDegV)
    sum_deg = 0
    max_deg = 0
    min_deg = 10000
    for item in InDegV:
        cur_deg = item.GetVal2()
        sum_deg += cur_deg
        min_deg = min(min_deg, cur_deg)
        max_deg = max(max_deg, cur_deg)
    avg_deg = sum_deg / nodes

    return (max_deg, min_deg, avg_deg)
def get_in_degree_rank(G):
	result = None        #REMOVER
	inlinkNumb = []
	
	InDegV = snap.TIntPrV()
	snap.GetNodeInDegV(G,InDegV) 
	for item in InDegV:
		node = item.GetVal1()
		degree = item.GetVal2()

		_tuple=(node,degree)
		inlinkNumb.append(_tuple)

	in_degree_rank = sorted(inlinkNumb, key=lambda x: (x[1], -x[0]), reverse=True) 		#Ordena uma tupla decrescente (id,in_degree)). Em caso de empate ordena crecente pelo id os empatados
	
	return in_degree_rank						#Retorna o id do vertice e o grau de entrada- inclusive se o grau for 0								
def calculate_stats():

    # create similarities folder
    if not os.path.exists(config.DATASET_DIR / 'similarities'):
        os.makedirs(config.DATASET_DIR / 'similarities')

    if config.CALCULATE_EGO_GRAPHS:
        print(f'Calculating ego graphs for {config.DATASET_DIR }...')
        if not (config.DATASET_DIR /
                'ego_graphs.txt').exists() or config.OVERRIDE:
            ego_graph_dict = {}
            for node in snap_graph.Nodes():
                node_id = int(node.GetId())
                nodes_vec = snap.TIntV()
                snap.GetNodesAtHop(snap_graph, node_id, 1, nodes_vec, False)
                ego_graph_dict[node_id] = list(nodes_vec)

            with open(str(config.DATASET_DIR / 'ego_graphs.txt'), 'w') as f:
                json.dump(ego_graph_dict, f)

    if config.CALCULATE_DEGREE_SEQUENCE:
        print(f'Calculating degree sequences for {config.DATASET_DIR}...')
        if not (config.DATASET_DIR /
                'degree_sequence.txt').exists() or config.OVERRIDE:
            n_nodes = len(list(snap_graph.Nodes()))
            degrees = {}
            InDegV = snap.TIntPrV()
            snap.GetNodeInDegV(snap_graph, InDegV)
            OutDegV = snap.TIntPrV()
            snap.GetNodeOutDegV(snap_graph, OutDegV)
            for item1, item2 in zip(InDegV, OutDegV):
                degrees[item1.GetVal1()] = item1.GetVal2()
            with open(str(config.DATASET_DIR / 'degree_sequence.txt'),
                      'w') as f:
                json.dump(degrees, f)

    if config.CALCULATE_SHORTEST_PATHS:
        print(f'Calculating shortest paths for {config.DATASET_DIR}...')
        if not (config.DATASET_DIR /
                'shortest_path_matrix.npy').exists() or config.OVERRIDE:

            with multiprocessing.Pool(processes=config.N_PROCESSSES) as pool:
                shortest_paths = pool.map(get_shortest_path, node_ids)

            all_shortest_paths = np.stack(shortest_paths)
            np.save(str(config.DATASET_DIR / 'shortest_path_matrix.npy'),
                    all_shortest_paths)
def runSimulation(graph, nodeDeg, iterations):
    log("Running simulation with params: nodeDeg=%s, iterations=%s ..." %
        (nodeDeg, iterations))
    totalInfected = 0
    nodeIdSet = set()
    InDegV = snap.TIntPrV()
    snap.GetNodeInDegV(graph, InDegV)
    for item in InDegV:
        currNodeDeg = item.GetVal2()
        if currNodeDeg >= nodeDeg:
            nodeIdSet.add(item.GetVal1())
    for iteration in xrange(0, iterations):
        log("Running iteration %s ..." % iteration)

        infected = set()  # All infected patients.
        infectedQueue = deque(
        )  # Infected patients we have not "processed" yet.

        # Find and infect patient zero.
        if len(nodeIdSet) == 0:
            return 0
        else:
            patientZeroId = random.sample(nodeIdSet, 1)  # choose patient zero
            #print patientZeroId[0]
            infected.add(patientZeroId[0])
            infectedQueue.append(patientZeroId[0])
        # Run through the "infected" queue in FIFO order until all the
        # patients have had a turn.
        while infectedQueue:
            nodeId = infectedQueue.popleft()
            node = graph.GetNI(nodeId)
            numNeighbors = node.GetOutDeg()
            for nbrIndex in xrange(0, numNeighbors):
                nbrId = node.GetNbrNId(nbrIndex)
                if nbrId not in infected:
                    if random.random() < TRANSMISSIBILITY:
                        # We have infected them.
                        infected.add(nbrId)
                        infectedQueue.append(nbrId)
        totalInfected += len(infected)

    avgInfected = totalInfected / iterations
    infectedFraction = avgInfected / graph.GetNodes()
    return infectedFraction
def power_law_fit():
    InDegV = snap.TIntPrV()
    snap.GetNodeInDegV(G, InDegV)
    a = np.arange(1, snap.CntNonZNodes(G) - snap.CntInDegNodes(G, 0) + 2)
    fit = pl.Fit(a)
    pl.plot_pdf(a, color='r')
    fig2 = fit.plot_pdf(color='b', linewidth=2)
    # power-law exponent
    print("Power Law Data\n")
    print("Power Law Exponential:", fit.alpha)
    print("Min value for X:", fit.xmin)
    print("Kolmogorov-Smirnov test:", fit.D)
    # comparison of data and Pl-fits of pdf (blue) and ccdf (red)"
    figCCDF = fit.plot_pdf(color='b', linewidth=2)
    fit.power_law.plot_pdf(color='b', linestyle='--', ax=figCCDF)
    fit.plot_ccdf(color='r', linewidth=2, ax=figCCDF)
    fit.power_law.plot_ccdf(color='r', linestyle='--', ax=figCCDF)
    ####
    figCCDF.set_ylabel(u"p(X),  p(X≥x)")
    figCCDF.set_xlabel(r"in-degree")
Esempio n. 16
0
    def get_in_out_degree_table(self, graph):
        # Placeholder for node / degree / out degree.
        nodes_degrees = np.zeros((graph.GetNodes(), 3), dtype=np.int32)

        # In degree vector.
        in_degree_v = snap.TIntPrV()
        snap.GetNodeInDegV(graph, in_degree_v)

        # Out degree vector.
        out_degree_v = snap.TIntPrV()
        snap.GetNodeOutDegV(graph, out_degree_v)

        # Set the nodes_degrees Numpy array.
        for item in in_degree_v:
            node = item.GetVal1()
            nodes_degrees[node, 0] = node
            nodes_degrees[node, 1] = item.GetVal2()

        for item in out_degree_v:
            node = item.GetVal1()
            # nodes_degrees[node, 0] = node
            nodes_degrees[node, 2] = item.GetVal2()

        return nodes_degrees
r.show("__context__")

# load network
print time.ctime(), "loading network ..."
net = snap.TNEANet.Load(FIn)
t.show("loadbin network", net)
r.show("__network__")

print time.ctime(), "done"


# In[2]:

print time.ctime(), "computing indegv ..."
InDegV = snap.TIntPrV()
snap.GetNodeInDegV(net, InDegV)
t.show("indegv", InDegV)
r.show("__InDegV__")
print time.ctime()


# In[3]:

print time.ctime(), "computing pagerank ..."
PRankH = snap.TIntFltH()
snap.GetPageRankMP(net, PRankH, 0.85, 1e-4, 100)
t.show("prank", PRankH)
r.show("__PRankH__")
print time.ctime()

Esempio n. 18
0
def indegree(graph):
    indegrees = snap.TIntPrV()
    snap.GetNodeInDegV(graph, indegrees)
    return dict((indegrees[i].GetVal1(), indegrees[i].GetVal2())
                for i in range(indegrees.Len()))
def main():

    parentDir = os.getcwd()
    os.chdir(parentDir + "/subgraphs")
    sub_graph = snap.LoadEdgeList(snap.PUNGraph, sys.argv[1], 0, 1)
    subGraphName = sys.argv[1].split(".")[0]
    os.chdir(parentDir)

    #### 1 ########
    node_count = 0
    for node in sub_graph.Nodes():
        node_count = node_count + 1

    printWithOutNewLine("Number of nodes:", node_count)
    printWithOutNewLine("Number of edges:", snap.CntUniqBiDirEdges(sub_graph))

    #### 2 ########
    printWithOutNewLine("Number of nodes with degree=7:",
                        snap.CntDegNodes(sub_graph, 7))

    rndMaxDegNId = snap.GetMxDegNId(sub_graph)
    nodeDegPairs = snap.TIntPrV()
    snap.GetNodeInDegV(sub_graph, nodeDegPairs)
    maxDegVal = 0

    for pair in nodeDegPairs:
        if (pair.GetVal1() == rndMaxDegNId):
            maxDegVal = pair.GetVal2()
            break

    maxDegNodes = []
    for pair in nodeDegPairs:
        if (pair.GetVal2() == maxDegVal):
            maxDegNodes.append(pair.GetVal1())

    print("Node id(s) with highest degree:", end=" ")
    print(*maxDegNodes, sep=',')

    #### 3 ########
    sampledFullDiam = []
    sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 10, False))
    sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 100, False))
    sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 1000, False))

    sampledFullDiamStats = []
    sampledFullDiamStats.append(round(statistics.mean(sampledFullDiam), 4))
    sampledFullDiamStats.append(round(statistics.variance(sampledFullDiam), 4))

    printWithOutNewLine("Approximate full diameter by sampling 10 nodes:",
                        sampledFullDiam[0])
    printWithOutNewLine("Approximate full diameter by sampling 100 nodes:",
                        sampledFullDiam[1])
    printWithOutNewLine("Approximate full diameter by sampling 1000 nodes:",
                        sampledFullDiam[2])
    print("Approximate full diameter (mean and variance):", end=" ")
    print(*sampledFullDiamStats, sep=',')

    sampledEffDiam = []
    sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 10, False), 4))
    sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 100, False), 4))
    sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 1000, False), 4))

    sampledEffDiamStats = []
    sampledEffDiamStats.append(round(statistics.mean(sampledEffDiam), 4))
    sampledEffDiamStats.append(round(statistics.variance(sampledEffDiam), 4))

    printWithOutNewLine("Approximate effective diameter by sampling 10 nodes:",
                        sampledEffDiam[0])
    printWithOutNewLine(
        "Approximate effective diameter by sampling 100 nodes:",
        sampledEffDiam[1])
    printWithOutNewLine(
        "Approximate effective diameter by sampling 1000 nodes:",
        sampledEffDiam[2])
    print("Approximate effective diameter (mean and variance):", end=" ")
    print(*sampledEffDiamStats, sep=',')

    #### 4 ########
    printWithOutNewLine("Fraction of nodes in largest connected component:",
                        round(snap.GetMxSccSz(sub_graph), 4))

    bridgeEdges = snap.TIntPrV()
    snap.GetEdgeBridges(sub_graph, bridgeEdges)
    printWithOutNewLine("Number of edge bridges:", len(bridgeEdges))

    articulationPoints = snap.TIntV()
    snap.GetArtPoints(sub_graph, articulationPoints)
    printWithOutNewLine("Number of articulation points:",
                        len(articulationPoints))

    #### 5 ########
    printWithOutNewLine("Average clustering coefficient:",
                        round(snap.GetClustCf(sub_graph, -1), 4))

    printWithOutNewLine("Number of triads:", snap.GetTriads(sub_graph, -1))

    randomNodeId = sub_graph.GetRndNId()
    nodeIdCcfMap = snap.TIntFltH()
    snap.GetNodeClustCf(sub_graph, nodeIdCcfMap)

    print("Clustering coefficient of random node", end=" ")
    print(randomNodeId, end=": ")
    print(round(nodeIdCcfMap[randomNodeId], 4))

    print("Number of triads random node", end=" ")
    print(randomNodeId, end=" participates: ")
    print(snap.GetNodeTriads(sub_graph, randomNodeId))

    printWithOutNewLine(
        "Number of edges that participate in at least one triad:",
        snap.GetTriadEdges(sub_graph, -1))

    #### plots ########
    if not os.path.isdir('plots'):
        os.makedirs('plots')

    os.chdir(parentDir + "/plots")
    plotsDir = os.getcwd()

    snap.PlotOutDegDistr(sub_graph, subGraphName,
                         subGraphName + " Subgraph Degree Distribution")
    snap.PlotShortPathDistr(
        sub_graph, subGraphName,
        subGraphName + " Subgraph Shortest Path Lengths Distribution")
    snap.PlotSccDistr(
        sub_graph, subGraphName,
        subGraphName + " Subgraph Connected Components Size Distribution")
    snap.PlotClustCf(
        sub_graph, subGraphName,
        subGraphName + " Subgraph Clustering Coefficient Distribution")

    files = os.listdir(plotsDir)

    for file in files:
        if not file.endswith(".png"):
            os.remove(os.path.join(plotsDir, file))

    plots = os.listdir(plotsDir)
    filePrefix = "filename"
    for file in plots:
        nameSplit = file.split(".")
        if (len(nameSplit) == 2):
            continue
        if (nameSplit[0] == "ccf"):
            filePrefix = "clustering_coeff_"
        elif (nameSplit[0] == "outDeg"):
            filePrefix = "deg_dist_"
        elif (nameSplit[0] == "diam"):
            filePrefix = "shortest_path_"
        elif (nameSplit[0] == "scc"):
            filePrefix = "connected_comp_"

        os.rename(file, filePrefix + nameSplit[1] + "." + nameSplit[2])

    os.chdir(parentDir)
Esempio n. 20
0
def getNextAttachment(Graph):
    InDegV = snap.TIntPrV()
    snap.GetNodeInDegV(Graph, InDegV)
    rand = random.random()
line_num = 0
for line in fil:
    if line_num > 2:
        a, b = map(int, line.split("	"))
        node_list.add(a - 1)
        node_list.add(b - 1)
    line_num += 1
fil.close()

#Degree Centrality

D = {}
y = sys.argv[1] + "-degree-centrality-4.txt"

InDegV = snap.TIntPrV()
snap.GetNodeInDegV(Graph, InDegV)

for item in InDegV:
    D[item.GetVal1()] = item.GetVal2()

D = sorted(D.iteritems(), key=lambda x: x[1], reverse=True)

f = open(y, "w+")
f.write("#NId Centrality\r\n")
for NId, value in D:
    f.write("%d %d\r\n" % (NId, value))
f.close()
#print("Degree Centrality done !")

#Closeness Centrality
Esempio n. 22
0
##  calculate indegree and outdegree centrality----Fail   So I choose NetworkX
import snap
txt_file = "/Users/dukechan/Downloads/sms_sna_oct18_directed.txt"
f = open('/Users/dukechan/Downloads/result4.txt', 'w')
f2 = open('/Users/dukechan/Downloads/result5.txt', 'w')
G = snap.LoadEdgeList(snap.PNGraph, txt_file, 4, 5)
InDegV = snap.TIntPrV()
OutDegV = snap.TIntPrV()
snap.GetNodeInDegV(G, InDegV)
snap.GetNodeOutDegV(G, OutDegV)
# indegree
for item in InDegV:
    DegCentr = snap.GetDegreeCentr(G, item.GetVal1())
    f.write("node: %d centrality: %f\n" % (item.GetVal1(), DegCentr))
f.close()
# outdegree
for item in OutDegV:
    DegCentr = snap.GetDegreeCentr(G, item.GetVal1())
    f2.write("node: %d centrality: %f\n" % (item.GetVal1(), DegCentr))
f2.close()

# problem :    centrality is  0    why????????
Esempio n. 23
0
def get_in_degrees(Graph):
    InDegV = snap.TIntPrV()
    snap.GetNodeInDegV(Graph, InDegV)
    return {item.GetVal1() : item.GetVal2() for item in InDegV}
def _get_degree(Graph, H, output_path):
    InDegV = snap.TIntPrV()
    snap.GetNodeInDegV(Graph, InDegV)
    InDeg_set = dict()
    for item in InDegV:
        username = H.GetKey(item.GetVal1())
        InDeg = item.GetVal2()
        InDeg_set[username] = InDeg
    OutDegV = snap.TIntPrV()
    snap.GetNodeOutDegV(Graph, OutDegV)
    OutDeg_set = dict()
    for item in OutDegV:
        username = H.GetKey(item.GetVal1())
        OutDeg = item.GetVal2()
        OutDeg_set[username] = OutDeg
    dataset = list()
    tot = len(InDeg_set)
    num = 0
    for username in InDeg_set:
        user_degree = dict()
        user_degree['username'] = username
        user_degree['in_degree'] = InDeg_set[username]
        user_degree['out_degree'] = OutDeg_set[username]
        profile_path = './data/Users/%s.json' % username
        if not os.path.exists(profile_path):
            continue
        with open(profile_path, 'r') as f:
            profile = json.load(f)
        if 'socialStats' in profile['profile']['user']:
            user_degree['in_degree'] = max(
                user_degree['in_degree'], profile['profile']['user']
                ['socialStats']['usersFollowedByCount'])
            user_degree['out_degree'] = max(
                user_degree['out_degree'], profile['profile']['user']
                ['socialStats']['usersFollowedCount'])
        in_set = set(profile['followers'])
        out_set = set(profile['following'])
        user_degree['in_degree'] = max(user_degree['in_degree'], len(in_set))
        user_degree['out_degree'] = max(user_degree['out_degree'],
                                        len(out_set))
        if user_degree['out_degree'] == 0:
            user_degree['balance'] = float(user_degree['in_degree']) / eps
        else:
            user_degree['balance'] = float(user_degree['in_degree']) / float(
                user_degree['out_degree'])
        bi = 0
        for out_username in out_set:
            if out_username in in_set:
                bi += 1
        if user_degree['out_degree'] == 0:
            user_degree['reciprocity'] = float(bi) / eps
        else:
            user_degree['reciprocity'] = float(bi) / float(
                user_degree['out_degree'])
        dataset.append(user_degree)
        num += 1
        print '%d/%d' % (num, tot)
    dataset = pd.DataFrame(dataset)
    dataset = dataset[[
        'username', 'in_degree', 'out_degree', 'balance', 'reciprocity'
    ]]
    dataset.to_csv(output_path, index=False, encoding='utf-8')
Esempio n. 25
0
#no of directed edges
Count = snap.CntUniqDirEdges(graph)
print "Count of directed edges is %d" % Count
#no of undirected edges
Count = snap.CntUniqUndirEdges(graph)
print "Count of undirected edges is %d" % Count
#no of self edges
Count = snap.CntSelfEdges(graph)
print "Count of self edges is %d" % Count
#no of unique bi-directional/reciprocated edges
Count = snap.CntUniqBiDirEdges(graph)
print "Count of unique bidirectional edges is %d" % Count

#no of nodes with out-degree greater than 10
OutDegV = snap.TIntPrV()
snap.GetNodeOutDegV(graph, OutDegV)
count_od = 0
for item in OutDegV:
    if (item.GetVal2() > 10):
        count_od = count_od + 1
print "Count of nodes with more than 10 outgoing edges %d" % count_od

#no of nodes with in-degree greater than 10
InDegV = snap.TIntPrV()
snap.GetNodeInDegV(graph, InDegV)
count_in = 0
for item in InDegV:
    if (item.GetVal2() < 10):
        count_in = count_in + 1
print "Count of nodes with fewer than 10 incoming edges %d" % count_in
Esempio n. 26
0
def getDirAttribute(filename, node_num, weighted=None, param=1.0):
    Graph = snap.LoadEdgeList(snap.PNGraph, filename, 0, 1)

    attributeNames = [
        'Graph', 'Id', 'Degree', 'InDegree', 'OutDegree',
        'NodeBetweennessCentrality', 'PageRank', 'EgonetDegree',
        'EgonetInDegree', 'EgonetOutDegree', 'AvgNeighborDeg',
        'AvgNeighborInDeg', 'AvgNeighborOutDeg', 'EgonetConnectivity'
    ]
    if weighted:
        attributeNames += [
            'WeightedDegree', 'WeightedInDegree', 'WeightedOutDegree',
            'EgoWeightedDegree', 'AvgWeightedNeighborDeg',
            'EgonetWeightedConnectivity', 'EgoWeightedInDegree',
            'EgoWeightedOutDegree', 'AvgWeightedNeighborInDeg',
            'AvgWeightedNeighborOutDeg'
        ]

    attributes = pd.DataFrame(np.zeros((node_num, len(attributeNames))),
                              columns=attributeNames)

    attributes['Graph'] = [filename.split('/')[-1].split('.')[0]] * node_num
    attributes['Id'] = range(0, node_num)

    # Degree
    degree = np.zeros((node_num, ))
    InDegV = snap.TIntPrV()
    snap.GetNodeInDegV(Graph, InDegV)
    for item in InDegV:
        degree[item.GetVal1()] = item.GetVal2()
    attributes['Degree'] += degree
    attributes['InDegree'] = degree

    degree = np.zeros((node_num, ))
    OutDegV = snap.TIntPrV()
    snap.GetNodeOutDegV(Graph, OutDegV)
    for item in OutDegV:
        degree[item.GetVal1()] = item.GetVal2()
    attributes['Degree'] += degree
    attributes['OutDegree'] = degree

    getEgoAttr(Graph, node_num, attributes)

    if weighted:
        df = getWeightedDegree(filename, node_num, attributes, directed=True)
        getWeightedEgoAttr(Graph, node_num, attributes, df, directed=True)

    # Betweenness Centrality
    betCentr = np.zeros((node_num, ))
    Nodes = snap.TIntFltH()
    Edges = snap.TIntPrFltH()
    snap.GetBetweennessCentr(Graph, Nodes, Edges, param, True)
    for node in Nodes:
        betCentr[node] = Nodes[node]
    attributes['NodeBetweennessCentrality'] = betCentr

    # PageRank
    pgRank = np.zeros((node_num, ))
    PRankH = snap.TIntFltH()
    snap.GetPageRank(Graph, PRankH)
    for item in PRankH:
        pgRank[item] = PRankH[item]
    attributes['PageRank'] = pgRank

    return attributes