def get_shortest_path(file_path, output_path):
    Graph, H = load_graph(file_path)
    path_distr = dict()
    MxScc = snap.GetMxScc(Graph)
    tot = MxScc.GetNodes()
    cnt = 0
    for NI in MxScc.Nodes():
        NIdToDistH = snap.TIntH()
        shortestPath = snap.GetShortPath(MxScc, NI.GetId(), NIdToDistH, True)
        for ID in NIdToDistH:
            dist = NIdToDistH[ID]
            if dist in path_distr:
                path_distr[dist] += 1
            else:
                path_distr[dist] = 1
        cnt += 1
        print '%d/%d' % (cnt, tot)
    dataset = list()
    for dist in path_distr:
        distr = dict()
        distr['dist'] = dist
        distr['freq'] = path_distr[dist]
        dataset.append(distr)
    dataset = pd.DataFrame(dataset)
    dataset = dataset[['dist', 'freq']]
    dataset.sort('dist', ascending=1, inplace=True)
    dataset.to_csv(output_path, index=False, encoding='utf-8')
예제 #2
0
def removeLink(G):
    
    plot = plotting(G, snap.gvlNeato, True)
    # 1st layer
    plot.hirachical()
    print 'DEBUG1', len(plot.community), plot.community 
    nodeList = []
   
    for node in plot.community:
        for NI in G.Nodes():
            if snap.GetShortPath(G, NI.GetId(), node) == 1:
                if NI.GetId() not in nodeList:
                    nodeList.append(NI.GetId())
        print 'DEBUG2', node, nodeList 

    nodeList.extend(plot.community)
    rmList = snap.TIntV()

    for NI in G.Nodes():
        if NI.GetId() not in nodeList:
            rmList.Add(NI.GetId())

    print 'DEBUG3', rmList.Len()
    snap.DelNodes(G, rmList)
    
    print '\nRemoving Nodes which is not the adjacent ones of CLAIMID' 
    print 'Graph has %d Nodes and %d Edges\n' % (G.GetNodes(), G.GetEdges())

    return nodeList, rmList
예제 #3
0
def output_closeness_centrality(graph, filename):
    num_nodes = graph.GetNodes()
    t0 = time.time()
    closeness_dict = {}

    for start in graph.Nodes():
        # get the sum of shortest path distances from start to all nodes by using snap algorithms
        sht_distance_htable = snap.TIntH()
        snap.GetShortPath(graph, start.GetId(), sht_distance_htable)

        sum_of_sht_paths = 0
        for item in sht_distance_htable:
            sum_of_sht_paths += sht_distance_htable[item]

        closeness_centrality_i = (num_nodes-1)/sum_of_sht_paths
        closeness_dict[start.GetId()] = closeness_centrality_i

    # sort the closeness centrality values in descending order
    closeness_dict = {k:v for k,v in sorted(closeness_dict.items(), key=lambda item: item[1], reverse=True)}

    with open(filename, "w") as f:
        for i in closeness_dict:
            f.write("{} {:.6f}\n".format(i, closeness_dict[i]))

    print("Time taken for calculation of closeness centrality = {:.6f}".format(time.time()-t0))
def get_shortest_path(node_id):
    NIdToDistH = snap.TIntH()
    path_len = snap.GetShortPath(snap_graph, int(node_id), NIdToDistH)
    paths = np.zeros((max(node_ids) + 1))  #previously was n_nodes
    for dest_node in NIdToDistH:
        paths[dest_node] = NIdToDistH[dest_node]
    return paths
예제 #5
0
def analyzeMisc(FNGraph):
    # LCC, average distances, clustering
    t1 = time.time()

    print "Started calculating miscellaneous network statistics:"

    print '\tPercentage of nodes in LCC in Football network: %.3f' % (snap.GetMxWccSz(FNGraph) * 100.0)
    GraphClustCoeff = snap.GetClustCf (FNGraph, -1)
    print "\tClustering coefficient: %.3f" % GraphClustCoeff

    diam = snap.GetBfsFullDiam(FNGraph, 1432, False)
    print "\tNetwork diameter: %.3f\n" % diam

    print "\tCalculating average distance..."

    avgDist   = 0
    iter1     = 0
    allNodes1 = FNGraph.GetNodes()

    for NI in FNGraph.Nodes():
        if(iter1 % 100 == 0):
            print "\t\tCalculated for %d nodes" % iter1
        NIdToDistH = snap.TIntH()
        snap.GetShortPath(FNGraph, NI.GetId(), NIdToDistH)
        singleDistSum = 0

        for item in NIdToDistH:
            singleDistSum += NIdToDistH[item]

        avgDist += (1.0/allNodes1) * float(singleDistSum)/(allNodes1-1)
        iter1   += 1

    print "\tNetwork average distance: %.3f" % avgDist

    print "\nFinished calculating in %f seconds\n" % (time.time() - t1)
예제 #6
0
def make_path_graph(Graphs):
    for tup in Graphs:
        G, label, color = tup
        results = dict()
        bigtotal = 0.
        bigcount = 0.
        for node in G.Nodes():
            pathtotal = 0.
            count = 0.
            for node2 in G.Nodes():
                pathtotal += snap.GetShortPath(G, node, node2)
                count += 1
            if pathtotal / count in results:
                results[pathtotal / count] += 1
            else:
                results[pathtotal / count] = 1
            bigtotal += pathtotal
            bigcount += pathcount
        print(label, bigtotal / bigcount)
        x = []
        y = []
        for key in results:
            x.append(key)
            y.append(results[key])
        inds = np.argsort(x)
        x2 = []
        y2 = []
        for ind in inds:
            x2.append(x[ind])
            y2.append(y[ind])
        plt.loglog(x2, y2, color=color, label=label)
    plt.show()
예제 #7
0
def getDistances(graph):
    distances = {}
    nameToNId = {}
    for n in graph.Nodes():
        id = n.GetId()
        nameToNId[graph.GetStrAttrDatN(id, 'name').decode('utf-8')] = id
    infile = codecs.open('csv/dblpusers.csv', 'r', 'utf-8')
    lines = infile.read().splitlines()
    dblpUsers = []
    for line in lines:
        tokens = line.split('||')
        if tokens[2] != '' and tokens[1] in nameToNId:
            id = int(tokens[0])
            distances[id] = {}
            dblpUsers.append({'name': tokens[1], 'id': id})
    for i in range(len(dblpUsers)):
        n1 = dblpUsers[i]['name']
        i1 = dblpUsers[i]['id']
        for j in range(i + 1, len(dblpUsers)):
            n2 = dblpUsers[j]['name']
            i2 = dblpUsers[j]['id']
            # shortest path behavior is weird if no path exists
            dist = snap.GetShortPath(graph, nameToNId[n1], nameToNId[n2])
            distances[i1][i2] = dist
            distances[i2][i1] = dist
    outfile = open('csv/distances.csv', 'w')
    for i1 in distances:
        for i2 in distances[i1]:
            outfile.write(
                str(i1) + '||' + str(i2) + '||' + str(distances[i1][i2]) +
                '\n')
    outfile.close()
    infile.close()
    return distances
예제 #8
0
def compute_closeness_centrality(G, GName, Nodes, nodes, edges):

    counter = 0
    closeness_centralities = []

    start_time = time.time()

    for NI in G.Nodes():
        NIdToDistH = snap.TIntH()
        sum_of_shortest_paths = 0
        shortestPath = snap.GetShortPath(G, NI.GetId(), NIdToDistH)

        for paths in NIdToDistH:
            sum_of_shortest_paths = sum_of_shortest_paths + NIdToDistH[paths]

        sum_of_shortest_paths = sum_of_shortest_paths + nodes * (
            nodes - len(NIdToDistH))  #incorporating unreachable nodes
        current_centrality = float(nodes) / sum_of_shortest_paths
        closeness_centralities.append([current_centrality, NI.GetId()])

    time_taken = time.time() - start_time
    print "Execution for Closeness Centrality completed in ", time_taken // 60, " mins and ", (
        time_taken // 1) % 60, "seconds"

    closeness_centralities.sort()
    store_in_file(closeness_centralities, "closeness_centrality", GName)
    closeness_centralities.sort(reverse=True)

    return closeness_centralities
def get_graph_distance(G, n1, n2, directed=False):
    deleted = False
    if G.IsEdge(n1, n2):
        G.DelEdge(n1, n2)
        deleted = True
    result = -1 * snap.GetShortPath(G, n1, n2, directed)
    if deleted: G.AddEdge(n1, n2)
    return result
예제 #10
0
def avgShortestPath(G):
	avgPathDir = 0
	avgPathUndir = 0
	numDirPath = 0
	numUndirPath = 0
	for src in G.Nodes():
		NIdToDistH = snap.TIntH()
		shortestPathUndir = snap.GetShortPath(G, src.GetId(), NIdToDistH, False)
		numUndirPath += len(NIdToDistH)
		for item in NIdToDistH:
			avgPathUndir += 1.0*NIdToDistH[item]#/len(NIdToDistH)

		shortestPathDir = snap.GetShortPath(G, src.GetId(), NIdToDistH, True)
		numDirPath += len(NIdToDistH)
		for item in NIdToDistH:
			avgPathDir += 1.0*NIdToDistH[item]#/len(NIdToDistH)
	print "Avg. Shortest Path (directed): %f"%(1.0*avgPathDir/numDirPath)
	print "Avg. Shortest Path (undirected): %f"%(1.0*avgPathUndir/numUndirPath)
예제 #11
0
def getAvgEfficiency(Graph):
    nodes = [node.GetId() for node in Graph.Nodes()]
    efficiency = 0
    n = len(nodes)
    for i in range(0, n):
        for j in range(i + 1, n):
            if i != j:
                efficiency += 1 / float(
                    snap.GetShortPath(Graph, nodes[i], nodes[j]))
    return 1 / float(n * n - 1) * efficiency
예제 #12
0
def getNodeEfficiency(Graph):
    nodes = [node.GetId() for node in Graph.Nodes()]
    efficiency = []
    n = len(nodes)
    for i in range(0, n):
        for j in range(i + 1, n):
            if i != j:
                efficiency.append(
                    1 / float(snap.GetShortPath(Graph, nodes[i], nodes[j])))
    return efficiency
예제 #13
0
def closeness_centrality_node(graph, node, sample=None):
    if not sample:
        list_nodes = {x for x in range(graph.GetNodes())}
    else:
        list_nodes = random.sample(xrange(graph.GetNodes()), len(sample))
    N = len(list_nodes)
    list_of_sp = (snap.GetShortPath(graph, node, x)
                  for x in list_nodes)  # Better to use list comprehension than
    #  map for clarity and speed reasons
    return sum((1.0 / x for x in list_of_sp if x > 0)) / N
예제 #14
0
def q2_4_aux(name):
    G = load_graph(name)
    counter = 0.0
    for i in range (1000):
        path = snap.GetShortPath(G, G.GetRndNId(), G.GetRndNId())

        if path != -1:
            counter += 1

    return counter / 1000
예제 #15
0
 def calc_path_prob(graph):
     trials = 1000
     reachable_count = 0
     for _ in range(trials):
         node1 = graph.GetRndNId()
         node2 = graph.GetRndNId()
         shortestPath = snap.GetShortPath(graph, node1, node2, True)
         if shortestPath > 0:
             reachable_count += 1
     return float(reachable_count) / float(trials)
예제 #16
0
def ProbabilityOfPath(Graph, nSamples=1000):
    '''
    Given a graph, returns the sampled probability of two nodes being
    connected. Takes nSamples of (u,v) pairs to check for path
    connectedness.
    '''
    paths = 0.0
    for _ in xrange(nSamples):
        u, v = Graph.GetRndNId(Rnd), Graph.GetRndNId(Rnd)
        if snap.GetShortPath(Graph, u, v, True) != NO_PATH: paths += 1
    return 100 * paths / nSamples
예제 #17
0
    def getHarmonicClosenessCentr(self, G, nodeId):
        n = G.GetNodes()
        nodeDistances = snap.TIntH()
        snap.GetShortPath(G, nodeId, nodeDistances)
        centrValue = 0.0
        for nodeKey in nodeDistances:
            if (nodeKey != nodeId):
                centrValue += (1 / float(nodeDistances[nodeKey]))

        centrValue /= (n - 1)
        # print nodeId, centrValue

        return centrValue
예제 #18
0
 def avg_path_length(self):
     """ Brute force average path length calculation """
     # TODO: Maybe add dynamic programming to speed up the operation
     total_path_length = 0
     num_path = 0
     for i in range(1, self._num_nodes):
         for j in range(i+1, self._num_nodes + 1):
             length = snap.GetShortPath(self._graph, i, j)
             if length > 0:
                 num_path += 1
                 total_path_length += length
             else:
                 pass
     return 1.0 * total_path_length / num_path
예제 #19
0
def TestFracPath(Graph, num_test=1000):
    Rnd = snap.TRnd(42)
    Rnd.Randomize()
    count = 0
    i = 0
    while i < num_test:
        NodeId1 = Graph.GetRndNId(Rnd)
        NodeId2 = Graph.GetRndNId(Rnd)
        if (NodeId1 != NodeId2):
            Length = snap.GetShortPath(Graph, NodeId1, NodeId2, True)
            if (Length > 0):
                count += 1
            i += 1
    print(" fraction of reachable pairs", count / num_test)
예제 #20
0
def GetTrustList(fSourceNode, graph, visitPath, totalNodeList, nodeMapNumDict):
    trustList = [0] * len(visitPath)
    # tSourceIndex=totalNodeList.index(fSourceNode)
    tSourceIndex = nodeMapNumDict[fSourceNode]

    # tVisitPathIndex=[totalNodeList.index(node) for node in visitPath]
    tVisitPathIndex = [nodeMapNumDict[node] for node in visitPath]

    for i in range(len(tVisitPathIndex)):
        tempDstNodeIndex = tVisitPathIndex[i]
        dist = snap.GetShortPath(graph, tSourceIndex, tempDstNodeIndex)
        trustList[i] = 1.0 / dist

    return trustList
예제 #21
0
def harmonic_closeness_centrality():
    sizeGraph = UGraph.GetNodes()
    NIdToDistH = snap.TIntH()

    for node in UGraph.Nodes():
        hashTableCount = 0
        shortestPath = snap.GetShortPath(UGraph, node.GetId(), NIdToDistH)
        for x in NIdToDistH:
            if (NIdToDistH[x] != 0):
                hashTableCount += float(1 / NIdToDistH[x])
        calculation = float((1 / (sizeGraph - 1)) * hashTableCount)
        harmonicList.append([node.GetId(), calculation])

    return harmonicList
예제 #22
0
def predictLinksNegatedShortestPath(GCombined, nodesAtHop, itemNodeIds, userNodeIds, directory):
    scores = {} 
    for node1 in userNodeIds:
        for node2 in itemNodeIds:
            if not GCombined.IsNode(node1) or not GCombined.IsNode(node2) or GCombined.IsEdge(node1, node2):
                if not node1 in scores:
                    scores[node1] = {}
                scores[node1][node2] = 0.0
            else:
                if not node1 in scores:
                    scores[node1] = {}
                scores[node1][node2] = 1.0/snap.GetShortPath(GCombined, node1, node2, False)
    with open(directory + 'NegatedShortestPath', 'wb') as outfile:
        pickle.dump(scores, outfile)
def path_proba(graph, name, n=1000):
    """Calculate the probability that a path exists between two uniformly random nodes (n simulations)"""
    p = 0
    for i in range(n):
        a = graph.GetRndNId()
        b = graph.GetRndNId()
        while a == b:
            b = graph.GetRndNId()
        NIdToDistH = snap.TIntH()
        snap.GetShortPath(graph, a, NIdToDistH, True)
        if b in NIdToDistH:
            p += 1
    print 'Using {} random pairs, the probability that a path exists between two nodes is ' \
          '{} for the {} network'.format(n, p / n, name)
    return p/n
예제 #24
0
    def getClosenessCentralities(self):
        centralities = []
        #print("In closeness method")
        for NI in self.network.Nodes():
            #print("Selected new origin node.\n")
            sumShortestPaths = 0

            for NI2 in self.network.Nodes():
                #print("Selected new comparitive node\n")
                sumShortestPaths += abs(snap.GetShortPath(self.network, NI.GetId(), NI2.GetId()))

            closeness = float(sumShortestPaths) / float(self.network.GetNodes())
            centralities.append(closeness)
        #print("Finished getting centrality values\n")
        return centralities
예제 #25
0
def get_dist_distribution(filename, sample_count):
    distance_dst = collections.defaultdict(int)
    graph = snap.LoadEdgeList(snap.PUNGraph, filename)
    node_list = []
    for node in graph.Nodes():
        node_list.append(node.GetId())
    for i in range(0, sample_count):
        sample_pair = random.sample(node_list, 2)
        dist = snap.GetShortPath(graph, sample_pair[0], sample_pair[1], False)
        if dist > 0:
            distance_dst[dist] += 1
    print "spid is " + str(calculate_spid(distance_dst)) + " for " + str(
        sample_count) + " samples"
    for item in distance_dst:
        distance_dst[item] /= float(sample_count)
    return distance_dst
예제 #26
0
def ssspfun(R0):
    paths = []
    start0 = time.time()
    for i, r in enumerate(R0):
        if i % 100 == 0:
            text_to_append = "%2.2f percent done, node %i out of %i" % (
                100 * double(i) / len(R0), i, len(R0))
            print "%2.2f percent done, node %i out of %i" % (
                100 * double(i) / len(R0), i, len(R0))
            print "    %2.2f seconds have elapsed so far..." % (time.time() -
                                                                start0)
            os.system("echo '" + text_to_append +
                      "\n' >> percent_done_4testing.txt")
        sp = snap.GetShortPath(Graph, r[0], r[1])
        paths.append(sp)
    print "Done!"
    paths = array(paths)
    return paths
예제 #27
0
def get_land_D_mtx(land_ids_temp):
    print "Getting landmark -> node distance matrix..."
    L2n = zeros((len(land_ids_temp), V0)).astype('int8')
    for i, l in enumerate(land_ids_temp):
        text_to_append = "   getting sssp for node %i out of %i" % (
            i + 1, len(land_ids_temp))
        print "   getting sssp for node %i out of %i" % (i + 1,
                                                         len(land_ids_temp))
        os.system("echo '" + text_to_append +
                  "\n' >> percent_done_4landmarks.txt")
        sys.stdout.flush()
        NIdToDistH = snap.TIntH()
        shortestPath = snap.GetShortPath(Graph, int(l), NIdToDistH)
        for item in NIdToDistH:
            # print item
            L2n[i, item - 1] = NIdToDistH[item]
    L2n = L2n[:, conn_node_ids - 1]
    return L2n
예제 #28
0
def GetTrustList(fSourceNode, graph, visitPath, totalNodeList, nodeMapNumDict):
    # trustList=[0] * len(visitPath)
    # tSourceIndex=totalNodeList.index(fSourceNode)
    trustList = []
    newVisitPath = []
    tSourceIndex = nodeMapNumDict[fSourceNode]

    # tVisitPathIndex=[totalNodeList.index(node) for node in visitPath]
    tVisitPathIndex = [nodeMapNumDict[node] for node in visitPath]

    for i in range(len(visitPath)):
        node = visitPath[i]
        tempDstNodeIndex = nodeMapNumDict[node]
        dist = snap.GetShortPath(graph, tSourceIndex, tempDstNodeIndex)
        if dist == 1:
            newVisitPath.append(node)
            trustList.append('1')

    return newVisitPath, trustList
    def sample_shortest_path(self, n_node=100, isDir=False):
        '''
        sample diameter, e.g. ‘shortest path’, of a Graph
        
        :param n_node: number of nodes to sample
        :param isDir: consider direct or not
         
        '''

        snap = self.snap
        n_node = min(self.num_nodes, n_node)
        nodes = self.nodes
        src = np.random.choice(nodes, n_node, replace=False)
        dest = np.random.choice(nodes, n_node, replace=False)
        ret = []
        for i in range(n_node):
            Length = snap.GetShortPath(self.graph, int(src[i]), int(dest[i]))
            ret.append(Length)
        return ret
예제 #30
0
def q2_4_utils(dataset_name):
    G = load_graph(dataset_name)
    Rnd = snap.TRnd(42)
    Rnd.Randomize()
    count = 0
    positive_count = 0
    negative_count = 0
    while count < 1000:
        NId_src = G.GetRndNId(Rnd)
        NId_dst = G.GetRndNId(Rnd)
        if NId_src != NId_dst:
            if snap.GetShortPath(G, NId_src, NId_dst, True) > 0:
                positive_count = positive_count + 1
            else:
                negative_count = negative_count + 1
        count = count + 1
            # print (snap.GetShortPath(G, NId_src, NId_dst))
    print 'positive_count', positive_count
    print 'negative_count', negative_count