Beispiel #1
0
def Q2_1_Experiments():
    # Load graphs.
    epinions, email = loadNetworks()

    # ### Email Model
    # Let's find the size of the largest SCC.
    emailSCC = snap.GetMxScc(email)
    print("Email SCC: %s." %
          (100 * float(emailSCC.GetNodes()) / email.GetNodes()))

    # The proposal is that 189587 is in SCC, so we have:
    # SCC + OUT = 19.6456446492%
    # OUT = 6.7492666299%
    # SCC + IN =  69.8402045141
    # OTHER: 23.410528856
    fp, bp = GetForwardBackwardProp(email, 189587)
    print("Forward %s and Backward %s for ID: %s." % (fp, bp, 189587))

    # The proposal is that 675 is in OUT, so we have:
    # SCC + IN + \epsilon =  69.8454832701%
    fp, bp = GetForwardBackwardProp(email, 675)
    print("Forward %s and Backward %s for ID: %s." % (fp, bp, 675))

    # We try one more random node to get a better picture.
    # The proposal is that 0 is in IN, so we have:
    # SCC + OUT + \epsilon =  19.6460217032%
    fp, bp = GetForwardBackwardProp(email, 0)
    print("Forward %s and Backward %s for ID: %s." % (fp, bp, 0))

    # ### Epinions Model
    # Let's find the size of the largest SCC.
    epinionsSCC = snap.GetMxScc(epinions)
    print("Epinions SCC: %s." %
          (100 * float(epinionsSCC.GetNodes()) / epinions.GetNodes()))

    # Proposal: 1952 is in IN region.
    # SCC + OUT + \epsilon = 62.8329313776%
    fp, bp = GetForwardBackwardProp(epinions, 1952)
    print("Forward %s and Backward %s for ID: %s." % (fp, bp, 1952))

    # Proposal: 9809 is in OUT region.
    # SCC + IN + \epsilon = 74.4079389554%
    fp, bp = GetForwardBackwardProp(epinions, 9809)
    print("Forward %s and Backward %s for ID: %s." % (fp, bp, 9809))

    # Proposal: 193 is in SCC region.
    # SCC + OUT = 62.8316134899%
    # OUT = 20.365318467599997%
    # SCC + IN = 74.4066210678
    # REST = 5.228060464599995%
    fp, bp = GetForwardBackwardProp(epinions, 193)
    print("Forward %s and Backward %s for ID: %s." % (fp, bp, 193))
def bowtie_components(graph, name):
    """Give sizes of DISCONNECTED, IN, OUT, SCC"""
    results = {}

    N = graph.GetNodes()

    SCC = snap.GetMxScc(graph)
    n = SCC.GetRndNId()

    disc = N - snap.GetMxWcc(graph).GetNodes()
    scc = SCC.GetNodes()
    SCC_in = snap.GetBfsTree(graph, n, False, True)
    SCC_out = snap.GetBfsTree(graph, n, True, False)
    in1 = SCC_in.GetNodes() - scc
    out = SCC_out.GetNodes() - scc
    tt = N - disc - scc - in1 - out

    results["a. SCC"] = scc
    results["b. IN"] = in1
    results["c. OUT"] = out
    results["d. TENDRILS + TUBES"] = tt
    results["e. DISCONNECTED"] = disc

    print 'Total nodes in {} network: {}'.format(name, N)
    print 'DISCONNECTED: {}'.format(disc)
    print 'SCC: {}'.format(scc)
    print 'IN: {}'.format(in1)
    print 'OUT: {}'.format(out)
    print 'TENDRILS + TUBES: {}'.format(tt)

    return results
def get_shortest_path(file_path, output_path):
    Graph, H = load_graph(file_path)
    path_distr = dict()
    MxScc = snap.GetMxScc(Graph)
    tot = MxScc.GetNodes()
    cnt = 0
    for NI in MxScc.Nodes():
        NIdToDistH = snap.TIntH()
        shortestPath = snap.GetShortPath(MxScc, NI.GetId(), NIdToDistH, True)
        for ID in NIdToDistH:
            dist = NIdToDistH[ID]
            if dist in path_distr:
                path_distr[dist] += 1
            else:
                path_distr[dist] = 1
        cnt += 1
        print '%d/%d' % (cnt, tot)
    dataset = list()
    for dist in path_distr:
        distr = dict()
        distr['dist'] = dist
        distr['freq'] = path_distr[dist]
        dataset.append(distr)
    dataset = pd.DataFrame(dataset)
    dataset = dataset[['dist', 'freq']]
    dataset.sort('dist', ascending=1, inplace=True)
    dataset.to_csv(output_path, index=False, encoding='utf-8')
Beispiel #4
0
def SizeOfBowtieRegions(Graph, sccNodeID):
    '''
    Given a Graph with a BowTie structure as described in
    http://snap.stanford.edu/class/cs224w-readings/broder00bowtie.pdf
    and an sccNodeID of a node known to belong to the central SCC,
    determines the size of each component.
    
    returns: tuple of sizes (SCC, IN, OUT, TENDRILS, DISCONNECTED)
    '''
    totalNodes = Graph.GetNodes()
    wcc = snap.GetMxWcc(Graph)
    assert wcc.IsNode(sccNodeID)
    wccNodes = wcc.GetNodes()
    disconnectedNodes = totalNodes - wccNodes

    scc = snap.GetMxScc(Graph)
    # Sanity check the input.
    assert scc.IsNode(sccNodeID)
    sccNodes = scc.GetNodes()

    sccAndOutNodes = snap.GetBfsTree(Graph, sccNodeID, True, False).GetNodes()
    sccAndInNodes = snap.GetBfsTree(Graph, sccNodeID, False, True).GetNodes()

    inNodes = sccAndInNodes - sccNodes
    outNodes = sccAndOutNodes - sccNodes
    tendrilNodes = wccNodes - (inNodes + outNodes + sccNodes)

    nodes = (sccNodes, inNodes, outNodes, tendrilNodes, disconnectedNodes)
    assert sum(nodes) == Graph.GetNodes()
    return nodes
Beispiel #5
0
def q2_1_aux(name,id):
    G = load_graph(name)

    # Your code here:

    OutTreeEp = snap.GetBfsTree(G, id, True, False)
    InTreeEp = snap.GetBfsTree(G, id, False, True)
    sccOneRandNodeId = snap.GetMxScc(G).GetRndNId()

    sccInOutTree = OutTreeEp.IsNode(sccOneRandNodeId)
    sccInInTree = InTreeEp.IsNode(sccOneRandNodeId)
    print "graph:",name
    print "nodeId",id

    OutTree = snap.GetBfsTree(G, id, True, False)
    InTree = snap.GetBfsTree(G, id, False, True)
    sizeOutTree = OutTree.GetNodes()
    sizeInTree = InTree.GetNodes()
    print "sizegraph", G.GetNodes()
    print "sizeOutTree", sizeOutTree
    print "sizeInTree", sizeInTree

    if (sccInOutTree):
        if (sccInInTree):
            print "node in SCC"
        else:
            print "node in IN"
    else:
        print "node in OUT"
Beispiel #6
0
def q1_3_grpah(Graph):
    n_nodes = Graph.GetNodes()
    MxWcc = snap.GetMxWcc(Graph)
    MxScc = snap.GetMxScc(Graph)
    n_MxWcc = MxWcc.GetNodes()
    n_MxScc = MxScc.GetNodes()
    print(" TOTAL          : ", n_nodes)
    print(" DISCONNECTED   : ", n_nodes - n_MxWcc)
    print(" SCC            : ", n_MxScc)

    SCC_nodes = []
    for NI in MxScc.Nodes():
        SCC_nodes.append(NI.GetId())

    num_test = 100
    random_sampled_scc = random.sample(SCC_nodes, num_test)

    num_out = []
    num_in = []
    for i in range(0, num_test):
        NodeId = random_sampled_scc[i]
        BfsTreeOut = snap.GetBfsTree(Graph, NodeId, True, False)
        BfsTreeIn = snap.GetBfsTree(Graph, NodeId, False, True)
        num_out.append(BfsTreeOut.GetNodes())  # roughly SCC + OUT
        num_in.append(BfsTreeIn.GetNodes())  # roughly SCC + IN
    num_out.sort()
    num_in.sort()

    print(" OUT            : ", num_out[-1] - n_MxScc)
    print(" IN             : ", num_in[-1] - n_MxScc)

    num_tendrils = n_MxWcc - n_MxScc - (num_out[-1] - n_MxScc) - (num_in[-1] -
                                                                  n_MxScc)
    print(" TENDRILS+TUBES : ", num_tendrils)
Beispiel #7
0
def get_connected_component(graph):
    if isinstance(graph, snap.PNGraph):
        lcc = snap.GetMxScc(graph)
        # renumber the node numbers from 0 to the size-1
        lcc = snap.ConvertGraph(snap.PNGraph, lcc, True)
    elif isinstance(graph, snap.PUNGraph):
        lcc = snap.GetMxWcc(graph)
        # renumber the node numbers from 0 to the size-1
        lcc = snap.ConvertGraph(snap.PUNGraph, lcc, True)
    else:
        raise NotAGraphError(graph)
    return lcc
def processNetwork(Graph, id_to_groups):
    with open("../../data/fastinf_graph_noweights_features.txt", "w+") as f:
        f.write("RELATED GROUPS GRAPH:\n")
        f.write('Edges: %d\n' % Graph.GetEdges())
        f.write('Nodes: %d\n\n' % Graph.GetNodes())

        MxWcc = snap.GetMxWcc(Graph)
        f.write("MAX WCC:\n")
        f.write('Edges: %f ' % MxWcc.GetEdges())
        f.write('Nodes: %f \n' % MxWcc.GetNodes())
        f.write('Node List: ')
        for node in MxWcc.Nodes():
            f.write('%d, ' % node.GetId())
        f.write('\n')
        for node in MxWcc.Nodes():
            f.write('%s, ' % id_to_groups[node.GetId()])

        f.write("\n\nALL WCCs:")
        Components = snap.TCnComV()
        snap.GetWccs(Graph, Components)
        for i, CnCom in enumerate(Components):
            if CnCom.Len() < 10: continue
            f.write('\nWcc%d: ' % i)
            for nodeid in CnCom:
                f.write('%d, ' % nodeid)

        MxScc = snap.GetMxScc(Graph)
        f.write("\n\nMAX SCC:\n")
        f.write('Edges: %f ' % MxScc.GetEdges())
        f.write('Nodes: %f \n' % MxScc.GetNodes())
        f.write('Node List: ')
        for node in MxScc.Nodes():
            f.write('%d, ' % node.GetId())
        f.write('\n')
        for node in MxScc.Nodes():
            f.write('%s, ' % id_to_groups[node.GetId()])

        f.write("\n\nALL SCCs:")
        Components = snap.TCnComV()
        snap.GetSccs(Graph, Components)
        for i, CnCom in enumerate(Components):
            if CnCom.Len() < 10: continue
            f.write('\nScc%d: ' % i)
            for nodeid in CnCom:
                f.write('%d, ' % nodeid)

        f.write('\n\nCLUSTERING AND COMMUNITIES:\n')
        f.write('Clustering coefficient: %f\n' % snap.GetClustCf(Graph, -1))
        f.write('Num Triads: %d\n' % snap.GetTriads(Graph, -1))
        Nodes = snap.TIntV()
        for node in Graph.Nodes():
            Nodes.Add(node.GetId())
        f.write('Modularity: %f' % snap.GetModularity(Graph, Nodes))
Beispiel #9
0
def get_densification(df):
    years = sorted(df['year'].unique())
    out_num_nodes = []
    out_num_edges = []
    #     out_bfs_diameters = []
    out_anf_diameters = []
    for year in years:
        G = get_graph(df[df['year'] == year])
        out_num_nodes.append(G.GetNodes())
        out_num_edges.append(G.GetEdges())
        scc = snap.GetMxScc(G)
        out_anf_diameters.append(snap.GetAnfEffDiam(scc))
    return out_num_nodes, out_num_edges, out_anf_diameters, years
def get_labeled_LSCC_for_paths(file_path, output_path_LSCC, output_path_hash):
    Graph, H = load_graph(file_path)
    MxScc = snap.GetMxScc(Graph)
    with open(output_path_LSCC, 'w') as f:
        print '-----clear'
    with open(output_path_hash, 'w') as f:
        print '-----clear'
    f_graph = open(output_path_LSCC, 'a')
    f_hash = open(output_path_hash, 'a')
    for NI in MxScc.Nodes():
        ID = NI.GetId()
        f_hash.write('%d %s\n' % (ID, H.GetKey(ID)))
        for des in NI.GetOutEdges():
            f_graph.write('%d %d\n' % (ID, des))
        print ID
def max_scc_evolution(graphs, name, time_units, verbose=False, duration=None):
    """Plot the time evolution of the number of nodes in the largest SCC of snap graph in graphs"""
    Y = []
    for g in graphs:
        scc = snap.GetMxScc(g)
        Y.append(scc.GetNodes())
    X = range(len(Y))
    if duration is not None:
        X = range(duration[0], duration[1] + 1)
    plt.plot(X, Y)
    plt.xlabel("Time in {}".format(time_units))
    plt.ylabel("Number of nodes in largest SCC")
    plt.title("SCC nodes evolution of {} graphs".format(name))
    plt.savefig("SCCnodes_time_{}".format(name))
    if verbose:
        plt.show()
Beispiel #12
0
def q2_3_util(dataset_name):
        # G = load_graph("email")
    G = load_graph(dataset_name)
    MxWcc = snap.GetMxWcc(G)
    total_size = G.GetNodes()
    wcc_size = MxWcc.GetNodes()
    disconnected_size = total_size - wcc_size
    print 'Total size: ', total_size
    print 'WCC size: ', wcc_size
    print 'DISCONNECTED: ', disconnected_size
    Rnd = snap.TRnd(42)
    Rnd.Randomize()
    MxScc = snap.GetMxScc(G)
    scc_size = MxScc.GetNodes()
    number_of_trials = 1
    scc_plus_out = 0
    scc_plus_in = 0
    out_size = 0
    in_size = 0
    tendrils_plus_tubes = 0
    for i in xrange(number_of_trials):
        NId = MxScc.GetRndNId(Rnd)
        # print 'Random node id', NId
        outward_set = set()
        BfsTree = snap.GetBfsTree(G, NId, True, False)
        for EI in BfsTree.Edges():
            outward_set.add(EI.GetDstNId())
        scc_plus_out = max(scc_plus_out, len(outward_set))
        out_size = max( out_size, scc_plus_out - scc_size)
        #
        inward_set = set()
        BfsTree = snap.GetBfsTree(G, NId, False, True)
        for EI in BfsTree.Edges():
            inward_set.add(EI.GetDstNId())
        scc_plus_in = max(scc_plus_in, len(inward_set))
        in_size = max(in_size, scc_plus_in - scc_size)
        tendrils_plus_tubes = max(tendrils_plus_tubes, wcc_size - in_size - out_size)

    print 'IN: ', in_size
    print 'scc_size', scc_size
    print 'scc + out: ', scc_plus_out
    print 'OUT: ', out_size
    print 'scc + in: ', scc_plus_in
    print 'TENDRILS + TUBES', tendrils_plus_tubes
    print '------------------'
def analyze_network(
    k=1000,
    fanout=1,
    fanout_samples=1,
    graph_in_path='bad_actors.graph'
):
    """
    @params: [k (int), graph_in_path (str)]
    @returns: None

    Loads a network from 'graph_in_path' and prints basic information about the
    network. Samples k edges from the network to visualize using networkx.
    """
    graph = snap.TNEANet.Load(snap.TFIn(graph_in_path))
    snap.PrintInfo(graph, 'Basic Graph Information', '/dev/stdout', False)
    MxScc = snap.GetMxScc(graph)
    print('Nodes in largest strongly-connected subcomponent: %d' %
        MxScc.GetNodes()
    )
    visualize_k_random_users(k, fanout, fanout_samples, graph)
Beispiel #14
0
def q2_3_aux(name):
    G = load_graph(name)

    SCC = snap.GetMxScc(G).GetNodes()
    wcc = snap.GetMxWcc(G).GetNodes()

    inexplosionVect = emIn if name == "email" else epIn
    outexplosionVect = emOut if name == "email" else epOut
    ineexpl = inexplosionVect[-1]
    outeexpl = outexplosionVect[-1]

    IN = ineexpl - SCC
    OUT = outeexpl - SCC

    DISCONNECTED = G.GetNodes()-wcc


    TENDRILS_AND_TUBES = wcc - IN - OUT - SCC
    print name,"DISCONNECTED:",DISCONNECTED,"IN:",IN,"OUT:",OUT,"SCC:",SCC,"TENDRILS + TUBES:",TENDRILS_AND_TUBES

    return
Beispiel #15
0
def main():
    citation = False

    if citation:
        folder = '../data/citation_networks/'
    else:
        folder = '../data/networks/'
    AssigneeGraphs = load_networks(folder)
    print "Generating features..."
    for AGraph in tqdm(AssigneeGraphs):
        # Calculate network features
        Graph = AGraph.Graph
        node_count = Graph.GetNodes()
        if node_count <= 0:
            print "0 nodes", AGraph.company_name
            continue
        edge_count = Graph.GetEdges()
        cc = snap.GetClustCf(Graph)
        Components = snap.TCnComV()
        snap.GetSccs(Graph, Components)
        num_sccs = len(Components)
        MxScc = snap.GetMxScc(Graph)
        max_scc_proportion = float(MxScc.GetNodes()) / node_count
        avg_patents_per_inventor =float(AGraph.metadata['number_of_patents']) / node_count
        modularity = get_modularity(Graph)
        net_stats = NetworkStats(node_count=node_count, edge_count=edge_count, clustering_cf=cc,
            num_sccs=num_sccs, max_scc_proportion=max_scc_proportion,
            avg_patents_per_inventor=avg_patents_per_inventor, modularity=modularity)
        AGraph.metadata['node_count'] = node_count
        AGraph.metadata['edge_count'] = edge_count
        AGraph.metadata['clustering_cf'] = cc
        AGraph.metadata['num_sccs'] = num_sccs
        AGraph.metadata['max_scc_proportion'] = max_scc_proportion
        AGraph.metadata['avg_patents_per_inventor'] = avg_patents_per_inventor
        AGraph.metadata['modularity'] = modularity
        with open(folder + AGraph.company_name + '.json', 'w') as fp:
            json.dump(AGraph.metadata, fp, sort_keys=True, indent=4)
    print len(AssigneeGraphs)
Beispiel #16
0
def calc_net_stats(folder):
		stats = []
		print "Loading features..."
		for AGraph in tqdm(AssigneeGraphs):
				# Calculate network features
				Graph = AGraph.Graph
				node_count = Graph.GetNodes()
				if node_count <= 0:
					# print "0 nodes", AGraph.company_name
					continue
				edge_count = Graph.GetEdges()
				cc = snap.GetClustCf(Graph)
				Components = snap.TCnComV()
				snap.GetSccs(Graph, Components)
				num_sccs = len(Components)
				MxScc = snap.GetMxScc(Graph)
				max_scc_proportion = float(MxScc.GetNodes()) / node_count
				avg_patents_per_inventor =float(AGraph.metadata['number_of_patents']) / node_count
				modularity = get_modularity(Graph)
				net_stats = NetworkStats(node_count=node_count, edge_count=edge_count, clustering_cf=cc,
					num_sccs=num_sccs, max_scc_proportion=max_scc_proportion,
					avg_patents_per_inventor=avg_patents_per_inventor, modularity=modularity)
				stats.append(net_stats)
		return stats
Beispiel #17
0
def analyze_graph(G):
    WCC = snap.GetMxWcc(G)
    SCC = snap.GetMxScc(G)

    id = SCC.GetRndNId()
    out_tree = snap.GetBfsTree(G, id, True, False)
    in_tree = snap.GetBfsTree(G, id, False, True)

    G_size = G.GetNodes()
    SCC_size = SCC.GetNodes()
    WCC_size = WCC.GetNodes()
    DISCONNECTED_size = G_size - WCC_size
    in_size = in_tree.GetNodes() - SCC_size
    out_size = out_tree.GetNodes() - SCC_size
    Tendril_size = G_size - SCC_size - DISCONNECTED_size - in_size - out_size

    print 'Total Graph Size: %d' % G_size
    print 'SCC Size: %d' % SCC_size
    print 'WCC Size: %d' % WCC_size
    print 'IN Size: %d' % in_size
    print 'OUT Size: %d' % out_size
    print 'DISCONNECTED Size: %d' % DISCONNECTED_size
    print 'Tendril tube size (remaining): %d' % Tendril_size
    print()
Beispiel #18
0
    def per_graph(graph, name):
        mxWcc = snap.GetMxWcc(graph)
        mxScc = snap.GetMxScc(graph)
        print ''
        print 'Size analysis on {}'.format(name)
        print 'Disconnected size = {}'.format(graph.GetNodes() - mxWcc.GetNodes())
        print 'SCC size = {}'.format(mxScc.GetNodes())
        
        trials = 200
        avg_reached_out = 0
        avg_reached_in = 0
        for _ in range(trials):
            nodeId = mxScc.GetRndNId()
            avg_reached_out += snap.GetBfsTree(graph, nodeId, True, False).GetNodes()
            avg_reached_in += snap.GetBfsTree(graph, nodeId, False, True).GetNodes()

        scc_out = float(avg_reached_out) / trials
        scc_in = float(avg_reached_in) / trials

        out_sz = scc_out - mxScc.GetNodes()
        in_sz = scc_in - mxScc.GetNodes()
        print 'OUT size = {}'.format(out_sz)
        print 'IN size = {}'.format(in_sz)
        print 'Tendrils/Tubes size = {}'.format(mxWcc.GetNodes() - mxScc.GetNodes() - out_sz - in_sz)
Beispiel #19
0
def q2_1():
    '''
    You will have to run the inward and outward BFS trees for the 
    respective nodes and reason about whether they are in SCC, IN or OUT.
    You may find the SNAP function GetBfsTree() to be useful here.
    '''
    
    ##########################################################################
    #TODO: Run outward and inward BFS trees from node 2018, compare sizes 
    #and comment on where node 2018 lies.
    G = load_graph("email")
    #Your code here:
    outward_set = set()
    BfsTree = snap.GetBfsTree(G, 2018, True, False)
    for EI in BfsTree.Edges():
        outward_set.add(EI.GetDstNId())
        # print "Edge from %d to %d in generated tree." % (EI.GetSrcNId(), EI.GetDstNId())
    inward_set = set()
    BfsTree = snap.GetBfsTree(G, 2018, False, True)
    for EI in BfsTree.Edges():
        inward_set.add(EI.GetDstNId())
        # print "Edge from %d to %d in generated tree." % (EI.GetSrcNId(), EI.GetDstNId())
    print('inward_set', len(inward_set))
    print('outward_set', len(outward_set))
    print('G size', G.GetEdges())
    MxScc = snap.GetMxScc(G)
    mxSccSize = MxScc.GetNodes()
    print 'SCC size:', mxSccSize
    print 'Relative size of SCC in Directed Graph:', snap.GetMxSccSz(G)
    
    
    ##########################################################################
    
    ##########################################################################
    #TODO: Run outward and inward BFS trees from node 224, compare sizes 
    #and comment on where node 224 lies.
    G = load_graph("epinions")
    #Your code here:
        #Your code here:
    outward_set = set()
    BfsTree = snap.GetBfsTree(G, 224, True, False)
    for EI in BfsTree.Edges():
        outward_set.add(EI.GetDstNId())
        # print "Edge from %d to %d in generated tree." % (EI.GetSrcNId(), EI.GetDstNId())
    inward_set = set()
    BfsTree = snap.GetBfsTree(G, 224, False, True)
    for EI in BfsTree.Edges():
        inward_set.add(EI.GetDstNId())
        # print "Edge from %d to %d in generated tree." % (EI.GetSrcNId(), EI.GetDstNId())
    print('inward_set', len(inward_set))
    print('outward_set', len(outward_set))
    print('G size', G.GetEdges())
    print 'Relative size of SCC in Directed Graph:', snap.GetMxSccSz(G)
    
    
    
    
    
    ##########################################################################

    print '2.1: Done!\n'
Beispiel #20
0
    l.append((src, dst))

l.sort()
#for item in l:
#    print("G7\t%d\t%d" % (item[0], item[1]))

WccG6 = snap.GetMxWcc(G6)
print("type(WccG6) %s" % (type(WccG6)))
print("WccG6 nodes %d, edges %d" % (WccG6.GetNodes(), WccG6.GetEdges()))

WccG7 = snap.GetMxWcc(G7)
print("type(WccG7) %s" % (type(WccG7)))
print("WccG7 nodes %d, edges %d" % (WccG7.GetNodes(), WccG7.GetEdges()))

SccG6 = snap.GetMxScc(G6)
print("type(SccG6) %s" % (type(SccG6)))
print("SccG6 nodes %d, edges %d" % (SccG6.GetNodes(), SccG6.GetEdges()))

SccG7 = snap.GetMxScc(G7)
print("type(SccG7) %s" % (type(SccG7)))
print("SccG7 nodes %d, edges %d" % (SccG7.GetNodes(), SccG7.GetEdges()))

SubG6 = snap.GetSubGraph(G6, snap.TIntV.GetV(0, 1, 2, 3, 4))
print("type(SubG6) %s" % (type(SubG6)))
print("SubG6 nodes %d, edges %d" % (SubG6.GetNodes(), SubG6.GetEdges()))
for EI in SubG6.Edges():
    print("edge (%d, %d)" % (EI.GetSrcNId(), EI.GetDstNId()))

Core3G6 = snap.GetKCore(G6, 3)
print("type(Core3G6) %s" % (type(Core3G6)))
Beispiel #21
0
        nx.draw(nxG, with_labels=True)
        fig.savefig(str(dir) + "\\role" + str(role) + "node" + str(id) + id_to_word[id].name() + name + ".png")
        plt.close(fig)



G0 = generate_word_graph(True, False, False, 0)
snap.SaveEdgeList(G0, "G0.txt", "")
G1 = generate_word_graph(True, False, False, 1)
snap.SaveEdgeList(G1, "G1.txt", "")
G2 = generate_word_graph(True, False, False, 2)
snap.SaveEdgeList(G2, "G2.txt", "")

print(meme)
PolyG, Polyid, Polysynset, _,_,_ = generate_meaning_graph(False, True, False)
print(snap.GetMxScc(PolyG).GetNodes())
HypG, Hypid, Hypsynset, _,_,_ = generate_meaning_graph(True, False, False)
HoloG, Holoid, Holosynset, _,_,_ = generate_meaning_graph(False, False, True)
for k in Polyid:
    if Polyid[k] != Hypid[k]:
        print("oh no")


W = extract_roles(create_node_vectors([HypG, PolyG, HoloG], 3), 12)
print(W.shape)
roles = []
counts = dict()
nodes = dict()
for i in range(W.shape[0]):
    role = np.argmax(W[i])
    roles.append(role)
Beispiel #22
0
import snap
import sys

# Simple script to re-index to 0-indexed graph.

graph = sys.argv[1]
if len(sys.argv) > 2 and sys.argv[2] == 1:
    Gin = snap.LoadEdgeList(snap.PUNGraph, graph)
else:
    Gin = snap.LoadEdgeList(snap.PNGraph, graph)
MxScc = snap.GetMxScc(Gin)
Gout = snap.ConvertGraph(snap.PNGraph, MxScc, True)
print 'Number of nodes: ', Gout.GetNodes()
print 'Number of edges: ', Gout.GetEdges()
snap.SaveEdgeList(Gout, graph)
Beispiel #23
0
    backwardTree = snap.GetBfsTree(Graph, nodeID, False, True)
    return (float(forwardTree.GetNodes()) / Graph.GetNodes(),
            float(backwardTree.GetNodes()) / Graph.GetNodes())


# In[104]:

# Load graphs.
epinions, email = loadNetworks()

# ### Email Model

# In[119]:

# Let's find the size of the largest SCC.
emailSCC = snap.GetMxScc(email)
print("Email SCC: %s." % (100 * float(emailSCC.GetNodes()) / email.GetNodes()))

# In[120]:

# The proposal is that 189587 is in SCC, so we have:
# SCC + OUT = 19.6456446492%
# OUT = 6.7492666299%
# SCC + IN =  69.8402045141%
# IN = 56.9438264948%
# OTHER: 23.410528856%
fp, bp = GetForwardBackwardProp(email, 189587)
print("Forward %s and Backward %s for ID: %s." % (fp, bp, 189587))

# In[121]:
Beispiel #24
0
from snap import TUNGraph
import time
from datetime import timedelta
import sys


def calculate_shortest_path_lengths_distribution(graph, hashtag):
    start = time.time()
    print("Calculating shortest path lengths distribution...")
    snap.PlotShortPathDistr(graph, hashtag + "_shortestPathLengthsDist",
                            "Shortest Path Lengths Distribution")
    end = time.time()
    print("Completed in: %s" % timedelta(seconds=(int(end - start))))


if __name__ == '__main__':
    if len(sys.argv) != 2:
        print("Must specify hashtag")
        sys.exit(1)
    hashtag = sys.argv[1]

    # Import the hashtag subgraph to work on
    FIn = snap.TFIn("../../data/mmr_subgraph_" + hashtag + ".bin")
    hashtag_subgraph = TUNGraph.Load(FIn)

    # Get max connected component
    hashtag_subgraph = snap.GetMxScc(hashtag_subgraph)

    # Start computation
    calculate_shortest_path_lengths_distribution(hashtag_subgraph, hashtag)
Beispiel #25
0
            p.GetVal2(), p.GetVal1())
        num_cc += p.GetVal2()
    print num_cc, "total strongly connected components"
    print

    snap.GetWccSzCnt(repliesgraph, CntV)
    num_cc = 0
    for p in CntV:
        print "{0} weakly connected component(s) of size {1}".format(
            p.GetVal2(), p.GetVal1())
        num_cc += p.GetVal2()
    print num_cc, "total weakly connected components"
    print

    #properties of largest strongly connected component
    big_scc = snap.GetMxScc(repliesgraph)
    snap.PrintInfo(big_scc, "Largest strongly connected component")

    num_dir_edges = snap.CntUniqDirEdges(big_scc)
    print "{0:.2f}% of directed edges are reciprocal".format(
        snap.CntUniqBiDirEdges(big_scc) * 2 * 100 / num_dir_edges)

    print "The clustering coefficient is {0:.2f}%".format(
        snap.GetClustCf(big_scc) * 100)

    print "The diameter is approximately {0}".format(
        snap.GetBfsFullDiam(big_scc, 1000))

    #store CC for graphviz
    snap.SaveGViz(big_scc, filename + ".dot", "Largest Connected Component")
    print "Saved GraphViz"
Beispiel #26
0
plot_filedir = os.path.join(plotpath, plot_filename)
plt.figure()
plt.scatter(list(shortest_path_dist.keys()),
            list(shortest_path_dist.values()),
            s=10)
plt.xlabel("Shortest Path Length")
plt.ylabel("Frequency")
plt.title("Shortest Path Distribution ({})".format(graph_filename[:-6]))
plt.savefig(plot_filedir)
"""
FOR FASTER COMPUTATION, UNCOMMENT THE FOLLOWING LINE AND COMMENT OUT LINE 107-125
"""
# snap.PlotShortPathDistr(G, "shortest_path_{}".format(graph_filename[:-6]), "Shortest Path Distribution ({})".format(graph_filename[:-6]))

# [4] Components of the network
SCC = snap.GetMxScc(G)
print("Fraction of nodes in largest connected component: {}".format(
    round(SCC.GetNodes() / G.GetNodes(), 4)))

Edge_Bridge = snap.TIntPrV()
snap.GetEdgeBridges(G, Edge_Bridge)
print("Number of edge bridges: {}".format(len(Edge_Bridge)))

ArticulationPoint = snap.TIntV()
snap.GetArtPoints(G, ArticulationPoint)
print("Number of articulation points: {}".format(len(ArticulationPoint)))

CComp = snap.TIntPrV()
snap.GetSccSzCnt(G, CComp)
connected_component = {}
for comp in CComp:
def main():
    #Loading the graph
    epinions = snap.LoadEdgeList(snap.PNGraph, "soc-Epinions1.txt", 0, 1)
    pr = PageRank(epinions, 0.8, 0.001)

    #calling page rank function
    #print pr
    #getting number of strongly connected components in the graph
    scc = snap.GetMxScc(epinions)

    #Storing SCC nodes id's in an array
    sccNodes = []
    for nodes in scc.Nodes():
        sccNodes.append(nodes.GetId())
    #storing total nodes
    nodeList = []
    for node in epinions.Nodes():
        nodeList.append(node.GetId())

    rankDesc = []
    rankIds = []
    #Computing top rank nodes
    for index, element in enumerate(pr):
        b, c = element
        rankDesc.append(b)
        rankIds.append(nodeList[index])

    rankDesc.sort(reverse=True)
    rankIds.sort(reverse=True)

    topRankNodes = rankDesc[0:10]
    topIds = rankIds[0:10]
    print "Top Rank Nodes: ", topRankNodes
    # Number of incoming edges (indegree of x)
    #Ranks of all the source pages having hyperlinks toward x
    for index, element in enumerate(topIds):

        currentNode = epinions.GetNI(topIds[index])
        x = currentNode.GetInDeg()

        for i in range(x):
            innerNode = currentNode.GetInNId(i)
            indi = nodeList.index(innerNode)
            ele = pr[indi]
            print "In Degree: ", innerNode, "w.r.t. node: ", x, "Rank: ", ele

    #printing number of strongly connected components in the graph
    print "Number of nodes in SCC: ", scc.GetNodes()

    #Applying a BFS to get the Out Set from node 1
    BfsOutSet = snap.GetBfsTree(epinions, sccNodes[0], True, False)
    #storing Out Set nodes in an array
    bfsOutNodes = []
    for nodes in BfsOutSet.Nodes():
        if (nodes.GetId() not in sccNodes):
            bfsOutNodes.append(nodes.GetId())
    #removing the SCC to get the Out Set Nodes
    for outNode in BfsOutSet.Nodes():
        if outNode.GetId() in sccNodes:
            BfsOutSet.DelNode(outNode.GetId())
    print "Number of OutSet Nodes: ", BfsOutSet.GetNodes()
    #applying BFS search to find the tendrils in Out Set
    outSetTen = snap.GetBfsTree(BfsOutSet, bfsOutNodes[0], False, True)
    print "Tendrils in OutSet: ", outSetTen.GetNodes()
    #storing out set tendrils in an array to use it later
    outTendrils = []
    for node in outSetTen.Nodes():
        outTendrils.append(node.GetId())
    #applying BFS to get in set nodes
    BfsInSet = snap.GetBfsTree(epinions, sccNodes[0], False, True)
    #storing In Set nodes in an array
    bfsInNodes = []
    for nodes in BfsInSet.Nodes():
        if (nodes.GetId() not in sccNodes):
            bfsInNodes.append(nodes.GetId())
    #removing the SCC to get the Out Set Nodes
    for inNode in BfsInSet.Nodes():
        if inNode.GetId() in sccNodes:
            BfsInSet.DelNode(inNode.GetId())
    print "Number of InSet Nodes: ", BfsInSet.GetNodes(), "clone:", len(
        bfsInNodes)
    #applying BFS search to find the tendrils in Out Set
    inSetTen = snap.GetBfsTree(BfsInSet, bfsInNodes[0], False, True)
    print "Tendrils in InSet: ", inSetTen.GetNodes()
    #storing out set tendrils in an array to use it later
    inTendrils = []
    for node in inSetTen.Nodes():
        inTendrils.append(node.GetId())
    #tubes in a SCC
    tubeNodes = []
    for nodes in inSetTen.Nodes():
        if nodes in outSetTen.Nodes():
            tubeNodes.append(nodes.GetId())
    print "Tubes in SCC: ", len(tubeNodes)
    #storing disconnected region in an array
    disComp = []
    for nodes in epinions.Nodes():
        if (nodes.GetId() not in sccNodes) and (
                nodes.GetId()
                not in bfsOutNodes) and (nodes.GetId() not in bfsInNodes) and (
                    nodes.GetId() not in inTendrils) and (nodes.GetId()
                                                          not in outTendrils):
            disComp.append(nodes.GetId())
    print "Number of Disconnected Components: ", len(disComp)
    probabilities = Random(epinions, 5)
    probabilities, nodes = Random(epinions, 5)
    plt.plot()
    plt.plot(nodes, probabilities)
    plt.xlabel('No of Nodes')
    plt.ylablel('Probability that path exists')
    plt.show()
plt.subplot(222)
plt.semilogy(X,Y, drawstyle = 'steps')
plt.xlabel('Frac. of Starting Nodes')
plt.ylabel('number of nodes reached (log)')
plt.title('Epinions: Reachability using Out-links')
plt.grid(True)
plt.show()


###################################################################
#2.3
###################################################################

#Calculate the values for email graph
total_nodes = G1.GetNodes()
largest_scc = snap.GetMxScc(G1)
SCC = largest_scc.GetNodes()
random_nid_in_scc = largest_scc.GetRndNId()

##Find the out − and in −components
outcomp = snap.GetBfsTree(G1, random_nid_in_scc, True, False)
incomp = snap.GetBfsTree(G1, random_nid_in_scc, False, True)

sz_outcomp = outcomp.GetNodes()
sz_incomp = incomp.GetNodes()

G_WCC = snap.GetMxWcc(G1)
WCC = G_WCC.GetNodes()
disconnected = total_nodes  - WCC
IN = sz_incomp − SCC
OUT = sz_outcomp − SCC
Beispiel #29
0
import snap
import sys
import numpy as np
import matplotlib
matplotlib.use('Agg')

import matplotlib.pyplot as plt

input_file = sys.argv[1]
SubGraph = snap.LoadEdgeList(snap.PUNGraph, input_file, 0, 1)

cc = set()
closeness = dict()

Graph = snap.GetMxScc(SubGraph)

for node in Graph.Nodes():
    print node.GetId()
    Clcentr = snap.GetClosenessCentr(Graph, node.GetId())
    closeness[node.GetId()] = Clcentr
    # Plotting the distribution of shortest Length

    snap.PlotShortPathDistr(p2p_gnutella04_subgraph, "p2p-Gnutella04-subgraph",
                            "Undirected graph - shortest path")
    print "Shortest path distribution of p2p-Gnutella04-subgraph is in :" + "diam.p2p-Gnutella04-subgraph.png"

## Task 1.2.4

# Task 1.2.4.1

if (sub_graph_name == "soc-Epinions1-subgraph"):

    # Finding the components of the network
    # Calculating the fraction of largest connected component

    largest_connected = snap.GetMxScc(soc_epinions1_subgraph)

    node = 0
    for i in largest_connected.Nodes():
        node = node + 1

    print "Fraction of nodes in largest connected component in soc-Epinions1-subgraph :" + str(
        round(node * 1.0 / len(v1), 3))

if (sub_graph_name == "cit-HepPh-subgraph"):

    # Finding the components of the network
    # Calculating the fraction of largest connected component

    largest_connected = snap.GetMxScc(cit_heph_subgraph)