Esempio n. 1
0
    def run(self):
        snap.DelSelfEdges(self.graph)
        community_list = snap.TCnComV()
        snap.CommunityGirvanNewman(self.graph, community_list)

        self.community_list = list()
        for community in community_list:
            cmty = list()
            for node in community:
                cmty.append(node)
            self.community_list.append(cmty)
Esempio n. 2
0
def get_communities(G_Undir, chords_dict):
    print("************")
    print("Communities")
    snap.DelSelfEdges(G_Undir)
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(G_Undir, CmtyV)
    for Cmty in CmtyV:
        print "Community: size", Cmty.Len()
        for NI in Cmty:
            print chords_dict[NI]
        print ""
        print ""
    print "The modularity of the network is %f" % modularity
Esempio n. 3
0
def generate_word_graph(hyp, poly, holo, type):
    if type == 0:
        G1 = snap.TUNGraph.New()
    else:
        G1 = snap.TNGraph.New()
    hypedges = set()
    holoedges = set()
    polyedges = set()
    idToLemma = dict()
    lemmaToId = dict()
    count = 0
    for lemma_name in list(wn.all_lemma_names('n')):
        G1.AddNode(count)
        idToLemma[count] = lemma_name
        lemmaToId[lemma_name] = count
        count += 1
    for lemma_name in list(wn.all_lemma_names('n')):
        if hyp:
            for synset in wn.synsets(lemma_name, "n"):
                for synset2 in synset.hyponyms() + synset.instance_hyponyms():
                    for lemma_name2 in synset2.lemma_names():
                        lemma_name2 = lemma_name2.lower()
                        if type in [0, 1]:
                            G1.AddEdge(lemmaToId[lemma_name],
                                       lemmaToId[lemma_name2])
                            hypedges.add((lemmaToId[lemma_name],
                                          lemmaToId[lemma_name2]))
                        else:
                            G1.AddEdge(lemmaToId[lemma_name2],
                                       lemmaToId[lemma_name])
                            hypedges.add((lemmaToId[lemma_name2],
                                          lemmaToId[lemma_name]))
        if poly:
            for synset in wn.synsets(lemma_name, "n"):
                for lemma_name2 in synset.lemma_names():
                    lemma_name2 = lemma_name2.lower()
                    G1.AddEdge(lemmaToId[lemma_name], lemmaToId[lemma_name2])
                    polyedges.add(
                        (lemmaToId[lemma_name], lemmaToId[lemma_name2]))
        if holo:
            for synset in wn.synsets(lemma_name, "n"):
                for synset2 in synset.member_holonyms() + synset.part_holonyms(
                ) + synset.substance_holonyms():
                    for lemma_name2 in synset2.lemma_names():
                        lemma_name2 = lemma_name2.lower()
                        G1.AddEdge(lemmaToId[lemma_name],
                                   lemmaToId[lemma_name2])
                        hypedges.add(
                            (lemmaToId[lemma_name], lemmaToId[lemma_name2]))
    snap.DelSelfEdges(G1)
    return G1, idToLemma, lemmaToId, hypedges, polyedges, holoedges
Esempio n. 4
0
def GetNetworkDegree(filePath):
    All_set = snap.LoadEdgeList(snap.PUNGraph, filePath, 0, 1)  # 载入训练网络

    snap.DelSelfEdges(All_set)  # 删除自连边
    snap.DelZeroDegNodes(All_set)  # 删除度为0的结点
    degs = dict()
    for NI in All_set.Nodes():
        degs[str(NI.GetId())] = NI.GetDeg()

    f = open(filePath + ".deg", "w")
    for (k, v) in degs.items():
        f.write(str(k) + "\t" + str(v) + "\r\n")
    f.close()

    return degs
Esempio n. 5
0
def loadCollabNet(path):
    """
    :param - path: path to edge list file

    return type: snap.PUNGraph
    return: Graph loaded from edge list at `path and self edges removed

    Do not forget to remove the self edges!
    """
    ############################################################################
    # TODO: Your code here!
    Graph = snap.LoadEdgeList(snap.PUNGraph, 'CA-GrQc.txt', 0, 1)
    snap.DelSelfEdges(Graph)
    ############################################################################
    return Graph
Esempio n. 6
0
def loadCollabNet(path):
    """
    :param - path: path to edge list file

    return type: snap.PUNGraph
    return: Graph loaded from edge list at `path and self edges removed

    Do not forget to remove the self edges!
    """
    ############################################################################
    # TODO: Your code here!
    Graph = snap.LoadEdgeList(snap.PUNGraph, path, 0, 1)
    snap.DelSelfEdges(Graph)
    # snap.DrawGViz(Graph, snap.gvlDot, 'collab.png', 'real world collaboration')

    ############################################################################
    return Graph
Esempio n. 7
0
def loadCollabNet(path):
    """
    :param - path: path to edge list file

    return type: snap.PUNGraph
    return: Graph loaded from edge list at `path and self edges removed

    Do not forget to remove the self edges!
    """
    ############################################################################
    # TODO: Your code here!
    # Repeats are automatically ignored when loading an (un)directed graph
    Graph = snap.LoadEdgeList(snap.PUNGraph, path, 0, 1)
    # remove self-edges
    snap.DelSelfEdges(Graph)

    ############################################################################
    return Graph
def main():
    """
    See usage message in module header block
    """
    get_subgraph = False  # if True discard nodes without attribute data
    try:
        opts, args = getopt.getopt(sys.argv[1:], "d")
    except:
        usage(sys.argv[0])
    for opt, arg in opts:
        if opt == "-d":
            get_subgraph = True
        else:
            usage(sys.argv[0])

    if len(args) != 1:
        usage(sys.argv[0])

    data_dir = args[0]

    outputdir = '.'

    sys.stdout.write('loading data from ' + data_dir + '...')
    start = time.time()
    datazipfile = data_dir + os.path.sep + 'physician-shared-patient-patterns-2014-days30.zip'
    G = load_physician_referral_data(datazipfile)
    print time.time() - start, 's'

    snap.PrintInfo(G)

    # Remove loops (self-edges).
    # G is a PNGraph so multiple edges not allowed in this type anyway.
    snap.DelSelfEdges(G)
    snap.PrintInfo(G)

    # specify ordered nodelist to map sequential ids to original ids consistent
    nodelist = [node.GetId() for node in G.Nodes()]

    graph_filename = outputdir + os.path.sep + "physician_referall_arclist" + os.path.extsep + "txt"
    nodeid_filename = outputdir + os.path.sep + "nodeid" + os.path.extsep + "txt"
    write_graph_file(graph_filename, G, nodelist)
    write_subgraph_nodeids(nodeid_filename, nodelist)
Esempio n. 9
0
def main(genre):
    G_Multi, G_Directed, G_Undirected, dict = load_genre_graphs(genre)
    snap.DelSelfEdges(G_Undirected)
    print(G_Undirected.GetNodes())
    node_id_to_pos = {}
    pos_to_node_id = {}
    i = 0
    for NI in G_Undirected.Nodes():
        node_id_to_pos[NI.GetId()] = i
        pos_to_node_id[i] = NI.GetId()
        i += 1


    S, T, A, D = normalized_cut_minimization(G_Undirected, node_id_to_pos)

    S_chords = [dict[pos_to_node_id[pos]] for pos in S]
    T_chords = [dict[pos_to_node_id[pos]] for pos in T]
    print S_chords
    print ''
    print T_chords
Esempio n. 10
0
def NetworkModel(filePath, TRY_TIMES, MAX_WAIK_LENGTH, MAX_TEST_TIMES,
                 WALK_belta, WINDOW, V_SIZE):
    All_set = snap.LoadEdgeList(snap.PUNGraph, filePath, 0, 1)  # 载入训练网络

    snap.DelSelfEdges(All_set)  # 删除自连边
    snap.DelZeroDegNodes(All_set)  # 删除度为0的结点

    for X in range(TRY_TIMES):
        if (os.path.exists(filePath + "_m" + str(MAX_TEST_TIMES) + "_s" +
                           str(V_SIZE) + "_w" + str(WINDOW) + "_t" + str(X) +
                           ".vec")):
            continue

        mymodel = train_net2vec_total(All_set, MAX_WAIK_LENGTH, WALK_belta,
                                      WINDOW, V_SIZE,
                                      MAX_TEST_TIMES)  # 训练结点的分布式表达
        mymodel.wv.save_word2vec_format(filePath + "_m" + str(MAX_TEST_TIMES) +
                                        "_s" + str(V_SIZE) + "_w" +
                                        str(WINDOW) + "_t" + str(X) + ".vec",
                                        binary=False)

    return
def partly_undir_rewire(G, spokes):
	spokes_copy = copy.deepcopy(spokes)
	rewired = snap.GenRndGnm(snap.PNGraph, G.GetNodes(), 0)

	# Add undirected edges
	total_undirected = np.sum(spokes_copy[:,2])
	while total_undirected > 1:
		undir_edges = spokes_copy[:,2]
		nonzero_stubs = np.where(undir_edges != 0)[0]
		probs = undir_edges[nonzero_stubs] / total_undirected
		random_stubs = np.random.choice(nonzero_stubs, size=2, p=probs)
		if random_stubs[0] == random_stubs[1]:
			continue
		rewired.AddEdge(random_stubs[0], random_stubs[1])
		rewired.AddEdge(random_stubs[1], random_stubs[0])
		spokes_copy[random_stubs[0],2] -= 1
		spokes_copy[random_stubs[1],2] -= 1
		total_undirected = np.sum(spokes_copy[:,2])

	# Add in/out edges
	total_directed = np.sum(spokes_copy[:,0:2])
	while total_directed > 1:
		out_edges = spokes_copy[:,0]
		in_edges = spokes_copy[:,1]
		nonzero_out_stubs = np.where(out_edges != 0)[0]
		out_probs = out_edges[nonzero_out_stubs] / np.sum(out_edges)
		nonzero_in_stubs = np.where(in_edges != 0)[0]
		in_probs = in_edges[nonzero_in_stubs] / np.sum(in_edges)
		random_out = np.random.choice(nonzero_out_stubs, p=out_probs)
		random_in = np.random.choice(nonzero_in_stubs, p=in_probs)
		if random_out == random_in:
			continue
		rewired.AddEdge(random_out, random_in)
		spokes_copy[random_out,0] -= 1
		spokes_copy[random_in,1] -= 1
		total_directed = np.sum(spokes_copy[:,0:2])
	snap.DelSelfEdges(rewired)
	return rewired
def graph_cleaning(file_path):
    Graph, H = load_graph(file_path)
    Graph = snap.GetMxWcc(Graph)
    snap.DelSelfEdges(Graph)
    nodes_set = set()
    for NI in Graph.Nodes():
        nodes_set.add(NI.GetId())
    with open(file_path, 'r') as f:
        raw_list = f.read().split('\n')
        edges_list = [edge_str.split() for edge_str in raw_list]
    with open(file_path, 'w') as f:
        print '-----clear'
    with open(file_path, 'a') as f:
        for edge in edges_list:
            if len(edge) == 0:
                continue
            if H.GetKeyId(edge[0]) not in nodes_set:
                continue
            edge_cleaned = list()
            for node in edge:
                if H.GetKeyId(node) in nodes_set:
                    edge_cleaned.append(node)
            f.write(' '.join(edge_cleaned) + '\n')
Esempio n. 13
0
def generate_meaning_graph(hyp, poly, holo):
    global numImp
    G1 = snap.TUNGraph.New()
    print wn.synsets('festoon')
    hypedges = set()
    holoedges = set()
    polyedges = set()
    idToSynset = dict()
    synsetToId = dict()
    count = 0
    numEl = 0
    for synset in list(wn.all_synsets('n')):
        if synset == wn.synset('benthos.n.01'):
            print synset
            numImp = count
            print count
        G1.AddNode(count)
        idToSynset[count] = synset
        synsetToId[synset] = count
        count += 1
    for synset in list(wn.all_synsets('n')):
        if hyp:
            for synset2 in synset.hyponyms() + synset.instance_hyponyms():
                G1.AddEdge(synsetToId[synset], synsetToId[synset2])
                hypedges.add((synsetToId[synset], synsetToId[synset2]))
        if poly:
            for lemma_name in synset.lemma_names():
                for synset2 in wn.synsets(lemma_name, "n"):
                    G1.AddEdge(synsetToId[synset], synsetToId[synset2])
                    polyedges.add((synsetToId[synset], synsetToId[synset2]))
        if holo:
            for synset2 in synset.member_holonyms() + synset.part_holonyms(
            ) + synset.substance_holonyms():
                G1.AddEdge(synsetToId[synset], synsetToId[synset2])
                holoedges.add((synsetToId[synset], synsetToId[synset2]))
    snap.DelSelfEdges(G1)
    return G1, idToSynset, synsetToId, hypedges, polyedges, holoedges
def main():
    """
    See usage message in module header block
    """
    get_subgraph = False  # if True discard nodes without attribute data
    try:
        opts, args = getopt.getopt(sys.argv[1:], "d")
    except:
        usage(sys.argv[0])
    for opt, arg in opts:
        if opt == "-d":
            get_subgraph = True
        else:
            usage(sys.argv[0])

    if len(args) != 1:
        usage(sys.argv[0])

    data_dir = args[0]

    outputdir = '.'

    sys.stdout.write('loading data from ' + data_dir + '...')
    start = time.time()
    (G, patdata, colnames) = load_nber_patent_data(data_dir)
    print time.time() - start, 's'

    snap.PrintInfo(G)

    # Remove loops (self-edges).
    # There is actually for some reason one loop (patent id 5489070).
    # G is a PNGraph so multiple edges not allowed in this type anyway.
    snap.DelSelfEdges(G)
    snap.PrintInfo(G)

    # We do not add attributes to nodes as SNAP node attribute as
    # these seem to get lost by varoius operations including subgraph
    # that we need to use, so instead maintain them just in the
    # dictionary mapping the original node ids to the attributes -
    # fortunately the original node ids are maintained by
    # GetSubGraph() so we can used these to index the patdata
    # dictoinary in the subgraphs

    # Cannot do this:
    #patdata[:][colnames['COUNTRY']] = convert_to_int_cat(patdata[:][colnames['COUNTRY']]) # like factor in R
    # as get "TypeError: unhashable type" so have to do this instead:
    id_countries = [(k, p[colnames['COUNTRY']])
                    for (k, p) in patdata.iteritems()]
    id_countries_int = convert_to_int_cat([x[1] for x in id_countries])
    for i in xrange(len(id_countries)):
        patdata[id_countries[i][0]][colnames['COUNTRY']] = id_countries_int[i]
    for attr in ['COUNTRY']:
        sys.stdout.write('There are %d NA for %s\n' %
                         ([p[colnames[attr]]
                           for p in patdata.itervalues()].count('NA'), attr))

    id_states = [(k, p[colnames['POSTATE']]) for (k, p) in patdata.iteritems()]
    id_states_int = convert_to_int_cat([x[1] for x in id_states])
    for i in xrange(len(id_states)):
        patdata[id_states[i][0]][colnames['POSTATE']] = id_states_int[i]
    for attr in ['POSTATE']:
        sys.stdout.write('There are %d NA for %s\n' %
                         ([p[colnames[attr]]
                           for p in patdata.itervalues()].count('NA'), attr))

    # There are 3774768 unique patent identifiers in the citation data but
    # only 2923922 unique patent identifiers in the patent data (patdata).
    # The size of the set intersection of these patent ids is 2755865
    # i.e. there is patent data for 73% of the patents in the citation network.
    # Presumably this is because the patdata (pat63_99.txt) contains all
    # utilit patents in the period 1963 to 1999 but the citation data
    # cit75_99.txt contains all US patent citations for utility patents
    # granted in the period 1975 to 1999, so there are patent ids in here
    # from earlier periods that are cited by patents in that period,
    # for which therefore we don't have the patent data (prior to 1963).
    # So we have to set the data for all patents in network that we have it
    # for, and the rest (27%) to NA.

    nodelist = list(
    )  # keep the iteration below in list so we always use same order in future

    if get_subgraph:
        # get subgraph induced by nodes that have patent data in the
        # pat63_99.txt file
        nodeVec = snap.TIntV()  # nodelist in TIntV format for use in SNAP
        for node in G.Nodes():
            patid = node.GetId()
            if patdata.has_key(patid):
                nodelist.append(patid)
                nodeVec.Add(patid)
        G = snap.GetSubGraph(G, nodeVec)
        print 'Subgraph with only nodes with patent attribute data:'
        snap.PrintInfo(G)
    else:
        # keep all the graph and just put NA for all data attributes
        citepatent_count = 0
        patentdata_count = 0
        for node in G.Nodes():
            citepatent_count += 1
            patid = node.GetId()
            nodelist.append(patid)
            #print citepatent_count, patentdata_count, patid  #XXX
            if not patdata.has_key(patid):
                #print 'NA for ', patid #XXX
                patdata[patid] = len(colnames) * ["NA"]
                patdata[patid][
                    colnames['HASDATA']] = 0  # no data on this patent
            else:
                patentdata_count += 1
        sys.stdout.write(
            "There are %d unique cited/citing patents of which %d (%f%%) have patent data\n"
            % (citepatent_count, patentdata_count,
               100 * float(patentdata_count) / citepatent_count))

    graph_filename = outputdir + os.path.sep + "patent_citations" + os.path.extsep + "txt"
    write_graph_file(graph_filename, G, nodelist)
    attributes_binary_filename = outputdir + os.path.sep + "patent_binattr" + os.path.extsep + "txt"
    attributes_categorical_filename = outputdir + os.path.sep + "patent_catattr" + os.path.extsep + "txt"
    attributes_continuous_filename = outputdir + os.path.sep + "patent_contattr" + os.path.extsep + "txt"

    write_attributes_file_binary(attributes_binary_filename, G, nodelist,
                                 patdata, colnames)
    write_attributes_file_categorical(attributes_categorical_filename, G,
                                      nodelist, patdata, colnames)
    write_attributes_file_continuous(attributes_continuous_filename, G,
                                     nodelist, patdata, colnames)

    nodeid_filename = outputdir + os.path.sep + "nodeid" + os.path.extsep + "txt"
    write_subgraph_nodeids(nodeid_filename, nodelist)
def main():
    """
    See usage message in module header block
    """
    get_subgraph = False # if True discard nodes without attribute data
    try:
        opts,args = getopt.getopt(sys.argv[1:], "d")
    except:
        usage(sys.argv[0])
    for opt,arg in opts:
        if opt == "-d":
            get_subgraph = True
        else:
            usage(sys.argv[0])

    if len(args) != 1:
        usage(sys.argv[0])

    data_dir = args[0]

    outputdir = '.'

    sys.stdout.write('loading data from ' + data_dir + '...')
    start = time.time()
    (G, patdata, colnames) = load_epo_patent_data(data_dir)
    print time.time() - start, 's'

    snap.PrintInfo(G)

    # Remove loops (self-edges).
    # There is actually for some reason 92 nodes with self-loops
    # e.g. EP0021443
    # G is a PNGraph so multiple edges not allowed in this type anyway.
    snap.DelSelfEdges(G)
    snap.PrintInfo(G)

    # We do not add attributes to nodes as SNAP node attribute as
    # these seem to get lost by varoius operations including subgraph
    # that we need to use, so instead maintain them just in the
    # dictionary mapping the original node ids to the attributes -
    # fortunately the original node ids are maintained by
    # GetSubGraph() so we can used these to index the patdata
    # dictoinary in the subgraphs


    # convert categorical attribute values to integers like factor in R
    for cat_colname in ['Language','Country']:
        catvalues = [(k, p[colnames[cat_colname]]) for (k,p) in patdata.iteritems()]
        catvalues_int = convert_to_int_cat([x[1] for x in catvalues])
        for i in xrange(len(catvalues)):
            patdata[catvalues[i][0]][colnames[cat_colname]] = catvalues_int[i]
        sys.stdout.write('There are %d NA for %s\n' % ([p[colnames[cat_colname]] for p in patdata.itervalues()].count('NA'), cat_colname))


    # convert categorical set attribute values to integers like factor in R
    for set_colname in ['Classes','Sections']:
        setvalues = [(k, p[colnames[set_colname]]) for (k,p) in patdata.iteritems()]
        setvalues_int = convert_to_int_set([x[1].split(',') for x in setvalues])
        for i in xrange(len(setvalues)):
            patdata[setvalues[i][0]][colnames[set_colname]] = setvalues_int[i]
        sys.stdout.write('There are %d NA for %s\n' % ([p[colnames[set_colname]] for p in patdata.itervalues()].count('NA'), set_colname))

    nodelist = list()  # keep the iteration below in list so we always use same order in future

    if get_subgraph:
        # get subgraph induced by nodes that have patent data in the
        # pat63_99.txt file
        nodeVec = snap.TIntV() # nodelist in TIntV format for use in SNAP
        for node in G.Nodes():
            patid = node.GetId()
            if patdata.has_key(patid):
                nodelist.append(patid)
                nodeVec.Add(patid)
        G = snap.GetSubGraph(G, nodeVec)
        print 'Subgraph with only nodes with patent attribute data:'
        snap.PrintInfo(G)
    else:
        # keep all the graph and just put NA for all data attributes
        citepatent_count = 0
        patentdata_count = 0
        for node in G.Nodes():
            citepatent_count += 1
            patid = node.GetId()
            nodelist.append(patid)
            #print citepatent_count, patentdata_count, patid  #XXX
            if not patdata.has_key(patid):
                #print 'NA for ', patid #XXX
                patdata[patid] = len(colnames)*["NA"]
            else:
                patentdata_count += 1
        sys.stdout.write("There are %d unique cited/citing patents of which %d (%f%%) have patent data\n" % (citepatent_count, patentdata_count, 100*float(patentdata_count)/citepatent_count))


    graph_filename = outputdir + os.path.sep + "patent_citations" + os.path.extsep + "txt"
    write_graph_file(graph_filename, G, nodelist)
    attributes_binary_filename = outputdir + os.path.sep + "patent_binattr"  + os.path.extsep + "txt"
    attributes_categorical_filename = outputdir + os.path.sep + "patent_catattr"  + os.path.extsep + "txt"
    attributes_continuous_filename = outputdir + os.path.sep + "patent_contattr" + os.path.extsep + "txt"
    attributes_set_filename = outputdir + os.path.sep + "patent_setattr" + os.path.extsep + "txt"

    write_attributes_file_binary(attributes_binary_filename, G, nodelist, patdata, colnames)
    write_attributes_file_categorical(attributes_categorical_filename, G, nodelist, patdata, colnames)
    write_attributes_file_continuous(attributes_continuous_filename, G, nodelist, patdata, colnames)
    write_attributes_file_set(attributes_set_filename, G, nodelist, patdata, colnames)

    nodeid_filename = outputdir + os.path.sep + "nodeid" + os.path.extsep + "txt"
    write_subgraph_nodeids(nodeid_filename, nodelist)

    # write patent sections as original letters before converting to int
    # This cannot be used by EstimNetDirected but is useful to read in R
    # and factor there so that the original names are preserved
    sections_filename = outputdir + os.path.sep + "patent_string_categories" + os.path.extsep + "txt"
    attrnames = ['CPCsections','LanguageCode','CountryCode']
    with open(sections_filename, 'w') as f:
        f.write(' '.join(attrnames) + '\n')
        for i in nodelist:
            for attrname in attrnames:
                val = patdata[i][colnames[attrname]]
                val = 'NA' if (val == 'NA' or val == 'XX') else val
                f.write(val)
                if attrname == attrnames[-1]:
                    f.write('\n')
                else:
                    f.write(' ' )
Esempio n. 16
0
def main():

    # Load data
    nodes = pd.read_csv("../data/nodes.csv", sep='\t', index_col=0)

    # Data in nice form
    headers = list(nodes.columns)
    nodes = np.asarray(nodes)

    # Load social network accordingly
    if path.exists("../data/youtube.graph"):
        FIn = snap.TFIn("../data/youtube.graph")
        social_network = snap.TNGraph.Load(FIn)
    else:
        edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
        edges = np.asarray(edges).astype(int)
        social_network = data2dag(edges, nodes.shape[0])

    # Check for self edges
    for e in social_network.Edges():
        if e.GetSrcNId() == e.GetDstNId():
            print("Self Loop Found:", e.GetSrcNId())

    # CNM Algorithm from snap.py
    print("Computing CNM")
    start = timeit.default_timer()
    CmtyV = snap.TCnComV()
    undirected = snap.ConvertGraph(snap.PUNGraph, social_network)
    snap.DelSelfEdges(undirected)
    the_modularity = snap.CommunityCNM(undirected, CmtyV)
    stop = timeit.default_timer()
    node_to_cmty = np.zeros(nodes.shape[0])
    cmty_sizes = np.zeros(len(CmtyV))
    for i in range(len(CmtyV)):
        for node in CmtyV[i]:
            node_to_cmty[node] = i
        cmty_sizes[i] = len(CmtyV[i])
    cmtys = [[node for node in cmty] for cmty in CmtyV]
    '''
  edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
  edges = np.asarray(edges).astype(int)
  G = nx.Graph()
  G.add_nodes_from(range(nodes.shape[0]))
  G.add_edges_from(list(map(tuple, edges)))
  '''

    #assert(is_partition(G, cmtys))

    #print("Calculating Modularity")
    #modul = modularity(G, cmtys)
    print("Results from Clauset-Newman-Moore:")
    #print("Modularity:",modul)
    print("Number of clusters:", len(CmtyV))
    print("Time elapsed:", stop - start)

    # Fun category stuff to do
    upload_col = headers.index('category')
    categories = set()
    for i in range(nodes.shape[0]):
        categories.add(nodes[i][upload_col])
    idx_to_categories = list(categories)
    print("Number of categories:", len(idx_to_categories))
    categories_to_idx = dict()
    for i in range(len(idx_to_categories)):
        categories_to_idx[idx_to_categories[i]] = i

    # Communities and categories
    cmty_category_count = np.zeros((len(CmtyV), len(idx_to_categories)))
    for i in range(nodes.shape[0]):
        cmty_category_count[int(node_to_cmty[i]),
                            categories_to_idx[nodes[i][upload_col]]] += 1
    cmty_category_count = cmty_category_count / cmty_sizes[:, np.newaxis]

    # Create graphs per category
    plt.figure()
    plt.plot(sorted(np.max(cmty_category_count, axis=1), reverse=True),
             label="Top proportion")
    plt.plot(0.5 * np.ones(cmty_category_count.shape[0]),
             label="Majority Threshold",
             linestyle='dashed')
    plt.title("Category Proportions in Clusters")
    plt.xlabel("Cluster")
    plt.ylabel("Proportion")
    plt.legend()
    plt.savefig("../figures/category_top_clusters.png")
    '''
  for i in range(cmty_category_count.shape[0]):
    top_category = np.argmax(cmty_category_count[i])
    print("Community "+str(i)+": "+str(idx_to_categories[top_category])+",",cmty_category_count[i][top_category])
  '''
    '''
Esempio n. 17
0
 def deleteSelfEdges(self):
     snap.DelSelfEdges(self.rawGraph)
    useredges.to_csv('temp/mergededges.csv', index=None)

    # Build graph from temp files using SNAP library
    context = snap.TTableContext()
    e_schema = snap.Schema()
    e_schema.Add(snap.TStrTAttrPr("source", snap.atStr))
    e_schema.Add(snap.TStrTAttrPr("target", snap.atStr))
    n_schema = snap.Schema()
    n_schema.Add(snap.TStrTAttrPr("username", snap.atStr))

    edgetable = snap.TTable.LoadSS(e_schema, 'temp/mergededges.csv', context,
                                   ",", snap.TBool(True))
    nodetable = snap.TTable.LoadSS(n_schema, 'temp/mergednodes.csv', context,
                                   ",", snap.TBool(True))

    edgeattrv = snap.TStrV()
    nodeattrv = snap.TStrV()
    nodeattrv.Add("username")

    net = snap.ToNetwork(snap.PNEANet, edgetable, "source", "target",
                         edgeattrv, nodetable, "username", nodeattrv,
                         snap.aaFirst)

    # Need to remove self-edges to compute rich club coefficient
    snap.DelSelfEdges(net)

    # Store the results
    name = str(pid) + '_usergraph'
    snap.SaveEdgeListNet(net, outpath + name + '.csv',
                         'Network of issues, PR and commits')
    generateTables(outpath, name, net)
def main():

    Component = snap.TIntPrV()
    #loading the real world graph
    realWorld = snap.LoadEdgeList(snap.PUNGraph, "CA-HepTh.txt", 0, 1)
    #deleting the self-edges from the graph
    snap.DelSelfEdges(realWorld)
    #calling the function
    wikiVotingNetwork()
    #Taking number of nodes in a graph from real world network
    n = realWorld.GetNodes()
    #Generating an Undirected Graph
    G = snap.TUNGraph.New()
    #Taking number of edges in a graph from user
    e = int(raw_input('Enter the number of Random Edges : '))

    p = float(
        raw_input('Enter the Probability of Edges between Nodes from 0-1  : '))
    #Generating Number of Nodes
    for i in range(n):
        #Adding Nodes into the graph
        G.AddNode(i)
    #calling the function
    erdosRenyi(G, p)
    #Printing the Clustering
    print 'Erdos Renyi Clustering Co-efficient: ', clustCoefficient(G)

    diam = snap.GetBfsFullDiam(G, 9877, False)
    #printing the diameter
    print 'Erdos Renyi Diameter: ', diam
    #plotting the graph
    snap.PlotOutDegDistr(G, "Erdos-Renyi",
                         "Un-Directed graph - Out-Degree Distribution")

    snap.GetSccSzCnt(G, Component)

    for comp in Component:
        #printing number of strongly connected components with size
        print "Size: %d - Number of Connected Component in Erdos-Renyi: %d" % (
            comp.GetVal1(), comp.GetVal2())
    #printing fraction of nodes and edges
    print "Fraction of Nodes and Edges in Erdos Renyi: ", snap.GetMxSccSz(G)
    #Drawing a Erdos Renyi Graph
    snap.DrawGViz(G, snap.gvlDot, "erdosRenyi1.png", "Erdos Renyi")
    #calling the function
    smallWorldRandomNetwork(G, e)
    #printing the clustering coefficient
    print 'Small World Random Network Clustering Co-efficient: ', clustCoefficient(
        G)

    diam = snap.GetBfsFullDiam(G, 9877, False)
    #printing the diameter
    print 'Small World Random Network Diameter: ', diam

    snap.GetSccSzCnt(G, Component)

    for comp in Component:

        #printing number of strongly connected components with size

        print "Size: %d - Number of Connected Component in Small World: %d" % (
            comp.GetVal1(), comp.GetVal2())
    #fraction of nodes and edges in small world
    print "Fraction of Nodes and Edges in Small World: ", snap.GetMxSccSz(G)
    #plotting the graph
    snap.PlotOutDegDistr(G, "Small-World",
                         "Un-Directed graph - Out-Degree Distribution")
    #drawinf the graph
    snap.DrawGViz(G, snap.gvlDot, "smallWorld1.png",
                  "Small World Random Network")
    #calculating the clustering co-efficient
    print 'Real World Random Network Clustering Co-efficient: ', clustCoefficient(
        realWorld)

    diam = snap.GetBfsFullDiam(G, 9877, False)

    print 'Real World Random Network Diameter: ', diam

    snap.GetSccSzCnt(realWorld, Component)

    for comp in Component:
        #printing number of strongly connected components with size

        print "Size: %d - Number of Weekly Connected Component in Real World: %d" % (
            comp.GetVal1(), comp.GetVal2())
    #printing fraction of nodes and edges
    print "Fraction of Nodes and Edges in Small World: ", snap.GetMxSccSz(
        realWorld)
    #plotting the real world network graph
    snap.PlotOutDegDistr(realWorld, "real-World",
                         "Un-Directed graph - Out-Degree Distribution")
    #Drawing Real WOrld Graph
    snap.DrawGViz(realWorld, snap.gvlDot, "realWorld.png",
                  "Real World Random Network")
Esempio n. 20
0
        # If an edge exists to or from a node in CnCom, connect that edge to the new representative node.
        for NI in graph.Nodes():
            if NI.GetId() in nodes:
                for Id_out in NI.GetOutEdges():
                    graph.AddEdge(num_nodes, Id_out)
                for Id_in in NI.GetInEdges():
                    graph.AddEdge(Id_in, num_nodes)

        # Delete all nodes in CnCom
        for NI in nodes:
            node_map_SCC[NI] = num_nodes
            graph.DelNode(NI)

# Delete all self loops and save graph as the SCC graph
snap.DelSelfEdges(graph)
graph.Defrag()
snap.SaveEdgeList(graph, file_name + "SCC.txt",
                  "Save as tab-separated list of edges")

# Section of code responsible for computing sets of nodes that have the same descendants
# Create a bfs tree from every node and map each node to a set of all its descendants
for NI in graph.Nodes():
    BfsTree = snap.GetBfsTree(graph, NI.GetId(), True, False)
    nodes = set()
    for EI in BfsTree.Edges():
        nodes.add(EI.GetDstNId())
    all_descendants[NI.GetId()] = nodes

# Iterate over the list of all descendants to pair the nodes that have the same descendants
for k1, v1 in all_descendants.items():
Esempio n. 21
0
sw_eed = 0
for i in range(len(sw_qk)):
    sw_eed += (sw_keys[i] - 1) * sw_qk[i] / sw_sumq
print 'SW Expected Excess Degree:', sw_eed

sw_ed = 0
for i in range(len(sw_values_p)):
    sw_ed += sw_keys[i] * sw_values_p[i]
print 'SW Expected Degree:', sw_ed

ax.plot(sw_keys, sw_qk, marker='*', linestyle='-.', label='SW')

# Real-World Collaboration Network
colab_net = snap.LoadEdgeList(snap.PUNGraph, "ca-GrQc.txt", 0, 1, '\t')

snap.DelSelfEdges(colab_net)

ca_deg_dist = {}
ca_keys = []
ca_values = []

for n in colab_net.Nodes():
    if n.GetOutDeg() in ca_deg_dist:
        ca_deg_dist[n.GetDeg()] += 1
    else:
        ca_deg_dist[n.GetDeg()] = 1

for key in sorted(ca_deg_dist.iterkeys()):
    ca_keys.append(key)
    ca_values.append(ca_deg_dist[key])
Esempio n. 22
0
def main():
    # Number of nodes
    n = int(raw_input("Please enter the number of nodes"))
    # Probability of an edge between nodes
    p = float(
        raw_input(
            "Please enter the value of probability of an edge between nodes"))
    # Random Input of x pairs of nodes
    x = int(raw_input("Please enter the number of random, x pairs of nodes:"))
    # Empty graph and add nodes
    ERM = Empty_graph(n)

    # Add edges to the graph using personal Erdos Renyi Model
    Erdos_Renyi(ERM, p)
    # Erdos Renyi Clustering Coeffecient
    print("Clustering Coeffecient: ", clustering_coffecient(ERM))
    # Diameter
    diameter_ERM = snap.GetBfsEffDiamAll(ERM, 10, False)
    print(diameter_ERM[2])
    # Largest Strongly Connected Component
    print("Largest Strongly Connected Component - Maximum size:",
          snap.GetMxSccSz(Small_world))
    # Largest Size of Graph
    ERM_size = snap.GetMxScc(ERM).GetEdges()
    print(ERM_size)
    # Plot of Degree Distribution
    snap.PlotOutDegDistr(ERM, "ERMGraph", "ERM Degree Distribution")

    # Add Small World Network
    Small_world = Empty_graph(n)
    first_edges(Small_world)
    second_edges(Small_world)
    random_edges(Small_world, x)
    # Small World Clustering Coeffecient
    print("Clustering Coeffecient: ", clustering_coffecient(Small_world))
    # Diameter
    diameter_Small_world = snap.GetBfsEffDiamAll(Small_world, 10, False)
    print(diameter_Small_world[2])
    # Largest Strongly Connected Component
    print("Largest Strongly Connected Component - Maximum size:",
          snap.GetMxSccSz(Small_world))
    # Largest Size of Graph
    Small_world_size = snap.GetMxScc(Small_world).GetEdges()
    print(Small_world_size)
    # Plot of Degree Distribution
    snap.PlotOutDegDistr(Small_world, "SmallWorldGraph",
                         "Small World Degree Distribution")

    # Add Collaboration Network
    Collaboration_Network = snap.LoadEdgeList(snap.PUNGraph, "CA-HepTh.txt", 0,
                                              1)
    snap.DelSelfEdges(Collaboration_Network)
    snap.PrintInfo(Collaboration_Network, "Graph Statistics", "info.txt",
                   False)
    # Collaboration Network Clustering Coeffecient
    print("Clustering Coeffecient: ",
          clustering_coffecient(Collaboration_Network))
    # Diameter
    diameter_Collaboration_Network = snap.GetBfsEffDiamAll(
        Collaboration_Network, 10, False)
    print(diameter_Collaboration_Network[2])
    # Largest Strongly Connected Component
    print("Largest Strongly Connected Component - Maximum size:",
          snap.GetMxSccSz(Collaboration_Network))
    # Largest Size of Graph
    Collaboration_Network_size = snap.GetMxScc(
        Collaboration_Network).GetEdges()
    print(Collaboration_Network_size)
    # Plot of Degree Distribution
    snap.PlotOutDegDistr(Collaboration_Network, "CollaborationNetworkGraph",
                         "Collaboration Network Degree Distribution")
Esempio n. 23
0
    return


def have_common_friends(G, a, b, node_is_B):
    friends_a = []
    for Id in G.GetNI(a).GetOutEdges():
        if Id != a and Id != b and not node_is_B[Id]:
            friends_a.append(Id)
    for Id in G.GetNI(b).GetOutEdges():
        if Id in friends_a:
            return True
    return False


LoadedGraph = snap.LoadEdgeList(snap.PUNGraph, "Slashdot0902.txt", 0, 1, '\t')
snap.DelSelfEdges(LoadedGraph)

random.seed(datetime.now())
PRankH = snap.TIntFltH()
snap.GetPageRank(LoadedGraph, PRankH)
PRankH_arr = []
for item in PRankH:
    PRankH_arr.append((item, PRankH[item]))
PRankH_arr.sort(key=itemgetter(1), reverse=True)
try:
    f = open("p4_result.txt", "w+")
except:
    print("Some error occurs about open file")
for num_init_adopters in num_init_adopters_arr:
    key_nodes_Id = []
    for i in range(num_init_adopters):
def loadCollabNet(path):
    Graph = snap.LoadEdgeList(snap.PUNGraph, path, 0, 1, '\t')
    snap.DelSelfEdges(Graph)
    return Graph
Esempio n. 25
0
def main():

  # Load data
  if path.exists("../data/cmty_nodes.csv"):
    node_upload = "../data/cmty_nodes.csv"
  elif path.exists("../data/nodes.csv"):
    node_upload = "../data/nodes.csv"
  else:
    print("NO NODES TO UPLOAD!")
    assert(False)
  pd_nodes = pd.read_csv(node_upload, sep='\t', index_col=0)

  # Data in nice form
  headers = list(pd_nodes.columns)
  nodes = np.asarray(pd_nodes)

  # Load social network accordingly
  if path.exists("../data/youtube.graph"):
    FIn = snap.TFIn("../data/youtube.graph")
    social_network = snap.TNGraph.Load(FIn)
  else:
    edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
    edges = np.asarray(edges).astype(int)
    social_network = data2dag(edges, nodes.shape[0])

  # Check for self edges
  for e in social_network.Edges():
    if e.GetSrcNId() == e.GetDstNId():
      print("Self Loop Found:",e.GetSrcNId())

  # CNM Algorithm from snap.py
  print("Computing CNM")
  start = timeit.default_timer()
  CmtyV = snap.TCnComV()
  undirected = snap.ConvertGraph(snap.PUNGraph, social_network)
  snap.DelSelfEdges(undirected)
  the_modularity = snap.CommunityCNM(undirected, CmtyV)
  stop = timeit.default_timer()
  node_to_cmty = np.zeros(nodes.shape[0]).astype(int)
  cmty_sizes = np.zeros(len(CmtyV))
  for i in range(len(CmtyV)):
    for node in CmtyV[i]:
      node_to_cmty[node] = i
    cmty_sizes[i] = len(CmtyV[i])
  cmtys = [[node for node in cmty] for cmty in CmtyV]
  '''
  m = 0
  for i in range(len(CmtyV)):
    Nodes = snap.TIntV()
    for elem in CmtyV[i]:
      Nodes.Add(int(elem))
    m += snap.GetModularity(social_network, Nodes, social_network.GetEdges())
  '''
  edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
  edges = np.asarray(edges).astype(int)
  G = nx.Graph()
  G.add_nodes_from(range(nodes.shape[0]))
  G.add_edges_from(list(map(tuple, edges)))

  # Add communities to nodes
  col_name = "cnm_cmty"
  pd_nodes[col_name] = node_to_cmty
  pd_nodes.to_csv("../data/cmty_nodes.csv", sep='\t')


  assert(is_partition(G, cmtys))

  print("Calculating Modularity")
  modul = modularity(G, cmtys)
  print("Results from Clauset-Newman-Moore:")
  print("Modularity:",modul)
  print("Number of clusters:",len(CmtyV))
  print("Time elapsed:",stop - start)


  # Fun category stuff to do
  '''
  upload_col = headers.index('category')
  categories = set()
  for i in range(nodes.shape[0]):
    categories.add(nodes[i][upload_col])
  idx_to_categories = list(categories)
  print("Number of categories:",len(idx_to_categories))
  categories_to_idx = dict()
  for i in range(len(idx_to_categories)):
    categories_to_idx[idx_to_categories[i]] = i

  # Communities and categories
  cmty_category_count = np.zeros((len(CmtyV),len(idx_to_categories)))
  for i in range(nodes.shape[0]):
    cmty_category_count[int(node_to_cmty[i]),categories_to_idx[nodes[i][upload_col]]] += 1
  cmty_category_count = cmty_category_count/cmty_sizes[:,np.newaxis]
  '''


  # Create graphs per category
  '''
  plt.figure()
  for i in range(len(idx_to_categories)):
    if (str(idx_to_categories[i]) != "nan") and (idx_to_categories[i] != " UNA "):
      plt.plot(sorted(cmty_category_count[:,i], reverse=True), label=idx_to_categories[i])
  plt.title("Category Proportions in Clusters")
  plt.xlabel("Cluster")
  plt.ylabel("Proportion")
  plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
  plt.savefig("../figures/category_proportions_clusters.png", bbox_inches="tight")
  '''
  '''
  for i in range(cmty_category_count.shape[0]):
    top_category = np.argmax(cmty_category_count[i])
    print("Community "+str(i)+": "+str(idx_to_categories[top_category])+",",cmty_category_count[i][top_category])
  '''





  '''