コード例 #1
0
ファイル: ltDecomp3.py プロジェクト: joeyh321/ORCA
def loadNwU(dsName, path, cd, wccOnly, revEdges, undir):
    print("   Opening " + dsName + " and loading graph... ")
    t1 = time.clock()
    fh = open(path + dsName, "rb")
    if undir:
        if cd:
            prodNet = nx.read_edgelist(fh, delimiter=",")
        else:
            prodNet = nx.read_edgelist(fh)
            # prodNet = prodNet.to_directed()
    else:
        if cd:
            prodNet = nx.read_edgelist(fh, delimiter=",", create_using=nx.DiGraph())
        else:
            prodNet = nx.read_edgelist(fh, create_using=nx.DiGraph())

    fh.close()
    if wccOnly:
        prodNet = nx.algorithms.weakly_connected.weakly_connected_component_subgraphs(prodNet)[0]

    prodNet.remove_edges_from(prodNet.selfloop_edges())

    if revEdges:
        prodNet.reverse(False)

    numNodes = str(prodNet.__len__())
    numEdges = str(prodNet.size())
    t2 = time.clock()
    print("    -> graph loaded: " + numNodes + " nodes, " + numEdges + " edges (" + str(t2 - t1) + " sec).")
    return prodNet
コード例 #2
0
ファイル: RandomGGen.py プロジェクト: lab-csx-ufmg/RECAST
def gen_random_graphs(seed, db):
    
    print "generating random graph with seed " + str(seed)
    directory = db.get_rnd_graphs_path()
    if not path.exists(directory):
        makedirs(directory)
    
    filename = db.get_rnd_graph_full_name(str(seed), str(db.get_final_time()))
    if(path.exists(filename)):
        print "random graph with seed " + str(seed) + " already exists! Skipping..."
        return

    
    pathD = db.get_graphs_path()
    filename = pathD + db.get_windowed_graph_name(0)
    G=nx.read_edgelist(filename, nodetype = int, data=(('weight',float),))
    GR = get_random_graph_from(G, seed)
    save_random_graph(GR,1, db)
    
    for i in range(2,db.get_final_time()+1):
        filename = pathD + db.get_windowed_graph_name(str(i))
        if(not path.exists(filename)):
            f = open(filename,'w')
            f.close()
            
        G=nx.read_edgelist(filename, nodetype = int, data=(('weight',float),))
        GRnew = get_random_graph_from(G, seed)
        GR.graph['nmerges'] = i-2
        GR = merge_temporal_graphs(GR, GRnew)
        GR = compute_edge_features(GR)
        save_random_graph(GR,i, db)
    
        print("G_RND[" + str(i)  + "] has " + str(GR.number_of_edges()) + " edges")
コード例 #3
0
def k_obfuscation_measure(before_file, after_file, n_nodes, k_arr, data=True):
    print "n_nodes =", n_nodes
    
    # before_file
    bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int)
    print "read bG - DONE"
    
#    if bG.number_of_nodes() < n_nodes:
#        bG.add_nodes_from(range(n_nodes))       # only for er_100k

    # Case 1 - aG = bG
    if after_file == before_file:      # after_file is before_file
        for e in bG.edges_iter():
            bG[e[0]][e[1]]['p'] = 1.0
        return compute_eps_multi(bG, bG, k_arr) 
        
    # Case 2 - aG is a sample
    # after_file
    if data == True:
        aG = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True)
    else:
        aG = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=False)
#        if aG.number_of_nodes() < n_nodes:
#            aG.add_nodes_from(range(n_nodes))       # only for the cases of KeyError !
        for e in aG.edges_iter():
            aG[e[0]][e[1]]['p'] = 1.0
    print "read aG - DONE"
    
    return compute_eps_multi(bG, aG, k_arr) 
コード例 #4
0
def gen_random_graphs(seed):
    
	# create windowed random graphs for each real graph
	# obtain aggreggated graph
	# calculate features of random graph

	print "GENERATING RANDOM GRAPHS"

	day = 1
	final_day = which_day(_maxtime)+1

	filename = str(results_folder) + "Graphs_Data/windowed_graph_" + str(day) + str(".txt")

	print filename 

	G = nx.read_edgelist(filename, nodetype = int, data = (('top',float),))

	# print G 

	GR = get_random_graph_from(G, seed)

	for i in range(2,final_day):
		day = i
		filename = str(results_folder) + "Graphs_Data/windowed_graph_" + str(day) + str(".txt")
		G = nx.read_edgelist(filename, nodetype = int, data = (('top',float),))
		GRnew = get_random_graph_from(G, seed)
		GR.graph['nmerges'] = i - 2
		GR = merge_temporal_graphs(GR, GRnew)
		GR = compute_edge_features(GR)
		save_random_graph(GR,i,seed)
コード例 #5
0
def incorrectness_uncertain_from_file(before_file, after_file, sample_file, n_samples, bins): 
    
    # compute sig_list_b, bucket_list_b ONCE !
    start = time.clock()
    bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int)
#    G = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True)
    print "read bG: DONE, elapsed :", time.clock() - start
    
    h2_list = equivalence_class_H2_open(bG, None)
    cand_size, bin_size, sig_list_b, bucket_list_b = bucket_H2(h2_list, bins)
#    print "len B:", len(sig_list_b), len(bucket_list_b)
    
    # H1 score, H2 score
    start = time.clock()
    score_H1 = 0.0
    score_H2 = 0.0
    count = 0
    for i in range(n_samples):
        file_name = sample_file + str(i)
        aG = nx.read_edgelist(file_name, '#', '\t', create_using=nx.MultiGraph(), nodetype=int, data=False)     # IMPORTANT: MultiGraph
        # H1
        sum_re_prob, re_prob_dict = incorrectness_H1(bG, aG, bins)
        score_H1 += sum_re_prob
        # H2
        sum_re_prob, re_prob_dict = incorrectness_H2_open(aG, sig_list_b, bucket_list_b, bins)
        score_H2 += sum_re_prob
        print "count =", count
        count += 1
    #
    score_H1 = score_H1/n_samples
    score_H2 = score_H2/n_samples
    print "compute score_H1, score_H2: DONE, elapsed :", time.clock() - start
    
    # 
    return score_H1, score_H2
コード例 #6
0
def main():
    """
    Pre-processing: 
        load data, compute centrality measures, write files with node data
    """
    print(nx.__version__)
    # Load network data, create storage dict, and extract main component
    depends=nx.read_edgelist("data/depends.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    depends.name="depends"
    suggests=nx.read_edgelist("data/suggests.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    suggests.name="suggests"
    imports=nx.read_edgelist("data/imports.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    imports.name="imports"
    nets_dict={"depends":depends,"suggests":suggests,"imports":imports}
    for k in nets_dict.keys():
        main_component=nx.connected_component_subgraphs(nets_dict[k].to_undirected())[0].nodes()
        nets_dict[k]=nx.subgraph(nets_dict[k],main_component)
    
    # Run multiple measures on graphs and normalize weights
    measure_list=[nx.in_degree_centrality,nx.betweenness_centrality,nx.pagerank]
    for g in nets_dict.values():
        multiple_measures(g,measure_list)
        normalize_weights(g)
        
    # Output networks in GraphML format (to store node attributes)
    for i in nets_dict.items():
        # print(i[1].edges(data=True))
        nx.write_graphml(i[1],"data/"+i[0]+"_data.graphml")
        print("")
    print("All files written with data")
    
    """Visualization:
コード例 #7
0
def main():
    parser = createParser()
    options = parser.parse_args()

    gtGraphNames = glob.glob("{0}/*.sim.cut".format(options.gtruth))
    gtGraphs = { fn.split("/")[-1][:-8] : nx.read_edgelist(fn) for fn in gtGraphNames }
    print(gtGraphs)
    print(gtGraphNames)

    oGraphNames = [ "{0}/{1}.out.ppi".format(options.other, k) for k in gtGraphs.keys() ]
    oGraphs = { fn.split("/")[-1][:-8] : nx.read_weighted_edgelist(fn) for fn in oGraphNames }
    inputGraphNames = glob.glob("{0}/bZIP*.cut".format(options.other))
    print(inputGraphNames)
    inputGraph = nx.read_edgelist(inputGraphNames[0])
    print(oGraphNames)

    cutoff = 0.99
    paranaGraph = graphWithCutoff(options.parana, 0.0)
    c = findSuggestedCutoff( paranaGraph, inputGraph, cutoff )
    evaluation.printStats( filteredGraph(paranaGraph, inputGraph.nodes(), cutoff=c ), inputGraph )
    print >>sys.stderr, "Parana 2.0    : {0}".format(getCurve(paranaGraph, inputGraph))



    for gtName, gtGraph in gtGraphs.iteritems():
        print(gtName)
        c = findSuggestedCutoff( paranaGraph, gtGraph, cutoff )
        print("Parana cutoff = {0}".format(c))
        print("==================")
        evaluation.printStats( filteredGraph(oGraphs[gtName], gtGraph.nodes()), gtGraph )
        print >>sys.stderr, "Pinney et. al : {0}".format(getCurve(oGraphs[gtName], gtGraph))
        evaluation.printStats( filteredGraph(paranaGraph, gtGraph.nodes(), cutoff=c ), gtGraph )
        print >>sys.stderr, "Parana 2.0    : {0}".format(getCurve(paranaGraph, gtGraph))
        print("\n")
    sys.exit(0)
コード例 #8
0
ファイル: graph_functions.py プロジェクト: jcccf/twitterdc
def graph_properties(filename, directed=False):
  # Read in rec as undirected graph
  if directed:
    G=nx.read_edgelist(filename, nodetype=int, create_using=nx.DiGraph())
  else:
    G=nx.read_edgelist(filename, nodetype=int, create_using=nx.Graph())

  props = {}

  # Calculate number of edges
  props['num_edges'] = G.number_of_edges()

  # Calculate number of nodes
  props['num_nodes'] = len(G)

  # Calculate largest connected component
  largest_component = nx.connected_component_subgraphs(G)[0]
  props['size_largestcc'] = len(largest_component)
  props['proportion_in_largestcc'] = float(len(largest_component)) / len(G)

  # Calculate clustering coefficient
  props['average_clustering'] = nx.average_clustering(G)

  # Calculate diameter of largest connected component
  # props['diameter'] = nx.diameter(largest_component)
  
  return props
コード例 #9
0
def calGraph(infile, mode = 1):
	#init Parameter
	inputpath = 'edge_list/'
	outputpath = 'network_output/'
	n = mode
	Data_G = inputpath+infile+'_'+str(n)+'.edgelist'
	
	#init Graph
	G = nx.read_edgelist(Data_G, create_using=nx.DiGraph())
	GU = nx.read_edgelist(Data_G)
	#basci info
	print nx.info(G),'\n', nx.info(GU) 
	average_degree = float(sum(nx.degree(G).values()))/len(G.nodes())
	print 'average degree :', average_degree 
	degree_histogram = nx.degree_histogram(G)
	print 'degree histogram max :', degree_histogram[1]
	desity = nx.density(G)
	print 'desity :', desity

	#Approximation
	#Centrality
	degree_centrality = nx.degree_centrality(G)
	print 'degree centrality top 10 !', sorted_dict(degree_centrality)[:2]
	out_degree_centrality = nx.out_degree_centrality(G)
	print 'out degree centrality top 10 !', sorted_dict(out_degree_centrality)[:2]
コード例 #10
0
ファイル: er_generator.py プロジェクト: shuchu/graph
    def load(self,fname):
        fext = (str(fname).split("."))[1]
        self.fname = (str(fname).split("."))[0]

        if self.directed_graph == False:
            self.G = nx.read_edgelist(path=fname)
        else:
            self.G = nx.read_edgelist(path=fname,create_using=nx.DiGraph())
コード例 #11
0
 def test_edgelist_integers(self):
     G=nx.convert_node_labels_to_integers(self.G)
     (fd,fname)=tempfile.mkstemp()
     nx.write_edgelist(G,fname)  
     H=nx.read_edgelist(fname,nodetype=int)
     H2=nx.read_edgelist(fname,nodetype=int)
     G.remove_node(5) # isolated nodes are not written in edgelist
     assert_equal(sorted(H.nodes()),sorted(G.nodes()))
     assert_equal(sorted(H.edges()),sorted(G.edges()))
     os.close(fd)
     os.unlink(fname)
コード例 #12
0
 def test_edgelist_multidigraph(self):
     G = self.XDG
     (fd, fname) = tempfile.mkstemp()
     nx.write_edgelist(G, fname)
     H = nx.read_edgelist(fname, nodetype=int, create_using=nx.MultiDiGraph())
     H2 = nx.read_edgelist(fname, nodetype=int, create_using=nx.MultiDiGraph())
     assert_not_equal(H, H2)  # they should be different graphs
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
     os.close(fd)
     os.unlink(fname)
コード例 #13
0
def calGraph(infile, mode = 1):
	#init Parameter
	inputpath = 'edge_list/'
	n = mode
	Data_G = inputpath+infile+'_'+str(n)+'.edgelist'
	
	#init Graph
	G = nx.read_edgelist(Data_G, create_using=nx.DiGraph())
	GU = nx.read_edgelist(Data_G)
	average_clustering = nx.average_clustering(GU)
	transitivity = nx.transitivity(G)
	return [average_clustering, transitivity]
コード例 #14
0
 def test_edgelist_graph(self):
     G=self.G
     (fd,fname)=tempfile.mkstemp()
     nx.write_edgelist(G,fname)  
     H=nx.read_edgelist(fname)
     H2=nx.read_edgelist(fname)
     assert_not_equal(H,H2) # they should be different graphs
     G.remove_node('g') # isolated nodes are not written in edgelist
     assert_equal(sorted(H.nodes()),sorted(G.nodes()))
     assert_equal(sorted(H.edges()),sorted(G.edges()))
     os.close(fd)
     os.unlink(fname)
コード例 #15
0
 def test_edgelist_digraph(self):
     G = self.DG
     (fd, fname) = tempfile.mkstemp()
     nx.write_edgelist(G, fname)
     H = nx.read_edgelist(fname, create_using=nx.DiGraph())
     G.remove_node('g')  # isolated nodes are not written in edgelist
     H2 = nx.read_edgelist(fname, create_using=nx.DiGraph())
     assert_not_equal(H, H2)  # they should be different graphs
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
     os.close(fd)
     os.unlink(fname)
コード例 #16
0
def comorbid_count_compare(net_dir, icd_gene_clinical, cancer_info, alterations, weighted=False):
    # = 'humannet.9'
    graph = networkx.read_edgelist(net_dir + '/network',nodetype=str)
    ct = neighbor_count_comorbid(graph, alterations['peak_mut'], icd_gene_clinical, cancer_info, comorbid_only = True, weighted=weighted)
    import os
    randdir = net_dir + '/rand/'
    randnets = os.listdir(randdir)
    x = scipy.zeros([len(randnets)])
    for i,f in enumerate(randnets):
        net = networkx.read_edgelist(randdir + f, nodetype = str, data=weighted)
        x[i] = neighbor_count_comorbid(net, alterations['peak_mut'], icd_gene_clinical, cancer_info, comorbid_only = True, weighted = weighted)    
    print 'comorbid_edges= ' + str(ct) + "\tngreater=" +str(sum(x >= ct)) + '\tp=' + str(sum(x >= ct)/float(len(randnets)))
    return ct, x
コード例 #17
0
    def test_read_edgelist_3(self):
        s = b"""\
# comment line
1 2 {'weight':2.0}
# comment line
2 3 {'weight':3.0}
"""
        bytesIO = io.BytesIO(s)
        G = nx.read_edgelist(bytesIO,nodetype=int,data=False)
        assert_equal_edges(G.edges(),[(1,2),(2,3)])

        bytesIO = io.BytesIO(s)
        G = nx.read_edgelist(bytesIO,nodetype=int,data=True)
        assert_equal_edges(G.edges(data=True),[(1,2,{'weight':2.0}),(2,3,{'weight':3.0})])
コード例 #18
0
ファイル: main.py プロジェクト: aditya-grover/node2vec
def read_graph():
	'''
	Reads the input network in networkx.
	'''
	if args.weighted:
		G = nx.read_edgelist(args.input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph())
	else:
		G = nx.read_edgelist(args.input, nodetype=int, create_using=nx.DiGraph())
		for edge in G.edges():
			G[edge[0]][edge[1]]['weight'] = 1

	if not args.directed:
		G = G.to_undirected()

	return G
コード例 #19
0
def write_communities(graph, name_to_size):

    shortname = str(graph.split('/')[-1].strip('.ncol'))
    nxgraph = networkx.read_edgelist(graph)
    partition = community.best_partition(networkx.read_edgelist(graph))
    count = 0
    if shortname in name_to_size.keys():
        for com in set(partition.values()):
            count = count + 1.
            list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com]
            size_com = len(list_nodes)
            if size_com > name_to_size[shortname]:
                community_subgraph = nxgraph.subgraph(list_nodes)
                with open("/net/data/graph-models/louvain-clusters/communities/" + shortname +"_" +str(count), 'a') as fout1:
                    networkx.write_edgelist(community_subgraph, fout1)
コード例 #20
0
ファイル: node2vec.py プロジェクト: Loricanal/entity2rec
    def read_graph(self, nx_g):

        if self.is_weighted:

            self.G = nx.read_edgelist(nx_g, data=(('weight', float),), create_using=nx.DiGraph(), edgetype=str)

        else:

            self.G = nx.read_edgelist(nx_g, create_using=nx.DiGraph(), edgetype=str)

            for edge in self.G.edges():
                self.G[edge[0]][edge[1]]['weight'] = 1

        if not self.is_directed:
            self.G = self.G.to_undirected()
def main():
    """
    Program Driver. Parses command line arguments to determine where to store
    output pickle files and what networks to attack, reads in networks from the
    given source, runs all necessary attacks, and pickles the output for later
    use.
    """

    aparse = argparse.ArgumentParser(usage="Attack a collection of networks")
    aparse.add_argument('--network_file', '-f', action='store',
                        default='networks.yaml',
                        help="Path to network config (default: ./networks.yaml)",
                        dest='config_path')
    aparse.add_argument('--picklejar', '-p', action='store',
                        default='.',
                        help='output for pickle files (default: current directory)',
                       )
    aparse.add_argument('--update', '-u', action='store_true',
                        help='Only run network processes for networks which have' +
                        'not already been analyzed.')
    args = aparse.parse_args()

    cfg = open(args.config_path, 'r')


    for net_attrs in yaml.safe_load_all(cfg):
        picklename = net_attrs["name"] + ".pickle"
        if args.update and picklename in os.listdir(args.picklejar):
            continue

        print "Analyzing network %s..." % net_attrs['name']
        fname = net_attrs['filename']
        data = [(key, eval(value)) for key, value in net_attrs['data'].items()]
        if net_attrs["directed"]:
            network = networkx.read_edgelist(fname,
                                             create_using=networkx.DiGraph(),
                                             nodetype=str,
                                             data=data).to_undirected()
        else:
            network = networkx.read_edgelist(fname,
                                             create_using=networkx.Graph(),
                                             nodetype=str,
                                             data=data)
        print "Network file loaded"
        pckl = os.path.normpath(args.picklejar+"/"+ picklename)
        ac.compare_to_random_networks(network, FRACS, pckl)

        print "Done!"
コード例 #22
0
ファイル: sigcomm2012.py プロジェクト: pstjuste/pt_analysis
def main():

    msg = "help: sigcomm graph1 graph2 cap dist edges wasted thld " \
          "hops tries ttl prob"
    if len(sys.argv) < 12: print msg; return -1

    g = nx.read_edgelist(sys.argv[1], create_using=nx.Graph())
    dg = nx.DiGraph()

    random.seed(-1)

    cap = int(sys.argv[3])
    dist = int(sys.argv[4])
    edges = int(sys.argv[5])
    wasted = int(sys.argv[6])
    threshold = int(sys.argv[7])
    hops = int(sys.argv[8])
    tries = int(sys.argv[9])
    ttl = int(sys.argv[10])
    prob = float(sys.argv[11])

    cap_edges(g, cap)
    get_followers_dist(g, dg, dist)
    sum_edges(g, edges)
    wasted_packets(g, dg, wasted)
    add_pseudo_edges(g, dg, threshold)
    find_paths(g, dg, hops, tries, ttl, prob)

    print >> sys.stderr, "g nodes", len(g)
    print >> sys.stderr, "g edges", g.size()

    print >> sys.stderr, "dg nodes", len(dg)
    print >> sys.stderr, "dg edges", dg.size()
コード例 #23
0
ファイル: vna2mpb.py プロジェクト: christianyoung/ORCA
def show(filename,title):
        if not os.path.isfile(filename+'.png'):
                        FLAG = 0
                        x = []
                        y = []
                        reader = csv.reader(open(filename+'.vna', 'rb'),delimiter='\t') #dialect='excel-tab')
                
                        for row in reader:
                                if FLAG == 1:
                                        x.append(row[0])
                                        y.append(row[1])
                                if row[0] == 'v1':
                                        FLAG = 1
                        
                        with open(filename+'.csv', 'wb') as csvfile:
                                writer = csv.writer(csvfile, delimiter = ',')
                                for i in range(len(x)):
                                        writer.writerow([x[i]] + [y[i]])

                        G = nx.read_edgelist(filename+'.csv', delimiter=",",create_using = nx.Graph(), nodetype = str)	

                        plot6.Save(G,filename)
        else:
                img = mpimg.imread(filename + '.png')
                plt.imshow(img, interpolation='nearest')
                plt.axis('off')
                plt.suptitle("", y=0.95)
                plt.suptitle(title, y = 0.95)
                plt.show()
コード例 #24
0
ファイル: sigcomm2012.py プロジェクト: pstjuste/pt_analysis
def get_followers_dist(g, dg, follow):

    if follow == -1: return -1

    if len(dg) == 0:
        dg = nx.read_edgelist(sys.argv[2], create_using=dg)

    no_of_paths = 0
    for u in dg.nodes():

        if not g.has_node(u):
            print "no_source"
            continue

        for v in dg.successors(u):
            if u == v: continue

            if g.has_node(v):
                try:
                    print nx.shortest_path_length(g, source=u, target=v)
                    no_of_paths += 1
                except nx.exception.NetworkXError as err:
                    print "no_path"
            else:
                print "no_target"

    print >> sys.stderr, "no of paths", no_of_paths
    return no_of_paths
コード例 #25
0
def get_graph(path):
    fh = open(path, 'rb')
    G = nx.read_edgelist(fh)
    fh.close()
    #remove posible self loops
    G.remove_edges_from(G.selfloop_edges())
    return G
コード例 #26
0
ファイル: sigcomm2012.py プロジェクト: pstjuste/pt_analysis
def add_pseudo_edges(g, dg, threshold):
    """ flawed logic, needs to be fixed """

    if threshold == -1 : return -1

    if len(dg) == 0:
        dg = nx.read_edgelist(sys.argv[2], create_using=dg)

    new_edges = []
    for n in dg.nodes():

        if not g.has_node(n): continue

        fw_count = {}
        n_dists = nx.single_source_shortest_path_length(g,n,4)
        followings = set(dg.successors(n))

        for node, dist in n_dists.iteritems():
            if dist > 2: continue

            for f in dg.successors(node):
                if f not in followings:
                    if f in fw_count:
                        fw_count[f] = fw_count[f] + 1
                    else: fw_count[f] = 1

        for k,v in fw_count.iteritems():
            if v >= threshold and k in n_dists and n_dists[k] <= 4: 
                new_edges.append((n,k))

    for e in new_edges: dg.add_edge(*e)
    print >> sys.stderr, "new edges", len(new_edges)
    return 0
コード例 #27
0
def load_data(from_cache):
    if from_cache:
        input_filename = constants.CHARTS_FOLDER_NAME + 'by_nodes'
        reader = open(input_filename, 'r')
        by_nodes = eval(reader.read())
        reader.close

        input_filename = constants.CHARTS_FOLDER_NAME + 'by_times'
        reader = open(input_filename, 'r')
        by_times = eval(reader.read())
        reader.close
    else:
        by_nodes = {}
        by_times = {}
        
        utils.ensure_folder(constants.CHARTS_FOLDER_NAME)

        filenames = os.listdir(constants.GRAPHS_FOLDER_NAME)
        filenames.sort()

        print len(filenames)
        
        time = 0
        for filename in filenames:
            print 'Processing: ' + filename
        
            input_filename = constants.GRAPHS_FOLDER_NAME + filename
            
            #g = nx.read_gpickle(input_filename)
            g = nx.read_edgelist(input_filename, create_using=nx.DiGraph())
            #for wifi data            
            #g = nx.read_edgelist(input_filename, '#', ',')
            
            for v in g.nodes():
                node_state = calc_node_state(g, v)
                
                if v not in by_nodes:
                    by_nodes[v] = {}
                by_nodes[v][time] = node_state
                
                if time not in by_times:
                    by_times[time] = {}
                by_times[time][v] = node_state
    
            time = time+1
            
        output_filename = constants.CHARTS_FOLDER_NAME + 'by_nodes'
        writer = open(output_filename, 'w')
        writer.write(str(by_nodes))
        writer.close
    
        output_filename = constants.CHARTS_FOLDER_NAME + 'by_times'
        writer = open(output_filename, 'w')
        writer.write(str(by_times))
        writer.close
    
        print len(by_times)
        print len(by_nodes)
        
    return by_nodes, by_times
コード例 #28
0
ファイル: traubnet.py プロジェクト: BhallaLab/thalamocortical
    def _read_cell_graph(self, filename, format):
        """Load the cell-to-cell connectivity graph from a
        file. 

        Returns None if any error happens.
        """
        cell_graph = None
        if filename:
            try:
                start = datetime.now()
                if format == "gml":
                    cell_graph = nx.read_gml(filename)
                elif format == "pickle":
                    cell_graph = nx.read_gpickle(filename)
                elif format == "edgelist":
                    cell_graph = nx.read_edgelist(filename)
                elif format == "yaml":
                    cell_graph = nx.read_yaml(filename)
                elif format == "graphml":
                    cell_graph = cell_graph = nx.read_graphml(filename)
                else:
                    print "Unrecognized format:", format
                end = datetime.now()
                delta = end - start
                config.BENCHMARK_LOGGER.info(
                    "Read cell_graph from file %s of format %s in %g s"
                    % (filename, format, delta.seconds + 1e-6 * delta.microseconds)
                )
            except Exception, e:
                print e
コード例 #29
0
def read_general(datadir,tolerance,minrepeats):

    """ Function to read datasets from files in *datadir*.
   
    Each file represents a graph for a particular timestamp. 
    The name of the files is expected to be <timestamp>.ncol,
    and each line in the file represents one edge in the graph e.g.
    line:' 1 2 5 ' indicates there is an edge between nodes
    '1' and '2' with weight '5'

    Parameters
    ----------
    datadir: string
        path to the directory containing the dataset.
    tolerance: float,optional
        For a label to be considered a dominant label, it must be within this much of the maximum
        value found for the quality function. The smaller it is, the fewer dominant labels there 
        will be. 
    minrepeats: integer
        The number of variations to try before returning the best partition.            

    Returns 
    ------- 
    t: list
        an array of timestamps, each representing a snapshot of the communities.
    g1: networkx.Graph
        the last graph to be read from file.
    initial_label_dictionary: dictionary { node: community}
        A dictionary mapping nodes to community labels if it is the first snapshot, otherwise *None*.
    """

    raw_file_list = os.listdir(datadir)
    timestamps = sorted([int(f.rstrip(".ncol")) for f in raw_file_list if f.endswith(".ncol")])

    initial_label_dict_filename = os.path.join(datadir, 'initial_label_dict.txt')

    beginning = True
    for t in timestamps:
        f = str(t) + ".ncol"
        fpath = os.path.join(datadir,f)

        # if a file is empty, move on to the next timestamp
        if os.path.getsize(fpath) == 0:
            continue

        g1 = nx.read_edgelist(fpath, nodetype=int, data=(('weight',float),))

        if beginning is True:
            # when called for the first time just return initial_label_dict
            if not os.path.exists(initial_label_dict_filename):
                initial_label_dict = maxQ(g1,tolerance=tolerance,minrepeats=minrepeats)
                with open(initial_label_dict_filename, 'w') as lf:
                    lf.write(repr(initial_label_dict))

            with open(initial_label_dict_filename, 'r') as lf:
                initial_label_dict = eval(lf.read())
            yield (t, g1, initial_label_dict)
            beginning = False
        else:
            yield (t, g1, None)
コード例 #30
0
ファイル: traubnet.py プロジェクト: BhallaLab/thalamocortical
    def _read_celltype_graph(self, celltypes_file, format="gml"):
        """
        Read celltype-celltype connectivity graph from file.

        celltypes_file -- the path of the file containing
        the graph.
        
        format -- format of the file. allowed values: gml, graphml, edgelist, pickle, yaml.

        """
        start = datetime.now()
        celltype_graph = None
        try:
            if format == "gml":
                celltype_graph = nx.read_gml(celltypes_file)
            elif format == "edgelist":
                celltype_graph = nx.read_edgelist(celltypes_file)
            elif format == "graphml":
                celltype_graph = nx.read_graphml(celltypes_file)
            elif format == "pickle":
                celltype_graph = nx.read_gpickle(celltypes_file)
            elif format == "yaml":
                celltype_graph = nx.read_yaml(celltypes_file)
            else:
                print "Unrecognized format %s" % (format)
        except Exception, e:
            print e
コード例 #31
0
- https://github.com/networkx/networkx/blob/master/examples/drawing/sampson_data.zip
"""

import zipfile
from io import BytesIO as StringIO

import matplotlib.pyplot as plt
import networkx as nx

with zipfile.ZipFile("sampson_data.zip") as zf:
    e1 = StringIO(zf.read("samplike1.txt"))
    e2 = StringIO(zf.read("samplike2.txt"))
    e3 = StringIO(zf.read("samplike3.txt"))

G1 = nx.read_edgelist(e1, delimiter="\t")
G2 = nx.read_edgelist(e2, delimiter="\t")
G3 = nx.read_edgelist(e3, delimiter="\t")
pos = nx.spring_layout(G3, iterations=100)
plt.clf()

plt.subplot(221)
plt.title("samplike1")
nx.draw(G1, pos, node_size=50, with_labels=False)
plt.subplot(222)
plt.title("samplike2")
nx.draw(G2, pos, node_size=50, with_labels=False)
plt.subplot(223)
plt.title("samplike3")
nx.draw(G3, pos, node_size=50, with_labels=False)
plt.subplot(224)
コード例 #32
0
    def sample_graph(self,
                     hparams,
                     placeholders,
                     adj,
                     features,
                     weights,
                     weight_bins,
                     s_num,
                     node,
                     hde,
                     num=10,
                     outdir=None):
        '''
        Args :
            num - int
                10
                number of edges to be sampled
            outdir - string
            output dir
        '''
        list_edges = []

        for i in range(self.n):
            for j in range(i + 1, self.n):
                list_edges.append((i, j, 1))
                list_edges.append((i, j, 2))
                list_edges.append((i, j, 3))
        # list_edges.append((-1, -1, 0))

        list_weight = [1, 2, 3]

        hparams.sample = True

        eps = np.random.randn(self.n, self.z_dim, 1)
        with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt',
                  'a') as f:
            for z_i in eps:
                f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')

        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj[0]})
        feed_dict.update({self.features: features[0]})
        feed_dict.update({self.weight_bin: weight_bins[0]})
        feed_dict.update({self.weight: weights[0]})

        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})

        prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss, w_edge, labels = self.sess.run(
            [
                self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu,
                self.enc_sigma, self.cost, self.w_edge, self.label
            ],
            feed_dict=feed_dict)
        prob = np.reshape(prob, (self.n, self.n))
        w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))

        indicator = np.ones([self.n, 3])
        p, list_edges, w_new = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)

        if not hparams.mask_weight:
            trial = 0
            while trial < 5000:
                candidate_edges = [
                    list_edges[i] for i in np.random.choice(range(
                        len(list_edges)), [hparams.edges],
                                                            p=p,
                                                            replace=False)
                ]
                with open(hparams.sample_file + 'test.txt', 'w') as f:
                    for (u, v, w) in candidate_edges:
                        if (u >= 0 and v >= 0):
                            f.write(
                                str(u) + ' ' + str(v) + ' {\'weight\':' +
                                str(w) + '}\n')
                f = open(hparams.sample_file + 'test.txt')
                G = nx.read_edgelist(f, nodetype=int)
                if nx.is_connected(G):
                    for (u, v, w) in candidate_edges:
                        if (u >= 0 and v >= 0):
                            with open(
                                    hparams.sample_file + "approach_2_" +
                                    str(trial) + "_" + str(s_num) + '.txt',
                                    'a') as f:
                                f.write(
                                    str(u) + ' ' + str(v) + ' {\'weight\':' +
                                    str(w) + '}\n')
                trial += 1

        else:
            trial = 0
            while trial < 5000:
                candidate_edges = self.get_masked_candidate(
                    list_edges, prob, w_edge, hparams.edges, hde)
                # print("Debug candidate", candidate_edges)
                if len(candidate_edges) > 0:
                    with open(hparams.sample_file + 'test.txt', 'w') as f:
                        for uvw in candidate_edges.split():
                            [u, v, w] = uvw.split("-")
                            u = int(u)
                            v = int(v)
                            w = int(w)
                            if (u >= 0 and v >= 0):
                                f.write(
                                    str(u) + ' ' + str(v) + ' {\'weight\':' +
                                    str(w) + '}\n')
                    f = open(hparams.sample_file + 'test.txt')
                    # try:
                    G = nx.read_edgelist(f, nodetype=int)
                    # except:
                    # continue

                    if nx.is_connected(G):
                        for uvw in candidate_edges.split():
                            [u, v, w] = uvw.split("-")
                            u = int(u)
                            v = int(v)
                            w = int(w)
                            if (u >= 0 and v >= 0):
                                with open(
                                        hparams.sample_file + "approach_2_" +
                                        str(trial) + "_" + str(s_num) + '.txt',
                                        'a') as f:
                                    f.write(
                                        str(u) + ' ' + str(v) +
                                        ' {\'weight\':' + str(w) + '}\n')
                trial += 1
コード例 #33
0
ファイル: edge_processor.py プロジェクト: fros1y/phenetics
import networkx as nx
import sys

g = nx.read_edgelist("/fast-data/patentmark/triplets.tsv", )
コード例 #34
0
ファイル: main.py プロジェクト: manvichawla2/link-pred
def create_graph_from_file(filename):
    print("----------------build graph--------------------")
    f = open(filename, "rb")
    g = nx.read_edgelist(f)
    return g
コード例 #35
0
    # Output
    OUTPUT_EVENT = sys.argv.pop()
    OUTPUT = sys.argv.pop()

    if len(sys.argv) == 12:
        isolatable_node_type = sys.pop()

    # Load data
    logging.debug("Loading data")
    filename, file_extension = os.path.splitext(edgelist)
    print(filename, file_extension)
    if file_extension == ".gexf":  # when a node has attributes
        G = nx.read_gexf(edgelist, node_type=int)
    elif file_extension == ".edgelist":  # when a node does not have attrbutes
        G = nx.read_edgelist(edgelist, nodetype=int)
    else:
        raise ValueError("The input graph should be saved in .edgelist or .gexf format")

    logs = pd.read_csv(sim_log_data)

    #
    # Preprocess
    #
    logging.debug("Construct the transmission tree from the log")
    logs["id"] = "id"
    tree_list = utils.construct_transmission_tree(logs)

    logging.debug("Set onset time")
    for tid, tree in enumerate(tree_list):
        tree_list[tid] = utils.set_onset_time(tree, time_lag_for_isolation)
コード例 #36
0
    for c, idx in color_idx.items():

        plt.scatter(node_pos[idx, 0], node_pos[idx, 1],
                    label=c)  # c=node_colors)

    plt.legend()

    plt.show()


if __name__ == "__main__":
    # G = nx.read_edgelist('test.edge_list.txt', create_using=nx.DiGraph(), nodetype=None,
    #                      data=[('weight', int)])
    G = nx.read_edgelist(
        '../data/ETH/Phishing node classification/TransEdgelist.txt',
        create_using=nx.MultiDiGraph(),
        delimiter=',',
        nodetype=None,
        data=[('amount', float), ('weight', int)])

    model = Struc2Vec(
        G,
        10,
        80,
        workers=4,
        verbose=40,
    )
    model.train()
    embeddings = model.get_embeddings()

    evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
コード例 #37
0
 def __init__(self, input_filename):
     infile = open(input_filename, 'r')
     self.G = nx.read_edgelist(infile,
                               nodetype=int,
                               data=(('weight', float), ))
     print("successfully loaded graph from file: " + input_filename)
コード例 #38
0
import networkx as nx
G = nx.read_edgelist("../dataset/Email-Enron.txt")

communities = sorted()
コード例 #39
0
ファイル: random_walk.py プロジェクト: wsgan001/itce2011
        start_id = int(sys.argv[4])
    if len(sys.argv) == 6:
        p = float(sys.argv[3])
        n_samples = int(sys.argv[4])
        start_id = int(sys.argv[5])

    print "file_name =", file_name
    print "n_samples =", n_samples
    print "start_id =", start_id
    print "t =", t
    print "alpha =", alpha
    if len(sys.argv) == 6:
        print "p =", p

    G = nx.read_edgelist(
        "../data/" + file_name + ".gr", '#', '\t', None, nodetype=int
    )  # implicitly remove duplicate edges (i.e. no multiple edges), use type 'int' instead of string
    #    G = nx.read_edgelist(file_name, '#', ' ', None, nodetype=int)

    print "#nodes :", G.number_of_nodes()
    print "#edges :", len(G.edges())
    print "#self-loops :", G.number_of_selfloops()
    print "#components :", len(nx.connected_components(G))
    n_nodes = G.number_of_nodes()
    deg_list = nx.degree(G)  # dict[node] = deg
    min_deg = min(deg_list.itervalues())
    max_deg = max(deg_list.itervalues())
    print "min-deg =", min_deg
    print "max-deg =", max_deg

    # TEST random_walk_transform()
コード例 #40
0
    for edge_index in a_edge_index:
        arr_tmp.append([
            fund_index,
            len(list_funds) + edge_index, weight_matrix_total[fund_index,
                                                              edge_index]
        ])
arr_tmp = np.array(arr_tmp)
pd_tmp = pd.DataFrame(arr_tmp)
pd_tmp[0] = pd_tmp[0].astype(int)
pd_tmp[1] = pd_tmp[1].astype(int)
output_name = 'fund'
path = data_dir + 'graph/{}.csv'.format(output_name)
pd_tmp.to_csv(path, index=False, sep=' ')

nx_G = nx.read_edgelist(path,
                        nodetype=int,
                        data=(('weight', float), ),
                        create_using=nx.DiGraph())
nx_G = nx_G.to_undirected()
G = graph.Graph(nx_G, False, 1, 1)
G.preprocess_transition_probs()

walks = G.simulate_walks(200, 200)
walks = [list(map(str, walk)) for walk in walks]

from gensim.models import Word2Vec

model = Word2Vec(walks,
                 size=32,
                 window=6,
                 min_count=0,
                 sg=1,
コード例 #41
0
    return r1, r2


Iteration = 10000  ##迭代次数
P = [
    1, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5, 0.45, 0.4, 0.35,
    0.3, 0.25, 0.2, 0.15, 0.1, 0.05, 0
]
assortivity = np.zeros((21, 1))
average_path = np.zeros((21, 1))
count = 0

for i in P:
    print(i)
    for j in range(10):
        G = nx.read_edgelist('datasets/network_average_path.txt', nodetype=int)
        r1, r2 = calculate_ass(G, i, Iteration)
        print(r1, r2)
        assortivity[count] = assortivity[count] + r1
        average_path[count] = average_path[count] + r2
    assortivity[count] = assortivity[count] / 10
    average_path[count] = average_path[count] / 10
    count = count + 1
fid = open('results/average_path_ass.txt', 'w')
for i in range(len(P)):
    fid.write(str(assortivity[i]) + ' ' + str(average_path[i]) + '\n')
fid.close()

plt.figure(figsize=(16, 16))
plt.style.use('ggplot')
plt.semilogx(assortivity, average_path, 'o-', label='$r_1$')
コード例 #42
0
ファイル: node2vec_flight.py プロジェクト: kexinxin/Defect
    color_idx = {}

    for i in range(len(X)):

        color_idx.setdefault(Y[i][0], [])

        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():

        plt.scatter(node_pos[idx, 0], node_pos[idx, 1],
                    label=c)  # c=node_colors)

    plt.legend()

    plt.show()


if __name__ == "__main__":
    G = nx.read_edgelist('../data/flight/usa-airports.edgelist',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    model = Node2Vec(G, 10, 80, workers=1, p=0.25, q=2)
    model.train()
    embeddings = model.get_embeddings()

    evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
コード例 #43
0
        # 3rd: cluster cores
        for candidate in candidates:
            self.cluster_core(candidate)
        print '3. after cluster core, disjoint set - parent dict:', dict(
            zip(range(self.n), self.disjoint_set.parent))

        # 4th: cluster non-core
        self.cluster_non_core()
        print '4. after cluster non-core mark cluster id, cluster(represented by root vertex), min ele id:', dict(
            filter(lambda pair: pair[1] != self.n,
                   zip(range(self.n), self.cluster_dict)))

        # finally, output result
        print '\nfinal result in format:', ' '.join([
            'core/non-core', 'vertex id',
            'cluster id(min core vertex id in this cluster)'
        ])
        self.result_lines.append('c/n vertex_id cluster_id')
        print 'c/n vertex_id cluster_id'
        self.output_result()


if __name__ == '__main__':
    graph = nx.read_edgelist('demo_input_graph.txt', nodetype=int)
    offset_lst, dst_v_lst, deg_lst = to_csr_graph(graph)

    print 'csr representation:\noffset_lst=', offset_lst, '\ndst_v_lst=', dst_v_lst, '\ndeg_lst=', deg_lst, '\n'

    pscan_algo = PScan(offset_lst, dst_v_lst, deg_lst, eps=0.6, min_pts=3)
    pscan_algo.run_algorithm()
コード例 #44
0
'''	Ganesh Prasad - 2018csm1008
	Rakesh meena - 2018csm1017
	Jeevan Kumar - 2018csm1012

Problem: Given a network of friendships, we have to find the nodes(persons) with most and least friendly neighbourhood.
Solution: We take a node and find its adjacent nodes(a list say j) which are also impressed by the node.
	Now for every node in j we get a total number of mutually impressed friends, which after dividing by len(j)
	we arrived at an average treated as a factor to decide whose neighbourhood is friendly or not.'''

import networkx as nx
import matplotlib.pyplot as plt

G = nx.read_edgelist(r"pagerank.txt", create_using=nx.DiGraph(), nodetype=int)

nx.draw(G, with_labels=True)
plt.show()


# Find total Number of mutually impressed friends of a node
def num_of_mutually_impressed_nodes(G, node):
    list_successors = G.successors(node)
    total_num = 0
    for successor_node in list_successors:
        if (G.has_edge(successor_node, node)):
            total_num = total_num + 1
    return total_num


# Returns a list of mutually impressed friends for a node
def mutually_impressed_nodes(G, node):
    list_successors = G.successors(node)
コード例 #45
0
import networkx as nx
import numpy
G = nx.read_edgelist("facebook_combined.txt")

n = list(G.nodes())

spll = []

for u in n:
    for v in n:
        if u != v:
            l = nx.shortest_path_length(G, u, v)
            print("Shortest path between ", u, " and ", v, " is of lenth ", l)
            spll.append(l)

min_spl = min(spll)
max_spl = max(spll)
avg_spl = numpy.average(spll)

print("Minimum shortest path length : ", min_spl)
print("Maximum shortest path length : ", max_spl)
print("Average shortest path length : ", avg_spl)
コード例 #46
0
import networkx as nx
from itertools import chain

#this script is meant to remove all the pages with fewer than 5 incoming links
#noise reduction process

G = nx.read_edgelist('../datasets/dbpedia_resources_wiki.edgelist',
                     nodetype=int,
                     create_using=nx.DiGraph())
print 'read graph'

remove_nodes_out = (node for node, degree in G.out_degree().iteritems()
                    if degree == 0)  #nodes with out_degree = 0

remove_nodes_in = (node for node, degree in G.in_degree().iteritems()
                   if degree == 0)  #nodes with in_degree = 0

remove_nodes = chain(remove_nodes_out, remove_nodes_in)

G.remove_nodes_from(remove_nodes)

print 'graph has %d nodes and %d edges' % (len(G.nodes()), len(G.edges()))

print "writing graph"

nx.write_edgelist(
    G,
    '../graph/dbpedia_resources_wiki_reduced_1_out_1_in.edgelist',
    data=False)
コード例 #47
0
ファイル: tdiDemo.py プロジェクト: bronxcasey/TDI
import collections
import numpy as np
import pandas as pd
from networkx import bipartite

###################
#Makes degree histogram
def grapher(G):
	degree_sequence = sorted([d for n, d in G.degree()], reverse=True)  # degree sequence
	# print "Degree sequence", degree_sequence
	degreeCount = collections.Counter(degree_sequence)
	deg, cnt = zip(*degreeCount.items())

	fig, ax = plt.subplots()
	plt.bar(deg, cnt, width=0.80, color='b')

	plt.title("Degree Histogram")
	plt.ylabel("Count")
	plt.xlabel("Degree")
	
# log-log option
	# ax.set_xscale("log")
	# ax.set_yscale("log")

	ax.set_xticks([d + 0.1 for d in deg])
	ax.set_xticklabels(deg)
	plt.show()
    

p = nx.read_edgelist('DCh-Miner_miner-disease-chemical.tsv')
コード例 #48
0
    argvs = sys.argv
    argc = len(argvs)
    if (argc < 2):
        print(
            'Please give frovedis_server calling command as the first argument \n(e.g. "mpirun -np 2 -x /opt/nec/nosupport/frovedis/ve/bin/frovedis_server")'
        )
        quit()
    FrovedisServer.initialize(argvs[1])

    frov_graph = fnx.read_edgelist(DATASET, nodetype=np.int32, delimiter=' ')
    fres = set(fnx.bfs_edges(frov_graph, src, depth_limit=depth))

    FrovedisServer.shut_down()
except Exception as e:
    print("status=Exception: " + str(e))
    sys.exit(1)

#NetworkX
try:
    nx_graph = nx.read_edgelist(DATASET, nodetype=np.int32, delimiter=' ')
    nres = set(nx.bfs_edges(nx_graph, src, depth_limit=depth))
except Exception as e:
    print("status=Exception: " + str(e))
    sys.exit(1)
print(fres)
print(nres)
if len(fres - nres) == 0:
    print("status=Passed")
else:
    print("status=Failed")
コード例 #49
0
                  [1, 0, 0, 0, 1, 0, 0, 0, 1, 0],
                  [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
                  [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
                 ])
G3 = nx.Graph(G_mat)
print('list(G3.edges())\n',list(G3.edges()))

# 简单可视化
# plt.figure()
# nx.draw_networkx(G3)
# plt.show()

# 边列表
G4 = nx.read_edgelist('./data/G_edgelist.txt', data=[('weigth', int)])
print('list(G4.edges(data=True))\n',list(G4.edges(data=True)))

# # 简单可视化
# plt.figure()
# nx.draw_networkx(G4)
# plt.show()

# 2.4DateFrame
G_df = pd.read_csv('./data/G_edgelist.txt', delim_whitespace=True,
                   header =None, names= ['n1', 'n2', 'weight'])
print('G_df\n',G_df)

G5 = nx.from_pandas_dataframe(G_df,'n1','n2',edge_attr='weight')
print('list(G5.edges(data=True))\n',list(G5.edges(data=True)))
# 简单可视化
コード例 #50
0
def label_prop():
    G = nx.read_edgelist("facebook_combined.txt",
                         create_using=nx.Graph(),
                         nodetype=int)
    print nx.info(G)

    for i in G.nodes():
        G.node[i]['label'] = i
        G.node[i]['ID'] = i
        G.node[i]['l_1'] = 0
        G.node[i]['l_2'] = 0
        G.node[i]['l_next'] = 0
    '''
    for n,nbrs in G.adjacency_iter():
        for nbr,edict in nbrs.items():
            if nbr==200:
                print n, nbrs, G.node[nbr]['label']
    '''
    mainStop = False
    i = 0
    while (i < 100):
        if i == 99:
            set_communities = set()
            for n in G.nodes():
                set_communities.add(G.node[n]['label'])
            print "the number of communities after 100 iterations==", len(
                set_communities)

        i += 1
        mainStop = False
        l1_stop = True
        l2_stop = True
        for n in G.nodes():
            if (not (G.node[n]['label'] == G.node[n]['l_1'])):
                l1_stop = False
        for n in G.nodes():
            if (not (G.node[n]['label'] == G.node[n]['l_2'])):
                l2_stop = False

        #print l1_stop, l2_stop
        if (not (l1_stop or l2_stop)):
            #print "in not loop"
            for n, nbrs in G.adjacency_iter():
                dict = {}
                dict.clear()
                for nbr, d in nbrs.items():
                    temp = G.node[nbr]['label']
                    if not dict.has_key(temp):
                        dict = {temp: 1}
                    else:
                        dict[temp] += 1
                max_key = 0
                max_key = max(dict, key=dict.get)
                G.node[n]['l_next'] = max_key
                G.node[n]['l_2'] = G.node[n]['l_1']
                G.node[n]['l_1'] = G.node[n]['label']
                G.node[n]['label'] = max_key
            '''
            for n in G.nodes():
                G.node[n]['l_2']=G.node[n]['l_1']
                G.node[n]['l_1']=G.node[n]['label']
                G.node[n]['label']=G.node[n]['l_next']
            '''

        else:
            print "The Community converges"
            mainStop = True

            print i
            return i
# -*- coding: utf-8 -*-
import random
import networkx as nx
import matplotlib.pyplot as plt
from operator import itemgetter

G2 = nx.read_edgelist('Facebook_Dataset.txt',
                      create_using=nx.Graph(),
                      nodetype=int)


#return the friends of a user
def friends(graph, user):
    return set(graph.neighbors(user))


#returns a list of friends of friends of a user
def friends_of_friends(graph, user):
    x = []
    for each in graph.neighbors(user):
        for item in graph.neighbors(each):
            x.append(item)
    return set(x)


# returns a list of common friends
def common_friends(graph, user1, user2):
    x1 = friends(graph, user1)
    x2 = friends(graph, user2)
    return set(x1 & x2)
コード例 #52
0
 def __init__(self):
     self.graph = nx.read_edgelist(
         'data/1_edge_list/kaggle_numbers_bidi.edgelist',
         create_using=nx.DiGraph)
     print(len(self.graph.nodes))
     print(len(self.graph.edges))
コード例 #53
0
def nodeID_mapping(input_file_name, output_file_name=" ", reverse=False):
    if input_file_name.endswith(".edges") or input_file_name.endswith(".txt"):
        f = open(input_file_name, "r")
        g = nx.read_edgelist(f,
                             create_using=nx.DiGraph(),
                             nodetype=str,
                             data=False)
        # print g.edges()[:10]
        f.close()
    elif input_file_name.endswith(".gpickle"):
        g = nx.read_gpickle(input_file_name)

    if output_file_name == " " or output_file_name == None:
        output_file_name = os.path.abspath(input_file_name).split(
            ".")[0] + "_index0.edges"

    print("write graph edges list to: %s" % output_file_name)
    print("Original graph: # nodes: %d, # edges: %d" %
          (g.number_of_nodes(), g.number_of_edges()))

    id_mapping = {}
    i2s_mapping = {}

    index = 0

    for (u, v) in g.edges():

        if u not in id_mapping:
            id_mapping[u] = index
            i2s_mapping[index] = u
            index += 1

        if v not in id_mapping:
            id_mapping[v] = index
            i2s_mapping[index] = v
            index += 1

    new_edges = [(id_mapping[u], id_mapping[v]) for (u, v) in g.edges()]

    new_g = nx.DiGraph()
    new_g.add_edges_from(new_edges)

    if reverse:
        print("edge reversed...")
        new_g.reverse(copy=False)

    print("New graph: # nodes: %d, # edges: %d" %
          (new_g.number_of_nodes(), new_g.number_of_edges()))
    nodes = list(new_g.nodes())
    print("New graph: min(node id): %d, max(node id):%d" %
          (min(nodes), max(nodes)))
    print("is Directed Acyclic Graph: %s " %
          nx.is_directed_acyclic_graph(new_g))

    nx.write_edgelist(new_g, output_file_name, data=False)

    print("# instances in mapping: %d (%d)" %
          (len(id_mapping), len(i2s_mapping)))
    mapping = {"s2i": id_mapping, "i2s": i2s_mapping}

    mapping_file = os.path.abspath(input_file_name).split(
        ".")[0] + "_id_mapping.pkl"
    print("id mapping file is saved: %s" % mapping_file)
    print("mappged graph file is saved at: %s" % output_file_name)
    with open(mapping_file, "wb") as f:
        pickle.dump(mapping, f)
    return output_file_name, mapping_file
コード例 #54
0
import networkx as nx
import sys, os, datetime
sys.path.insert(1,
                os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from utils import io_utils

# Author: Katya Gurgel
# Description: a script cleaning up unweighted SNAP graphs encoded by pairs of
# nodes on each line, each representing individual edges. The output is printed.

# Usage: python ./snap_cleanup.py INPUT_FILE > OUTPUT_DIR/OUTPUT_FILE

G = nx.read_edgelist(sys.argv[1])

print('# {} {} {}'.format(datetime.datetime.now(),
                          os.popen('git rev-parse HEAD').read().strip(),
                          sys.argv[1]))

io_utils.print_uw_graph(G)
コード例 #55
0
import networkx as nx
import math
import matplotlib.pyplot as plt


def avg_degree(A):
    degree = A.degree()
    Average_degree = sum(degree.values()) / float(len(A))
    return Average_degree


S = nx.Graph()
G = nx.read_edgelist("edges.txt", delimiter=",")
k = avg_degree(G)
print("The average degree of original graph is \t", k)

#propabilty of edge creation
p = k / (G.number_of_nodes() - 1)

S.add_edges_from((nx.fast_gnp_random_graph(G.number_of_nodes(),
                                           p,
                                           seed=None,
                                           directed=False)).edges())
c = avg_degree(S)

avg_clusco = nx.average_clustering(S)
print("Average local Clustering\t" + str(avg_clusco))

#average path length
apl = math.log(S.number_of_nodes()) / float(math.log(c))
import numpy as np
import networkx as nx

webget.download("https://snap.standford.edu/data/twitter_combined.txt.gz")

with gzip.open('twitter_combined.txt.gz') as f:
    g = nx.read_edgelist(f)

コード例 #57
0
if __name__ == "__main__":
    # G=nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
    #                      create_using = nx.DiGraph(), nodetype = None, data = [('weight', int)])
    #
    #
    # model = Node2Vec(G, walk_length=10, num_walks=80,
    #                  p=0.25, q=4, workers=1, use_rejection_sampling=0)
    # model.train(embed_size=64,window_size = 5, iter = 3)
    # embeddings=model.get_embeddings()
    # print(embeddings)
    #
    # evaluate_embeddings(embeddings)
    # plot_embeddings(embeddings)

    G = nx.read_edgelist('../data/text.txt',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    model = Node2Vec(G,
                     walk_length=10,
                     num_walks=80,
                     p=0.25,
                     q=4,
                     workers=1,
                     use_rejection_sampling=0)
    model.train(embed_size=4, window_size=5, iter=3)
    embeddings = model.get_embeddings()
    print(embeddings)
コード例 #58
0
    #print('dict_2:', dict_2)
    return dict_1


dict_1 = create_dict(hosts_list)

# pos = open('pickles/dict_2.pkl','wb')
# pickle.dump(dict_2, pos)
# pos.close()
pos = open('pickles/dict_2.pkl', 'rb')
dict_2 = pickle.load(pos)
pos.close()

# load dataset to a directed graph
G = nx.read_edgelist('dataset/edgelist.txt',
                     delimiter='\t',
                     create_using=nx.DiGraph())
nodes_list = nx.nodes(G)
print('**Nodes list**', nodes_list)
# print(len(nodes_list))

# *************************************************************************
# compute in_degree and in_degree number


def in_out_degree(G, dim):
    num_rows = 0
    out_degree_matrix = np.zeros((dim, 1))
    in_degree_matrix = np.zeros((dim, 1))
    for k in hosts_list:
        if k in nodes_list:
コード例 #59
0
ファイル: main.py プロジェクト: GemsLab/LinkWaldo
def main(args, jupyter=False):
    ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

    seed = args.seed
    edgelist = os.path.join(
        ROOT_DIR,
        '../data/{}/train/{}_{}_seed_{}.txt'.format(args.sampling_method,
                                                    args.graph,
                                                    args.percent_test, seed))
    if not args.bipartite:
        G = nx.read_edgelist(edgelist)
    else:
        bip_edges = list()
        A = set()
        B = set()
        for line in open(edgelist, 'r'):
            a, b = line.strip().split()
            A.add(a)
            B.add(b)
            bip_edges.append((a, b))
        G = nx.Graph()
        G.add_nodes_from(A, bipartite=0)
        G.add_nodes_from(B, bipartite=1)
        G.add_edges_from(bip_edges)

    test_path = os.path.join(
        ROOT_DIR,
        '../data/{}/test/{}_{}_seed_{}.txt'.format(args.sampling_method,
                                                   args.graph,
                                                   args.percent_test, seed))

    output_dir = os.path.join(ROOT_DIR,
                              '../output/{}/'.format(args.sampling_method))
    emb_path = os.path.join(
        output_dir,
        '{}_{}_{}_seed_{}.emb'.format(args.embedding_method, args.graph,
                                      args.percent_test, seed))
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    if not args.output_override:
        output_path = os.path.join(
            output_dir, '{}_{}_{}_{}_{}_{}_{}_k_{}.txt'.format(
                args.method, args.graph, args.embedding_method,
                args.percent_test, args.exact_search_tolerance,
                args.bailout_tol, seed, args.k))
    else:
        output_path = args.output_override

    if args.embedding_method == 'netmf1':
        embeddings = NetMF(args.embedding_method,
                           edgelist,
                           test_path,
                           emb_path,
                           G,
                           normalize=True,
                           window_size=1)
    elif args.embedding_method == 'netmf2':
        embeddings = NetMF(args.embedding_method,
                           edgelist,
                           test_path,
                           emb_path,
                           G,
                           normalize=True,
                           window_size=2)
    elif args.embedding_method == 'bine':
        embeddings = BiNE(args.embedding_method,
                          edgelist,
                          test_path,
                          emb_path,
                          G,
                          normalize=True)
    elif args.embedding_method == 'aa':
        embeddings = AA(args.embedding_method,
                        edgelist,
                        test_path,
                        emb_path,
                        G,
                        normalize=True)
    if args.force_emb or not os.path.exists(emb_path):
        if os.path.exists(emb_path.replace('.emb', '_nodeX.npy')):
            os.remove(emb_path.replace('.emb', '_nodeX.npy'))
        embeddings.run(G)

    if args.method in {'lapm'}:
        sel = LaPMSelector(args.method,
                           G,
                           args.k,
                           embeddings,
                           output_path,
                           seed=seed,
                           bipartite=args.bipartite)
        load_embeddings = True
    elif args.method in {'cn'}:
        sel = CNSelector(args.method,
                         G,
                         args.k,
                         embeddings,
                         output_path,
                         seed=seed,
                         bipartite=args.bipartite)
        load_embeddings = False
    elif args.method in {'js'}:
        sel = JSSelector(args.method,
                         G,
                         args.k,
                         embeddings,
                         output_path,
                         seed=seed,
                         bipartite=args.bipartite)
        load_embeddings = False
    elif args.method in {'aa'}:
        sel = AASelector(args.method,
                         G,
                         args.k,
                         embeddings,
                         output_path,
                         seed=seed,
                         bipartite=args.bipartite)
        load_embeddings = False
    elif args.method in {'nmf+bag'}:
        sel = BaggingEnsemble(args.method,
                              G,
                              args.k,
                              embeddings,
                              output_path,
                              seed=seed,
                              bipartite=args.bipartite)
        load_embeddings = False
    elif args.method == 'LinkWaldo':
        num_groupings = 0
        if args.DG:
            num_groupings += 1
        if args.SG:
            num_groupings += 1
        if args.CG:
            num_groupings += 1

        if num_groupings > 1:
            if args.bailout_tol > 0.0:
                sel = MGBailoutSelector(
                    args.method,
                    G,
                    args.k,
                    embeddings,
                    output_path,
                    DG=args.DG,
                    SG=args.SG,
                    CG=args.CG,
                    exact_search_tolerance=args.exact_search_tolerance,
                    seed=seed,
                    bipartite=args.bipartite)
            else:
                sel = MGSelector(
                    args.method,
                    G,
                    args.k,
                    embeddings,
                    output_path,
                    DG=args.DG,
                    SG=args.SG,
                    CG=args.CG,
                    exact_search_tolerance=args.exact_search_tolerance,
                    seed=seed,
                    bipartite=args.bipartite)
        else:
            if args.DG and args.bailout_tol > 0.0:
                sel = DGBailoutSelector(
                    args.method,
                    G,
                    args.k,
                    embeddings,
                    output_path,
                    DG=args.DG,
                    SG=args.SG,
                    CG=args.CG,
                    exact_search_tolerance=args.exact_search_tolerance,
                    seed=seed,
                    bipartite=args.bipartite)
            elif args.DG:
                sel = DGSelector(
                    args.method,
                    G,
                    args.k,
                    embeddings,
                    output_path,
                    DG=args.DG,
                    SG=args.SG,
                    CG=args.CG,
                    exact_search_tolerance=args.exact_search_tolerance,
                    seed=seed,
                    bipartite=args.bipartite)
            elif args.SG:
                sel = SGSelector(
                    args.method,
                    G,
                    args.k,
                    embeddings,
                    output_path,
                    DG=args.DG,
                    SG=args.SG,
                    CG=args.CG,
                    exact_search_tolerance=args.exact_search_tolerance,
                    seed=seed,
                    bipartite=args.bipartite)
            elif args.CG:
                sel = CGSelector(
                    args.method,
                    G,
                    args.k,
                    embeddings,
                    output_path,
                    DG=args.DG,
                    SG=args.SG,
                    CG=args.CG,
                    exact_search_tolerance=args.exact_search_tolerance,
                    seed=seed,
                    bipartite=args.bipartite)
        load_embeddings = True

    sel.num_groups = args.num_groups
    sel.num_groups_alt = args.num_groups_alt
    sel.bailout_tol = args.bailout_tol
    sel.bag_epsilon = args.bag_epsilon
    sel.skip_output = args.skip_output

    embeddings.load_data(load_embeddings=load_embeddings)

    if jupyter:
        return sel

    _time = sel.select()

    sel.write_res(_time)
    if not args.skip_output:
        sel.write()
コード例 #60
0
ファイル: Node2vec.py プロジェクト: realRoc/INFS7450
# Do the same process to compute a training subset from within the test graph
edge_splitter_train = EdgeSplitter(graph_test, graph)
graph_train, examples, labels = edge_splitter_train.train_test_split(
    p=0.1, method="global"
)
(
    examples_train,
    examples_model_selection,
    labels_train,
    labels_model_selection,
) = train_test_split(examples, labels, train_size=0.75, test_size=0.25)

print(graph_train.info())'''

G_test_nx = nx.read_edgelist('val_positive.txt')
nodes = G_test_nx.nodes()
feature_vector = [1,1,1,1,1]
node_data = pd.DataFrame(
    [feature_vector for i in range(len(nodes))],
    index=[node for node in nodes])
graph_test = sg.StellarGraph.from_networkx(G_test_nx, node_features=node_data)

G_test_neg = nx.read_edgelist('val_negative.txt')
edges_test_neg = G_test_neg.edges()

G_train_nx = nx.read_edgelist('training.txt')
nodes_train = G_train_nx.nodes()
feature_vector = [1,1,1,1,1]
node_data = pd.DataFrame(
    [feature_vector for i in range(len(nodes_train))],