Beispiel #1
0
def loadNwU(dsName, path, cd, wccOnly, revEdges, undir):
    print("   Opening " + dsName + " and loading graph... ")
    t1 = time.clock()
    fh = open(path + dsName, "rb")
    if undir:
        if cd:
            prodNet = nx.read_edgelist(fh, delimiter=",")
        else:
            prodNet = nx.read_edgelist(fh)
            # prodNet = prodNet.to_directed()
    else:
        if cd:
            prodNet = nx.read_edgelist(fh, delimiter=",", create_using=nx.DiGraph())
        else:
            prodNet = nx.read_edgelist(fh, create_using=nx.DiGraph())

    fh.close()
    if wccOnly:
        prodNet = nx.algorithms.weakly_connected.weakly_connected_component_subgraphs(prodNet)[0]

    prodNet.remove_edges_from(prodNet.selfloop_edges())

    if revEdges:
        prodNet.reverse(False)

    numNodes = str(prodNet.__len__())
    numEdges = str(prodNet.size())
    t2 = time.clock()
    print("    -> graph loaded: " + numNodes + " nodes, " + numEdges + " edges (" + str(t2 - t1) + " sec).")
    return prodNet
Beispiel #2
0
def gen_random_graphs(seed, db):
    
    print "generating random graph with seed " + str(seed)
    directory = db.get_rnd_graphs_path()
    if not path.exists(directory):
        makedirs(directory)
    
    filename = db.get_rnd_graph_full_name(str(seed), str(db.get_final_time()))
    if(path.exists(filename)):
        print "random graph with seed " + str(seed) + " already exists! Skipping..."
        return

    
    pathD = db.get_graphs_path()
    filename = pathD + db.get_windowed_graph_name(0)
    G=nx.read_edgelist(filename, nodetype = int, data=(('weight',float),))
    GR = get_random_graph_from(G, seed)
    save_random_graph(GR,1, db)
    
    for i in range(2,db.get_final_time()+1):
        filename = pathD + db.get_windowed_graph_name(str(i))
        if(not path.exists(filename)):
            f = open(filename,'w')
            f.close()
            
        G=nx.read_edgelist(filename, nodetype = int, data=(('weight',float),))
        GRnew = get_random_graph_from(G, seed)
        GR.graph['nmerges'] = i-2
        GR = merge_temporal_graphs(GR, GRnew)
        GR = compute_edge_features(GR)
        save_random_graph(GR,i, db)
    
        print("G_RND[" + str(i)  + "] has " + str(GR.number_of_edges()) + " edges")
def k_obfuscation_measure(before_file, after_file, n_nodes, k_arr, data=True):
    print "n_nodes =", n_nodes
    
    # before_file
    bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int)
    print "read bG - DONE"
    
#    if bG.number_of_nodes() < n_nodes:
#        bG.add_nodes_from(range(n_nodes))       # only for er_100k

    # Case 1 - aG = bG
    if after_file == before_file:      # after_file is before_file
        for e in bG.edges_iter():
            bG[e[0]][e[1]]['p'] = 1.0
        return compute_eps_multi(bG, bG, k_arr) 
        
    # Case 2 - aG is a sample
    # after_file
    if data == True:
        aG = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True)
    else:
        aG = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=False)
#        if aG.number_of_nodes() < n_nodes:
#            aG.add_nodes_from(range(n_nodes))       # only for the cases of KeyError !
        for e in aG.edges_iter():
            aG[e[0]][e[1]]['p'] = 1.0
    print "read aG - DONE"
    
    return compute_eps_multi(bG, aG, k_arr) 
Beispiel #4
0
def gen_random_graphs(seed):
    
	# create windowed random graphs for each real graph
	# obtain aggreggated graph
	# calculate features of random graph

	print "GENERATING RANDOM GRAPHS"

	day = 1
	final_day = which_day(_maxtime)+1

	filename = str(results_folder) + "Graphs_Data/windowed_graph_" + str(day) + str(".txt")

	print filename 

	G = nx.read_edgelist(filename, nodetype = int, data = (('top',float),))

	# print G 

	GR = get_random_graph_from(G, seed)

	for i in range(2,final_day):
		day = i
		filename = str(results_folder) + "Graphs_Data/windowed_graph_" + str(day) + str(".txt")
		G = nx.read_edgelist(filename, nodetype = int, data = (('top',float),))
		GRnew = get_random_graph_from(G, seed)
		GR.graph['nmerges'] = i - 2
		GR = merge_temporal_graphs(GR, GRnew)
		GR = compute_edge_features(GR)
		save_random_graph(GR,i,seed)
def incorrectness_uncertain_from_file(before_file, after_file, sample_file, n_samples, bins): 
    
    # compute sig_list_b, bucket_list_b ONCE !
    start = time.clock()
    bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int)
#    G = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True)
    print "read bG: DONE, elapsed :", time.clock() - start
    
    h2_list = equivalence_class_H2_open(bG, None)
    cand_size, bin_size, sig_list_b, bucket_list_b = bucket_H2(h2_list, bins)
#    print "len B:", len(sig_list_b), len(bucket_list_b)
    
    # H1 score, H2 score
    start = time.clock()
    score_H1 = 0.0
    score_H2 = 0.0
    count = 0
    for i in range(n_samples):
        file_name = sample_file + str(i)
        aG = nx.read_edgelist(file_name, '#', '\t', create_using=nx.MultiGraph(), nodetype=int, data=False)     # IMPORTANT: MultiGraph
        # H1
        sum_re_prob, re_prob_dict = incorrectness_H1(bG, aG, bins)
        score_H1 += sum_re_prob
        # H2
        sum_re_prob, re_prob_dict = incorrectness_H2_open(aG, sig_list_b, bucket_list_b, bins)
        score_H2 += sum_re_prob
        print "count =", count
        count += 1
    #
    score_H1 = score_H1/n_samples
    score_H2 = score_H2/n_samples
    print "compute score_H1, score_H2: DONE, elapsed :", time.clock() - start
    
    # 
    return score_H1, score_H2
def main():
    """
    Pre-processing: 
        load data, compute centrality measures, write files with node data
    """
    print(nx.__version__)
    # Load network data, create storage dict, and extract main component
    depends=nx.read_edgelist("data/depends.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    depends.name="depends"
    suggests=nx.read_edgelist("data/suggests.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    suggests.name="suggests"
    imports=nx.read_edgelist("data/imports.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    imports.name="imports"
    nets_dict={"depends":depends,"suggests":suggests,"imports":imports}
    for k in nets_dict.keys():
        main_component=nx.connected_component_subgraphs(nets_dict[k].to_undirected())[0].nodes()
        nets_dict[k]=nx.subgraph(nets_dict[k],main_component)
    
    # Run multiple measures on graphs and normalize weights
    measure_list=[nx.in_degree_centrality,nx.betweenness_centrality,nx.pagerank]
    for g in nets_dict.values():
        multiple_measures(g,measure_list)
        normalize_weights(g)
        
    # Output networks in GraphML format (to store node attributes)
    for i in nets_dict.items():
        # print(i[1].edges(data=True))
        nx.write_graphml(i[1],"data/"+i[0]+"_data.graphml")
        print("")
    print("All files written with data")
    
    """Visualization:
def main():
    parser = createParser()
    options = parser.parse_args()

    gtGraphNames = glob.glob("{0}/*.sim.cut".format(options.gtruth))
    gtGraphs = { fn.split("/")[-1][:-8] : nx.read_edgelist(fn) for fn in gtGraphNames }
    print(gtGraphs)
    print(gtGraphNames)

    oGraphNames = [ "{0}/{1}.out.ppi".format(options.other, k) for k in gtGraphs.keys() ]
    oGraphs = { fn.split("/")[-1][:-8] : nx.read_weighted_edgelist(fn) for fn in oGraphNames }
    inputGraphNames = glob.glob("{0}/bZIP*.cut".format(options.other))
    print(inputGraphNames)
    inputGraph = nx.read_edgelist(inputGraphNames[0])
    print(oGraphNames)

    cutoff = 0.99
    paranaGraph = graphWithCutoff(options.parana, 0.0)
    c = findSuggestedCutoff( paranaGraph, inputGraph, cutoff )
    evaluation.printStats( filteredGraph(paranaGraph, inputGraph.nodes(), cutoff=c ), inputGraph )
    print >>sys.stderr, "Parana 2.0    : {0}".format(getCurve(paranaGraph, inputGraph))



    for gtName, gtGraph in gtGraphs.iteritems():
        print(gtName)
        c = findSuggestedCutoff( paranaGraph, gtGraph, cutoff )
        print("Parana cutoff = {0}".format(c))
        print("==================")
        evaluation.printStats( filteredGraph(oGraphs[gtName], gtGraph.nodes()), gtGraph )
        print >>sys.stderr, "Pinney et. al : {0}".format(getCurve(oGraphs[gtName], gtGraph))
        evaluation.printStats( filteredGraph(paranaGraph, gtGraph.nodes(), cutoff=c ), gtGraph )
        print >>sys.stderr, "Parana 2.0    : {0}".format(getCurve(paranaGraph, gtGraph))
        print("\n")
    sys.exit(0)
Beispiel #8
0
def graph_properties(filename, directed=False):
  # Read in rec as undirected graph
  if directed:
    G=nx.read_edgelist(filename, nodetype=int, create_using=nx.DiGraph())
  else:
    G=nx.read_edgelist(filename, nodetype=int, create_using=nx.Graph())

  props = {}

  # Calculate number of edges
  props['num_edges'] = G.number_of_edges()

  # Calculate number of nodes
  props['num_nodes'] = len(G)

  # Calculate largest connected component
  largest_component = nx.connected_component_subgraphs(G)[0]
  props['size_largestcc'] = len(largest_component)
  props['proportion_in_largestcc'] = float(len(largest_component)) / len(G)

  # Calculate clustering coefficient
  props['average_clustering'] = nx.average_clustering(G)

  # Calculate diameter of largest connected component
  # props['diameter'] = nx.diameter(largest_component)
  
  return props
def calGraph(infile, mode = 1):
	#init Parameter
	inputpath = 'edge_list/'
	outputpath = 'network_output/'
	n = mode
	Data_G = inputpath+infile+'_'+str(n)+'.edgelist'
	
	#init Graph
	G = nx.read_edgelist(Data_G, create_using=nx.DiGraph())
	GU = nx.read_edgelist(Data_G)
	#basci info
	print nx.info(G),'\n', nx.info(GU) 
	average_degree = float(sum(nx.degree(G).values()))/len(G.nodes())
	print 'average degree :', average_degree 
	degree_histogram = nx.degree_histogram(G)
	print 'degree histogram max :', degree_histogram[1]
	desity = nx.density(G)
	print 'desity :', desity

	#Approximation
	#Centrality
	degree_centrality = nx.degree_centrality(G)
	print 'degree centrality top 10 !', sorted_dict(degree_centrality)[:2]
	out_degree_centrality = nx.out_degree_centrality(G)
	print 'out degree centrality top 10 !', sorted_dict(out_degree_centrality)[:2]
Beispiel #10
0
    def load(self,fname):
        fext = (str(fname).split("."))[1]
        self.fname = (str(fname).split("."))[0]

        if self.directed_graph == False:
            self.G = nx.read_edgelist(path=fname)
        else:
            self.G = nx.read_edgelist(path=fname,create_using=nx.DiGraph())
 def test_edgelist_integers(self):
     G=nx.convert_node_labels_to_integers(self.G)
     (fd,fname)=tempfile.mkstemp()
     nx.write_edgelist(G,fname)  
     H=nx.read_edgelist(fname,nodetype=int)
     H2=nx.read_edgelist(fname,nodetype=int)
     G.remove_node(5) # isolated nodes are not written in edgelist
     assert_equal(sorted(H.nodes()),sorted(G.nodes()))
     assert_equal(sorted(H.edges()),sorted(G.edges()))
     os.close(fd)
     os.unlink(fname)
Beispiel #12
0
 def test_edgelist_multidigraph(self):
     G = self.XDG
     (fd, fname) = tempfile.mkstemp()
     nx.write_edgelist(G, fname)
     H = nx.read_edgelist(fname, nodetype=int, create_using=nx.MultiDiGraph())
     H2 = nx.read_edgelist(fname, nodetype=int, create_using=nx.MultiDiGraph())
     assert_not_equal(H, H2)  # they should be different graphs
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
     os.close(fd)
     os.unlink(fname)
def calGraph(infile, mode = 1):
	#init Parameter
	inputpath = 'edge_list/'
	n = mode
	Data_G = inputpath+infile+'_'+str(n)+'.edgelist'
	
	#init Graph
	G = nx.read_edgelist(Data_G, create_using=nx.DiGraph())
	GU = nx.read_edgelist(Data_G)
	average_clustering = nx.average_clustering(GU)
	transitivity = nx.transitivity(G)
	return [average_clustering, transitivity]
 def test_edgelist_graph(self):
     G=self.G
     (fd,fname)=tempfile.mkstemp()
     nx.write_edgelist(G,fname)  
     H=nx.read_edgelist(fname)
     H2=nx.read_edgelist(fname)
     assert_not_equal(H,H2) # they should be different graphs
     G.remove_node('g') # isolated nodes are not written in edgelist
     assert_equal(sorted(H.nodes()),sorted(G.nodes()))
     assert_equal(sorted(H.edges()),sorted(G.edges()))
     os.close(fd)
     os.unlink(fname)
Beispiel #15
0
 def test_edgelist_digraph(self):
     G = self.DG
     (fd, fname) = tempfile.mkstemp()
     nx.write_edgelist(G, fname)
     H = nx.read_edgelist(fname, create_using=nx.DiGraph())
     G.remove_node('g')  # isolated nodes are not written in edgelist
     H2 = nx.read_edgelist(fname, create_using=nx.DiGraph())
     assert_not_equal(H, H2)  # they should be different graphs
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
     os.close(fd)
     os.unlink(fname)
def comorbid_count_compare(net_dir, icd_gene_clinical, cancer_info, alterations, weighted=False):
    # = 'humannet.9'
    graph = networkx.read_edgelist(net_dir + '/network',nodetype=str)
    ct = neighbor_count_comorbid(graph, alterations['peak_mut'], icd_gene_clinical, cancer_info, comorbid_only = True, weighted=weighted)
    import os
    randdir = net_dir + '/rand/'
    randnets = os.listdir(randdir)
    x = scipy.zeros([len(randnets)])
    for i,f in enumerate(randnets):
        net = networkx.read_edgelist(randdir + f, nodetype = str, data=weighted)
        x[i] = neighbor_count_comorbid(net, alterations['peak_mut'], icd_gene_clinical, cancer_info, comorbid_only = True, weighted = weighted)    
    print 'comorbid_edges= ' + str(ct) + "\tngreater=" +str(sum(x >= ct)) + '\tp=' + str(sum(x >= ct)/float(len(randnets)))
    return ct, x
    def test_read_edgelist_3(self):
        s = b"""\
# comment line
1 2 {'weight':2.0}
# comment line
2 3 {'weight':3.0}
"""
        bytesIO = io.BytesIO(s)
        G = nx.read_edgelist(bytesIO,nodetype=int,data=False)
        assert_equal_edges(G.edges(),[(1,2),(2,3)])

        bytesIO = io.BytesIO(s)
        G = nx.read_edgelist(bytesIO,nodetype=int,data=True)
        assert_equal_edges(G.edges(data=True),[(1,2,{'weight':2.0}),(2,3,{'weight':3.0})])
Beispiel #18
0
def read_graph():
	'''
	Reads the input network in networkx.
	'''
	if args.weighted:
		G = nx.read_edgelist(args.input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph())
	else:
		G = nx.read_edgelist(args.input, nodetype=int, create_using=nx.DiGraph())
		for edge in G.edges():
			G[edge[0]][edge[1]]['weight'] = 1

	if not args.directed:
		G = G.to_undirected()

	return G
def write_communities(graph, name_to_size):

    shortname = str(graph.split('/')[-1].strip('.ncol'))
    nxgraph = networkx.read_edgelist(graph)
    partition = community.best_partition(networkx.read_edgelist(graph))
    count = 0
    if shortname in name_to_size.keys():
        for com in set(partition.values()):
            count = count + 1.
            list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com]
            size_com = len(list_nodes)
            if size_com > name_to_size[shortname]:
                community_subgraph = nxgraph.subgraph(list_nodes)
                with open("/net/data/graph-models/louvain-clusters/communities/" + shortname +"_" +str(count), 'a') as fout1:
                    networkx.write_edgelist(community_subgraph, fout1)
Beispiel #20
0
    def read_graph(self, nx_g):

        if self.is_weighted:

            self.G = nx.read_edgelist(nx_g, data=(('weight', float),), create_using=nx.DiGraph(), edgetype=str)

        else:

            self.G = nx.read_edgelist(nx_g, create_using=nx.DiGraph(), edgetype=str)

            for edge in self.G.edges():
                self.G[edge[0]][edge[1]]['weight'] = 1

        if not self.is_directed:
            self.G = self.G.to_undirected()
def main():
    """
    Program Driver. Parses command line arguments to determine where to store
    output pickle files and what networks to attack, reads in networks from the
    given source, runs all necessary attacks, and pickles the output for later
    use.
    """

    aparse = argparse.ArgumentParser(usage="Attack a collection of networks")
    aparse.add_argument('--network_file', '-f', action='store',
                        default='networks.yaml',
                        help="Path to network config (default: ./networks.yaml)",
                        dest='config_path')
    aparse.add_argument('--picklejar', '-p', action='store',
                        default='.',
                        help='output for pickle files (default: current directory)',
                       )
    aparse.add_argument('--update', '-u', action='store_true',
                        help='Only run network processes for networks which have' +
                        'not already been analyzed.')
    args = aparse.parse_args()

    cfg = open(args.config_path, 'r')


    for net_attrs in yaml.safe_load_all(cfg):
        picklename = net_attrs["name"] + ".pickle"
        if args.update and picklename in os.listdir(args.picklejar):
            continue

        print "Analyzing network %s..." % net_attrs['name']
        fname = net_attrs['filename']
        data = [(key, eval(value)) for key, value in net_attrs['data'].items()]
        if net_attrs["directed"]:
            network = networkx.read_edgelist(fname,
                                             create_using=networkx.DiGraph(),
                                             nodetype=str,
                                             data=data).to_undirected()
        else:
            network = networkx.read_edgelist(fname,
                                             create_using=networkx.Graph(),
                                             nodetype=str,
                                             data=data)
        print "Network file loaded"
        pckl = os.path.normpath(args.picklejar+"/"+ picklename)
        ac.compare_to_random_networks(network, FRACS, pckl)

        print "Done!"
Beispiel #22
0
def main():

    msg = "help: sigcomm graph1 graph2 cap dist edges wasted thld " \
          "hops tries ttl prob"
    if len(sys.argv) < 12: print msg; return -1

    g = nx.read_edgelist(sys.argv[1], create_using=nx.Graph())
    dg = nx.DiGraph()

    random.seed(-1)

    cap = int(sys.argv[3])
    dist = int(sys.argv[4])
    edges = int(sys.argv[5])
    wasted = int(sys.argv[6])
    threshold = int(sys.argv[7])
    hops = int(sys.argv[8])
    tries = int(sys.argv[9])
    ttl = int(sys.argv[10])
    prob = float(sys.argv[11])

    cap_edges(g, cap)
    get_followers_dist(g, dg, dist)
    sum_edges(g, edges)
    wasted_packets(g, dg, wasted)
    add_pseudo_edges(g, dg, threshold)
    find_paths(g, dg, hops, tries, ttl, prob)

    print >> sys.stderr, "g nodes", len(g)
    print >> sys.stderr, "g edges", g.size()

    print >> sys.stderr, "dg nodes", len(dg)
    print >> sys.stderr, "dg edges", dg.size()
Beispiel #23
0
def show(filename,title):
        if not os.path.isfile(filename+'.png'):
                        FLAG = 0
                        x = []
                        y = []
                        reader = csv.reader(open(filename+'.vna', 'rb'),delimiter='\t') #dialect='excel-tab')
                
                        for row in reader:
                                if FLAG == 1:
                                        x.append(row[0])
                                        y.append(row[1])
                                if row[0] == 'v1':
                                        FLAG = 1
                        
                        with open(filename+'.csv', 'wb') as csvfile:
                                writer = csv.writer(csvfile, delimiter = ',')
                                for i in range(len(x)):
                                        writer.writerow([x[i]] + [y[i]])

                        G = nx.read_edgelist(filename+'.csv', delimiter=",",create_using = nx.Graph(), nodetype = str)	

                        plot6.Save(G,filename)
        else:
                img = mpimg.imread(filename + '.png')
                plt.imshow(img, interpolation='nearest')
                plt.axis('off')
                plt.suptitle("", y=0.95)
                plt.suptitle(title, y = 0.95)
                plt.show()
Beispiel #24
0
def get_followers_dist(g, dg, follow):

    if follow == -1: return -1

    if len(dg) == 0:
        dg = nx.read_edgelist(sys.argv[2], create_using=dg)

    no_of_paths = 0
    for u in dg.nodes():

        if not g.has_node(u):
            print "no_source"
            continue

        for v in dg.successors(u):
            if u == v: continue

            if g.has_node(v):
                try:
                    print nx.shortest_path_length(g, source=u, target=v)
                    no_of_paths += 1
                except nx.exception.NetworkXError as err:
                    print "no_path"
            else:
                print "no_target"

    print >> sys.stderr, "no of paths", no_of_paths
    return no_of_paths
def get_graph(path):
    fh = open(path, 'rb')
    G = nx.read_edgelist(fh)
    fh.close()
    #remove posible self loops
    G.remove_edges_from(G.selfloop_edges())
    return G
Beispiel #26
0
def add_pseudo_edges(g, dg, threshold):
    """ flawed logic, needs to be fixed """

    if threshold == -1 : return -1

    if len(dg) == 0:
        dg = nx.read_edgelist(sys.argv[2], create_using=dg)

    new_edges = []
    for n in dg.nodes():

        if not g.has_node(n): continue

        fw_count = {}
        n_dists = nx.single_source_shortest_path_length(g,n,4)
        followings = set(dg.successors(n))

        for node, dist in n_dists.iteritems():
            if dist > 2: continue

            for f in dg.successors(node):
                if f not in followings:
                    if f in fw_count:
                        fw_count[f] = fw_count[f] + 1
                    else: fw_count[f] = 1

        for k,v in fw_count.iteritems():
            if v >= threshold and k in n_dists and n_dists[k] <= 4: 
                new_edges.append((n,k))

    for e in new_edges: dg.add_edge(*e)
    print >> sys.stderr, "new edges", len(new_edges)
    return 0
def load_data(from_cache):
    if from_cache:
        input_filename = constants.CHARTS_FOLDER_NAME + 'by_nodes'
        reader = open(input_filename, 'r')
        by_nodes = eval(reader.read())
        reader.close

        input_filename = constants.CHARTS_FOLDER_NAME + 'by_times'
        reader = open(input_filename, 'r')
        by_times = eval(reader.read())
        reader.close
    else:
        by_nodes = {}
        by_times = {}
        
        utils.ensure_folder(constants.CHARTS_FOLDER_NAME)

        filenames = os.listdir(constants.GRAPHS_FOLDER_NAME)
        filenames.sort()

        print len(filenames)
        
        time = 0
        for filename in filenames:
            print 'Processing: ' + filename
        
            input_filename = constants.GRAPHS_FOLDER_NAME + filename
            
            #g = nx.read_gpickle(input_filename)
            g = nx.read_edgelist(input_filename, create_using=nx.DiGraph())
            #for wifi data            
            #g = nx.read_edgelist(input_filename, '#', ',')
            
            for v in g.nodes():
                node_state = calc_node_state(g, v)
                
                if v not in by_nodes:
                    by_nodes[v] = {}
                by_nodes[v][time] = node_state
                
                if time not in by_times:
                    by_times[time] = {}
                by_times[time][v] = node_state
    
            time = time+1
            
        output_filename = constants.CHARTS_FOLDER_NAME + 'by_nodes'
        writer = open(output_filename, 'w')
        writer.write(str(by_nodes))
        writer.close
    
        output_filename = constants.CHARTS_FOLDER_NAME + 'by_times'
        writer = open(output_filename, 'w')
        writer.write(str(by_times))
        writer.close
    
        print len(by_times)
        print len(by_nodes)
        
    return by_nodes, by_times
Beispiel #28
0
    def _read_cell_graph(self, filename, format):
        """Load the cell-to-cell connectivity graph from a
        file. 

        Returns None if any error happens.
        """
        cell_graph = None
        if filename:
            try:
                start = datetime.now()
                if format == "gml":
                    cell_graph = nx.read_gml(filename)
                elif format == "pickle":
                    cell_graph = nx.read_gpickle(filename)
                elif format == "edgelist":
                    cell_graph = nx.read_edgelist(filename)
                elif format == "yaml":
                    cell_graph = nx.read_yaml(filename)
                elif format == "graphml":
                    cell_graph = cell_graph = nx.read_graphml(filename)
                else:
                    print "Unrecognized format:", format
                end = datetime.now()
                delta = end - start
                config.BENCHMARK_LOGGER.info(
                    "Read cell_graph from file %s of format %s in %g s"
                    % (filename, format, delta.seconds + 1e-6 * delta.microseconds)
                )
            except Exception, e:
                print e
def read_general(datadir,tolerance,minrepeats):

    """ Function to read datasets from files in *datadir*.
   
    Each file represents a graph for a particular timestamp. 
    The name of the files is expected to be <timestamp>.ncol,
    and each line in the file represents one edge in the graph e.g.
    line:' 1 2 5 ' indicates there is an edge between nodes
    '1' and '2' with weight '5'

    Parameters
    ----------
    datadir: string
        path to the directory containing the dataset.
    tolerance: float,optional
        For a label to be considered a dominant label, it must be within this much of the maximum
        value found for the quality function. The smaller it is, the fewer dominant labels there 
        will be. 
    minrepeats: integer
        The number of variations to try before returning the best partition.            

    Returns 
    ------- 
    t: list
        an array of timestamps, each representing a snapshot of the communities.
    g1: networkx.Graph
        the last graph to be read from file.
    initial_label_dictionary: dictionary { node: community}
        A dictionary mapping nodes to community labels if it is the first snapshot, otherwise *None*.
    """

    raw_file_list = os.listdir(datadir)
    timestamps = sorted([int(f.rstrip(".ncol")) for f in raw_file_list if f.endswith(".ncol")])

    initial_label_dict_filename = os.path.join(datadir, 'initial_label_dict.txt')

    beginning = True
    for t in timestamps:
        f = str(t) + ".ncol"
        fpath = os.path.join(datadir,f)

        # if a file is empty, move on to the next timestamp
        if os.path.getsize(fpath) == 0:
            continue

        g1 = nx.read_edgelist(fpath, nodetype=int, data=(('weight',float),))

        if beginning is True:
            # when called for the first time just return initial_label_dict
            if not os.path.exists(initial_label_dict_filename):
                initial_label_dict = maxQ(g1,tolerance=tolerance,minrepeats=minrepeats)
                with open(initial_label_dict_filename, 'w') as lf:
                    lf.write(repr(initial_label_dict))

            with open(initial_label_dict_filename, 'r') as lf:
                initial_label_dict = eval(lf.read())
            yield (t, g1, initial_label_dict)
            beginning = False
        else:
            yield (t, g1, None)
Beispiel #30
0
    def _read_celltype_graph(self, celltypes_file, format="gml"):
        """
        Read celltype-celltype connectivity graph from file.

        celltypes_file -- the path of the file containing
        the graph.
        
        format -- format of the file. allowed values: gml, graphml, edgelist, pickle, yaml.

        """
        start = datetime.now()
        celltype_graph = None
        try:
            if format == "gml":
                celltype_graph = nx.read_gml(celltypes_file)
            elif format == "edgelist":
                celltype_graph = nx.read_edgelist(celltypes_file)
            elif format == "graphml":
                celltype_graph = nx.read_graphml(celltypes_file)
            elif format == "pickle":
                celltype_graph = nx.read_gpickle(celltypes_file)
            elif format == "yaml":
                celltype_graph = nx.read_yaml(celltypes_file)
            else:
                print "Unrecognized format %s" % (format)
        except Exception, e:
            print e
Beispiel #31
0
- https://github.com/networkx/networkx/blob/master/examples/drawing/sampson_data.zip
"""

import zipfile
from io import BytesIO as StringIO

import matplotlib.pyplot as plt
import networkx as nx

with zipfile.ZipFile("sampson_data.zip") as zf:
    e1 = StringIO(zf.read("samplike1.txt"))
    e2 = StringIO(zf.read("samplike2.txt"))
    e3 = StringIO(zf.read("samplike3.txt"))

G1 = nx.read_edgelist(e1, delimiter="\t")
G2 = nx.read_edgelist(e2, delimiter="\t")
G3 = nx.read_edgelist(e3, delimiter="\t")
pos = nx.spring_layout(G3, iterations=100)
plt.clf()

plt.subplot(221)
plt.title("samplike1")
nx.draw(G1, pos, node_size=50, with_labels=False)
plt.subplot(222)
plt.title("samplike2")
nx.draw(G2, pos, node_size=50, with_labels=False)
plt.subplot(223)
plt.title("samplike3")
nx.draw(G3, pos, node_size=50, with_labels=False)
plt.subplot(224)
Beispiel #32
0
    def sample_graph(self,
                     hparams,
                     placeholders,
                     adj,
                     features,
                     weights,
                     weight_bins,
                     s_num,
                     node,
                     hde,
                     num=10,
                     outdir=None):
        '''
        Args :
            num - int
                10
                number of edges to be sampled
            outdir - string
            output dir
        '''
        list_edges = []

        for i in range(self.n):
            for j in range(i + 1, self.n):
                list_edges.append((i, j, 1))
                list_edges.append((i, j, 2))
                list_edges.append((i, j, 3))
        # list_edges.append((-1, -1, 0))

        list_weight = [1, 2, 3]

        hparams.sample = True

        eps = np.random.randn(self.n, self.z_dim, 1)
        with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt',
                  'a') as f:
            for z_i in eps:
                f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n')

        feed_dict = construct_feed_dict(hparams.learning_rate,
                                        hparams.dropout_rate, self.k, self.n,
                                        self.d, hparams.decay_rate,
                                        placeholders)
        feed_dict.update({self.adj: adj[0]})
        feed_dict.update({self.features: features[0]})
        feed_dict.update({self.weight_bin: weight_bins[0]})
        feed_dict.update({self.weight: weights[0]})

        feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])})
        feed_dict.update({self.eps: eps})

        prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss, w_edge, labels = self.sess.run(
            [
                self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu,
                self.enc_sigma, self.cost, self.w_edge, self.label
            ],
            feed_dict=feed_dict)
        prob = np.reshape(prob, (self.n, self.n))
        w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim))

        indicator = np.ones([self.n, 3])
        p, list_edges, w_new = normalise(prob, w_edge, self.n, self.bin_dim,
                                         [], list_edges, indicator)

        if not hparams.mask_weight:
            trial = 0
            while trial < 5000:
                candidate_edges = [
                    list_edges[i] for i in np.random.choice(range(
                        len(list_edges)), [hparams.edges],
                                                            p=p,
                                                            replace=False)
                ]
                with open(hparams.sample_file + 'test.txt', 'w') as f:
                    for (u, v, w) in candidate_edges:
                        if (u >= 0 and v >= 0):
                            f.write(
                                str(u) + ' ' + str(v) + ' {\'weight\':' +
                                str(w) + '}\n')
                f = open(hparams.sample_file + 'test.txt')
                G = nx.read_edgelist(f, nodetype=int)
                if nx.is_connected(G):
                    for (u, v, w) in candidate_edges:
                        if (u >= 0 and v >= 0):
                            with open(
                                    hparams.sample_file + "approach_2_" +
                                    str(trial) + "_" + str(s_num) + '.txt',
                                    'a') as f:
                                f.write(
                                    str(u) + ' ' + str(v) + ' {\'weight\':' +
                                    str(w) + '}\n')
                trial += 1

        else:
            trial = 0
            while trial < 5000:
                candidate_edges = self.get_masked_candidate(
                    list_edges, prob, w_edge, hparams.edges, hde)
                # print("Debug candidate", candidate_edges)
                if len(candidate_edges) > 0:
                    with open(hparams.sample_file + 'test.txt', 'w') as f:
                        for uvw in candidate_edges.split():
                            [u, v, w] = uvw.split("-")
                            u = int(u)
                            v = int(v)
                            w = int(w)
                            if (u >= 0 and v >= 0):
                                f.write(
                                    str(u) + ' ' + str(v) + ' {\'weight\':' +
                                    str(w) + '}\n')
                    f = open(hparams.sample_file + 'test.txt')
                    # try:
                    G = nx.read_edgelist(f, nodetype=int)
                    # except:
                    # continue

                    if nx.is_connected(G):
                        for uvw in candidate_edges.split():
                            [u, v, w] = uvw.split("-")
                            u = int(u)
                            v = int(v)
                            w = int(w)
                            if (u >= 0 and v >= 0):
                                with open(
                                        hparams.sample_file + "approach_2_" +
                                        str(trial) + "_" + str(s_num) + '.txt',
                                        'a') as f:
                                    f.write(
                                        str(u) + ' ' + str(v) +
                                        ' {\'weight\':' + str(w) + '}\n')
                trial += 1
Beispiel #33
0
import networkx as nx
import sys

g = nx.read_edgelist("/fast-data/patentmark/triplets.tsv", )
Beispiel #34
0
def create_graph_from_file(filename):
    print("----------------build graph--------------------")
    f = open(filename, "rb")
    g = nx.read_edgelist(f)
    return g
Beispiel #35
0
    # Output
    OUTPUT_EVENT = sys.argv.pop()
    OUTPUT = sys.argv.pop()

    if len(sys.argv) == 12:
        isolatable_node_type = sys.pop()

    # Load data
    logging.debug("Loading data")
    filename, file_extension = os.path.splitext(edgelist)
    print(filename, file_extension)
    if file_extension == ".gexf":  # when a node has attributes
        G = nx.read_gexf(edgelist, node_type=int)
    elif file_extension == ".edgelist":  # when a node does not have attrbutes
        G = nx.read_edgelist(edgelist, nodetype=int)
    else:
        raise ValueError("The input graph should be saved in .edgelist or .gexf format")

    logs = pd.read_csv(sim_log_data)

    #
    # Preprocess
    #
    logging.debug("Construct the transmission tree from the log")
    logs["id"] = "id"
    tree_list = utils.construct_transmission_tree(logs)

    logging.debug("Set onset time")
    for tid, tree in enumerate(tree_list):
        tree_list[tid] = utils.set_onset_time(tree, time_lag_for_isolation)
Beispiel #36
0
    for c, idx in color_idx.items():

        plt.scatter(node_pos[idx, 0], node_pos[idx, 1],
                    label=c)  # c=node_colors)

    plt.legend()

    plt.show()


if __name__ == "__main__":
    # G = nx.read_edgelist('test.edge_list.txt', create_using=nx.DiGraph(), nodetype=None,
    #                      data=[('weight', int)])
    G = nx.read_edgelist(
        '../data/ETH/Phishing node classification/TransEdgelist.txt',
        create_using=nx.MultiDiGraph(),
        delimiter=',',
        nodetype=None,
        data=[('amount', float), ('weight', int)])

    model = Struc2Vec(
        G,
        10,
        80,
        workers=4,
        verbose=40,
    )
    model.train()
    embeddings = model.get_embeddings()

    evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
 def __init__(self, input_filename):
     infile = open(input_filename, 'r')
     self.G = nx.read_edgelist(infile,
                               nodetype=int,
                               data=(('weight', float), ))
     print("successfully loaded graph from file: " + input_filename)
Beispiel #38
0
import networkx as nx
G = nx.read_edgelist("../dataset/Email-Enron.txt")

communities = sorted()
Beispiel #39
0
        start_id = int(sys.argv[4])
    if len(sys.argv) == 6:
        p = float(sys.argv[3])
        n_samples = int(sys.argv[4])
        start_id = int(sys.argv[5])

    print "file_name =", file_name
    print "n_samples =", n_samples
    print "start_id =", start_id
    print "t =", t
    print "alpha =", alpha
    if len(sys.argv) == 6:
        print "p =", p

    G = nx.read_edgelist(
        "../data/" + file_name + ".gr", '#', '\t', None, nodetype=int
    )  # implicitly remove duplicate edges (i.e. no multiple edges), use type 'int' instead of string
    #    G = nx.read_edgelist(file_name, '#', ' ', None, nodetype=int)

    print "#nodes :", G.number_of_nodes()
    print "#edges :", len(G.edges())
    print "#self-loops :", G.number_of_selfloops()
    print "#components :", len(nx.connected_components(G))
    n_nodes = G.number_of_nodes()
    deg_list = nx.degree(G)  # dict[node] = deg
    min_deg = min(deg_list.itervalues())
    max_deg = max(deg_list.itervalues())
    print "min-deg =", min_deg
    print "max-deg =", max_deg

    # TEST random_walk_transform()
Beispiel #40
0
    for edge_index in a_edge_index:
        arr_tmp.append([
            fund_index,
            len(list_funds) + edge_index, weight_matrix_total[fund_index,
                                                              edge_index]
        ])
arr_tmp = np.array(arr_tmp)
pd_tmp = pd.DataFrame(arr_tmp)
pd_tmp[0] = pd_tmp[0].astype(int)
pd_tmp[1] = pd_tmp[1].astype(int)
output_name = 'fund'
path = data_dir + 'graph/{}.csv'.format(output_name)
pd_tmp.to_csv(path, index=False, sep=' ')

nx_G = nx.read_edgelist(path,
                        nodetype=int,
                        data=(('weight', float), ),
                        create_using=nx.DiGraph())
nx_G = nx_G.to_undirected()
G = graph.Graph(nx_G, False, 1, 1)
G.preprocess_transition_probs()

walks = G.simulate_walks(200, 200)
walks = [list(map(str, walk)) for walk in walks]

from gensim.models import Word2Vec

model = Word2Vec(walks,
                 size=32,
                 window=6,
                 min_count=0,
                 sg=1,
    return r1, r2


Iteration = 10000  ##迭代次数
P = [
    1, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5, 0.45, 0.4, 0.35,
    0.3, 0.25, 0.2, 0.15, 0.1, 0.05, 0
]
assortivity = np.zeros((21, 1))
average_path = np.zeros((21, 1))
count = 0

for i in P:
    print(i)
    for j in range(10):
        G = nx.read_edgelist('datasets/network_average_path.txt', nodetype=int)
        r1, r2 = calculate_ass(G, i, Iteration)
        print(r1, r2)
        assortivity[count] = assortivity[count] + r1
        average_path[count] = average_path[count] + r2
    assortivity[count] = assortivity[count] / 10
    average_path[count] = average_path[count] / 10
    count = count + 1
fid = open('results/average_path_ass.txt', 'w')
for i in range(len(P)):
    fid.write(str(assortivity[i]) + ' ' + str(average_path[i]) + '\n')
fid.close()

plt.figure(figsize=(16, 16))
plt.style.use('ggplot')
plt.semilogx(assortivity, average_path, 'o-', label='$r_1$')
Beispiel #42
0
    color_idx = {}

    for i in range(len(X)):

        color_idx.setdefault(Y[i][0], [])

        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():

        plt.scatter(node_pos[idx, 0], node_pos[idx, 1],
                    label=c)  # c=node_colors)

    plt.legend()

    plt.show()


if __name__ == "__main__":
    G = nx.read_edgelist('../data/flight/usa-airports.edgelist',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    model = Node2Vec(G, 10, 80, workers=1, p=0.25, q=2)
    model.train()
    embeddings = model.get_embeddings()

    evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
Beispiel #43
0
        # 3rd: cluster cores
        for candidate in candidates:
            self.cluster_core(candidate)
        print '3. after cluster core, disjoint set - parent dict:', dict(
            zip(range(self.n), self.disjoint_set.parent))

        # 4th: cluster non-core
        self.cluster_non_core()
        print '4. after cluster non-core mark cluster id, cluster(represented by root vertex), min ele id:', dict(
            filter(lambda pair: pair[1] != self.n,
                   zip(range(self.n), self.cluster_dict)))

        # finally, output result
        print '\nfinal result in format:', ' '.join([
            'core/non-core', 'vertex id',
            'cluster id(min core vertex id in this cluster)'
        ])
        self.result_lines.append('c/n vertex_id cluster_id')
        print 'c/n vertex_id cluster_id'
        self.output_result()


if __name__ == '__main__':
    graph = nx.read_edgelist('demo_input_graph.txt', nodetype=int)
    offset_lst, dst_v_lst, deg_lst = to_csr_graph(graph)

    print 'csr representation:\noffset_lst=', offset_lst, '\ndst_v_lst=', dst_v_lst, '\ndeg_lst=', deg_lst, '\n'

    pscan_algo = PScan(offset_lst, dst_v_lst, deg_lst, eps=0.6, min_pts=3)
    pscan_algo.run_algorithm()
Beispiel #44
0
'''	Ganesh Prasad - 2018csm1008
	Rakesh meena - 2018csm1017
	Jeevan Kumar - 2018csm1012

Problem: Given a network of friendships, we have to find the nodes(persons) with most and least friendly neighbourhood.
Solution: We take a node and find its adjacent nodes(a list say j) which are also impressed by the node.
	Now for every node in j we get a total number of mutually impressed friends, which after dividing by len(j)
	we arrived at an average treated as a factor to decide whose neighbourhood is friendly or not.'''

import networkx as nx
import matplotlib.pyplot as plt

G = nx.read_edgelist(r"pagerank.txt", create_using=nx.DiGraph(), nodetype=int)

nx.draw(G, with_labels=True)
plt.show()


# Find total Number of mutually impressed friends of a node
def num_of_mutually_impressed_nodes(G, node):
    list_successors = G.successors(node)
    total_num = 0
    for successor_node in list_successors:
        if (G.has_edge(successor_node, node)):
            total_num = total_num + 1
    return total_num


# Returns a list of mutually impressed friends for a node
def mutually_impressed_nodes(G, node):
    list_successors = G.successors(node)
import networkx as nx
import numpy
G = nx.read_edgelist("facebook_combined.txt")

n = list(G.nodes())

spll = []

for u in n:
    for v in n:
        if u != v:
            l = nx.shortest_path_length(G, u, v)
            print("Shortest path between ", u, " and ", v, " is of lenth ", l)
            spll.append(l)

min_spl = min(spll)
max_spl = max(spll)
avg_spl = numpy.average(spll)

print("Minimum shortest path length : ", min_spl)
print("Maximum shortest path length : ", max_spl)
print("Average shortest path length : ", avg_spl)
import networkx as nx
from itertools import chain

#this script is meant to remove all the pages with fewer than 5 incoming links
#noise reduction process

G = nx.read_edgelist('../datasets/dbpedia_resources_wiki.edgelist',
                     nodetype=int,
                     create_using=nx.DiGraph())
print 'read graph'

remove_nodes_out = (node for node, degree in G.out_degree().iteritems()
                    if degree == 0)  #nodes with out_degree = 0

remove_nodes_in = (node for node, degree in G.in_degree().iteritems()
                   if degree == 0)  #nodes with in_degree = 0

remove_nodes = chain(remove_nodes_out, remove_nodes_in)

G.remove_nodes_from(remove_nodes)

print 'graph has %d nodes and %d edges' % (len(G.nodes()), len(G.edges()))

print "writing graph"

nx.write_edgelist(
    G,
    '../graph/dbpedia_resources_wiki_reduced_1_out_1_in.edgelist',
    data=False)
Beispiel #47
0
import collections
import numpy as np
import pandas as pd
from networkx import bipartite

###################
#Makes degree histogram
def grapher(G):
	degree_sequence = sorted([d for n, d in G.degree()], reverse=True)  # degree sequence
	# print "Degree sequence", degree_sequence
	degreeCount = collections.Counter(degree_sequence)
	deg, cnt = zip(*degreeCount.items())

	fig, ax = plt.subplots()
	plt.bar(deg, cnt, width=0.80, color='b')

	plt.title("Degree Histogram")
	plt.ylabel("Count")
	plt.xlabel("Degree")
	
# log-log option
	# ax.set_xscale("log")
	# ax.set_yscale("log")

	ax.set_xticks([d + 0.1 for d in deg])
	ax.set_xticklabels(deg)
	plt.show()
    

p = nx.read_edgelist('DCh-Miner_miner-disease-chemical.tsv')
Beispiel #48
0
    argvs = sys.argv
    argc = len(argvs)
    if (argc < 2):
        print(
            'Please give frovedis_server calling command as the first argument \n(e.g. "mpirun -np 2 -x /opt/nec/nosupport/frovedis/ve/bin/frovedis_server")'
        )
        quit()
    FrovedisServer.initialize(argvs[1])

    frov_graph = fnx.read_edgelist(DATASET, nodetype=np.int32, delimiter=' ')
    fres = set(fnx.bfs_edges(frov_graph, src, depth_limit=depth))

    FrovedisServer.shut_down()
except Exception as e:
    print("status=Exception: " + str(e))
    sys.exit(1)

#NetworkX
try:
    nx_graph = nx.read_edgelist(DATASET, nodetype=np.int32, delimiter=' ')
    nres = set(nx.bfs_edges(nx_graph, src, depth_limit=depth))
except Exception as e:
    print("status=Exception: " + str(e))
    sys.exit(1)
print(fres)
print(nres)
if len(fres - nres) == 0:
    print("status=Passed")
else:
    print("status=Failed")
Beispiel #49
0
                  [1, 0, 0, 0, 1, 0, 0, 0, 1, 0],
                  [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
                  [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
                 ])
G3 = nx.Graph(G_mat)
print('list(G3.edges())\n',list(G3.edges()))

# 简单可视化
# plt.figure()
# nx.draw_networkx(G3)
# plt.show()

# 边列表
G4 = nx.read_edgelist('./data/G_edgelist.txt', data=[('weigth', int)])
print('list(G4.edges(data=True))\n',list(G4.edges(data=True)))

# # 简单可视化
# plt.figure()
# nx.draw_networkx(G4)
# plt.show()

# 2.4DateFrame
G_df = pd.read_csv('./data/G_edgelist.txt', delim_whitespace=True,
                   header =None, names= ['n1', 'n2', 'weight'])
print('G_df\n',G_df)

G5 = nx.from_pandas_dataframe(G_df,'n1','n2',edge_attr='weight')
print('list(G5.edges(data=True))\n',list(G5.edges(data=True)))
# 简单可视化
def label_prop():
    G = nx.read_edgelist("facebook_combined.txt",
                         create_using=nx.Graph(),
                         nodetype=int)
    print nx.info(G)

    for i in G.nodes():
        G.node[i]['label'] = i
        G.node[i]['ID'] = i
        G.node[i]['l_1'] = 0
        G.node[i]['l_2'] = 0
        G.node[i]['l_next'] = 0
    '''
    for n,nbrs in G.adjacency_iter():
        for nbr,edict in nbrs.items():
            if nbr==200:
                print n, nbrs, G.node[nbr]['label']
    '''
    mainStop = False
    i = 0
    while (i < 100):
        if i == 99:
            set_communities = set()
            for n in G.nodes():
                set_communities.add(G.node[n]['label'])
            print "the number of communities after 100 iterations==", len(
                set_communities)

        i += 1
        mainStop = False
        l1_stop = True
        l2_stop = True
        for n in G.nodes():
            if (not (G.node[n]['label'] == G.node[n]['l_1'])):
                l1_stop = False
        for n in G.nodes():
            if (not (G.node[n]['label'] == G.node[n]['l_2'])):
                l2_stop = False

        #print l1_stop, l2_stop
        if (not (l1_stop or l2_stop)):
            #print "in not loop"
            for n, nbrs in G.adjacency_iter():
                dict = {}
                dict.clear()
                for nbr, d in nbrs.items():
                    temp = G.node[nbr]['label']
                    if not dict.has_key(temp):
                        dict = {temp: 1}
                    else:
                        dict[temp] += 1
                max_key = 0
                max_key = max(dict, key=dict.get)
                G.node[n]['l_next'] = max_key
                G.node[n]['l_2'] = G.node[n]['l_1']
                G.node[n]['l_1'] = G.node[n]['label']
                G.node[n]['label'] = max_key
            '''
            for n in G.nodes():
                G.node[n]['l_2']=G.node[n]['l_1']
                G.node[n]['l_1']=G.node[n]['label']
                G.node[n]['label']=G.node[n]['l_next']
            '''

        else:
            print "The Community converges"
            mainStop = True

            print i
            return i
# -*- coding: utf-8 -*-
import random
import networkx as nx
import matplotlib.pyplot as plt
from operator import itemgetter

G2 = nx.read_edgelist('Facebook_Dataset.txt',
                      create_using=nx.Graph(),
                      nodetype=int)


#return the friends of a user
def friends(graph, user):
    return set(graph.neighbors(user))


#returns a list of friends of friends of a user
def friends_of_friends(graph, user):
    x = []
    for each in graph.neighbors(user):
        for item in graph.neighbors(each):
            x.append(item)
    return set(x)


# returns a list of common friends
def common_friends(graph, user1, user2):
    x1 = friends(graph, user1)
    x2 = friends(graph, user2)
    return set(x1 & x2)
Beispiel #52
0
 def __init__(self):
     self.graph = nx.read_edgelist(
         'data/1_edge_list/kaggle_numbers_bidi.edgelist',
         create_using=nx.DiGraph)
     print(len(self.graph.nodes))
     print(len(self.graph.edges))
Beispiel #53
0
def nodeID_mapping(input_file_name, output_file_name=" ", reverse=False):
    if input_file_name.endswith(".edges") or input_file_name.endswith(".txt"):
        f = open(input_file_name, "r")
        g = nx.read_edgelist(f,
                             create_using=nx.DiGraph(),
                             nodetype=str,
                             data=False)
        # print g.edges()[:10]
        f.close()
    elif input_file_name.endswith(".gpickle"):
        g = nx.read_gpickle(input_file_name)

    if output_file_name == " " or output_file_name == None:
        output_file_name = os.path.abspath(input_file_name).split(
            ".")[0] + "_index0.edges"

    print("write graph edges list to: %s" % output_file_name)
    print("Original graph: # nodes: %d, # edges: %d" %
          (g.number_of_nodes(), g.number_of_edges()))

    id_mapping = {}
    i2s_mapping = {}

    index = 0

    for (u, v) in g.edges():

        if u not in id_mapping:
            id_mapping[u] = index
            i2s_mapping[index] = u
            index += 1

        if v not in id_mapping:
            id_mapping[v] = index
            i2s_mapping[index] = v
            index += 1

    new_edges = [(id_mapping[u], id_mapping[v]) for (u, v) in g.edges()]

    new_g = nx.DiGraph()
    new_g.add_edges_from(new_edges)

    if reverse:
        print("edge reversed...")
        new_g.reverse(copy=False)

    print("New graph: # nodes: %d, # edges: %d" %
          (new_g.number_of_nodes(), new_g.number_of_edges()))
    nodes = list(new_g.nodes())
    print("New graph: min(node id): %d, max(node id):%d" %
          (min(nodes), max(nodes)))
    print("is Directed Acyclic Graph: %s " %
          nx.is_directed_acyclic_graph(new_g))

    nx.write_edgelist(new_g, output_file_name, data=False)

    print("# instances in mapping: %d (%d)" %
          (len(id_mapping), len(i2s_mapping)))
    mapping = {"s2i": id_mapping, "i2s": i2s_mapping}

    mapping_file = os.path.abspath(input_file_name).split(
        ".")[0] + "_id_mapping.pkl"
    print("id mapping file is saved: %s" % mapping_file)
    print("mappged graph file is saved at: %s" % output_file_name)
    with open(mapping_file, "wb") as f:
        pickle.dump(mapping, f)
    return output_file_name, mapping_file
import networkx as nx
import sys, os, datetime
sys.path.insert(1,
                os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from utils import io_utils

# Author: Katya Gurgel
# Description: a script cleaning up unweighted SNAP graphs encoded by pairs of
# nodes on each line, each representing individual edges. The output is printed.

# Usage: python ./snap_cleanup.py INPUT_FILE > OUTPUT_DIR/OUTPUT_FILE

G = nx.read_edgelist(sys.argv[1])

print('# {} {} {}'.format(datetime.datetime.now(),
                          os.popen('git rev-parse HEAD').read().strip(),
                          sys.argv[1]))

io_utils.print_uw_graph(G)
Beispiel #55
0
import networkx as nx
import math
import matplotlib.pyplot as plt


def avg_degree(A):
    degree = A.degree()
    Average_degree = sum(degree.values()) / float(len(A))
    return Average_degree


S = nx.Graph()
G = nx.read_edgelist("edges.txt", delimiter=",")
k = avg_degree(G)
print("The average degree of original graph is \t", k)

#propabilty of edge creation
p = k / (G.number_of_nodes() - 1)

S.add_edges_from((nx.fast_gnp_random_graph(G.number_of_nodes(),
                                           p,
                                           seed=None,
                                           directed=False)).edges())
c = avg_degree(S)

avg_clusco = nx.average_clustering(S)
print("Average local Clustering\t" + str(avg_clusco))

#average path length
apl = math.log(S.number_of_nodes()) / float(math.log(c))
import numpy as np
import networkx as nx

webget.download("https://snap.standford.edu/data/twitter_combined.txt.gz")

with gzip.open('twitter_combined.txt.gz') as f:
    g = nx.read_edgelist(f)

Beispiel #57
0
if __name__ == "__main__":
    # G=nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
    #                      create_using = nx.DiGraph(), nodetype = None, data = [('weight', int)])
    #
    #
    # model = Node2Vec(G, walk_length=10, num_walks=80,
    #                  p=0.25, q=4, workers=1, use_rejection_sampling=0)
    # model.train(embed_size=64,window_size = 5, iter = 3)
    # embeddings=model.get_embeddings()
    # print(embeddings)
    #
    # evaluate_embeddings(embeddings)
    # plot_embeddings(embeddings)

    G = nx.read_edgelist('../data/text.txt',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    model = Node2Vec(G,
                     walk_length=10,
                     num_walks=80,
                     p=0.25,
                     q=4,
                     workers=1,
                     use_rejection_sampling=0)
    model.train(embed_size=4, window_size=5, iter=3)
    embeddings = model.get_embeddings()
    print(embeddings)
    #print('dict_2:', dict_2)
    return dict_1


dict_1 = create_dict(hosts_list)

# pos = open('pickles/dict_2.pkl','wb')
# pickle.dump(dict_2, pos)
# pos.close()
pos = open('pickles/dict_2.pkl', 'rb')
dict_2 = pickle.load(pos)
pos.close()

# load dataset to a directed graph
G = nx.read_edgelist('dataset/edgelist.txt',
                     delimiter='\t',
                     create_using=nx.DiGraph())
nodes_list = nx.nodes(G)
print('**Nodes list**', nodes_list)
# print(len(nodes_list))

# *************************************************************************
# compute in_degree and in_degree number


def in_out_degree(G, dim):
    num_rows = 0
    out_degree_matrix = np.zeros((dim, 1))
    in_degree_matrix = np.zeros((dim, 1))
    for k in hosts_list:
        if k in nodes_list:
Beispiel #59
0
def main(args, jupyter=False):
    ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

    seed = args.seed
    edgelist = os.path.join(
        ROOT_DIR,
        '../data/{}/train/{}_{}_seed_{}.txt'.format(args.sampling_method,
                                                    args.graph,
                                                    args.percent_test, seed))
    if not args.bipartite:
        G = nx.read_edgelist(edgelist)
    else:
        bip_edges = list()
        A = set()
        B = set()
        for line in open(edgelist, 'r'):
            a, b = line.strip().split()
            A.add(a)
            B.add(b)
            bip_edges.append((a, b))
        G = nx.Graph()
        G.add_nodes_from(A, bipartite=0)
        G.add_nodes_from(B, bipartite=1)
        G.add_edges_from(bip_edges)

    test_path = os.path.join(
        ROOT_DIR,
        '../data/{}/test/{}_{}_seed_{}.txt'.format(args.sampling_method,
                                                   args.graph,
                                                   args.percent_test, seed))

    output_dir = os.path.join(ROOT_DIR,
                              '../output/{}/'.format(args.sampling_method))
    emb_path = os.path.join(
        output_dir,
        '{}_{}_{}_seed_{}.emb'.format(args.embedding_method, args.graph,
                                      args.percent_test, seed))
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    if not args.output_override:
        output_path = os.path.join(
            output_dir, '{}_{}_{}_{}_{}_{}_{}_k_{}.txt'.format(
                args.method, args.graph, args.embedding_method,
                args.percent_test, args.exact_search_tolerance,
                args.bailout_tol, seed, args.k))
    else:
        output_path = args.output_override

    if args.embedding_method == 'netmf1':
        embeddings = NetMF(args.embedding_method,
                           edgelist,
                           test_path,
                           emb_path,
                           G,
                           normalize=True,
                           window_size=1)
    elif args.embedding_method == 'netmf2':
        embeddings = NetMF(args.embedding_method,
                           edgelist,
                           test_path,
                           emb_path,
                           G,
                           normalize=True,
                           window_size=2)
    elif args.embedding_method == 'bine':
        embeddings = BiNE(args.embedding_method,
                          edgelist,
                          test_path,
                          emb_path,
                          G,
                          normalize=True)
    elif args.embedding_method == 'aa':
        embeddings = AA(args.embedding_method,
                        edgelist,
                        test_path,
                        emb_path,
                        G,
                        normalize=True)
    if args.force_emb or not os.path.exists(emb_path):
        if os.path.exists(emb_path.replace('.emb', '_nodeX.npy')):
            os.remove(emb_path.replace('.emb', '_nodeX.npy'))
        embeddings.run(G)

    if args.method in {'lapm'}:
        sel = LaPMSelector(args.method,
                           G,
                           args.k,
                           embeddings,
                           output_path,
                           seed=seed,
                           bipartite=args.bipartite)
        load_embeddings = True
    elif args.method in {'cn'}:
        sel = CNSelector(args.method,
                         G,
                         args.k,
                         embeddings,
                         output_path,
                         seed=seed,
                         bipartite=args.bipartite)
        load_embeddings = False
    elif args.method in {'js'}:
        sel = JSSelector(args.method,
                         G,
                         args.k,
                         embeddings,
                         output_path,
                         seed=seed,
                         bipartite=args.bipartite)
        load_embeddings = False
    elif args.method in {'aa'}:
        sel = AASelector(args.method,
                         G,
                         args.k,
                         embeddings,
                         output_path,
                         seed=seed,
                         bipartite=args.bipartite)
        load_embeddings = False
    elif args.method in {'nmf+bag'}:
        sel = BaggingEnsemble(args.method,
                              G,
                              args.k,
                              embeddings,
                              output_path,
                              seed=seed,
                              bipartite=args.bipartite)
        load_embeddings = False
    elif args.method == 'LinkWaldo':
        num_groupings = 0
        if args.DG:
            num_groupings += 1
        if args.SG:
            num_groupings += 1
        if args.CG:
            num_groupings += 1

        if num_groupings > 1:
            if args.bailout_tol > 0.0:
                sel = MGBailoutSelector(
                    args.method,
                    G,
                    args.k,
                    embeddings,
                    output_path,
                    DG=args.DG,
                    SG=args.SG,
                    CG=args.CG,
                    exact_search_tolerance=args.exact_search_tolerance,
                    seed=seed,
                    bipartite=args.bipartite)
            else:
                sel = MGSelector(
                    args.method,
                    G,
                    args.k,
                    embeddings,
                    output_path,
                    DG=args.DG,
                    SG=args.SG,
                    CG=args.CG,
                    exact_search_tolerance=args.exact_search_tolerance,
                    seed=seed,
                    bipartite=args.bipartite)
        else:
            if args.DG and args.bailout_tol > 0.0:
                sel = DGBailoutSelector(
                    args.method,
                    G,
                    args.k,
                    embeddings,
                    output_path,
                    DG=args.DG,
                    SG=args.SG,
                    CG=args.CG,
                    exact_search_tolerance=args.exact_search_tolerance,
                    seed=seed,
                    bipartite=args.bipartite)
            elif args.DG:
                sel = DGSelector(
                    args.method,
                    G,
                    args.k,
                    embeddings,
                    output_path,
                    DG=args.DG,
                    SG=args.SG,
                    CG=args.CG,
                    exact_search_tolerance=args.exact_search_tolerance,
                    seed=seed,
                    bipartite=args.bipartite)
            elif args.SG:
                sel = SGSelector(
                    args.method,
                    G,
                    args.k,
                    embeddings,
                    output_path,
                    DG=args.DG,
                    SG=args.SG,
                    CG=args.CG,
                    exact_search_tolerance=args.exact_search_tolerance,
                    seed=seed,
                    bipartite=args.bipartite)
            elif args.CG:
                sel = CGSelector(
                    args.method,
                    G,
                    args.k,
                    embeddings,
                    output_path,
                    DG=args.DG,
                    SG=args.SG,
                    CG=args.CG,
                    exact_search_tolerance=args.exact_search_tolerance,
                    seed=seed,
                    bipartite=args.bipartite)
        load_embeddings = True

    sel.num_groups = args.num_groups
    sel.num_groups_alt = args.num_groups_alt
    sel.bailout_tol = args.bailout_tol
    sel.bag_epsilon = args.bag_epsilon
    sel.skip_output = args.skip_output

    embeddings.load_data(load_embeddings=load_embeddings)

    if jupyter:
        return sel

    _time = sel.select()

    sel.write_res(_time)
    if not args.skip_output:
        sel.write()
Beispiel #60
0
# Do the same process to compute a training subset from within the test graph
edge_splitter_train = EdgeSplitter(graph_test, graph)
graph_train, examples, labels = edge_splitter_train.train_test_split(
    p=0.1, method="global"
)
(
    examples_train,
    examples_model_selection,
    labels_train,
    labels_model_selection,
) = train_test_split(examples, labels, train_size=0.75, test_size=0.25)

print(graph_train.info())'''

G_test_nx = nx.read_edgelist('val_positive.txt')
nodes = G_test_nx.nodes()
feature_vector = [1,1,1,1,1]
node_data = pd.DataFrame(
    [feature_vector for i in range(len(nodes))],
    index=[node for node in nodes])
graph_test = sg.StellarGraph.from_networkx(G_test_nx, node_features=node_data)

G_test_neg = nx.read_edgelist('val_negative.txt')
edges_test_neg = G_test_neg.edges()

G_train_nx = nx.read_edgelist('training.txt')
nodes_train = G_train_nx.nodes()
feature_vector = [1,1,1,1,1]
node_data = pd.DataFrame(
    [feature_vector for i in range(len(nodes_train))],