def createGraph(semantic_relationships, graph_file=None, edges_weight=None): ''' Build the supervised graph G=(V, E) using networkx library, where V is the set of synsets and E the set of semantic correlation between synsets (vertexes) :param semantic_relationships: the relationship from which build the graph :param graph_file: where to save the file (to avoid creating the graph each time) :return: return the graph ''' if graph_file is not None and os.path.isfile(graph_file): return nx.read_multiline_adjlist(graph_file) G = nx.Graph() keys = list(semantic_relationships.keys()) for lemma in tqdm.tqdm(semantic_relationships.keys()): G.add_node(lemma) for relationship, nodes in semantic_relationships[lemma].items(): for node in nodes: if node in keys: G.add_edge(lemma, node, v=relationship, weight=1.0) if edges_weight is not None: G.add_weighted_edges_from(edges_weight) if graph_file is not None: nx.write_multiline_adjlist(G, graph_file) return G
def GENERATE_MUL_NETWORK(NETWORK_DICT, RANDOM_DICT, N_LAYERS, path): #这个功能可以暂时封存了,不要也罢 MULTI_NETWORK = list() for value in NETWORK_DICT.values(): MULTI_NETWORK.append( inf_network(False, None, value['name'], **value['arg'])) for i in range(N_LAYERS): MULTI_NETWORK[i].layer = i for i in range(N_LAYERS): MULTI_NETWORK[i].change_weight(RANDOM_DICT[str(i + 1)]['fun'], **RANDOM_DICT[str(i + 1)]['arg']) for NETWORK in MULTI_NETWORK: read = open(path + '{}.nt'.format(NETWORK.layer), 'wb') nx.write_multiline_adjlist(NETWORK.network, read, delimiter=',') read.close() read = open(path + 'detail.txt', 'w') read.write("#NODES #LAYERS" + '\n') read.write("{0:^5d} {1:^5d}".format(MULTI_NETWORK[0].num_nodes, N_LAYERS) + '\n') read.close() return True
def save_results(self, image_name, *results): """ Create a directory of the following format: current pipeline + fname. Save and put the results of algorithm processing in the directory. Args: | *image_name* (str): image name | *results* (list): a list of arguments to save """ # saving the processed image try: cv2.imwrite(os.path.join(self.out_dir, image_name), results[0]) except (IOError, cv2.error): print('ERROR! Could not write an image file, make sure there is ' + 'enough free space on disk') sys.exit(1) if not self.isui: print('Success!', image_name, 'saved in', self.out_dir) # exporting graph object if results[1]: image_name = os.path.splitext(image_name)[0] + '.txt' nx.write_multiline_adjlist(results[1], os.path.join(self.out_dir, image_name), delimiter='|') print('Success!', image_name, 'saved in', self.out_dir)
def shortest_path(src, dst, names, cost): G = nx.DiGraph() G.add_nodes_from(names) indices = {} for i in range(len(names)): indices[names[i]] = i # Add edges from src to stations before it for i in range(indices[src]): G.add_edge(src, names[i], weight=cost(src, names[i])) # Add edges from stations after dst to dst for i in range(indices[dst] + 1, len(names)): G.add_edge(names[i], dst, weight=cost(names[i], dst)) # Add reverse edges from every station after src back for i in range(indices[src] + 1, len(names)): for j in range(indices[src] + 1, i): G.add_edge(names[i], names[j], weight=cost(names[i], names[j])) # Add edges from every station before dst to # every station after src for i in range(len(names)): for j in range(i + 1, len(names)): if i >= indices[dst] or j <= indices[src]: continue G.add_edge(names[i], names[j], weight=cost(names[i], names[j])) nx.write_multiline_adjlist(G, "test.adjlist") return nx.shortest_path(G, src, dst, weight='weight')
def test_multiline_adjlist_delimiter(self): fh = io.BytesIO() G = nx.path_graph(3) nx.write_multiline_adjlist(G, fh, delimiter=':') fh.seek(0) H = nx.read_multiline_adjlist(fh, nodetype=int, delimiter=':') assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges()))
def test_multiline_adjlist_delimiter(self): fh = io.BytesIO() G = nx.path_graph(3) nx.write_multiline_adjlist(G, fh, delimiter=":") fh.seek(0) H = nx.read_multiline_adjlist(fh, nodetype=int, delimiter=":") assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges()))
def test_multiline_adjlist_delimiter(self): fh=io.BytesIO() G = nx.path_graph(3) nx.write_multiline_adjlist(G, fh, delimiter=':') fh.seek(0) H = nx.read_multiline_adjlist(fh, nodetype=int, delimiter=':') assert_equal(sorted(H.nodes()),sorted(G.nodes())) assert_equal(sorted(H.edges()),sorted(G.edges()))
def test_multiline_adjlist_integers(self): (fd, fname) = tempfile.mkstemp() G = nx.convert_node_labels_to_integers(self.G) nx.write_multiline_adjlist(G, fname) H = nx.read_multiline_adjlist(fname, nodetype=int) H2 = nx.read_multiline_adjlist(fname, nodetype=int) assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def test_latin1(self): G = nx.Graph() name1 = 'Bj' + chr(246) + 'rk' name2 = chr(220) + 'ber' G.add_edge(name1, 'Radiohead', **{name2: 3}) fd, fname = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname, encoding='latin-1') H = nx.read_multiline_adjlist(fname, encoding='latin-1') assert_graphs_equal(G, H) os.close(fd) os.unlink(fname)
def createDocumentsGraph(train, graph_file=None, sem_rel=None, sem_graph=None): """ :param train: :param graph_file: :param sem_rel: :param sem_graph: :return: """ if graph_file is not None and os.path.isfile(graph_file): return nx.read_multiline_adjlist(graph_file) G = nx.DiGraph() if sem_graph is not None: G = nx.compose(G, sem_graph.to_directed()) for doc in train.keys(): for sentence in train[doc]: lemmas = [l for l in sentence if isinstance(l, utils.instance)] for i in range(1, len(lemmas)): lemma = lemmas[i] lemma_key = lemma.lemma + '_' + lemma.pos prev_lemma = lemmas[i - 1] prev_lemma_key = prev_lemma.lemma + '_' + prev_lemma.pos if i == 1: G.add_node(prev_lemma_key) G.add_node(lemma_key) G.add_edge(prev_lemma_key, lemma_key) if sem_rel is not None: for r, nodes in sem_rel[lemma.instance].items(): for node in nodes: if node not in G.nodes: continue G.add_edge(lemma_key, node) if i == 1: for r, nodes in sem_rel[prev_lemma.instance].items(): for node in nodes: if node not in G.nodes: continue G.add_edge(prev_lemma_key, node) if graph_file is not None: nx.write_multiline_adjlist(G, graph_file) return G
def test_unicode(self): G = nx.Graph() name1 = chr(2344) + chr(123) + chr(6543) name2 = chr(5543) + chr(1543) + chr(324) G.add_edge(name1, "Radiohead", **{name2: 3}) fd, fname = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname) H = nx.read_multiline_adjlist(fname) assert_graphs_equal(G, H) os.close(fd) os.unlink(fname)
def test_multiline_adjlist_digraph(self): G = self.DG (fd, fname) = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname) H = nx.read_multiline_adjlist(fname, create_using=nx.DiGraph()) H2 = nx.read_multiline_adjlist(fname, create_using=nx.DiGraph()) assert H is not H2 # they should be different graphs assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def test_multiline_adjlist_graph(self): G = self.G (fd, fname) = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname) H = nx.read_multiline_adjlist(fname) H2 = nx.read_multiline_adjlist(fname) assert_not_equal(H, H2) # they should be different graphs assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def test_latin1(self): G = nx.Graph() name1 = "Bj" + chr(246) + "rk" name2 = chr(220) + "ber" G.add_edge(name1, "Radiohead", **{name2: 3}) fd, fname = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname, encoding="latin-1") H = nx.read_multiline_adjlist(fname, encoding="latin-1") assert_graphs_equal(G, H) os.close(fd) os.unlink(fname)
def test_multiline_adjlist_digraph(self): G = self.DG (fd, fname) = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname) H = nx.read_multiline_adjlist(fname, create_using=nx.DiGraph()) H2 = nx.read_multiline_adjlist(fname, create_using=nx.DiGraph()) assert_not_equal(H, H2) # they should be different graphs assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def test_multiline_adjlist_graph(self): G=self.G (fd,fname)=tempfile.mkstemp() nx.write_multiline_adjlist(G,fname) H=nx.read_multiline_adjlist(fname) H2=nx.read_multiline_adjlist(fname) assert_not_equal(H,H2) # they should be different graphs assert_equal(sorted(H.nodes()),sorted(G.nodes())) assert_equal(sorted(H.edges()),sorted(G.edges())) os.close(fd) os.unlink(fname)
def barabasi_albert(n, m): """Generates a barabasi albert graph given the number of nodes and the m value""" click.echo('generating barabasi albert model with n = {}, m = {}'.format( n, m)) m0 = m + 1 t = n - m0 G = nx.complete_graph(m0) for i in range(m0, n): G.add_node(i) sumaGradosNodos = 0 for nodo in range(0, i): sumaGradosNodos += G.degree(nodo) probConexion = {} gradosNodos = nx.degree(G) for j in range(0, i): probConexion[j] = (float)(gradosNodos[j]) / sumaGradosNodos probAcumulada = [] aux = 0 for idNodo, probabilidad in probConexion.items(): nodo = (idNodo, aux + probabilidad) probAcumulada.append(nodo) aux += probabilidad conexiones = 0 nodosAdded = [] while (conexiones < m): num = random.random() actual = 0 while (actual < i and probAcumulada[actual][1] < num): actual += 1 idDestino = probAcumulada[actual][0] if idDestino not in nodosAdded: nodosAdded.append(idDestino) G.add_edge(i, idDestino) conexiones += 1 if i < 501 and i % 25 == 0 or (i < 20 and i % 5 == 0): nx.write_multiline_adjlist( G, '../graphs/barabasi-albert/steps/ba_n_{}_m_{}_steps_{:03d}.csv' .format(n, m, i), delimiter=",") nx.write_multiline_adjlist(G, '../graphs/barabasi-albert/ba_{}_{}.csv'.format( m, n), delimiter=",") return G
def test_multiline_adjlist_multidigraph(self): G=self.XDG (fd,fname)=tempfile.mkstemp() nx.write_multiline_adjlist(G,fname) H=nx.read_multiline_adjlist(fname,nodetype=int, create_using=nx.MultiDiGraph()) H2=nx.read_multiline_adjlist(fname,nodetype=int, create_using=nx.MultiDiGraph()) assert_not_equal(H,H2) # they should be different graphs assert_nodes_equal(H.nodes(),G.nodes()) assert_edges_equal(H.edges(),G.edges()) os.close(fd) os.unlink(fname)
def test_unicode(self): G = nx.Graph() try: # Python 3.x name1 = chr(2344) + chr(123) + chr(6543) name2 = chr(5543) + chr(1543) + chr(324) except ValueError: # Python 2.6+ name1 = unichr(2344) + unichr(123) + unichr(6543) name2 = unichr(5543) + unichr(1543) + unichr(324) G.add_edge(name1, 'Radiohead', **{name2: 3}) fd, fname = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname) H = nx.read_multiline_adjlist(fname) assert_graphs_equal(G, H) os.close(fd) os.unlink(fname)
def test_latin1(self): G = nx.Graph() try: # Python 3.x blurb = chr(1245) # just to trigger the exception name1 = 'Bj' + chr(246) + 'rk' name2 = chr(220) + 'ber' except ValueError: # Python 2.6+ name1 = 'Bj' + unichr(246) + 'rk' name2 = unichr(220) + 'ber' G.add_edge(name1, 'Radiohead', **{name2: 3}) fd, fname = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname, encoding='latin-1') H = nx.read_multiline_adjlist(fname, encoding='latin-1') assert_graphs_equal(G, H) os.close(fd) os.unlink(fname)
def test_latin1(self): G = nx.Graph() try: # Python 3.x blurb = chr(1245) # just to trigger the exception name1 = "Bj" + chr(246) + "rk" name2 = chr(220) + "ber" except ValueError: # Python 2.6+ name1 = "Bj" + unichr(246) + "rk" name2 = unichr(220) + "ber" G.add_edge(name1, "Radiohead", {name2: 3}) fd, fname = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname, encoding="latin-1") H = nx.read_multiline_adjlist(fname, encoding="latin-1") assert_graphs_equal(G, H) os.close(fd) os.unlink(fname)
def load(json_name): #making G (networkx) if json_name == "pan12-sexual-predator-identification-training-corpus-2012-05-01": G = nx.read_multiline_adjlist( "adjlists/train_networkxBeforeRemove.adjlist") elif json_name == "pan12-sexual-predator-identification-test-corpus-2012-05-17": G = nx.read_multiline_adjlist( "adjlists/test_networkxBeforeRemove.adjlist") #generate picture of networkx # nx.draw(G, node_size=1) # plt.savefig("../API/client/public/models/load/networkx_before_remove.png") # Remove All 2-Connected-Components in G for component in list(nx.connected_components(G)): if len(component) <= 2: # This will actually remove only 2-connected for node in component: G.remove_node(node) networkx.write_multiline_adjlist(G, "./adjlists/graphU.adjlist") # nx.draw(G, node_size=3) # plt.savefig("../API/client/public/models/load/networkx_after_remove.png") return G
def save_graph(self, is_needed, df, graph, name): if is_needed: if not os.path.isdir("data/graphs/" + name): os.mkdir("data/graphs/" + name) df.to_csv("data/graphs/%s/%s.tsv" % (name, name), sep="\t") nx.write_gpickle(graph, "data/graphs/%s/%s.gpickle" % (name, name)) nx.write_adjlist(graph, "data/graphs/%s/%s.adjlist" % (name, name), delimiter="\t") nx.write_multiline_adjlist( graph, "data/graphs/%s/%s.multiline_adjlist" % (name, name), delimiter="\t", ) nx.write_edgelist(graph, "data/graphs/%s/%s.edgelist" % (name, name), delimiter="\t") with open("data/graphs/%s/%s.cyjs" % (name, name), "w") as outfile: outfile.write(json.dumps(nx.cytoscape_data(graph), indent=2)) graph = stringify_list_attributes(graph) nx.write_gexf(graph, "data/graphs/%s/%s.gexf" % (name, name)) nx.write_graphml(graph, "data/graphs/%s/%s.graphml" % (name, name))
def is_connected(self): vertex2bags = self.bag_occuences() # print self.hypergraph.number_of_edges() for v in self.hypergraph.nodes_iter(): logging.debug("vertex %s" % v) SG = self.tree.subgraph(vertex2bags[v]) if not nx.is_connected(SG.to_undirected()): logging.error( 'Subgraph induced by vertex "%s" is not connected' % v) string = StringIO() nx.write_multiline_adjlist(SG, string) logging.error('Involved bags: %s' % vertex2bags[v]) logging.error( 'Nodes of the hypergraph (should be the same): %s' % SG.nodes()) logging.error('Begin Adjacency Matrix') # we skip comments from networkx for line in string.getvalue().split('\n')[3:-1]: logging.error('%s' % line) logging.error('End Adjacency Matrix') return False return True
def erdos_renyi(n, p, total): """ Generates an erdos renyi graph, also called random graph, given the number of nodes 'n' the probability 'p' and the total of graphs to generate with those variables""" click.echo('generating {} erdos renyi model(s) with n = {}, p = {}'.format( total, n, p)) for i in range(total): G = nx.Graph() G.add_nodes_from(range(n)) #Declaramos un grafo vacio y lo llenamos con nodos sin enlazar for nodo1 in range(n): #cogemos un nodo for nodo2 in range( nodo1 + 1, n ): # desde el siguiente nodo a nodo1 buscamos nodos que se puedan enlazar con nodo1 random_num = random.random( ) # generamos un numero aleatorio [0.0, 1.0) if random_num <= p: # si el numero aleatorio generado es menor o igual que p nodo1 y nodo2 se pueden enlazar G.add_edge(nodo1, nodo2) #creamos dicho enlace en el grafo #nx.write_edgelist(er, '../graphs/erdos-renyi/erdos_renyi_{}_n{}_p{}.csv'.format(i,n,p), delimiter=",", data=True) nx.write_multiline_adjlist( G, '../graphs/erdos-renyi/erdos_renyi_{}_n{}_p{}.csv'.format(i, n, p), delimiter=",")
def getFeatureGraph(mAllData, dEdgeThreshold=0.30, bResetGraph=True, dMinDivergenceToKeep=np.log2(10e5)): try: if bResetGraph: raise Exception("User requested graph recreation.") print("Trying to load graph...") g = read_multiline_adjlist("graphAdjacencyList.txt") with open("usefulFeatureNames.pickle", "rb") as fIn: saUsefulFeatureNames = pickle.load(fIn) print("Trying to load graph... Done.") return g, saUsefulFeatureNames except Exception as e: print("Trying to load graph... Failed:\n%s\n Recomputing..." % (str(e))) # DEBUG LINES print("Got data of size %s." % (str(np.shape(mAllData)))) print("Extracting graph...") ############# # Init graph # Determine meaningful features (with a divergence of more than MIN_DIVERGENCE from the control mean) iFeatureCount = np.shape(mAllData)[1] mMeans = np.nanmean(mAllData, 0) # Ignore nans vUseful = [ abs(mMeans[iFieldNum]) - dMinDivergenceToKeep > 0.00 for iFieldNum in range(1, iFeatureCount) ] saFeatures = getFeatureNames()[1:iFeatureCount] saUsefulIndices = [ iFieldNum for iFieldNum, _ in enumerate(saFeatures) if vUseful[iFieldNum] ] saUsefulFeatureNames = [ saFeatures[iFieldNum] for iFieldNum in saUsefulIndices ] iUsefulFeatureCount = len(saUsefulIndices) print("Keeping %d features out of %d." % (len(saUsefulIndices), len(saFeatures))) ############################### g = nx.Graph() print("Adding nodes...") # Add a node for each feature lIndexedNames = enumerate(saFeatures) for idx in saUsefulIndices: # Only act on useful features g.add_node(saFeatures[idx], label=idx) print("Adding nodes... Done.") # Measure correlations print("Creating edges for %d possible pairs..." % (0.5 * (iUsefulFeatureCount * iUsefulFeatureCount))) lCombinations = itertools.combinations(saUsefulIndices, 2) # Create queue and threads qCombination = Queue(10000) threads = [] num_worker_threads = 4 for i in range(num_worker_threads): t = threading.Thread(target=addEdgeAboveThreshold, args=( i, qCombination, )) t.setDaemon(True) t.start() iCnt = 0 dStartTime = clock() for iFirstFeatIdx, iSecondFeatIdx in lCombinations: qCombination.put( (iFirstFeatIdx, iSecondFeatIdx, g, mAllData, saFeatures, iFirstFeatIdx, iSecondFeatIdx, dEdgeThreshold)) # DEBUG LINES if iCnt != 0 and (iCnt % 1000 == 0): sys.stdout.write(".") if iCnt % 10000 == 0 and (iCnt != 10000): dNow = clock() dRate = ((dNow - dStartTime) / iCnt) dRemaining = (0.5 * (iUsefulFeatureCount * iUsefulFeatureCount) - iCnt) * dRate sys.stdout.write( "%d (Estimated remaining (sec): %4.2f - Working at a rate of %4.2f pairs/sec)\n" % (iCnt, dRemaining, 1.0 / dRate)) iCnt += 1 ############# print("Waiting for completion...") qCombination.join() print("Total time (sec): %4.2f" % (clock() - dStartTime)) print("Creating edges for %d possible pairs... Done." % (0.5 * (iUsefulFeatureCount * iUsefulFeatureCount))) print("Extracting graph... Done.") print("Removing single nodes... Nodes before removal: %d" % (g.number_of_nodes())) toRemove = [ curNode for curNode in g.nodes().keys() if len(g[curNode]) == 0 ] while len(toRemove) > 0: g.remove_nodes_from(toRemove) toRemove = [ curNode for curNode in g.nodes().keys() if len(g[curNode]) == 0 ] print("Nodes after removal step: %d" % (g.number_of_nodes())) print("Removing single nodes... Done. Nodes after removal: %d" % (g.number_of_nodes())) print("Saving graph...") write_multiline_adjlist(g, "graphAdjacencyList.txt") with open("usefulFeatureNames.pickle", "wb") as fOut: pickle.dump(saUsefulFeatureNames, fOut) print("Saving graph... Done.") print("Trying to load graph... Done.") return g, saUsefulFeatureNames
def main(rdfLocation, outputType): """Main function which parses the rdf graph then sends it to be queried. It uses the result from the queryAll function to call the required file parser. If multiple files are detected it will parse them all. Returns: Final Network output to file in a variety of common network file formats. """ files = [] # Parse The RDF Graph Object by first checking if it is a file location or the file itself. if os.path.isfile(rdfLocation): g.parse(location=rdfLocation) else: g.parse(data=rdfLocation) # Specify Output File Type For graph Object outputFileType = outputType # Create NetworkX Graph object G = nx.MultiDiGraph() # Extract the file types from the rdf as this will be used to call a custom query for a certain filetype fileInfo = g.query("""SELECT ?name ?filetype WHERE { ?s network:fileType ?filetype . ?s network:fileName ?name . }""") # Loop through files contained within the dataset and issue relavent queries for element in fileInfo: # Extract the FileType and Name from the query result fileName = str(element[0]) fileType = str(element[1]) # Call XML functions if fileType == "xml": print "Found XML File" xmlData = queryAll(fileName,fileType) for f in xmlData: G = FP.parseXML(f,G) elif fileType == "json": print "Found JSON file" jsonData = queryAll(fileName,fileType) for f in jsonData: G = FP.parseJSON(f,G) elif fileType == "csv": print "Found CSV file" csvData = queryAll(fileName,fileType) for f in csvData: print f G = FP.parseCSV(f,G) elif fileType == "excel": print "Found Excel File" excelData = queryAll(fileName,fileType) for f in excelData: G = FP.parseEXCEL(f,G) else: print "Currently Not a Supported File" print "Finished processing file" # Write out the graph object to file ------------------------------------------------------------------------------------- if outputFileType == "gml": nx.write_gml(G,'output.gml') elif outputFileType == "adj": nx.write_adjlist(G,"output.adj") elif outputFileType == "mladj": nx.write_multiline_adjlist(G,"output.adjlist") elif outputFileType == "graphml": nx.write_graphml(G, "output.graphml") elif outputFileType == "pajek": nx.write_pajek(G, "output.net") elif outputFileType == "neo4j": # Update to address of the Neo4j server results = neonx.write_to_neo("http://localhost:7474/db/data/", G, 'LINKS_TO')
import networkx as nx from networkx.readwrite import json_graph import json if __name__ == '__main__': graph = nx.read_edgelist('input/example_graph.edgelist', nodetype=int, data=(('weight', float),)) assert isinstance(graph, nx.Graph) print 'edges:', graph.edges() # raw nx.write_adjlist(graph, 'output_raw/example_graph.adjlist') nx.write_multiline_adjlist(graph, 'output_raw/example_graph.multiline_adjlist') nx.write_edgelist(graph, 'output_raw/example_graph.edgelist') # better serialization nx.write_gpickle(graph, 'output_serialization/example_graph.pickle') nx.write_yaml(graph, 'output_serialization/example_graph.yaml') nx.write_graph6(graph, 'output_serialization/example_graph.graph6') # xml nx.write_gexf(graph, 'output_xml/example_graph.gexf') nx.write_graphml(graph, 'output_xml/example_graph.graphml') # json with open('output_json/node_link.json', 'w') as outfile: json.dump(json_graph.node_link_data(graph), outfile, indent=2) with open('output_json/adjacency.json', 'w') as outfile: json.dump(json_graph.adjacency_data(graph), outfile, indent=2)
dt = 'Deatht' + chr(246) + 'ngue' G = nx.Graph() G.add_edge(hd, mh) G.add_edge(mc, st) G.add_edge(boc, mc) G.add_edge(boc, dt) G.add_edge(st, dt) G.add_edge(q, st) G.add_edge(dt, mh) G.add_edge(st, mh) # write in UTF-8 encoding fh = open('edgelist.utf-8', 'wb') fh.write('# -*- coding: utf-8 -*-\n'.encode('utf-8')) # encoding hint for emacs nx.write_multiline_adjlist(G, fh, delimiter='\t', encoding='utf-8') # read and store in UTF-8 fh = open('edgelist.utf-8', 'rb') H = nx.read_multiline_adjlist(fh, delimiter='\t', encoding='utf-8') for n in G.nodes(): if n not in H: print(False) print(list(G.nodes())) pos = nx.spring_layout(G) nx.draw(G, pos, font_size=16, with_labels=False) for p in pos: # raise text positions pos[p][1] += 0.07
def write_graph(gr, from_user, to_user): with get_file(edgelist_fname(**locals()), 'w') as fd: NX.write_multiline_adjlist(gr, fd, delimiter='\t')
def main(): args = set_manta().parse_args(sys.argv[1:]) args = vars(args) if args['version']: info = VersionInfo('manta') logger.info('Version ' + info.version_string()) exit(0) if args['graph'] != 'demo': filename = args['graph'].split(sep=".") extension = filename[len(filename) - 1] # see if the file can be detected # if not, try appending current working directory and then read. if not os.path.isfile(args['graph']): if os.path.isfile(os.getcwd() + '/' + args['graph']): args['graph'] = os.getcwd() + '/' else: logger.error( 'Could not find the specified file. Is your file path correct?' ) exit() try: if extension == 'graphml': network = nx.read_graphml(args['graph']) elif extension == 'txt': network = nx.read_weighted_edgelist(args['graph']) elif extension == 'gml': network = nx.read_gml(args['graph']) elif extension == 'cyjs': network = read_cyjson(args['graph']) else: logger.warning( 'Format not accepted. ' 'Please specify the filename including extension (e.g. test.graphml).', exc_info=True) exit() except Exception: logger.error('Could not import network file!', exc_info=True) exit() # first need to convert network to undirected elif args['graph'] == 'demo': path = os.path.dirname(manta.__file__) path = path + '//demo.graphml' network = nx.read_graphml(path) if args['direction']: if extension == 'txt': logger.warning( 'Directed networks from edge lists not supported, use graphml or cyjs! ' ) exit() else: network = nx.to_undirected(network) if args['bin']: orig_edges = dict() # store original edges for export for edge in network.edges: orig_edges[edge] = network.edges[edge]['weight'] network.edges[edge]['weight'] = np.sign( network.edges[edge]['weight']) results = cluster_graph(network, limit=args['limit'], max_clusters=args['max'], min_clusters=args['min'], min_cluster_size=args['ms'], iterations=args['iter'], subset=args['subset'], ratio=args['ratio'], edgescale=args['edgescale'], permutations=args['perm'], verbose=args['verbose']) graph = results[0] if args['cr']: perm_clusters(graph=graph, limit=args['limit'], max_clusters=args['max'], min_clusters=args['min'], min_cluster_size=args['ms'], iterations=args['iter'], ratio=args['ratio'], partialperms=args['perm'], relperms=args['rel'], subset=args['subset'], error=args['error'], verbose=args['verbose']) layout = None if args['bin']: for edge in network.edges: network.edges[edge]['weight'] = orig_edges[edge] if args['layout']: layout = generate_layout(graph, args['tax']) if args['fp']: if args['f'] == 'graphml': nx.write_graphml(graph, args['fp'] + '.graphml') elif args['f'] == 'edgelist': nx.write_weighted_edgelist(graph, args['fp'] + '.txt') elif args['f'] == 'gml': nx.write_gml(graph, args['fp'] + '.gml') elif args['f'] == 'adj': nx.write_multiline_adjlist(graph, args['fp'] + '.txt') elif args['f'] == 'cyjs': write_cyjson(graph=graph, filename=args['fp'] + '.cyjs', layout=layout) logger.info('Wrote clustered network to ' + args['fp'] + '.' + args['f']) else: logger.error('Could not write network to disk, no file path given.') exit(0)
boc = "Blue " + chr(214) + "yster Cult" dt = "Deatht" + chr(246) + "ngue" G = nx.Graph() G.add_edge(hd, mh) G.add_edge(mc, st) G.add_edge(boc, mc) G.add_edge(boc, dt) G.add_edge(st, dt) G.add_edge(q, st) G.add_edge(dt, mh) G.add_edge(st, mh) # write in UTF-8 encoding fh = open("edgelist.utf-8", "wb") nx.write_multiline_adjlist(G, fh, delimiter="\t", encoding="utf-8") # read and store in UTF-8 fh = open("edgelist.utf-8", "rb") H = nx.read_multiline_adjlist(fh, delimiter="\t", encoding="utf-8") for n in G.nodes(): if n not in H: print(False) print(list(G.nodes())) pos = nx.spring_layout(G) nx.draw(G, pos, font_size=16, with_labels=False) for p in pos: # raise text positions pos[p][1] += 0.07
for author1 in authors: papers = author_paper[author1] for paper in papers: coauthors = paper_author[paper] for author2 in coauthors: if author1 != author2: emb1 = embs[author1] emb2 = embs[author2] n_author1 = nodes[author1] n_author2 = nodes[author2] val = cosine(emb1,emb2) simadj[n_author1,n_author2] = val SG = nx.from_scipy_sparse_matrix(simadj) SG = nx.relabel_nodes(SG, inv_nodes) nx.write_multiline_adjlist(SG, '../data/sim_collaboration_network.adjlist') # Paper and paper similarity graph # read the file to create a dictionary with paperId as key and paper embedding as value f = open("../data/paper_embeddings_64_dm.txt","r") papers = {} s = "" pattern = re.compile(r'(\s){2,}') for l in f: if(":" in l and s!=""): papers[s.split(":")[0].strip()] = np.array(ast.literal_eval(re.sub(pattern, ',', s.split(":")[1]).replace(" ",","))) s = l.replace("\n","") else: s = s+" "+l.replace("\n","")
def barabasi_albert(nodos, m): m0 = m + 1 t = nodos - m0 G = nx.complete_graph(m0) #Creamos el grafo con una distribucion inicial de m0 nodos con al menos un enlace cada nodo ''' -------------------------------------------------------------------------------------------------------------------------------------------------- PREPROCESAMIENTO -------------------------------------------------------------------------------------------------------------------------------------------------- ''' for i in range (m0, nodos): #añadimos los N - n0 nodos restantes G.add_node(i) # añadimos el nodo nuevo queremos conecar sumaGradosNodos = 0 for nodo in range (0, i): sumaGradosNodos += G.degree(nodo) #Sumamos los grados de todos los nodos que forman la red en este momento, para posteriormente calcular formula de la probablidad de conexion probConexion = {} #Creamos un diccionario donde guardar la probabilidad de cada nodo para crear una nueva conexion con el nodo i #Esto metodo es conocido como Conexion preferencial ya que se conectara con nodos que tenga mas conexiones #probConexion -> clave: id del nodo, valor: probalid gradosNodos = nx.degree(G) #Sacamos la lista con los grados de cada nodo #Llenamos el diccionario con las probabilidades de cada nodos que hay hasta este momento con la formula: #Pi = ki / SUM kj #Pi es la probabilidad de que uno de los enlaces se conecte al nodo nuevo # donde ki es el grado del nodo existente #denominador suma de los grados de la red hasta este momento for j in range (0,i): probConexion[j] = (float)(gradosNodos[j])/sumaGradosNodos ''' PARA AGRERAR ARISTAS AL NUEVO NODO USAREMOS EL METODO DE PROBABILIDADES ACUMULADAS Implementaremos la idea de conexion preferencial mediante este metodo. Se genera un numero aleatorio [0.0, 1) Usaremos una nueva metrica llamada probabilidad acumulada que es la suma de las probabilidades anteriores Con estos dos valores podremos implementar la conexion preferencial ya que cuato mayor sea la probabilidad acumulada mas conexiones tendrá. Con una probabilidad acumulada alta la ventana de posibulidades para ser escogida es mayor, en caso contrario si la probabilidad acumulada es baja la venta de posibilidades es mas pequeña haciendo que sea menos probable que se escoja ese nodo. Ejemplo: probabilidades = [0.2, 0.3, 0.5] probabilidadAcumulada = [0.2, 0.5, 1.0] Numero aleatorio de [0.0, 1.0) n = 0.4 ventanas: 0.2: [0.0, 0.2] 0.5: [0.0, 0,5] 1.0: [0.0, 1.0] Cuanto mas grande sea la ventana, mas probable es que el numero aleatorio caiga dentro de esa ventana ''' #Vamos a crear una lista de probabilidades acumuladas, la cual contendrá tuplas. #Cada tupla, tendra id del nodo y la probabilidad acumulada (id,probAcumulada) probAcumulada = [] #lista vacia aux = 0 for idNodo, probabilidad in probConexion.items(): nodo = (idNodo, aux + probabilidad) #creamos un elemento de la lista con la informacion necesaria probAcumulada.append(nodo) aux += probabilidad #actualizamos lo anterior con lo actaul para la siguiente iteracion #-------------------------------------------------------------------------------------------------------------------------------------------------- # CREACION DE CONEXIONES #-------------------------------------------------------------------------------------------------------------------------------------------------- #Ahora hay que hacer m conexiones, m aristas, con m nodos. Basandonos en los datos extraidos anteriormente conexiones = 0 nodosAdded = [] #Lista de nodos selccionados para conectarlos con el nuevo nodo while(conexiones < m): n = random.random() actual = 0 while(actual < i and probAcumulada[actual][1] < n): # No nos pasamos del nuevo nodo y la probabilidad acumulado es menor que la n, entonces pasamos al suiente nodo candidato actual += 1 idDestino = probAcumulada[actual][0] # extreamos el id del nodo seleccionado para formar la conexion #Vamos a comprobar si idDestino no tiene conexion con el nodo nuevo if idDestino not in nodosAdded: nodosAdded.append(idDestino) # lo metemos en la lista de nodos selecionados G.add_edge(i,idDestino) #añadimos la conexional grafo conexiones += 1 if conexiones < 501 and conexiones % 25 == 0 or (conexiones < 20 and conexiones % 5 == 0): nx.write_multiline_adjlist(G, '../graphs/barabasi-albert/steps/ba_steps_{:03d}.csv'.format(conexiones), delimiter=",") return G
def _traverse_and_execute( self, executable: GraphExecutable ) -> None: # noqa mccabe: disable=MC0001 """ Processes a single graph and loads the data into the replica if required To save memory after processing, the loaded dataframes are deleted, and garbage collection manually called. Args: executable (GraphExecutable): object that contains all of the necessary info for executing a sample and loading it into the target """ start_time = time.time() if self.barf: with open( os.path.join( self.barf_output, f'{[n for n in executable.graph.nodes][0].dot_notation}.component' ), 'wb') as cmp_file: # noqa pylint: disable=unnecessary-comprehension nx.write_multiline_adjlist(executable.graph, cmp_file) try: logger.debug( f"Executing graph with {len(executable.graph)} relations in it..." ) for i, relation in enumerate( nx.algorithms.dag.topological_sort(executable.graph)): relation.population_size = executable.source_adapter.scalar_query( executable.source_adapter.population_count_statement( relation)) logger.info( f'Executing source query for relation {relation.dot_notation} ' f'({i+1} of {len(executable.graph)} in graph)...') relation.sampling.prepare(relation, executable.source_adapter) relation = RuntimeSourceCompiler.compile_queries_for_relation( relation, executable.graph, executable.source_adapter, executable.analyze) if executable.analyze: if relation.is_view: relation.population_size = "N/A" relation.sample_size = "N/A" logger.info( f'Relation {relation.dot_notation} is a view, skipping.' ) else: result = executable.source_adapter.check_count_and_query( relation.compiled_query, MAX_ALLOWED_ROWS, relation.unsampled).iloc[0] relation.population_size = result.population_size relation.sample_size = result.sample_size logger.info( f'Analysis of relation {relation.dot_notation} completed in {duration(start_time)}.' ) else: executable.target_adapter.create_database_if_not_exists( relation.quoted(relation.database)) executable.target_adapter.create_schema_if_not_exists( relation.quoted(relation.database), relation.quoted(relation.schema)) if relation.is_view: logger.info( f'Retrieving DDL statement for view {relation.dot_notation} in source...' ) relation.population_size = "N/A" relation.sample_size = "N/A" try: relation.view_ddl = executable.source_adapter.scalar_query( relation.compiled_query) except Exception: raise SystemError( f'Failed to extract DDL statement: {relation.compiled_query}' ) logger.info( f'Successfully extracted DDL statement for view {relation.quoted_dot_notation}' ) else: logger.info( f'Retrieving records from source {relation.dot_notation}...' ) try: relation.data = executable.source_adapter.check_count_and_query( relation.compiled_query, MAX_ALLOWED_ROWS, relation.unsampled) except Exception as exc: raise SystemError( f'Failed execution of extraction sql statement: {relation.compiled_query} {exc}' ) relation.sample_size = len(relation.data) logger.info( f'{relation.sample_size} records retrieved for relation {relation.dot_notation}.' ) logger.info( f'Inserting relation {relation.quoted_dot_notation} into target...' ) try: executable.target_adapter.create_and_load_relation( relation) except Exception as exc: raise SystemError( f'Failed to load relation {relation.quoted_dot_notation} into target: {exc}' ) logger.info( f'Done replication of relation {relation.dot_notation} in {duration(start_time)}.' ) relation.target_loaded = True relation.source_extracted = True logger.info( f'population:{relation.population_size}, sample:{relation.sample_size}' ) if self.barf: with open( os.path.join(self.barf_output, f'{relation.dot_notation}.sql'), 'w') as barf_file: barf_file.write(relation.compiled_query) try: for relation in executable.graph.nodes: del relation.data except AttributeError: pass gc.collect() except Exception as exc: logger.error(f'failed with error of type {type(exc)}: {str(exc)}') raise exc
def write_adjacency(graph, fname): """ Write the graph as an adjacency list """ nx.write_multiline_adjlist(graph, fname)
G = nx.Graph() G.add_edge(hd, mh) G.add_edge(mc, st) G.add_edge(boc, mc) G.add_edge(boc, dt) G.add_edge(st, dt) G.add_edge(q, st) G.add_edge(dt, mh) G.add_edge(st, mh) # write in UTF-8 encoding fh = open('edgelist.utf-8', 'wb') fh.write( '# -*- coding: utf-8 -*-\n'.encode('utf-8')) # encoding hint for emacs nx.write_multiline_adjlist(G, fh, delimiter='\t', encoding='utf-8') # read and store in UTF-8 fh = open('edgelist.utf-8', 'rb') H = nx.read_multiline_adjlist(fh, delimiter='\t', encoding='utf-8') for n in G.nodes(): if n not in H: print(False) print(list(G.nodes())) pos = nx.spring_layout(G) nx.draw(G, pos, font_size=16, with_labels=False) for p in pos: # raise text positions pos[p][1] += 0.07
def save(G, fname): fh = open(fname, 'wb') nx.write_multiline_adjlist(G, fh)