def test_multiline_adjlist_integers(self): (fd, fname) = tempfile.mkstemp() G = nx.convert_node_labels_to_integers(self.G) nx.write_multiline_adjlist(G, fname) H = nx.read_multiline_adjlist(fname, nodetype=int) H2 = nx.read_multiline_adjlist(fname, nodetype=int) assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def test_multiline_adjlist_graph(self): G=self.G (fd,fname)=tempfile.mkstemp() nx.write_multiline_adjlist(G,fname) H=nx.read_multiline_adjlist(fname) H2=nx.read_multiline_adjlist(fname) assert_not_equal(H,H2) # they should be different graphs assert_equal(sorted(H.nodes()),sorted(G.nodes())) assert_equal(sorted(H.edges()),sorted(G.edges())) os.close(fd) os.unlink(fname)
def test_multiline_adjlist_digraph(self): G = self.DG (fd, fname) = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname) H = nx.read_multiline_adjlist(fname, create_using=nx.DiGraph()) H2 = nx.read_multiline_adjlist(fname, create_using=nx.DiGraph()) assert_not_equal(H, H2) # they should be different graphs assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def test_multiline_adjlist_delimiter(self): fh = io.BytesIO() G = nx.path_graph(3) nx.write_multiline_adjlist(G, fh, delimiter=':') fh.seek(0) H = nx.read_multiline_adjlist(fh, nodetype=int, delimiter=':') assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges()))
def test_unicode(self): G = nx.Graph() name1 = chr(2344) + chr(123) + chr(6543) name2 = chr(5543) + chr(1543) + chr(324) G.add_edge(name1, 'Radiohead', **{name2: 3}) fd, fname = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname) H = nx.read_multiline_adjlist(fname) assert_graphs_equal(G, H) os.close(fd) os.unlink(fname)
def test_latin1(self): G = nx.Graph() name1 = 'Bj' + chr(246) + 'rk' name2 = chr(220) + 'ber' G.add_edge(name1, 'Radiohead', **{name2: 3}) fd, fname = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname, encoding='latin-1') H = nx.read_multiline_adjlist(fname, encoding='latin-1') assert_graphs_equal(G, H) os.close(fd) os.unlink(fname)
def createDocumentsGraph(train, graph_file=None, sem_rel=None, sem_graph=None): """ :param train: :param graph_file: :param sem_rel: :param sem_graph: :return: """ if graph_file is not None and os.path.isfile(graph_file): return nx.read_multiline_adjlist(graph_file) G = nx.DiGraph() if sem_graph is not None: G = nx.compose(G, sem_graph.to_directed()) for doc in train.keys(): for sentence in train[doc]: lemmas = [l for l in sentence if isinstance(l, utils.instance)] for i in range(1, len(lemmas)): lemma = lemmas[i] lemma_key = lemma.lemma + '_' + lemma.pos prev_lemma = lemmas[i - 1] prev_lemma_key = prev_lemma.lemma + '_' + prev_lemma.pos if i == 1: G.add_node(prev_lemma_key) G.add_node(lemma_key) G.add_edge(prev_lemma_key, lemma_key) if sem_rel is not None: for r, nodes in sem_rel[lemma.instance].items(): for node in nodes: if node not in G.nodes: continue G.add_edge(lemma_key, node) if i == 1: for r, nodes in sem_rel[prev_lemma.instance].items(): for node in nodes: if node not in G.nodes: continue G.add_edge(prev_lemma_key, node) if graph_file is not None: nx.write_multiline_adjlist(G, graph_file) return G
def test_read_multiline_adjlist_1(self): # Unit test for https://networkx.lanl.gov/trac/ticket/252 s = b"""# comment line 1 2 # comment line 2 3 """ bytesIO = io.BytesIO(s) G = nx.read_multiline_adjlist(bytesIO) adj = {'1': {'3': {}, '2': {}}, '3': {'1': {}}, '2': {'1': {}}} assert_equal(G.adj, adj)
def test_read_multiline_adjlist_1(self): # Unit test for https://networkx.lanl.gov/trac/ticket/252 s = b"""# comment line 1 2 # comment line 2 3 """ bytesIO = io.BytesIO(s) G = nx.read_multiline_adjlist(bytesIO) adj = {"1": {"3": {}, "2": {}}, "3": {"1": {}}, "2": {"1": {}}} assert_graphs_equal(G, nx.Graph(adj))
def test_read_multiline_adjlist_1(self): # Unit test for https://networkx.lanl.gov/trac/ticket/252 s = b"""# comment line 1 2 # comment line 2 3 """ bytesIO = io.BytesIO(s) G = nx.read_multiline_adjlist(bytesIO) adj = {'1': {'3': {}, '2': {}}, '3': {'1': {}}, '2': {'1': {}}} assert_graphs_equal(G, nx.Graph(adj))
def load(json_name): #making G (networkx) if json_name == "pan12-sexual-predator-identification-training-corpus-2012-05-01": G = nx.read_multiline_adjlist( "adjlists/train_networkxBeforeRemove.adjlist") elif json_name == "pan12-sexual-predator-identification-test-corpus-2012-05-17": G = nx.read_multiline_adjlist( "adjlists/test_networkxBeforeRemove.adjlist") #generate picture of networkx # nx.draw(G, node_size=1) # plt.savefig("../API/client/public/models/load/networkx_before_remove.png") # Remove All 2-Connected-Components in G for component in list(nx.connected_components(G)): if len(component) <= 2: # This will actually remove only 2-connected for node in component: G.remove_node(node) networkx.write_multiline_adjlist(G, "./adjlists/graphU.adjlist") # nx.draw(G, node_size=3) # plt.savefig("../API/client/public/models/load/networkx_after_remove.png") return G
def test_unicode(self): G = nx.Graph() try: # Python 3.x name1 = chr(2344) + chr(123) + chr(6543) name2 = chr(5543) + chr(1543) + chr(324) except ValueError: # Python 2.6+ name1 = unichr(2344) + unichr(123) + unichr(6543) name2 = unichr(5543) + unichr(1543) + unichr(324) G.add_edge(name1, 'Radiohead', **{name2: 3}) fd, fname = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname) H = nx.read_multiline_adjlist(fname) assert_graphs_equal(G, H) os.close(fd) os.unlink(fname)
def test_unicode(self): G = nx.Graph() try: # Python 3.x name1 = chr(2344) + chr(123) + chr(6543) name2 = chr(5543) + chr(1543) + chr(324) except ValueError: # Python 2.6+ name1 = unichr(2344) + unichr(123) + unichr(6543) name2 = unichr(5543) + unichr(1543) + unichr(324) G.add_edge(name1, 'Radiohead', {name2: 3}) fd, fname = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname) H = nx.read_multiline_adjlist(fname) assert_graphs_equal(G, H) os.close(fd) os.unlink(fname)
def test_read_multiline_adjlist_1(): # Unit test for https://networkx.lanl.gov/trac/ticket/252 s = """# comment line 1 2 # comment line 2 3 """ import StringIO strIO = StringIO.StringIO(s) G = nx.read_multiline_adjlist(strIO) adj = {'1': {'3': {}, '2': {}}, '3': {'1': {}}, '2': {'1': {}}} assert_equal(G.adj, adj)
def pca(): # Getting datset from request dataset = request.get_json()["dataset"] # If use server data or do all process useServerData = request.get_json()["useServerData"] # Prefix for saving information prefix = "/data" + "/pca/" + dataset print(dataset) if not os.path.exists("." + prefix): os.makedirs("." + prefix) skip = True for algo in all_algorithms: if not os.path.isfile("." + prefix + "/" + algo + ".png"): skip = False if not os.path.isfile(get_tmpfile(dataset + "plotter.pbz2")): skip = False if not useServerData: skip = False app.logger.info('got /pca request with skip = %s and dataset = %s' % (skip, dataset)) if not skip: # Taking G from memory G = networkx.read_multiline_adjlist("." + "/data" + "/load/" + dataset + "/graph.adjlist") # Taking Memory from memory fname = dataset + "model.kv" path = get_tmpfile(fname) model = KeyedVectors.load(path, mmap='r') global plotter # PCA from 64D to 3D plotter = Plotter.Plotter(G, model) plotter.SaveAll(prefix) # saving a compress pickle file fname = dataset + "plotter.pbz2" path = get_tmpfile(fname) with bz2.BZ2File(path, 'w') as f: cPickle.dump(plotter, f) return jsonify(res="pca completed and saved in image", path=prefix + "/base.png")
def test_latin1(self): G = nx.Graph() try: # Python 3.x blurb = chr(1245) # just to trigger the exception name1 = "Bj" + chr(246) + "rk" name2 = chr(220) + "ber" except ValueError: # Python 2.6+ name1 = "Bj" + unichr(246) + "rk" name2 = unichr(220) + "ber" G.add_edge(name1, "Radiohead", {name2: 3}) fd, fname = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname, encoding="latin-1") H = nx.read_multiline_adjlist(fname, encoding="latin-1") assert_graphs_equal(G, H) os.close(fd) os.unlink(fname)
def test_latin1(self): G = nx.Graph() try: # Python 3.x blurb = chr(1245) # just to trigger the exception name1 = 'Bj' + chr(246) + 'rk' name2 = chr(220) + 'ber' except ValueError: # Python 2.6+ name1 = 'Bj' + unichr(246) + 'rk' name2 = unichr(220) + 'ber' G.add_edge(name1, 'Radiohead', **{name2: 3}) fd, fname = tempfile.mkstemp() nx.write_multiline_adjlist(G, fname, encoding='latin-1') H = nx.read_multiline_adjlist(fname, encoding='latin-1') assert_graphs_equal(G, H) os.close(fd) os.unlink(fname)
def embedding(): # Getting datset from request dataset = request.get_json()["dataset"] # If use server data or do all process useServerData = request.get_json()["useServerData"] # Prefix for saving information prefix = "/data" + "/embedding/" + dataset skip = True print("." + prefix + "/walks.txt") if not os.path.isfile("." + prefix + "/walks.txt"): skip = False # if not os.path.isfile(get_tmpfile(dataset + "model.kv")): # skip = False if not useServerData: skip = False app.logger.info('got /embedding request with skip = %s and dataset = %s' % (skip, dataset)) if not skip: G = networkx.read_multiline_adjlist("." + "/data" + "/load/" + dataset + "/graph.adjlist") # Precompute probabilities and generate walks node2vec = Node2Vec(G, dimensions=64, walk_length=25, num_walks=10, workers=1) saveWalks(list(node2vec.walks), prefix) # Creates the embeddings using gensim's Word2Vec. model = node2vec.fit(window=10, min_count=1, batch_words=4) # Save the model into fname = dataset + "model.kv" path = get_tmpfile(fname) model.wv.save(path) return jsonify(res="walks saved successfully", walk_length=25, num_walks=10, walks=open("." + prefix + "/walks.txt", "r").read())
def main(): # ======================================== Getting (G,model) ======================================== # # Taking G from memory G = nx.read_multiline_adjlist("adjlists/test_networkxAfterRemove.adjlist") # Taking Memory from memory fname = "test_embedded_vectors_model.kv" path = get_tmpfile(fname) model = KeyedVectors.load(path, mmap='r') # ======================================== Plotting ======================================== # plotter = Plotter.Plotter(G, model) # plt = plotter.BaseGraph.getPlot() # plotter.showWithBaseGraph() # plotter.showWithKMeans() # plotter.showWithCC() plotter.showWithSpectral() plotter.showCombined("kmeans+spectral+connected")
def load(): # Getting datset from request dataset = request.get_json()["dataset"] # If use server data or do all process useServerData = request.get_json()["useServerData"] # Prefix for saving information prefix = "/data" + "/load/" + dataset skip = True if not os.path.isfile( "." + prefix + "/networkx_after_remove.png" ): # and os.path.isfile("." + prefix + dataset + "/networkx_before_remove.png"): skip = False if not useServerData: skip = False app.logger.info('got /load request with skip = %s and dataset = %s' % (skip, dataset)) # Making G (networkx) if dataset == "pan12-sexual-predator-identification-training-corpus-2012-05-01": G = networkx.read_multiline_adjlist( "./data/start/train_networkxBeforeRemove.adjlist") elif dataset == "pan12-sexual-predator-identification-test-corpus-2012-05-17": G = networkx.read_multiline_adjlist( "./data/start/test_networkxBeforeRemove.adjlist") else: return jsonify(err="405", msg="Invalid JSON file name") # if not skip: # # Plotting # networkx.draw(G, node_size=1) # plt.savefig("." + prefix + "/networkx_before_remove.png") # write json formatted data app.logger.debug('loaded dataset with %s nodes before remove' % len(G.nodes())) before = json_graph.node_link_data(G)[ "links"] # node-link format to serialize graphData = [] graphData.append("%s Nodes, %s links" % (len(G.nodes()), len(G.edges()))) # After Remove for component in list(networkx.connected_components(G)): if len(component) <= 2: # This will actually remove only 2-connected for node in component: G.remove_node(node) # write json formatted data app.logger.debug('loaded dataset with %s nodes after remove' % len(G.nodes())) after = json_graph.node_link_data(G)[ "links"] # node-link format to serialize graphData.append("%s Nodes, %s links" % (len(G.nodes()), len(G.edges()))) # Save after remove graph if not os.path.exists("." + prefix): os.makedirs("." + prefix) networkx.write_multiline_adjlist(G, "." + prefix + "/graph.adjlist") if not skip: # Plotting networkx.draw(G, node_size=3) plt.savefig("." + prefix + "/networkx_after_remove.png") return jsonify(before_path=prefix + "/networkx_before_remove.png", after_path=prefix + "/networkx_after_remove.png", before=before, after=after, graphData=graphData)
import networkx from gensim.models import KeyedVectors from gensim.test.utils import get_tmpfile from BERT.clustersBy3DVec import clustersBy3DVec from BERT.json2conversation import json2conversation from BERT.vectors2text import Vectors2MatchConversions from Step3 import Plotter #========================================initialization of data=========================================# # Taking G from memory G = networkx.read_multiline_adjlist("./adjlists/graphU.adjlist") # Taking Memory from memory fname = "model.kv" path = get_tmpfile(fname) model = KeyedVectors.load(path, mmap='r') #convert the json file to list of Conversation objects conversations = json2conversation.parse_data_to_case_class( "C:/Users/EILON/PycharmProjects/data_set/test" "/pan12-sexual-predator-identification-test-corpus-2012-05-21" "/pan12-sexual-predator-identification-test-corpus-2012-05-17") #=======================================preparing the intut data for bert========================================# #get centers with name of all plotter = Plotter.Plotter(G, model) #get all algorithms dictionary of center by cluster name (kmeans_centers_by_name, spectral_centers_by_name, connected_center_by_name) = plotter.getAllCentersName()
G.add_edge(mc, st) G.add_edge(boc, mc) G.add_edge(boc, dt) G.add_edge(st, dt) G.add_edge(q, st) G.add_edge(dt, mh) G.add_edge(st, mh) # write in UTF-8 encoding fh = codecs.open('edgelist.utf-8', 'w', encoding='utf-8') fh.write('# -*- coding: %s -*-\n' % fh.encoding) # encoding hint for emacs NX.write_multiline_adjlist(G, fh, delimiter='\t') # read and store in UTF-8 fh = codecs.open('edgelist.utf-8', 'r', encoding='utf-8') H = NX.read_multiline_adjlist(fh, delimiter='\t') for n in G.nodes(): if n not in H: print False print G.nodes() try: pos = NX.spring_layout(G) NX.draw(G, pos, font_size=16, with_labels=False) for p in pos: # raise text positions pos[p][1] += 0.07 NX.draw_networkx_labels(G, pos) P.show() except:
import networkx as nx import matplotlib.pyplot as plt G = nx.read_multiline_adjlist('graph.multiline_adjlist') nx.draw_networkx(G, arrows=True, node_size=180, node_shape="o") # nx.draw_spectral(G,arrows=True,node_size = 200,node_shape = "8",node_color = "red") # nx.write_gml(G,"test.gml") plt.show()
def read_adjacency(fname): """ Read the graph as an adjacency list """ return nx.read_multiline_adjlist(fname, nodetype=int)
def load(fname): G = nx.read_multiline_adjlist(fname) return G
G = nx.Graph() G.add_edge(hd, mh) G.add_edge(mc, st) G.add_edge(boc, mc) G.add_edge(boc, dt) G.add_edge(st, dt) G.add_edge(q, st) G.add_edge(dt, mh) G.add_edge(st, mh) # write in UTF-8 encoding fh = open("edgelist.utf-8", "wb") nx.write_multiline_adjlist(G, fh, delimiter="\t", encoding="utf-8") # read and store in UTF-8 fh = open("edgelist.utf-8", "rb") H = nx.read_multiline_adjlist(fh, delimiter="\t", encoding="utf-8") for n in G.nodes(): if n not in H: print(False) print(list(G.nodes())) pos = nx.spring_layout(G) nx.draw(G, pos, font_size=16, with_labels=False) for p in pos: # raise text positions pos[p][1] += 0.07 nx.draw_networkx_labels(G, pos) plt.show()
def read_graph(from_user, to_user): try: with get_file(edgelist_fname(**locals()), 'r') as fd: return NX.read_multiline_adjlist(fd, delimiter='\t') except IOError: return None
def load_graph(year): with open(GRAPH_PATH + str(year) + '.txt', 'rb') as f: return nx.read_multiline_adjlist(f)
# Number of simulation runs nSimulation = 1 # Number of payments in each simulation run nPayments = 200 # Payments gaussian mean weight to be multiplied by average channel balance payments_mu_weight = 0.1 # Payemnts gaussian standard deviation payments_sigma_weight = payments_mu_weight / 2 # Number of nodes to have before stopping removing nodes nNodes = 280 # Number of routing gossip messages to be sent in-between payments nRoutingGossip = 10 # Open adjencency list file and build the undirected graph f = open("adjList.txt", 'rb') G = nx.read_multiline_adjlist(f) f.close() # Clean graph from smallest components largest_cc = max(nx.connected_components(G), key=len) cleanG = G.subgraph(largest_cc).copy() print("Number of nodes: " + str(G.number_of_nodes())) print("Number of edges: " + str(G.number_of_edges())) # Read alias file and create a pub_key -> alias dic aliasDic = {} f = open("nodeAlias.txt", 'r') lines = f.read().splitlines()
def saveWalks(walks): f = open("walks_test.txt", "w+") row = 1 for sentence in walks: f.write("row %s: " % str(row)) row = row + 1 for word in sentence: f.write(word) f.write(" ") f.write("\n") f.close() # Start Point: G = nx.read_multiline_adjlist("test_networkxAfterRemove.adjlist") # Part A """ :param G: Input graph :param dimensions: Embedding dimensions :param walk_length: Number of nodes in each walk :param num_walks: Number of walks per node :param workers: Number of workers for parallel execution """ # Precompute probabilities and generate walks node2vec = Node2Vec(G, dimensions=64, walk_length=25, num_walks=10, workers=1) saveWalks(list(node2vec.walks))
def bert(): dataset = request.get_json()["dataset"] option_cluster_name = request.get_json()["cluster"] app.logger.info('got /bert request with dataset = %s' % (dataset)) # Taking G from memory G = networkx.read_multiline_adjlist("." + "/data" + "/load/" + dataset + "/graph.adjlist") global conversations if conversations is None: if dataset == "pan12-sexual-predator-identification-training-corpus-2012-05-01": conversations = loadDataset2Conversation.loadConversations( "C:/Users/EILON/PycharmProjects/data_set/traning" "/pan12-sexual-predator-identification-training-corpus-2012-05-01" "/pan12-sexual-predator-identification-training-corpus-2012-05-01" ) # 40820 conversations elif dataset == "pan12-sexual-predator-identification-test-corpus-2012-05-17": conversations = loadDataset2Conversation.loadConversations( "C:/Users/EILON/PycharmProjects/data_set/test" "/pan12-sexual-predator-identification-test-corpus-2012-05-21" "/pan12-sexual-predator-identification-test-corpus-2012-05-17") # load plotter global plotter if plotter is None: fname = dataset + "plotter.pbz2" path = get_tmpfile(fname) data = bz2.BZ2File(path, 'rb') plotter = cPickle.load(data) global clusters if clusters is None: # Get all algorithms dictionary of center by cluster names (kmeans_centers_by_name, spectral_centers_by_name, connected_center_by_name) = plotter.getAllCentersName() # Make all algorithms dictionary of cluster's nodes by cluster names clusters = clustersBy3DVec.clustersBy3DVec( kmeans_centers_by_name, spectral_centers_by_name, connected_center_by_name, plotter.all_vectors_after_pca) # get list of vectors that matching to the input cluster name selected_vectors = clusters.getAllVectorsByCombinationClustersName( option_cluster_name) app.logger.info("Start process of extracting topics...") # get list of Conversation objects from list of vectors vectors2Conversations = Vectors2MatchConversions.Vectors2MatchConversions( G, plotter.all_vectors_after_pca, conversations) selected_conversations = vectors2Conversations.getConversationsFromGroupOfVecs( selected_vectors) # get list of 5 most similar topics from list of Conversation objects conversations2Topics = convert_Conversations_2_topic.convert_Conversations_2_topic( ) clusters_vector = conversations2Topics.clustersEmbedding( selected_conversations) topics_list = conversations2Topics.vector2Topic(clusters_vector) return jsonify(topic=str( topics_list)) # ['notice', 'clothe', 'feet', 'ship', 'quart']
#Create f given on page 120 of The Game of Cops and Robbers on Graphs for node in Pnodes: f[str(node)] = Gnodes - NGP[str(node)] #If we are running from the cmd line use arguments if __name__ == '__main__': import sys if len(sys.argv) != 3: print('The format for the arguments is: graphFileLocation k') exit() k = int(sys.argv[2]) G = nx.read_multiline_adjlist(sys.argv[1]) initGraph() numK = getCopNumber() if numK: print('The cop number is >', k) else: print('The cop number is <=', k) G = 0 k = 0 #Otherwise we are using a module def copk(graph, kVal): global G, k
def getFeatureGraph(mAllData, dEdgeThreshold=0.30, bResetGraph=True, dMinDivergenceToKeep=np.log2(10e5)): try: if bResetGraph: raise Exception("User requested graph recreation.") print("Trying to load graph...") g = read_multiline_adjlist("graphAdjacencyList.txt") with open("usefulFeatureNames.pickle", "rb") as fIn: saUsefulFeatureNames = pickle.load(fIn) print("Trying to load graph... Done.") return g, saUsefulFeatureNames except Exception as e: print("Trying to load graph... Failed:\n%s\n Recomputing..." % (str(e))) # DEBUG LINES print("Got data of size %s." % (str(np.shape(mAllData)))) print("Extracting graph...") ############# # Init graph # Determine meaningful features (with a divergence of more than MIN_DIVERGENCE from the control mean) iFeatureCount = np.shape(mAllData)[1] mMeans = np.nanmean(mAllData, 0) # Ignore nans vUseful = [ abs(mMeans[iFieldNum]) - dMinDivergenceToKeep > 0.00 for iFieldNum in range(1, iFeatureCount) ] saFeatures = getFeatureNames()[1:iFeatureCount] saUsefulIndices = [ iFieldNum for iFieldNum, _ in enumerate(saFeatures) if vUseful[iFieldNum] ] saUsefulFeatureNames = [ saFeatures[iFieldNum] for iFieldNum in saUsefulIndices ] iUsefulFeatureCount = len(saUsefulIndices) print("Keeping %d features out of %d." % (len(saUsefulIndices), len(saFeatures))) ############################### g = nx.Graph() print("Adding nodes...") # Add a node for each feature lIndexedNames = enumerate(saFeatures) for idx in saUsefulIndices: # Only act on useful features g.add_node(saFeatures[idx], label=idx) print("Adding nodes... Done.") # Measure correlations print("Creating edges for %d possible pairs..." % (0.5 * (iUsefulFeatureCount * iUsefulFeatureCount))) lCombinations = itertools.combinations(saUsefulIndices, 2) # Create queue and threads qCombination = Queue(10000) threads = [] num_worker_threads = 4 for i in range(num_worker_threads): t = threading.Thread(target=addEdgeAboveThreshold, args=( i, qCombination, )) t.setDaemon(True) t.start() iCnt = 0 dStartTime = clock() for iFirstFeatIdx, iSecondFeatIdx in lCombinations: qCombination.put( (iFirstFeatIdx, iSecondFeatIdx, g, mAllData, saFeatures, iFirstFeatIdx, iSecondFeatIdx, dEdgeThreshold)) # DEBUG LINES if iCnt != 0 and (iCnt % 1000 == 0): sys.stdout.write(".") if iCnt % 10000 == 0 and (iCnt != 10000): dNow = clock() dRate = ((dNow - dStartTime) / iCnt) dRemaining = (0.5 * (iUsefulFeatureCount * iUsefulFeatureCount) - iCnt) * dRate sys.stdout.write( "%d (Estimated remaining (sec): %4.2f - Working at a rate of %4.2f pairs/sec)\n" % (iCnt, dRemaining, 1.0 / dRate)) iCnt += 1 ############# print("Waiting for completion...") qCombination.join() print("Total time (sec): %4.2f" % (clock() - dStartTime)) print("Creating edges for %d possible pairs... Done." % (0.5 * (iUsefulFeatureCount * iUsefulFeatureCount))) print("Extracting graph... Done.") print("Removing single nodes... Nodes before removal: %d" % (g.number_of_nodes())) toRemove = [ curNode for curNode in g.nodes().keys() if len(g[curNode]) == 0 ] while len(toRemove) > 0: g.remove_nodes_from(toRemove) toRemove = [ curNode for curNode in g.nodes().keys() if len(g[curNode]) == 0 ] print("Nodes after removal step: %d" % (g.number_of_nodes())) print("Removing single nodes... Done. Nodes after removal: %d" % (g.number_of_nodes())) print("Saving graph...") write_multiline_adjlist(g, "graphAdjacencyList.txt") with open("usefulFeatureNames.pickle", "wb") as fOut: pickle.dump(saUsefulFeatureNames, fOut) print("Saving graph... Done.") print("Trying to load graph... Done.") return g, saUsefulFeatureNames
# read test data df_test = pd.read_csv('../data/test.csv', dtype={'authorID': np.int64}) n_test = df_test.shape[0] # read collaboration graph G = nx.read_edgelist('../data/collaboration_network.edgelist', delimiter=' ', nodetype=int) # read weighted collaboration graph WG = nx.read_edgelist("../data/weighted_collaboration_network.edgelist", nodetype=int, data=(("weight", float), )) # read author similarity graph SG = nx.read_multiline_adjlist("../data/author_similarity_network.adjlist", nodetype=int) nodes = {k: v for v, k in enumerate(list(G.nodes()))} # compute graph features for each node avg_neighbor_degree_wg = nx.average_neighbor_degree(WG) avg_neighbor_degree_g = nx.average_neighbor_degree(G) core_number_g = nx.core_number(G) page_rank_g = nx.pagerank(G) page_rank_wg = nx.pagerank(WG) avg_neighbor_degree_sg = nx.average_neighbor_degree(SG) page_rank_sg = nx.pagerank(SG) eigenvector_centrality_sg = nx.eigenvector_centrality(SG) # load precomputed features for each node f = open("../data/n_papers.pkl", "rb")
workers=1) # Embed nodes model = node2vec.fit(window=10, min_count=1, batch_words=4) # Save the model into fname = "model.kv" path = get_tmpfile(fname) model.wv.save(path) return model.wv #========================================initialization of data=========================================# # Taking G from memory G = nx.read_multiline_adjlist("./adjlists/train_networkxAfterRemove.adjlist") # Taking Memory from memory fname = "model.kv" path = get_tmpfile(fname) model = KeyedVectors.load(path, mmap='r') # the embeding section # model = embedding(G) #convert the json file to list of Conversation objects data = bz2.BZ2File( "saved_objects/conversations_train_dataset_after_remove.pbz2", 'rb') # 40820 conversations conversations = cPickle.load(data) print("data conversations amount " + str(len(conversations)))
G.add_edge(hd, mh) G.add_edge(mc, st) G.add_edge(boc, mc) G.add_edge(boc, dt) G.add_edge(st, dt) G.add_edge(q, st) G.add_edge(dt, mh) G.add_edge(st, mh) # write in UTF-8 encoding fh = open('edgelist.utf-8', 'wb') fh.write('# -*- coding: utf-8 -*-\n'.encode('utf-8')) # encoding hint for emacs nx.write_multiline_adjlist(G, fh, delimiter='\t', encoding='utf-8') # read and store in UTF-8 fh = open('edgelist.utf-8', 'rb') H = nx.read_multiline_adjlist(fh, delimiter='\t', encoding='utf-8') for n in G.nodes(): if n not in H: print(False) print(list(G.nodes())) pos = nx.spring_layout(G) nx.draw(G, pos, font_size=16, with_labels=False) for p in pos: # raise text positions pos[p][1] += 0.07 nx.draw_networkx_labels(G, pos) plt.show()
G.add_edge(mc, st) G.add_edge(boc, mc) G.add_edge(boc, dt) G.add_edge(st, dt) G.add_edge(q, st) G.add_edge(dt, mh) G.add_edge(st, mh) # write in UTF-8 encoding fh = open('edgelist.utf-8', 'wb') fh.write( '# -*- coding: utf-8 -*-\n'.encode('utf-8')) # encoding hint for emacs nx.write_multiline_adjlist(G, fh, delimiter='\t', encoding='utf-8') # read and store in UTF-8 fh = open('edgelist.utf-8', 'rb') H = nx.read_multiline_adjlist(fh, delimiter='\t', encoding='utf-8') for n in G.nodes(): if n not in H: print(False) print(list(G.nodes())) pos = nx.spring_layout(G) nx.draw(G, pos, font_size=16, with_labels=False) for p in pos: # raise text positions pos[p][1] += 0.07 nx.draw_networkx_labels(G, pos) plt.show()
# Zvi Mints and Eilon Tsadok - Mac Version def saveWalks(walks): f = open("walks.txt", "w+") row = 1 for sentence in walks: f.write("row %s:" % str(row)) row = row + 1 for word in sentence: f.write(word) f.write(" ") f.write("\n") f.close() # Start Point: G = nx.read_multiline_adjlist("convesations.adjlist") # Part A """ :param G: Input graph :param dimensions: Embedding dimensions :param walk_length: Number of nodes in each walk :param num_walks: Number of walks per node :param workers: Number of workers for parallel execution """ # Precompute probabilities and generate walks node2vec = Node2Vec(G, dimensions=64, walk_length=25, num_walks=10, workers=1) saveWalks(list(node2vec.walks))