def gml_gpickle_to_snap_graph(filename, src_path, dst_path=""): if dst_path == "": dst_path = src_path try: G = nx.read_gpickle(src_path + filename + ".gpickle") except: G = nx.read_gml(src_path + filename + ".gml") GSnap = snap.TUNGraph() labels = snap.TIntStrH() for node in G.nodes(data=True): id = node[0] node_label = node[1]['predicate'] labels[int(id)] = str(node_label) GSnap.AddNode(int(id)) for edge in G.edges(): GSnap.AddEdge(int(edge[0]), int(edge[1])) FOut = snap.TFOut(dst_path + filename + ".graph") GSnap.Save(FOut) FOut = snap.TFOut(dst_path + filename + ".labels") labels.Save(FOut) FOut.Flush()
def create_rnd_trees(size, number, filename, dst_path, labeled=False, seed=1): random.seed(seed) for i in range(number): G = nx.random_tree(size, seed + i) GSnap = snap.TUNGraph() if labeled: labels = snap.TIntStrH() for node in G.nodes(data=True): id = node[0] if labeled: node_label = node[1]['predicate'] labels[int(id)] = str(node_label) GSnap.AddNode(int(id)) for edge in G.edges(): GSnap.AddEdge(int(edge[0]), int(edge[1])) FOut = snap.TFOut(dst_path + filename + "_" + str(i).zfill(math.ceil(math.log10(number + 1))) + ".graph") GSnap.Save(FOut) FOut.Flush() if labeled: FOut = snap.TFOut(dst_path + filename + "_" + str(i).zfill(math.ceil(math.log10(number + 1))) + ".labels") labels.Save(FOut) FOut.Flush()
def SaveGraph(graph, name): if name.split(".")[-1]=="graph": FOut = snap.TFOut("res/others/" + name) graph.Save( FOut ) FOut.Flush() else: FOut = snap.TFOut("res/others/" + name + ".graph") graph.Save( FOut ) FOut.Flush() return
def generate_graph(prune, output): graph = snap.TNGraph.New() print ("Adding PRs...") for pr in pull_requests(): # If source or destination don't exist, then we need to create them. src, dst = pr['pr_creator'], pr['repo_owner'] # add this to fix outlier if src is None or dst is None: continue if not graph.IsNode(src): graph.AddNode(src) if not graph.IsNode(dst): graph.AddNode(dst) graph.AddEdge(src, dst) print("Nodes:", graph.GetNodes()) print("Edges:", graph.GetEdges()) FOut = snap.TFOut(output) graph.Save(FOut) FOut.Flush()
def generate_graph(prune, output): graph = snap.TNGraph.New() # If we aren't pruning edges, then we need the whole graph. if not prune: print("Adding users...") for user in users(): graph.AddNode(user['id']) print("Adding follow edges...") for follow in followers(): # If source or destination don't exist, then we need to create them. src, dst = follow['user_id'], follow['follower_id'] if not graph.IsNode(src): graph.AddNode(src) if not graph.IsNode(dst): graph.AddNode(dst) graph.AddEdge(src, dst) print("Nodes:", graph.GetNodes()) print("Edges:", graph.GetEdges()) FOut = snap.TFOut(output) graph.Save(FOut) FOut.Flush()
def get_top_packages(graph_path, n): graph_abs_path = os.path.abspath(graph_path) graph_name = os.path.basename(graph_abs_path).replace(".graph", "") fin = snap.TFIn(graph_abs_path) graph = snap.TNEANet.Load(fin) # rebuild the id => pkg dictionary id_pkg_dict = {} for node in graph.Nodes(): id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg") directory = os.path.dirname(os.path.abspath(graph_path)) # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory os.chdir(directory) # print("{0} Computing top {0} nodes with highest pagerank".format(n, datetime.datetime.now())) data_file = graph_name + "_pageranks" prank_hashtable = snap.TIntFltH() if not os.path.isfile(data_file): # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100 snap.GetPageRank(graph, prank_hashtable, 0.85) fout = snap.TFOut(data_file) prank_hashtable.Save(fout) else: fin = snap.TFIn(data_file) prank_hashtable.Load(fin) top_n = get_top_nodes_from_hashtable(prank_hashtable, n) top_n.sort(key=itemgetter(1)) top_packages = [] for pair in top_n: top_packages.append(id_pkg_dict[pair[0]]) return top_packages
def main(version): starttime = datetime.datetime.now() codePath = sys.path[0] s = codePath.split('\\') workPath = s[0] + '\\' + s[1] + '\\' + s[ 2] + '\\data\\flixster\\commondata\\' #f:\project\somproject filePath1 = workPath + 'finalSocial' + version + '.txt' # transfer node string to num 2131313 to 1 # use the index of list to represent the node totalNodeList = [] G1 = snap.TUNGraph.New() for line in open(filePath1): if line == '': break linkPair = line[:-1].split('\t') node1 = int(linkPair[0]) node2 = int(linkPair[1]) if node1 not in totalNodeList: totalNodeList.append(node1) if node2 not in totalNodeList: totalNodeList.append(node2) node1MapNum = totalNodeList.index(node1) node2MapNum = totalNodeList.index(node2) if not G1.IsNode(node1MapNum): G1.AddNode(node1MapNum) if not G1.IsNode(node2MapNum): G1.AddNode(node2MapNum) G1.AddEdge(node1MapNum, node2MapNum) print 'get the max connected component...' MxWcc = snap.GetMxWcc(G1) print 'the max connected component node num is %d ' % MxWcc.GetNodes() print MxWcc.GetEdges() # filePath2=workPath+'finalUserID.txt' # finalNodeList=[] # for line in open(filePath2): # if line=='': # break # nodeStr=line[:-1] # node=int(nodeStr) # nodeMapNum=totalNodeList.index(node) # if MxWcc.IsNode(nodeMapNum): # finalNodeList.append(node) # print 'the final user num is %d' %len(finalNodeList) FOut = snap.TFOut(workPath + 'finalSocial' + version + '.graph') MxWcc.Save(FOut) FOut.Flush() print 'finished' endtime = datetime.datetime.now() print 'passed time is %d s' % (endtime - starttime).seconds
def construct(): FIn = snap.TFIn("../graph/steam.graph") G = snap.TUNGraph.Load(FIn) print G.GetNodes(), G.GetEdges() ls = [] for ni in G.Nodes(): id = ni.GetId() if id >= 600000 and ni.GetDeg() > 100: ls.append(id) elif id < 600000 and ni.GetDeg() > 1000: ls.append(id) for i in ls: G.DelNode(i) print G.GetNodes(), G.GetEdges() ls = [] for ni in G.Nodes(): id = ni.GetId() if ni.GetDeg() == 0: ls.append(id) for i in ls: G.DelNode(i) print G.GetNodes(), G.GetEdges() FOut = snap.TFOut("../graph/steam_user100_game1000.graph") G.Save(FOut) FOut.Flush()
def create_tneanet(save=True): meta, matches = sp.loadPickle() numNodes = len(matches) + len(meta.player) + len(meta.team) + len( meta.country) numEdges = 3 * len(matches) + 4 * len(meta.player) + len(meta.team) G = TN.New(numNodes, numEdges) countryToNId = {} teamToNId = {} playerToNId = {} matchIndToNId = [] i = 0 for (countryId, countryName) in meta.country.items(): ni = G.GetNI(G.AddNode(i)) G.AddStrAttrDatN(ni, "country", "kind") G.AddIntAttrDatN(ni, countryId, "countryId") G.AddStrAttrDatN(ni, countryName, "countryName") countryToNId[countryId] = i i += 1 for (teamId, d) in meta.team.items(): ni = G.GetNI(G.AddNode(i)) G.AddStrAttrDatN(ni, "team", "kind") G.AddIntAttrDatN(ni, teamId, "teamId") G.AddStrAttrDatN(ni, d['name'], "teamName") teamToNId[teamId] = i EId = G.AddEdge(i, countryToNId[d['country']]) G.AddStrAttrDatE(EId, "team from", "kind") i += 1 for (playerId, d) in meta.player.items(): ni = G.GetNI(G.AddNode(i)) G.AddStrAttrDatN(ni, "player", "kind") G.AddIntAttrDatN(ni, playerId, "playerId") G.AddStrAttrDatN(ni, d['name'], "playerName") playerToNId[playerId] = i for teamId in d['team']: EId = G.AddEdge(i, teamToNId[teamId]) G.AddStrAttrDatE(EId, "plays for", "kind") i += 1 for match in matches: matchIndToNId.append(i) ni = G.GetNI(G.AddNode(i)) G.AddStrAttrDatN(ni, "match", "kind") G.AddIntAttrDatN(ni, match.away_goal, "away_goal") G.AddIntAttrDatN(ni, match.home_goal, "away_goal") G.AddIntAttrDatN(ni, match.stageId, "stageId") G.AddStrAttrDatN(ni, match.season, "season") G.AddIntAttrDatN(ni, match.leagueId, "leagueId") G.AddIntAttrDatN(ni, match.id, "matchId") EId = G.AddEdge(i, teamToNId[match.home_team]) G.AddStrAttrDatE(EId, "home team", "kind") EId = G.AddEdge(i, teamToNId[match.away_team]) G.AddStrAttrDatE(EId, "away team", "kind") EId = G.AddEdge(i, countryToNId[match.countryId]) G.AddStrAttrDatE(EId, "match in", "kind") i += 1 if save: G.Save(snap.TFOut(saveFileName)) return G
def data2dag(data, num_nodes): dag = snap.TNGraph.New() for i in range(num_nodes): dag.AddNode(i) for i in range(data.shape[0]): dag.AddEdge(int(data[i][0]), int(data[i][1])) FOut = snap.TFOut("../data/youtube.graph") dag.Save(FOut) return dag
def saveToFile(G, idToOsmid, nodes, name): out = snap.TFOut(DATA_PATH + name + ".graph") # graph saved as _.graph G.Save(out) out.Flush() idOut = open(DATA_PATH + name + ".id", 'w') pickle.dump(idToOsmid, idOut, 1) nodesOut = open(DATA_PATH + name + ".coords", 'w') pickle.dump(nodes, nodesOut, 1)
def tungraphToBinary(): t0 = t() G = snap.LoadEdgeList(snap.PUNGraph, NW.twitter, 0, 1) t1 = reportTime(t0, "TUNGRAPH") FOut = snap.TFOut(NW.twitter_binary) G.Save(FOut) FOut.Flush() t2 = reportTime(t1, "TUNGRAPH save binary") FIn = snap.TFIn(NW.twitter_binary) G2 = snap.TUNGraph.Load(FIn) reportTime(t2, "TUNGRAPH load binary")
def SaveState(ds): fname = sw.GetStateName() Start = Snap.TInt(ds["start"]) Dist = Snap.TInt(ds["dist"]) Visited = ds["visit"] FOut = Snap.TFOut(Snap.TStr(fname)) Start.Save(FOut) Dist.Save(FOut) Visited.Save(FOut) FOut.Flush()
def snapSave(to_save, file_name): """ Salvataggio in formato binario. Viene sovrascritto il contenuto del file specificato se già esistente. Viene creato il file se non esiste. :param to_save: oggetto da salvare :param file_name: nome del file dove salvare l'oggetto """ f_out = snap.TFOut(file_name) to_save.Save(f_out) f_out.Flush()
def _parse_from_raw_data(self): train_path = DATA_PATH + 'train.txt' num_lines = sum(1 for line in open(train_path, "r")) trainFile = open(train_path, "r") self.digraph = snap.TNGraph.New() for i in range(num_lines): line = trainFile.readline() nodes = line.split("\t") base = int(nodes[0]) if not self.digraph.IsNode(base): self.digraph.AddNode(base) for j in range(1, len(nodes)): node = int(nodes[j]) if not self.digraph.IsNode(node): self.digraph.AddNode(node) self.digraph.AddEdge(base, node) self.ugraph = snap.ConvertGraphMP(snap.PUNGraph, self.digraph) self.digraph.save(snap.TFOut(DIRECT_GRAPH_PATH)) self.ugraph.save(snap.TFOut(UNDIRECT_GRAPH_PATH))
def txt_to_graph(filename, src_path, dst_path=""): """ Converts a snap txt graph to the much more faster .graph format :param filename: name of the graph without .txt ending :param src_path: source path of the graph :param dst_path: destination path for the output """ if dst_path == "": dst_path = src_path GSnap = snap.LoadEdgeList(snap.PNGraph, src_path + filename + ".txt") FOut = snap.TFOut(dst_path + filename + ".graph") GSnap.Save(FOut)
def save(self, filename): """Save this graph in binary format to the given `filename`. In order to store metadata associated with this the EIGraph object, we save an extra file, with the name `filename + '.ei_meta'`. """ FOut = snap.TFOut(filename) self.base().Save(FOut) FOut.Flush() meta_fn = self._get_meta_filename(filename) with open(meta_fn, 'wb') as fout: marshal.dump(self._weights, fout)
def SaveState(sw, ds): fname = sw.GetStateName() Start = Snap.TInt(ds["start"]) Dist = Snap.TInt(ds["dist"]) Visited = ds["visit"] FOut = Snap.TFOut(Snap.TStr(fname)) sw.cum_timer.cum_start("disk") Start.Save(FOut) Dist.Save(FOut) Visited.Save(FOut) FOut.Flush() sw.cum_timer.cum_stop("disk")
def to_snapformat(self, filepath=None): if (filepath == None): filepath = self.file_snap if utils.file_exists(filepath): return filepath import snap from gct.dataset import convert g = convert.to_snap(self) self.logger.info("Writing {} to {}".format(type(g), filepath)) FOut = snap.TFOut(filepath) g.Save(FOut) FOut.Flush() return filepath
def copy_graph(graph): tmpfile = '.copy.bin' # Saving to tmp file FOut = snap.TFOut(tmpfile) graph.Save(FOut) FOut.Flush() # Loading to new graph FIn = snap.TFIn(tmpfile) graphtype = type(graph) new_graph = graphtype.New() new_graph = new_graph.Load(FIn) return new_graph
def save_graph(self, out_dir): meta = { "cells": self.nodes, "cell_map": self.node_map, "cell_pos": self.cell_pos } os.makedirs(out_dir, exist_ok=True) metadata_path = os.path.join(out_dir, "meta.pickle") with open(metadata_path, "wb") as out_file: pickle.dump(meta, out_file) graph_path = os.path.join(out_dir, "bin.graph") # self.graph.SaveEdgeList(graph_path) FOut = snap.TFOut(graph_path) self.graph.Save(FOut) FOut.Flush()
def buildSimGraph(questions, wordVecs): tfidf_matrix, ids, idf = similarityModel(questions, wordVecs) graph = snap.TUNGraph.New() for id in ids: graph.AddNode(id) print graph.GetNodes() numq = tfidf_matrix.shape[0] for i in xrange(numq): if i % 1000 == 0: print "done", i similarity = tfidf_matrix[i + 1:].dot(tfidf_matrix[i]) for j in xrange(len(similarity)): if similarity[j] > 0.2: graph.AddEdge(ids[i], ids[j + i + 1]) fout = snap.TFOut("similarity2.graph") graph.Save(fout) fout.Flush()
def setCategorys(): G = getGraph("../files/G.graph") list_post = getListFromFile("../files/list_comment_category_nbsvm_1.txt") post_comments = list_post["post"] print "post_comments:", len(post_comments) i = 0 m = 0 for NI in G.Nodes(): nid = NI.GetId() NLabel = G.GetStrAttrDatN(nid, "NLabel") #-------------PHOTO if NLabel == 'photo': c = 0 comments = post_comments[i]["comments"] #------------------------------------------------IN EDGES------------------------------------------------ for nid1 in NI.GetInEdges(): NLabel1 = G.GetStrAttrDatN(nid1, "NLabel") NName1 = G.GetStrAttrDatN(nid1, "NName") NCategory = G.GetStrAttrDatN(nid1, "NCategory") eid = G.GetEId(nid1, nid) ETime = G.GetStrAttrDatE(eid, "ETime") #------------COMMENT if NLabel1 == "comment": if NCategory == "text": G.AddStrAttrDatN(nid1, "other", 'NCategory') newCategory = comments[c]['category'] G.AddStrAttrDatN(nid1, newCategory, 'NCategory') NCategory_1 = G.GetStrAttrDatN(nid1, "NCategory") print c, NCategory_1, "--", newCategory c += 1 print i, "-->", len(comments), "=", c i += 1 #---------------save Graph as an output file snap.SaveEdgeList(G, "../files/new_G.txt", "Save as tab-separated list of edges") #---------------save binary FOut = snap.TFOut("../files/new_G.graph") G.Save(FOut)
def reverse_graph(input, output): print("Loading graph...") FIn = snap.TFIn(input) graph = snap.TNGraph.Load(FIn) reversed_graph = snap.TNGraph.New() for node in graph.Nodes(): reversed_graph.AddNode(node.GetId()) for e in graph.Edges(): reversed_graph.AddEdge(e.GetDstNId(), e.GetSrcNId()) assert graph.GetNodes() == reversed_graph.GetNodes() assert graph.GetEdges() == reversed_graph.GetEdges() FOut = snap.TFOut(output) reversed_graph.Save(FOut) FOut.Flush()
def SaveState(ds): fname = sw.GetStateName() First = Snap.TInt(ds["first"]) Range = Snap.TInt(ds["range"]) Count = Snap.TInt(ds["count"]) Dist = Snap.TInt(ds["dist"]) Start = Snap.TInt(ds["start"]) Visited = ds["visit"] FOut = Snap.TFOut(Snap.TStr(fname)) First.Save(FOut) Range.Save(FOut) Count.Save(FOut) Dist.Save(FOut) Start.Save(FOut) Visited.Save(FOut) FOut.Flush()
def q4_2(): FIn = snap.TFIn('GDNetwork.graph') G = snap.TUNGraph.Load(FIn) import csv #id:degree geneDict = dict() with open('geneDegrees.csv', "r") as file: for line in file: list = line.split()[0].split(',') geneDict[int(list[0])] = int(list[1]) #create HDN #traverse over genes, create full graph for every node #adding nodes for node in G.Nodes(): if (node.GetId() < 20000): continue HDN.AddNode(node.GetId()) #gene disease boundary is 20000 (nodeId) #17047 is max gene id maxId = max(geneDict.keys()) #maxCliques = [] #counter = 0 for i in range(maxId, 0, -1): gene = G.GetNI(i) genDeg = gene.GetDeg() neighbours = [] for k in range(genDeg): neighbours.append(gene.GetNbrNId(k)) #if (counter<10): # maxCliques.append(neighbours) # counter += 1 #add edges among nodes for j in range(len(neighbours) - 1): for z in range(j + 1, len(neighbours)): #add edge HDN.AddEdge(neighbours[j], neighbours[z]) print i FOut = snap.TFOut('HDN.graph') G.Save(FOut) FOut.Flush() print "end of q4_2"
def generate_steam_graph(): G = snap.TUNGraph.New() user_node_array = [] user_node_id = 600000 game_node_array = [] #10978 # min_game_id = sys.maxint #10 # max_game_id = 0 #530720 # count1=0 # count2=0 with open("data/australian_users_items.json") as f: for line in f: data = ast.literal_eval(line) user_id = data['user_id'] item_count = int(data['items_count']) # if item_count>900: # count1+=1 # if item_count>1000: # count2+=1 G.AddNode(user_node_id) items = data['items'] for item in items: item_id = int(item['item_id']) # min_game_id = min(item_id, min_game_id) # max_game_id = max(item_id, max_game_id) if not G.IsNode(item_id): G.AddNode(item_id) game_node_array.append(item_id) G.AddEdge(user_node_id, item_id) user_node_array.append(user_node_id) user_node_id += 1 # print(len(game_node_array)) # print(min_game_id, max_game_id) with open('graph/user_node.txt', 'w') as f: for item in user_node_array: f.write("%d\n" % item) with open('graph/game_node.txt', 'w') as f: for item in game_node_array: f.write("%d\n" % item) FOut = snap.TFOut("graph/steam.graph") G.Save(FOut) FOut.Flush()
def main(): subreddit_file = 'data/subreddits.gz' print('Fetching number of lines in ' + subreddit_file + '...') nsubreddits = sum(1 for l in gzip.open(subreddit_file)) subreddits = (json.loads(line) for line in gzip.open(subreddit_file)) graph = setup_graph() print('Parsing {}...'.format(subreddit_file)) progress.init_progbar(nsubreddits) for s in subreddits: parse_subreddit(s, graph) progress.report_progress() progress.report_finished() print('Saving...') output = snap.TFOut('output/subreddits.graph') graph.Save(output) output.Flush() print('Done')
def xmlScrape(): authorsDict = {} graph = snap.TNEANet.New() used = [ 'article', 'inproceedings', 'proceedings', 'book', 'incollection', 'phdthesis', 'mastersthesis', 'www', 'author' ] for event, elem in et.iterparse('/lfs/local/0/dzeng0/dblp/rep-dblp.xml', events=('start', 'end')): if event == 'end': if elem.tag not in used: elem.clear() elif elem.tag != 'author': authors = elem.findall('author') for author in authors: name = author.text.encode('utf-8') if name not in authorsDict: id = graph.AddNode(-1) authorsDict[name] = id graph.AddStrAttrDatN(id, name, 'name') graph.AddIntAttrDatN(id, 0, 'exp') id = authorsDict[name] graph.AddIntAttrDatN(id, graph.GetIntAttrDatN(id, 'exp') + 1, 'exp') for a1 in authors: n1 = a1.text.encode('utf-8') i1 = authorsDict[n1] for a2 in authors: n2 = a2.text.encode('utf-8') i2 = authorsDict[n2] if not graph.IsEdge(i1, i2) and i1 != i2: eid = graph.AddEdge(i1, i2) eid = graph.AddEdge(i2, i1) print elem.get('key'), len(authors) sys.stdout.flush() elem.clear() fout = snap.TFOut('coauthor.graph') graph.Save(fout) fout.Flush()
def generate_steam_game_graph(): FIn = snap.TFIn("graph/steam.graph") G = snap.TUNGraph.Load(FIn) user_node_array = [] #88310 with open('graph/user_node.txt', 'r') as f: for line in f: user_node_array.append(int(line)) game_node_array = [] #10978 with open('graph/game_node.txt', 'r') as f: for line in f: game_node_array.append(int(line)) G_game = snap.TUNGraph.New() # add nodes for uid in game_node_array: G_game.AddNode(uid) # add edges count = 0 for node in G.Nodes(): NId = node.GetId() if NId in user_node_array: ki = node.GetDeg() neid = [] for i in range(ki): neid.append(node.GetNbrNId(i)) for i in range(len(neid)): for j in range(i + 1, len(neid)): G_game.AddEdge(neid[i], neid[j]) count += 1 if count % 1000 == 0: print("percentage: %f" % (count / (float(G.GetNodes())))) FOut = snap.TFOut("graph/steam_game.graph") G_game.Save(FOut) FOut.Flush() FIn = snap.TFIn("graph/steam_game.graph") G_game = snap.TUNGraph.Load(FIn) ClustCf = snap.GetClustCf(G_game, 1000) print("clustering coefficient: %f" % ClustCf) return G_game