def snap_load(filename="", src_path="", directed=False): graph_extension = ".graph" label_extension = ".label" if filename == "": graph_extension = "" if os.path.isfile(src_path + filename + graph_extension): g_path = src_path + filename + graph_extension FIn = snap.TFIn(g_path) if directed: G = snap.TNGraph.Load(FIn) else: G = snap.TUNGraph.Load(FIn) elif os.path.isfile(src_path + filename + ".gml") or os.path.isfile(src_path + filename + ".gpickle"): graph_converter.gml_gpickle_to_snap_graph(filename, src_path) FIn = snap.TFIn(src_path + filename + graph_extension) if directed: G = snap.TNGraph.Load(FIn) else: G = snap.TUNGraph.Load(FIn) if os.path.isfile(src_path + filename + label_extension): FIn = snap.TFIn(src_path + filename + label_extension) labels = snap.TIntStrH() labels.Load(FIn) else: labels = snap.TIntStrH() if labels: return G, labels else: return G
def get_top_packages(graph_path, n): graph_abs_path = os.path.abspath(graph_path) graph_name = os.path.basename(graph_abs_path).replace(".graph", "") fin = snap.TFIn(graph_abs_path) graph = snap.TNEANet.Load(fin) # rebuild the id => pkg dictionary id_pkg_dict = {} for node in graph.Nodes(): id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg") directory = os.path.dirname(os.path.abspath(graph_path)) # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory os.chdir(directory) # print("{0} Computing top {0} nodes with highest pagerank".format(n, datetime.datetime.now())) data_file = graph_name + "_pageranks" prank_hashtable = snap.TIntFltH() if not os.path.isfile(data_file): # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100 snap.GetPageRank(graph, prank_hashtable, 0.85) fout = snap.TFOut(data_file) prank_hashtable.Save(fout) else: fin = snap.TFIn(data_file) prank_hashtable.Load(fin) top_n = get_top_nodes_from_hashtable(prank_hashtable, n) top_n.sort(key=itemgetter(1)) top_packages = [] for pair in top_n: top_packages.append(id_pkg_dict[pair[0]]) return top_packages
def LoadGraph(name): if name.split(".")[-1]=="graph": FIn = snap.TFIn("res/others/" + name ) completeGraph = snap.TUNGraph.Load( FIn ) else: FIn = snap.TFIn("res/others/" + name + ".graph" ) completeGraph = snap.TUNGraph.Load( FIn ) return completeGraph
def load_z_deta(self, index=0): LEdgeVIn = snap.TFIn(f"{base_path}\\LEdgeV-{index}.bin") LEdgeV = snap.TIntTrV() LEdgeV.Load(LEdgeVIn) NodePermIn = snap.TFIn(f"{base_path}\\NodePerm-{index}.bin") NodePerm = snap.TIntV() NodePerm.Load(NodePermIn) return LEdgeV, NodePerm
def GetNbr(sw): """ provide graph neighbors """ taskname = sw.GetName() # tindex = sw.GetIndex() msglist = sw.GetMsgList() sw.log.debug("msglist %s" % msglist) with perf.Timer(sw.log, "LoadState-GetNbrCpp"): AdjLists = LoadState(sw) if AdjLists: # state is available, process requests for neighbors sw.log.debug('[%s] state available, length %d' % (sw.GetName(), AdjLists.Len())) for item in msglist: name = sw.GetMsgName(item) # read the input nodes FIn = Snap.TFIn(Snap.TStr(name)) msg = Snap.TIntV(FIn) GetNeighbors(sw, AdjLists, msg) return # state not found, initialize it with neighbors sw.log.debug('[%s] adjlist not found, initializing' % sw.GetName()) Edges = Snap.TIntIntVV() for item in msglist: name = sw.GetMsgName(item) FIn = Snap.TFIn(Snap.TStr(name)) Vec = Snap.TIntIntVV(FIn) Snap.AddVec64(Edges, Vec) # first iteration: input are edges, save the state AdjLists = GetEdges(sw, Edges) sw.log.debug('[%s] saving adjlist of size %d now' % (sw.GetName(), AdjLists.Len())) with perf.Timer(sw.log, "SaveState-GetNbrCpp"): SaveState(sw, AdjLists) dmsgout = {} dmsgout["src"] = sw.GetName() dmsgout["cmd"] = "targets" dmsgout["body"] = {} sw.Send(0, dmsgout, "2")
def testRandomDataset7(self): ds = self.graph_unweighted_undirect utils.remove_if_file_exit(ds.file_snap) print(ds.to_snapformat()) FIn = snap.TFIn(ds.file_snap) Graph = snap.TUNGraph.Load(FIn) ds = self.graph_unweighted_direct self.assertTrue(ds.is_directed()) utils.remove_if_file_exit(ds.file_snap) print(ds.to_snapformat()) FIn = snap.TFIn(ds.file_snap) Graph = snap.TNGraph.Load(FIn)
def degreeDistribution(): ### read original and reonstructed graph FIn = snap.TFIn("../graph/steam.graph") Go = snap.TUNGraph.Load(FIn) FIn = snap.TFIn("../graph/steam_user100_game1000.graph") Gn = snap.TUNGraph.Load(FIn) print Go.GetNodes(), Go.GetEdges() print Gn.GetNodes(), Gn.GetEdges() d1, user1, game1 = getDeg(Go) d2, user2, game2 = getDeg(Gn) plot(d1, user1, game1, 'Original Graph') plot(d2, user2, game2, 'Reconstructed Graph')
def GetNbr(sw): """ provide graph neighbors """ # taskname = sw.GetName() msglist = sw.GetMsgList() sw.log.debug("msglist %s" % msglist) with perf.Timer(sw.log, "LoadState-GetNbrCpp"): AdjLists = LoadState(sw) if AdjLists: # state is available, process requests for neighbors for item in msglist: name = sw.GetMsgName(item) # read the input nodes FIn = Snap.TFIn(Snap.TStr(name)) msg = Snap.TIntV(FIn) GetNeighbors(sw, AdjLists, msg) return # state not found, initialize it with neighbors Edges = Snap.TIntV() for item in msglist: name = sw.GetMsgName(item) FIn = Snap.TFIn(Snap.TStr(name)) Vec = Snap.TIntV(FIn) Edges.AddV(Vec) # first iteration: input are edges, save the state AdjLists = GetEdges(sw, Edges) sw.log.debug("state: %d" % AdjLists.Len()) with perf.Timer(sw.log, "SaveState-GetNbrCpp"): SaveState(sw, AdjLists) dmsgout = {} dmsgout["src"] = sw.GetName() dmsgout["cmd"] = "targets" dmsgout["body"] = {} sw.Send(0, dmsgout, "2")
def GetNbr(sw): """ provide graph neighbors """ taskname = sw.GetName() msglist = sw.GetMsgList() sw.flog.write("msglist " + str(msglist) + "\n") sw.flog.flush() AdjLists = LoadState() if AdjLists: # state is available, process requests for neighbors for item in msglist: name = sw.GetMsgName(item) # read the input nodes FIn = Snap.TFIn(Snap.TStr(name)) msg = Snap.TIntV(FIn) GetNeighbors(sw, AdjLists, msg) return # state not found, initialize it with neighbors Edges = Snap.TIntV() for item in msglist: name = sw.GetMsgName(item) FIn = Snap.TFIn(Snap.TStr(name)) Vec = Snap.TIntV(FIn) Edges.AddV(Vec) # first iteration: input are edges, save the state AdjLists = GetEdges(Edges) sw.flog.write("state " + str(AdjLists.Len()) + "\n") sw.flog.flush() SaveState(AdjLists) dmsgout = {} dmsgout["src"] = sw.GetName() dmsgout["cmd"] = "targets" dmsgout["body"] = {} sw.Send(0, dmsgout, "2")
def write_graph_to_csv(name): start = time.time() FIn = snap.TFIn("graph/steam_weight_user_limit_100.graph") G = snap.TNEANet.Load(FIn) print("finished loading: ", time.time() - start) attr = 'weight' # put node ids into consecutive integers node_dict = {} nid_count = 0 for node in G.Nodes(): nid = node.GetId() if nid not in node_dict.keys(): node_dict[nid] = nid_count nid_count += 1 print("finished generating node id dict: ", time.time() - start) with open('csv/weighted_%s_limit_100.csv' % name, mode='w') as f: writer = csv.writer(f, delimiter=',') writer.writerow([str(len(node_dict.keys()))]) # count = 0 for edge in G.Edges(): srcnid = node_dict[edge.GetSrcNId()] dstnid = node_dict[edge.GetDstNId()] # print(edge.GetSrcNId(), edge.GetDstNId(), srcnid, dstnid) value = G.GetIntAttrDatE(edge, attr) row = [str(srcnid), str(dstnid), str(value)] writer.writerow(row) # count+=1 print("done: ", time.time() - start)
def generate_steam_edge_list(): FIn = snap.TFIn("graph/steam.graph") G = snap.TUNGraph.Load(FIn) G = snap.GetMxWcc(G) user_node_array = [] #88310 with open('graph/user_node.txt', 'r') as f: for line in f: user_node_array.append(int(line)) game_node_array = [] #10978 with open('graph/game_node.txt', 'r') as f: for line in f: game_node_array.append(int(line)) with open('graph/steam_edge_list.csv', 'w') as f: writer = csv.writer(f, delimiter=',') for edge in G.Edges(): # eid = edge.GetId() id1 = edge.GetSrcNId() id2 = edge.GetDstNId() if id1 in user_node_array: row = [str(id1), 'g' + str(id2)] else: row = [str(id2), 'g' + str(id1)] writer.writerow(row)
def sampleFeatures(family, feature, numSamples, apiGraph): path = 'data/graphs/' + family + '/' files = os.listdir(path) if apiGraph: graph_files = filter(lambda x: '.apigraph' in x, files) else: graph_files = filter(lambda x: '.edges' in x, files) random.shuffle(graph_files) features = np.zeros(numSamples) count = 0 i = 0 while count < numSamples: f = graph_files[i] i += 1 if apiGraph: FIn = snap.TFIn(path + f) G = snap.TNEANet.Load(FIn) else: G = snap.LoadEdgeList(snap.PNEANet, path + f, 0, 1) if G.GetEdges() == 0 or G.GetNodes() == 0: continue extractor = graph_features.extractors[feature] features[count] = extractor(G) count += 1 return features
def avgDegreeDist(family, direction, numSamples, apiGraph): path = 'data/graphs/' + family + '/' files = os.listdir(path) if apiGraph: graph_files = filter(lambda x: '.apigraph' in x, files) else: graph_files = filter(lambda x: '.edges' in x, files) random.shuffle(graph_files) maxdeg = 0 if apiGraph: Gs = [snap.TNEANet.Load(snap.TFIn(path + f)) for f in graph_files[:numSamples]] else: Gs = [snap.LoadEdgeList(snap.PNEANet, path + f, 0, 1) for f in graph_files[:numSamples]] if direction == 'in': maxdeg = max([G.GetNI((snap.GetMxInDegNId(G))).GetInDeg() for G in Gs]) else: maxdeg = max([G.GetNI((snap.GetMxOutDegNId(G))).GetOutDeg() for G in Gs]) avg_deg_dist = np.zeros(maxdeg + 1) for G in Gs: DegToCntV = snap.TIntPrV() if direction == 'in': snap.GetInDegCnt(G, DegToCntV) else: snap.GetOutDegCnt(G, DegToCntV) for item in DegToCntV: deg = item.GetVal1() avg_deg_dist[deg] += item.GetVal2() avg_deg_dist = avg_deg_dist / numSamples return avg_deg_dist
def LoadGraph(pth='hw1-q2.graph'): """ :param pth: data path :return: loaded graph g """ g = snap.TUNGraph.Load(snap.TFIn(pth)) return g
def construct(): FIn = snap.TFIn("../graph/steam.graph") G = snap.TUNGraph.Load(FIn) print G.GetNodes(), G.GetEdges() ls = [] for ni in G.Nodes(): id = ni.GetId() if id >= 600000 and ni.GetDeg() > 100: ls.append(id) elif id < 600000 and ni.GetDeg() > 1000: ls.append(id) for i in ls: G.DelNode(i) print G.GetNodes(), G.GetEdges() ls = [] for ni in G.Nodes(): id = ni.GetId() if ni.GetDeg() == 0: ls.append(id) for i in ls: G.DelNode(i) print G.GetNodes(), G.GetEdges() FOut = snap.TFOut("../graph/steam_user100_game1000.graph") G.Save(FOut) FOut.Flush()
def clustering_coefficient(input): print("Loading graph...") FIn = snap.TFIn(input) graph = snap.TNGraph.Load(FIn) print("Calculating clustering coefficient...") print ("Clustering Coefficient:", snap.GetClustCf (graph, -1))
def to_snap(data): """ convert the dataset to a SNAP graph. :param data: :py:class:`gct.Dataset` :rtype: SNAP graph """ import snap if 1 and utils.file_exists(data.file_snap): FIn = snap.TFIn(data.file_snap) if data.is_directed(): graph = snap.TNGraph.Load(FIn) else: graph = snap.TUNGraph.Load(FIn) return graph if False and data.is_weighted(): raise Exception("weighted graph is not supported well on snap") fname = data.file_edges if not utils.file_exists(fname): data.to_edgelist() if data.is_directed(): return snap.LoadEdgeList(snap.PNGraph, fname, 0, 1) else: return snap.LoadEdgeList(snap.PUNGraph, fname, 0, 1)
def main (): import json import snap import graphviz import matplotlib.pyplot as plt import numpy as np import xlrd #----------------- #The common area rumor_number = "21" path_input = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Input\\' workbook_input1_D = xlrd.open_workbook(path_input + 'DATASET.xlsx', on_demand = True) path_jsonl = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Input\\Rumor_' + rumor_number + '.jsonl' path_graph = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Input\\Rumor_' + rumor_number + '.graph' path_output = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Output\\' FIn = snap.TFIn(path_graph) G_Directed = snap.TNGraph.Load(FIn) G_Directed_with_Attributes = snap.ConvertGraph(snap.PNEANet, G_Directed) #Convert Directed Graph to Directed Graph with attributes: it means now we can assign attributes to the graph nodes G_Directed_with_Attributes = Get_Graph_with_Attributes_New (path_jsonl, G_Directed_with_Attributes, workbook_input1_D) #----------------- #The specific area snap.PrintInfo(G_Directed_with_Attributes, "Python type PNEANet", path_output + "S18_5_Output.txt", False)
def simpleLoadFromFile(name): G = snap.TUNGraph.Load(snap.TFIn(DATA_PATH + name + ".graph")) nodes = open(DATA_PATH + name + ".nodes", 'r') nodesMap = pickle.load(nodes) return G, nodesMap # variation for simple OSM saving.
def load_binary_graph(): """ 读取二进制存储的图 :return: SNAP中的 TUNGraph """ G = snap.TUNGraph.Load(snap.TFIn("hw1-q2.graph")) return G
def getGraphFromFile(gfile): if gfile.endswith('.graph'): FIn = snap.TFIn(gfile) Network = snap.TUNGraph.Load(FIn) else: Network = snap.LoadEdgeList(snap.PUNGraph, gfile, 0, 1) return Network
def load(filename): """Loads an EIGraph from the given `filename` and the possible ratings.""" FIn = snap.TFIn(filename) G = snap.TUNGraph.Load(FIn) graph = EIGraph() graph.name = filename graph._G = G for node in G.Nodes(): if EIGraph.nid_is_entity(node.GetId()): graph.num_entities += 1 graph.entities.append(node.GetId()) else: assert EIGraph.nid_is_item(node.GetId()) graph.num_items += 1 graph.items.append(node.GetId()) with open(EIGraph._get_meta_filename(filename), 'rb') as fin: graph._weights = marshal.load(fin) ratings_set = set() for k, v in graph._weights.items(): ratings_set.add(v) possible_ratings = sorted(list(ratings_set)) # Setup the graph with the range of possible ratings. graph.rating_range = (1, 5) graph.possible_ratings = possible_ratings return graph
def LoadState(): fname = sw.GetStateName() if not os.path.exists(fname): return None FIn = Snap.TFIn(Snap.TStr(fname)) AdjLists = Snap.TIntIntVH(FIn) return AdjLists
def loadCollaborationGraph(): """ This method loads the collaboration graph return type : A TUNGraph :returns: Collaboration Graph """ Graph = snap.TUNGraph.Load(snap.TFIn("collaboration.graph")) return Graph
def getGraph(filename): FIn = snap.TFIn(filename) G = snap.TNEANet.Load(FIn) print "Get nodes: ",G.GetNodes() print "Get edges: ",G.GetEdges() return G;
def GenGraph(sw): """ generate the graph edges """ # extract the stubs from the args # iterate through the input queue and add new items to the stub list # taskname = sw.GetName() msglist = sw.GetMsgList() sw.log.debug("msglist: %s" % msglist) Stubs = Snap.TIntV() # Stubs is an empty vector for item in msglist: # 1) Get item in msglist # 2) Get name of item name = sw.GetMsgName(item) # 3) Get vector associated with name FIn = Snap.TFIn(Snap.TStr(name)) Vec = Snap.TIntV(FIn) # 4) Add vector to Stubs Stubs.AddV(Vec) # 5) Got all stubs, which is of length msglist # # Randomize the items (aka shuffle) # Snap.Randomize(Stubs) # # # nodes in each task and the number of tasks # tsize = sw.GetRange() # ntasks = int(sw.GetVar("gen_tasks")) # # # get edges for a specific task # Tasks = Snap.TIntIntVV(ntasks) # vector of length ntasks containing vectors # Snap.AssignEdges(Stubs, Tasks, tsize) ntasks = int(sw.GetVar("gen_tasks")) seg_bits = int(sw.GetVar('seg_bits')) tsize = sw.GetRange() Tasks = Snap.TIntVVV(ntasks) Stubs = Snap.segment(Stubs, seg_bits) # segmentize stubs # do segmented random edge assignment Snap.AssignRandomEdges64(Stubs, Tasks, tsize, seg_bits) # desegment results Tasks = Snap.desegmentRandomizedEdges(Tasks, seg_bits, tsize) # send messages for i in xrange(0, Tasks.Len()): sw.log.debug("sending task: %d, len: %d" % (i, Tasks.GetVal(i).Len())) sw.Send(i, Tasks.GetVal(i), swsnap=True)
def snapLoad(to_load, file_name): """ Caricamento da file. :param to_load: oggetto da caricare :param file_name: nome del file da caricare """ f_in = snap.TFIn(file_name) to_load.Load(f_in)
def loadFromFile(name): G = snap.TUNGraph.Load(snap.TFIn(DATA_PATH + name + ".graph")) idIn = open(DATA_PATH + name + ".id", 'r') idToOsmid = pickle.load(idIn) coords = open(DATA_PATH + name + ".coords", 'r') coordsMap = pickle.load(coords) return G, idToOsmid, coordsMap
def _rehydrate_snap_graph(self): """ IMPORTANT: run this after restoring from a pickle. After restoring this osmAnalyzer from a pickled state, it doesn't have its snap graph because the snap graph object doesn't work with the pickle API; I chopped off self._graph before pickling. This restores the snap graph from the data/ folder. """ self._graph = snap.TUNGraph.Load( snap.TFIn(DATA_PATH + self._city_name + ".graph"))
def GenGraph(sw): """ generate the graph edges """ # extract the stubs from the args # iterate through the input queue and add new items to the stub list # taskname = sw.GetName() msglist = sw.GetMsgList() sw.log.debug("msglist: %s" % msglist) Stubs = Snap.TIntIntVV() # Stubs is an empty vector for item in msglist: # 1) Get item in msglist # 2) Get name of item name = sw.GetMsgName(item) # 3) Get vector associated with name FIn = Snap.TFIn(Snap.TStr(name)) Vec64 = Snap.TIntIntVV(FIn) # 4) Add vector to Stubs #Stubs.AddV(Vec) Snap.AddVec64(Stubs, Vec64) # 5) Got all stubs, which is of length msglist # nodes in each task tsize = sw.GetRange() # number of bits in our segment (so seg size is (1<<seg_bits)) seg_bits = int(sw.GetVar('seg_bits')) # number of tasks ntasks = int(sw.GetVar("gen_tasks")) # get edges for a specific task Tasks = Snap.TIntVVV(ntasks) # vector of length ntasks containing vectors sw.log.debug('[%s] about to assign random edges' % sw.GetName()) # handles shuffling and random assignment of edges Snap.AssignRandomEdges64(Stubs, Tasks, tsize, seg_bits) sw.log.debug('[%s] done assigning random edges' % sw.GetName()) # send messages for i in xrange(0, Tasks.Len()): sw.log.debug(LazyStr(lambda: '[%s] sending TIntIntVV of memory size %d to %d' % \ (sw.GetName(), Snap.GetMemSize64(Tasks.GetVal(i)), i))) sw.Send(i, Tasks.GetVal(i), swsnap=True)