def load_physician_referral_data(infilename): """ Load the US physician referral data from specified zipfile Parameters: infilename - path name of zipflie to load from Return value: SNAP TNGraph object built from the data """ tmpdir = tempfile.mkdtemp() try: archive = zipfile.ZipFile(infilename, 'r') archive.extract('physician-shared-patient-patterns-2014-days30.txt', tmpdir) filename = os.path.join( tmpdir, "physician-shared-patient-patterns-2014-days30.txt") archive.close() context = snap.TTableContext() schema = snap.Schema() ## schema.Add(snap.TStrTAttrPr("NPI_1", snap.atInt)) ## schema.Add(snap.TStrTAttrPr("NPI_2", snap.atInt)) # the above 2 lines worked with SNAP 4.0.0 on VLSCI # but now using SNAP 4.1.0 # on hpc.ics.usi.ch find that all ids are -1 so graph wrong. # Cannot work out why so changed to string not int to try to fix it: schema.Add(snap.TStrTAttrPr("NPI_1", snap.atStr)) schema.Add(snap.TStrTAttrPr("NPI_2", snap.atStr)) ## schema.Add(snap.TStrTAttrPr("count", snap.atInt)) ## schema.Add(snap.TStrTAttrPr("unique_bene", snap.atInt)) ## schema.Add(snap.TStrTAttrPr("same_day_count", snap.atInt)) # The above 3 lines also worked fine with SNAP 4.0.0 before but # now fail on SNAP 4.1.0 (seems to be due to spaces in CSV fields, # not inexplicable like first two which have no spaces) but not using # them at the moment anyway so easier to just make (unused) strings: schema.Add(snap.TStrTAttrPr("count", snap.atStr)) schema.Add(snap.TStrTAttrPr("unique_bene", snap.atStr)) schema.Add(snap.TStrTAttrPr("same_day_count", snap.atStr)) table = snap.TTable.LoadSS(schema, filename, context, ",", snap.TBool(False)) G = snap.ToGraph(snap.PNGraph, table, "NPI_1", "NPI_2", snap.aaFirst) finally: cleanup_tmpdir(tmpdir) return G
import snap graphfilename = "C:\Python27\HW1\wiki-vote.txt" schema = snap.Schema() context = snap.TTableContext() schema.Add(snap.TStrTAttrPr("srcID", snap.atStr)) schema.Add(snap.TStrTAttrPr("dstID", snap.atStr)) sample_table = snap.TTable.LoadSS(schema, graphfilename, context, "\t", snap.TBool(False)) # graph will be an object of type snap.PNGraph graph = snap.ToGraph(snap.PNGraph, sample_table, "srcID", "dstID", snap.aaFirst) #no of nodes Count = snap.CntNonZNodes(graph) print "Count of nodes with degree greater than 0 is %d" % Count #no of edges Count = snap.CntOutDegNodes(graph, 0) print "Count of nodes with out-degree 0 is %d" % Count #no of nodes with zero in-degree Count = snap.CntInDegNodes(graph, 0) print "Count of nodes with in-degree 0 is %d" % Count #no of directed edges Count = snap.CntUniqDirEdges(graph) print "Count of directed edges is %d" % Count #no of undirected edges Count = snap.CntUniqUndirEdges(graph) print "Count of undirected edges is %d" % Count #no of self edges Count = snap.CntSelfEdges(graph) print "Count of self edges is %d" % Count
# Rename # >>> posts.rename('UserId','Expert') posts.Rename("UserId", "Expert") t.show("rename", posts) # Join # >>> edges = questions.join(posts, ['AcceptedAnswerId'], ['PostId']) edges = questions.Join("t1.AcceptedAnswerId", posts, "PostId") t.show("join", edges) # Create haskell-specific Q&A graph # >>> graph = posts.graph('Asker', 'Expert', directed = True) edges.SetSrcCol("t1_t2.Asker") edges.SetDstCol("t1.Expert") graph = snap.ToGraph(edges, snap.aaFirst) t.show("graph", graph) # Compute Authority score # >>> hits = graph.hits('Authority', 'Hub') # note: the code below creates a table (Node name, Authority score) - the hub score is not used HTHub = snap.TIntFltH() HTAuth = snap.TIntFltH() snap.GetHits(graph, HTHub, HTAuth) authority = snap.TTable.New("authority", HTAuth, "Expert", AUTHORITY_ATTRIBUTE, context, snap.TBool(False)) t.show("authority score", authority) # b) Compute comment scores # Load comments
# Join # >>> t3 = t1.join(t2) #t3 = t1.Join("PostId", t2, "PostId") #t.show("join", t3) # Join # >>> t4 = t3.join(t1, ["AnswerId"], ["PostId"]) t4 = t1.Join("AnswerId", t1, "PostId") t.show("join", t4) # Graph # >>> graph = t4.graph("UserId_1", "UserId_2") t4.SetSrcCol("t1_1.UserId") t4.SetDstCol("t1_2.UserId") graph = snap.ToGraph( t4, snap.aaFirst ) # ToGraphPerGroup should be able to support grouping on string columns! t.show("graph", graph) # Get authority scores HTHub = snap.TIntFltH() HTAuth = snap.TIntFltH() snap.GetHits(graph, HTHub, HTAuth) t.show("hits", graph) t5 = snap.TTable.TableFromHashMap("t5", HTAuth, "UserId", "Authority", context, snap.TBool(False)) t.show("authority score", t5) # Select top entries # >>> t.select('Authority > 0.0')
# Self-join # >>> table.selfjoin(table, ['Key']) table = table.SelfJoin("Key") t.show("join", table) # Select # >>> table.select('Author_1 != Author_2') table.SelectAtomic("1_2_1.1.Author", "1_2_2.1.Author", snap.NEQ) t.show("select", table) # Create network # >>> table.graph('Author_1', 'Author_2', directed=False) table.SetSrcCol("1_2_1.1.Author") table.SetDstCol("1_2_2.1.Author") graph = snap.ToGraph(table, snap.aaFirst) t.show("graph", graph) # Compute PageRank score # >>> pagerank = graph.pageRank('PageRank') HT = snap.TIntFltH() snap.GetPageRank(graph, HT) pagerank = snap.TTable.New("PR", HT, "Author", PAGE_RANK_ATTRIBUTE, context, snap.TBool(True)) t.show("page rank", pagerank) # Order by PageRank score (in descending order) # >>> pagerank.order(['PageRank'], desc = True) V = snap.TStrV() V.Add(PAGE_RANK_ATTRIBUTE) pagerank.Order(V, "", snap.TBool(False), snap.TBool(False)) t.show("order", pagerank)
r.show("__references__") # load context print time.ctime(), "loading context ..." context.Load(FIn) t.show("loadbin context", RefsT) r.show("__context__") print time.ctime(), "done" # In[2]: # Create the network refs_schema = map(lambda x: x.GetVal1(), RefsT.GetSchema()) print time.ctime(), "Creating network ..." net = snap.ToGraph(snap.PNGraph, RefsT, refs_schema[0], refs_schema[1], snap.aaFirst) print time.ctime(), "done." # In[3]: # Compute InDegV from references graph to get number of citations of each paper. print time.ctime(), "Computing indegv ..." InDegV = snap.TIntPrV() snap.GetNodeInDegV(net, InDegV) t.show("indegv", InDegV) r.show("__InDegV__") print time.ctime() # In[4]: # Compute PageRank from references graph.
t.show("select tag = 'python'", questions) r.show("__selecttagpython__") questions.SelectAtomicIntConst("AcceptedAnswerId", 0, snap.NEQ) t.show("select questions", questions) r.show("__selectquestions__") table.SelectAtomicIntConst("AcceptedAnswerId", 0, snap.EQ) t.show("select answers", table) r.show("__selectanswers__") qa = questions.Join("AcceptedAnswerId", table, "Id") t.show("join", qa) r.show("__join__") graph = snap.ToGraph(snap.PUNGraph, qa, "2.OwnerUserId", "1.OwnerUserId", snap.aaFirst) t.show("graph", graph) r.show("__graph__") PRankH = snap.TIntFltH() snap.GetPageRank(graph, PRankH, 0.85, 1e-4, 100) prtable = snap.TTable.New("PR", PRankH, "UserId", "PageRank", context, snap.TBool(True)) t.show("pagerank", prtable) r.show("__pagerank__") FOut = snap.TFOut(dstfile) prtable.Save(FOut) t.show("save bin", prtable) r.show("__savebin__")
print("G3 nodes", G3.GetNodes()) print("G3 edges", G3.GetEdges()) t = printtime(t, "saving the graph to binary") FOut = snap.TFOut(binname) G1.Save(FOut) FOut.Flush() t = printtime(t, "reading the graph from binary") FIn = snap.TFIn(binname) G4 = snap.TUNGraph.Load(FIn) print("G4 nodes", G4.GetNodes()) print("G4 edges", G4.GetEdges()) t = printtime(t, "reading the graph as table") context = snap.TTableContext() schema = snap.Schema() schema.Add(snap.TStrTAttrPr("SrcID", snap.atInt)) schema.Add(snap.TStrTAttrPr("DstID", snap.atInt)) T1 = snap.TTable.LoadSS(schema, txtname, context, "\t", snap.TBool(False)) print("T1 rows", T1.GetNumRows()) t = printtime(t, "converting table to graph") G5 = snap.ToGraph(snap.PUNGraph, T1, "SrcID", "DstID", snap.aaFirst) print("G5 nodes", G5.GetNodes()) print("G5 edges", G5.GetEdges()) printtime(t, "done")
t.show("select tag = 'python'", questions) r.show("__selecttagpython__") questions.SelectAtomicIntConst("AcceptedAnswerId", 0, snap.NEQ) t.show("select questions", questions) r.show("__selectquestions__") table.SelectAtomicIntConst("AcceptedAnswerId", 0, snap.EQ) t.show("select answers", table) r.show("__selectanswers__") qa = questions.Join("AcceptedAnswerId", table, "Id") t.show("join", qa) r.show("__join__") graph = snap.ToGraph(snap.PUNGraph, qa, "OwnerUserId-2", "OwnerUserId-1", snap.aaFirst) t.show("graph", graph) r.show("__graph__") PRankH = snap.TIntFltH() snap.GetPageRank(graph, PRankH, 0.85, 1e-4, 100) prtable = snap.TTable.New(PRankH, "UserId", "PageRank", context, snap.TBool(True)) t.show("pagerank", prtable) r.show("__pagerank__") FOut = snap.TFOut(dstfile) prtable.Save(FOut) t.show("save bin", prtable) r.show("__savebin__")
# Join # >>> t3 = t1.join(t2) t3 = t1.Join("PostId", t2, "PostId") t.show("join", t3) # Join # >>> t4 = t3.join(t1, ["AnswerId"], ["PostId"]) t4 = t3.Join("t1.AnswerId", t1, "PostId") t.show("join", t4) # Graph # >>> graph = t4.graph("UserId_1", "UserId_2") #t4.SetSrcCol("t1_t2.t1.UserId") #t4.SetDstCol("t1.UserId") graph = snap.ToGraph( snap.PNGraph, t4, "t1_t2.t1.UserId", "t1.UserId", snap.aaFirst ) # ToGraphPerGroup should be able to support grouping on string columns! t.show("graph", graph) #graph.Dump() # Get authority scores HTHub = snap.TIntFltH() HTAuth = snap.TIntFltH() snap.GetHits(graph, HTHub, HTAuth) t.show("hits", graph) t5 = snap.TTable.TableFromHashMap("t5", HTAuth, "UserId", "Authority", context, snap.TBool(False)) t.show("authority score", t5) # Select top entries