Beispiel #1
0
def load_physician_referral_data(infilename):
    """
    Load the US physician referral data from specified zipfile

    
    Parameters:
       infilename - path name of zipflie to load from

    Return value:
       SNAP TNGraph object built from the data
    """
    tmpdir = tempfile.mkdtemp()
    try:
        archive = zipfile.ZipFile(infilename, 'r')
        archive.extract('physician-shared-patient-patterns-2014-days30.txt',
                        tmpdir)
        filename = os.path.join(
            tmpdir, "physician-shared-patient-patterns-2014-days30.txt")
        archive.close()
        context = snap.TTableContext()
        schema = snap.Schema()
        ## schema.Add(snap.TStrTAttrPr("NPI_1", snap.atInt))
        ## schema.Add(snap.TStrTAttrPr("NPI_2", snap.atInt))
        # the above 2 lines worked with SNAP 4.0.0 on VLSCI
        # but now using SNAP 4.1.0
        # on hpc.ics.usi.ch find that all ids are -1 so graph wrong.
        # Cannot work out why so changed to string not int to try to fix it:
        schema.Add(snap.TStrTAttrPr("NPI_1", snap.atStr))
        schema.Add(snap.TStrTAttrPr("NPI_2", snap.atStr))
        ## schema.Add(snap.TStrTAttrPr("count", snap.atInt))
        ## schema.Add(snap.TStrTAttrPr("unique_bene", snap.atInt))
        ## schema.Add(snap.TStrTAttrPr("same_day_count", snap.atInt))
        # The above 3 lines also worked fine with SNAP 4.0.0 before but
        # now fail on SNAP 4.1.0 (seems to be due to spaces in CSV fields,
        # not inexplicable like first two which have no spaces) but not using
        # them at the moment anyway so easier to just make (unused) strings:
        schema.Add(snap.TStrTAttrPr("count", snap.atStr))
        schema.Add(snap.TStrTAttrPr("unique_bene", snap.atStr))
        schema.Add(snap.TStrTAttrPr("same_day_count", snap.atStr))
        table = snap.TTable.LoadSS(schema, filename, context, ",",
                                   snap.TBool(False))
        G = snap.ToGraph(snap.PNGraph, table, "NPI_1", "NPI_2", snap.aaFirst)
    finally:
        cleanup_tmpdir(tmpdir)

    return G
Beispiel #2
0
import snap

graphfilename = "C:\Python27\HW1\wiki-vote.txt"
schema = snap.Schema()
context = snap.TTableContext()
schema.Add(snap.TStrTAttrPr("srcID", snap.atStr))
schema.Add(snap.TStrTAttrPr("dstID", snap.atStr))
sample_table = snap.TTable.LoadSS(schema, graphfilename, context, "\t",
                                  snap.TBool(False))

# graph will be an object of type snap.PNGraph
graph = snap.ToGraph(snap.PNGraph, sample_table, "srcID", "dstID",
                     snap.aaFirst)
#no of nodes
Count = snap.CntNonZNodes(graph)
print "Count of nodes with degree greater than 0 is %d" % Count
#no of edges
Count = snap.CntOutDegNodes(graph, 0)
print "Count of nodes with out-degree 0 is %d" % Count
#no of nodes with zero in-degree
Count = snap.CntInDegNodes(graph, 0)
print "Count of nodes with in-degree 0 is %d" % Count
#no of directed edges
Count = snap.CntUniqDirEdges(graph)
print "Count of directed edges is %d" % Count
#no of undirected edges
Count = snap.CntUniqUndirEdges(graph)
print "Count of undirected edges is %d" % Count
#no of self edges
Count = snap.CntSelfEdges(graph)
print "Count of self edges is %d" % Count
Beispiel #3
0
# Rename
# >>> posts.rename('UserId','Expert')
posts.Rename("UserId", "Expert")
t.show("rename", posts)

# Join
# >>> edges = questions.join(posts, ['AcceptedAnswerId'], ['PostId'])
edges = questions.Join("t1.AcceptedAnswerId", posts, "PostId")
t.show("join", edges)

# Create haskell-specific Q&A graph
# >>> graph = posts.graph('Asker', 'Expert', directed = True)
edges.SetSrcCol("t1_t2.Asker")
edges.SetDstCol("t1.Expert")
graph = snap.ToGraph(edges, snap.aaFirst)
t.show("graph", graph)

# Compute Authority score
# >>> hits = graph.hits('Authority', 'Hub')
# note: the code below creates a table (Node name, Authority score) - the hub score is not used
HTHub = snap.TIntFltH()
HTAuth = snap.TIntFltH()
snap.GetHits(graph, HTHub, HTAuth)
authority = snap.TTable.New("authority", HTAuth, "Expert", AUTHORITY_ATTRIBUTE,
                            context, snap.TBool(False))
t.show("authority score", authority)

# b) Compute comment scores

# Load comments
# Join
# >>> t3 = t1.join(t2)
#t3 = t1.Join("PostId", t2, "PostId")
#t.show("join", t3)

# Join
# >>> t4 = t3.join(t1, ["AnswerId"], ["PostId"])
t4 = t1.Join("AnswerId", t1, "PostId")
t.show("join", t4)

# Graph
# >>> graph = t4.graph("UserId_1", "UserId_2")
t4.SetSrcCol("t1_1.UserId")
t4.SetDstCol("t1_2.UserId")
graph = snap.ToGraph(
    t4, snap.aaFirst
)  # ToGraphPerGroup should be able to support grouping on string columns!
t.show("graph", graph)

# Get authority scores
HTHub = snap.TIntFltH()
HTAuth = snap.TIntFltH()
snap.GetHits(graph, HTHub, HTAuth)
t.show("hits", graph)

t5 = snap.TTable.TableFromHashMap("t5", HTAuth, "UserId", "Authority", context,
                                  snap.TBool(False))
t.show("authority score", t5)

# Select top entries
# >>> t.select('Authority > 0.0')
Beispiel #5
0
# Self-join
# >>> table.selfjoin(table, ['Key'])
table = table.SelfJoin("Key")
t.show("join", table)

# Select
# >>> table.select('Author_1 != Author_2')
table.SelectAtomic("1_2_1.1.Author", "1_2_2.1.Author", snap.NEQ)
t.show("select", table)

# Create network
# >>> table.graph('Author_1', 'Author_2', directed=False)
table.SetSrcCol("1_2_1.1.Author")
table.SetDstCol("1_2_2.1.Author")
graph = snap.ToGraph(table, snap.aaFirst)
t.show("graph", graph)

# Compute PageRank score
# >>> pagerank = graph.pageRank('PageRank')
HT = snap.TIntFltH()
snap.GetPageRank(graph, HT)
pagerank = snap.TTable.New("PR", HT, "Author", PAGE_RANK_ATTRIBUTE, context, snap.TBool(True))
t.show("page rank", pagerank)

# Order by PageRank score (in descending order)
# >>> pagerank.order(['PageRank'], desc = True)
V = snap.TStrV()
V.Add(PAGE_RANK_ATTRIBUTE)
pagerank.Order(V, "", snap.TBool(False), snap.TBool(False))
t.show("order", pagerank)
r.show("__references__")

# load context
print time.ctime(), "loading context ..."
context.Load(FIn)
t.show("loadbin context", RefsT)
r.show("__context__")

print time.ctime(), "done"

# In[2]:

# Create the network
refs_schema = map(lambda x: x.GetVal1(), RefsT.GetSchema())
print time.ctime(), "Creating network ..."
net = snap.ToGraph(snap.PNGraph, RefsT, refs_schema[0], refs_schema[1],
                   snap.aaFirst)
print time.ctime(), "done."

# In[3]:

# Compute InDegV from references graph to get number of citations of each paper.
print time.ctime(), "Computing indegv ..."
InDegV = snap.TIntPrV()
snap.GetNodeInDegV(net, InDegV)
t.show("indegv", InDegV)
r.show("__InDegV__")
print time.ctime()

# In[4]:

# Compute PageRank from references graph.
    t.show("select tag = 'python'", questions)
    r.show("__selecttagpython__")

    questions.SelectAtomicIntConst("AcceptedAnswerId", 0, snap.NEQ)
    t.show("select questions", questions)
    r.show("__selectquestions__")

    table.SelectAtomicIntConst("AcceptedAnswerId", 0, snap.EQ)
    t.show("select answers", table)
    r.show("__selectanswers__")

    qa = questions.Join("AcceptedAnswerId", table, "Id")
    t.show("join", qa)
    r.show("__join__")

    graph = snap.ToGraph(snap.PUNGraph, qa, "2.OwnerUserId", "1.OwnerUserId",
                         snap.aaFirst)
    t.show("graph", graph)
    r.show("__graph__")

    PRankH = snap.TIntFltH()
    snap.GetPageRank(graph, PRankH, 0.85, 1e-4, 100)
    prtable = snap.TTable.New("PR", PRankH, "UserId", "PageRank", context,
                              snap.TBool(True))
    t.show("pagerank", prtable)
    r.show("__pagerank__")

    FOut = snap.TFOut(dstfile)
    prtable.Save(FOut)
    t.show("save bin", prtable)
    r.show("__savebin__")
Beispiel #8
0
    print("G3 nodes", G3.GetNodes())
    print("G3 edges", G3.GetEdges())

    t = printtime(t, "saving the graph to binary")
    FOut = snap.TFOut(binname)
    G1.Save(FOut)
    FOut.Flush()

    t = printtime(t, "reading the graph from binary")
    FIn = snap.TFIn(binname)
    G4 = snap.TUNGraph.Load(FIn)
    print("G4 nodes", G4.GetNodes())
    print("G4 edges", G4.GetEdges())

    t = printtime(t, "reading the graph as table")
    context = snap.TTableContext()

    schema = snap.Schema()
    schema.Add(snap.TStrTAttrPr("SrcID", snap.atInt))
    schema.Add(snap.TStrTAttrPr("DstID", snap.atInt))

    T1 = snap.TTable.LoadSS(schema, txtname, context, "\t", snap.TBool(False))
    print("T1 rows", T1.GetNumRows())

    t = printtime(t, "converting table to graph")
    G5 = snap.ToGraph(snap.PUNGraph, T1, "SrcID", "DstID", snap.aaFirst)
    print("G5 nodes", G5.GetNodes())
    print("G5 edges", G5.GetEdges())

    printtime(t, "done")
Beispiel #9
0
    t.show("select tag = 'python'", questions)
    r.show("__selecttagpython__")

    questions.SelectAtomicIntConst("AcceptedAnswerId", 0, snap.NEQ)
    t.show("select questions", questions)
    r.show("__selectquestions__")

    table.SelectAtomicIntConst("AcceptedAnswerId", 0, snap.EQ)
    t.show("select answers", table)
    r.show("__selectanswers__")

    qa = questions.Join("AcceptedAnswerId", table, "Id")
    t.show("join", qa)
    r.show("__join__")

    graph = snap.ToGraph(snap.PUNGraph, qa, "OwnerUserId-2", "OwnerUserId-1",
                         snap.aaFirst)
    t.show("graph", graph)
    r.show("__graph__")

    PRankH = snap.TIntFltH()
    snap.GetPageRank(graph, PRankH, 0.85, 1e-4, 100)
    prtable = snap.TTable.New(PRankH, "UserId", "PageRank", context,
                              snap.TBool(True))
    t.show("pagerank", prtable)
    r.show("__pagerank__")

    FOut = snap.TFOut(dstfile)
    prtable.Save(FOut)
    t.show("save bin", prtable)
    r.show("__savebin__")
# Join
# >>> t3 = t1.join(t2)
t3 = t1.Join("PostId", t2, "PostId")
t.show("join", t3)

# Join
# >>> t4 = t3.join(t1, ["AnswerId"], ["PostId"])
t4 = t3.Join("t1.AnswerId", t1, "PostId")
t.show("join", t4)

# Graph
# >>> graph = t4.graph("UserId_1", "UserId_2")
#t4.SetSrcCol("t1_t2.t1.UserId")
#t4.SetDstCol("t1.UserId")
graph = snap.ToGraph(
    snap.PNGraph, t4, "t1_t2.t1.UserId", "t1.UserId", snap.aaFirst
)  # ToGraphPerGroup should be able to support grouping on string columns!
t.show("graph", graph)
#graph.Dump()

# Get authority scores
HTHub = snap.TIntFltH()
HTAuth = snap.TIntFltH()
snap.GetHits(graph, HTHub, HTAuth)
t.show("hits", graph)

t5 = snap.TTable.TableFromHashMap("t5", HTAuth, "UserId", "Authority", context,
                                  snap.TBool(False))
t.show("authority score", t5)

# Select top entries