def main(args): if len(args) < 3: print(get_usage()) sys.exit(1) votes = sys.argv[1] outFile = sys.argv[2] t = testutils.Timer(ENABLE_TIMER) context = snap.TTableContext() VoteS = snap.Schema() VoteS.Add(snap.TStrTAttrPr("UserId", snap.atInt)) VoteS.Add(snap.TStrTAttrPr("AdminId", snap.atInt)) TVotes = snap.TTable.LoadSS("WikiVotes", VoteS, votes, context, '\t', snap.TBool(False)) t.show("load Votes", TVotes) GroupBy = snap.TStrV() GroupBy.Add("UserId") JointTable = TVotes.SelfSimJoinPerGroup(GroupBy, "AdminId", DISTANCE_ATTRIBUTE, snap.Jaccard, 0.5) t.show("SimJoinPerGroup complete", JointTable) JointTable.SelectAtomic("WikiVotes_1.UserId", "WikiVotes_2.UserId", snap.NEQ) t.show("Select complete", JointTable) testutils.dump(JointTable, 20); JointTable.SaveSS(outFile)
def main(): S = snap.Schema() context = snap.TTableContext() S.Add(snap.TStrTAttrPr("Animal", snap.atStr)) S.Add(snap.TStrTAttrPr("Size", snap.atStr)) S.Add(snap.TStrTAttrPr("Location", snap.atStr)) S.Add(snap.TStrTAttrPr("Number", snap.atInt)) Animals = snap.TTable.LoadSS("Animals", S, "/dfs/ilfs2/0/ringo/tests/animals.txt", context, '\t', snap.TBool(False)) # Gets animals with size=big pred_size = snap.TAtomicPredicate(snap.atStr, snap.TBool(True), snap.EQ, "Size", "", 0, 0, "big") node_size = snap.TPredicateNode(pred_size) # Get animals with location=Australia pred_location = snap.TAtomicPredicate(snap.atStr, snap.TBool(True), snap.EQ, "Location", "", 0, 0, "Australia") node_location = snap.TPredicateNode(pred_location) # size=big and location=Australia node1 = snap.TPredicateNode(snap.AND) node1.AddLeftChild(node_size) node1.AddRightChild(node_location) # Get animals with name==location (fabricated to show a non const case pred_animal_location = snap.TAtomicPredicate(snap.atStr, snap.TBool(False), snap.EQ, "Animal","Location") node2 = snap.TPredicateNode(pred_animal_location) # (size=big and location=Australia) or Animal==Location node_root = snap.TPredicateNode(snap.OR) node_root.AddLeftChild(node1) node_root.AddRightChild(node2) pred = snap.TPredicate(node_root) Animals.Select(pred) testutils.dump(Animals)
def main(args): if len(args) < 3: print(get_usage()) sys.exit(1) votes = sys.argv[1] outFile = sys.argv[2] t = testutils.Timer(ENABLE_TIMER) context = snap.TTableContext() VoteS = snap.Schema() VoteS.Add(snap.TStrTAttrPr("UserId", snap.atInt)) VoteS.Add(snap.TStrTAttrPr("AdminId", snap.atInt)) TVotes = snap.TTable.LoadSS("WikiVotes", VoteS, votes, context, '\t', snap.TBool(False)) t.show("load Votes", TVotes) GroupBy = snap.TStrV() GroupBy.Add("UserId") JointTable = TVotes.SelfSimJoinPerGroup(GroupBy, "AdminId", DISTANCE_ATTRIBUTE, snap.Jaccard, 0.5) t.show("SimJoinPerGroup complete", JointTable) JointTable.SelectAtomic("WikiVotes_1.UserId", "WikiVotes_2.UserId", snap.NEQ) t.show("Select complete", JointTable) testutils.dump(JointTable, 20) JointTable.SaveSS(outFile)
def main(args): if len(args) < 3: print(get_usage()) sys.exit(1) yelp = sys.argv[1] outFile = sys.argv[2] t = testutils.Timer(ENABLE_TIMER) context = snap.TTableContext() YelpS = snap.Schema() YelpS.Add(snap.TStrTAttrPr("Name", snap.atStr)) YelpS.Add(snap.TStrTAttrPr("City", snap.atStr)) YelpS.Add(snap.TStrTAttrPr("State", snap.atStr)) YelpS.Add(snap.TStrTAttrPr("Latitude", snap.atFlt)) YelpS.Add(snap.TStrTAttrPr("Longitude", snap.atFlt)) TYelp = snap.TTable.LoadSS("Yelp", YelpS, yelp, context, '\t', snap.TBool(True)); t.show("load Yelp", TYelp) Cols = snap.TStrV() Cols.Add("Latitude") Cols.Add("Longitude") # Get all business within 5 kilometers of each other JointTable = TYelp.SelfSimJoin(Cols, DISTANCE_ATTRIBUTE, snap.Haversine, 2) t.show("SimJoin complete", JointTable) ProjectionV = snap.TStrV() ProjectionV.Add("Yelp_1.Name") ProjectionV.Add("Yelp_1.City") ProjectionV.Add("Yelp_1.State") ProjectionV.Add("Yelp_2.Name") ProjectionV.Add("Yelp_2.City") ProjectionV.Add("Yelp_2.State") ProjectionV.Add(DISTANCE_ATTRIBUTE) JointTable.ProjectInPlace(ProjectionV) t.show("Project complete") testutils.dump(JointTable, 100); JointTable.SaveSS(outFile)
def main(): S = snap.Schema() context = snap.TTableContext() S.Add(snap.TStrTAttrPr("Animal", snap.atStr)) S.Add(snap.TStrTAttrPr("Size", snap.atStr)) S.Add(snap.TStrTAttrPr("Location", snap.atStr)) S.Add(snap.TStrTAttrPr("Number", snap.atInt)) Animals = snap.TTable.LoadSS("Animals", S, "/dfs/ilfs2/0/ringo/tests/animals.txt", context, '\t', snap.TBool(False)) # Gets animals with size=big pred_size = snap.TAtomicPredicate(snap.atStr, snap.TBool(True), snap.EQ, "Size", "", 0, 0, "big") node_size = snap.TPredicateNode(pred_size) # Get animals with location=Australia pred_location = snap.TAtomicPredicate(snap.atStr, snap.TBool(True), snap.EQ, "Location", "", 0, 0, "Australia") node_location = snap.TPredicateNode(pred_location) # size=big and location=Australia node1 = snap.TPredicateNode(snap.AND) node1.AddLeftChild(node_size) node1.AddRightChild(node_location) # Get animals with name==location (fabricated to show a non const case pred_animal_location = snap.TAtomicPredicate(snap.atStr, snap.TBool(False), snap.EQ, "Animal", "Location") node2 = snap.TPredicateNode(pred_animal_location) # (size=big and location=Australia) or Animal==Location node_root = snap.TPredicateNode(snap.OR) node_root.AddLeftChild(node1) node_root.AddRightChild(node2) pred = snap.TPredicate(node_root) Animals.Select(pred) testutils.dump(Animals)
S.Add(snap.TStrTAttrPr("PostId", snap.atInt)) S.Add(snap.TStrTAttrPr("Tag", snap.atStr)) tags = snap.TTable.LoadSS("t2", S, tagsFile, context, '\t', snap.TBool(False)) t.show("load tags", tags) # Select # >>> tags.select('Tags = "python"') tags.SelectAtomicStrConst("Tag", "python", snap.EQ) t.show("select", tags) # Join # >>> questions = posts.join(tags) questions = posts.Join("PostId", tags, "PostId") t.show("join", questions) testutils.dump(questions, 1) # Project # >>> questions.project(['PostId', 'UserId', 'AcceptedAnswerId'], in_place = True) V = snap.TStrV() V.Add("t1.PostId") V.Add("t1.UserId") V.Add("t1.AcceptedAnswerId") questions.ProjectInPlace(V) t.show("copy & project", questions) # Rename # >>> questions.rename('UserId', 'Asker') questions.Rename("t1.UserId", "Asker") t.show("rename", questions)
import sys sys.path.append("../use-cases") import snap import testutils import pdb P1 = snap.TStrTAttrPr("col1", snap.atInt) P2 = snap.TStrTAttrPr("col2", snap.atInt) S = snap.Schema() S.Add(P1) S.Add(P2) Context = snap.TTableContext() T1 = snap.TTable.LoadSS("1", S, "test2.tsv", Context) testutils.dump(T1) V = snap.TStrV() V.Add("col1") T2 = T1.Project(V, "2") testutils.dump(T2) V = snap.TStrV() V.Add("col2") T3 = T1.Project(V, "3") testutils.dump(T3) T3.Rename("col2", "col1") T4 = T2.Union(T3, "4") testutils.dump(T4)
t.show("graph", graph) # Get authority scores HTHub = snap.TIntFltH() HTAuth = snap.TIntFltH() snap.GetHits(graph, HTHub, HTAuth) t.show("hits", graph) t5 = snap.TTable.TableFromHashMap("t5", HTAuth, "UserId", "Authority", context, snap.TBool(False)) t.show("authority score", t5) # Select top entries # >>> t.select('Authority > 0.0') #t5.SelectAtomicFltConst("Authority", 0.0, snap.GT) #t.show("select", t5) # Order by final score (in descending order) # >>> t5.order(['Authority'], desc = True) V = snap.TStrV() V.Add("Authority") t5.Order(V, "", snap.TBool(False), snap.TBool(False)) t.show("order", t5) # Save if not dstdir is None: t5.SaveSS(os.path.join(dstdir, OUTPUT_TABLE_FILENAME)) t.show("save", t5) testutils.dump(t5, 20)
# >>> t5.Count('CommentScore', 'UserId') # >>> t5.Unique() t5.Count("UserId", "ExpertCount") pdb.set_trace() V = snap.TStrV() V.Add("UserId") t5.Unique(V) t.show("count", t5) pdb.set_trace() # Select # >>> t5.select('ExpertCount >= 5') t5.SelectAtomicIntConst("ExpertCount", 5, snap.GTE) t.show("select", t5) # Project # >>> t5 = t5.project(['UserId']) V = snap.TStrV() V.Add("UserId") t5.ProjectInPlace(V) t.show("project", t5) # Save if not destFile is None: t5.SaveSS(destFile) t.show("save", t5) testutils.dump(t5)
S.Add(snap.TStrTAttrPr("Key", snap.atStr)) S.Add(snap.TStrTAttrPr("Year", snap.atInt)) year = snap.TTable.LoadSS("2", S, yearFile, context, '\t', snap.TBool(False)) t.show("load year table", year) # Select # >>> year.select('Year >= 2005') year.SelectAtomicIntConst("Year", 2005, snap.GTE) t.show("select", year) # Join # >>> table = authors.join(year, ['Key'], ['Key']) table = authors.Join("Key", year, "Key") t.show("join", table) testutils.dump(table, 2) # Self-join # >>> table.selfjoin(table, ['Key']) table = table.SelfJoin("Key") t.show("join", table) # Select # >>> table.select('Author_1 != Author_2') table.SelectAtomic("1_2_1.1.Author", "1_2_2.1.Author", snap.NEQ) t.show("select", table) # Create network # >>> table.graph('Author_1', 'Author_2', directed=False) table.SetSrcCol("1_2_1.1.Author") table.SetDstCol("1_2_2.1.Author")
t.show("graph", graph) #graph.Dump() # Get authority scores HTHub = snap.TIntFltH() HTAuth = snap.TIntFltH() snap.GetHits(graph, HTHub, HTAuth) t.show("hits", graph) t5 = snap.TTable.TableFromHashMap("t5", HTAuth, "UserId", "Authority", context, snap.TBool(False)) t.show("authority score", t5) # Select top entries # >>> t.select('Authority > 0.0') #t5.SelectAtomicFltConst("Authority", 0.0, snap.GT) #t.show("select", t5) # Order by final score (in descending order) # >>> t5.order(['Authority'], desc = True) V = snap.TStrV() V.Add("Authority") t5.Order(V, "", snap.TBool(False), snap.TBool(False)) t.show("order", t5) # Save if not destFile is None: t5.SaveSS(destFile) t.show("save", t5) testutils.dump(t5, 20)
import sys sys.path.append("../use-cases") import snap import testutils import pdb P1 = snap.TStrTAttrPr("col1", snap.atInt) P2 = snap.TStrTAttrPr("col2", snap.atInt) S = snap.Schema() S.Add(P1) S.Add(P2) Context = snap.TTableContext() T1 = snap.TTable.LoadSS("1", S, "test2.tsv", Context) testutils.dump(T1) V = snap.TStrV() V.Add("col1") T2 = T1.Project(V, "2") testutils.dump(T2) V = snap.TStrV() V.Add("col2") T3 = T1.Project(V, "3") testutils.dump(T3) T3.Rename("col2","col1") T4 = T2.Union(T3, "4") testutils.dump(T4)
import sys sys.path.append("../utils") import snap import testutils if __name__ == '__main__': srcfile = '/dfs/ilfs2/0/ringo/StackOverflow_joined/debug.tsv' context = snap.TTableContext() print "Loading table..." schema = snap.Schema() schema.Add(snap.TStrTAttrPr("Val", snap.atInt)) table = snap.TTable.LoadSS("1", schema, srcfile, context, "\t", snap.TBool(False)) print "Selecting rows with val == 0 in place..." table.SelectAtomicIntConst("Val", 0, snap.EQ) print "Number of rows in result: %d" % table.GetNumValidRows() print "10 first rows of table:" testutils.dump(table, 10)
# Count # >>> t5.Count('CommentScore', 'UserId') # >>> t5.Unique() t5.Count("UserId", "ExpertCount") pdb.set_trace() V = snap.TStrV() V.Add("UserId") t5.Unique(V) t.show("count", t5) pdb.set_trace() # Select # >>> t5.select('ExpertCount >= 5') t5.SelectAtomicIntConst("ExpertCount", 5, snap.GTE) t.show("select", t5) # Project # >>> t5 = t5.project(['UserId']) V = snap.TStrV() V.Add("UserId") t5.ProjectInPlace(V) t.show("project", t5) # Save if not destFile is None: t5.SaveSS(destFile) t.show("save", t5) testutils.dump(t5)