def BadgesGraph(): rg = ringo.Ringo() rg.load('data/badges.xml') rg.load('data/posts.xml') rg.start('badges', 'Name') rg.setSource() rg.label('Badge1') rg.link('UserId') rg.join('posts', 'OwnerUserId') rg.select('PostTypeId == 2') rg.link('ParentId') rg.join('posts', 'Id') rg.select('PostTypeId == 1') rg.link('OwnerUserId') rg.join('badges', 'UserId') rg.link('Name') rg.label('Badge2') rg.group('Group', 'Badge1', 'Badge2') rg.count('Count') rg.select('Count >= 100') rg.link('Group') rg.unique() rg.link('Badge2') rg.makegraph() rg.dump()
def import_bench_ext(ext): files = glob.glob("../../data_full/*." + ext) for f in files: print 'Importing file ' + f + '...' rg = ringo.Ringo() rg.load(f) print str(os.path.getsize(f)) + ' bytes, ' + str( rg.tables[0].numrows()) + ' rows'
def CommentsGraph(): rg = ringo.Ringo() rg.load('data/comments.xml', 'data/posts.xml') rg.start('comments', 'UserId') rg.setSource() rg.label('UserId1') rg.link('PostId') rg.join('posts', 'Id') rg.link('OwnerUserId') rg.label('UserId2') rg.makegraph() rg.dump()
def convert_files(safe=False): files = glob.glob("/lfs/local/0/mraison/*") for f in files: name, ext = os.path.splitext(f) if ext == ".xml": rg = ringo.Ringo() print 'Importing file ' + f + '...' rg.load(f) print "Writing file for " + f + "..." if safe: rg.tables[0].write_tsv(name + ".tsvs") else: rg.tables[0].write_tsv_fast(name + ".tsv")
def DatesGraph(): rg = ringo.Ringo() rg.load('data/posthistory.xml') rg.start('posthistory', 'PostId') rg.setSource() rg.group('FullEdits', 'CreationDate', 'PostId') rg.unique( ) # If a user changes different elements (eg, body, title and tags), # then there is one row for each element. These two lines group them into one. rg.order('Order', 'UserId', 'CreationDate') rg.group('Group', 'UserId') rg.link('PostId') rg.next('Group', 'Order', 'NextPostId') rg.makegraph() rg.dump()
def SameEditors(): rg = ringo.Ringo() rg.load('data/posthistory.xml') rg.start('posthistory', 'PostId') rg.setSource() rg.label('PostId1') rg.link('UserId') rg.join('posthistory', 'UserId') rg.link('PostId') rg.label('PostId2') rg.select('PostId1 != PostId2') rg.group('Group', 'PostId1', 'PostId2') rg.unique() rg.link('PostId2') rg.makegraph() rg.dump()
def CommonVoters(): rg = ringo.Ringo() rg.load('data/votes.xml') rg.start('votes', 'UserId') rg.setSource() rg.label('UserId1') rg.link('PostId') rg.join('votes', 'PostId') rg.link('UserId') rg.label('UserId2') rg.select('UserId1 != UserId2') rg.group('Group', 'UserId1', 'UserId2') rg.count('Count') rg.select('Count >= 10') rg.link('Group') rg.unique() rg.link('UserId2') rg.makegraph() rg.dump()
def CommonComments(): rg = ringo.Ringo() rg.load('/lfs/local/0/mraison/comments_10000.tsv') rg.tables[0].name = 'comments' rg.start('comments', 'UserId') rg.setSource() rg.label('UserId1') rg.link('PostId') rg.join('comments', 'PostId') rg.link('UserId') rg.label('UserId2') rg.select('UserId1 != UserId2') rg.group('Group', 'UserId1', 'UserId2') #rg.count('Count') #rg.select('Count >= 10 && UserId1 != UserId2') #rg.select('Count >= 10') #rg.link('Group') rg.unique() rg.link('UserId2') rg.makegraph()
def QAGraph(): rg = ringo.Ringo() rg.load('/lfs/local/0/mraison/posts_10000.tsv') rg.tables[0].name = 'posts' rg.start('posts', 'OwnerUserId') rg.setSource() rg.label('UserId1') rg.select('PostTypeId == 2') rg.link('ParentId') rg.join('posts', 'Id') #rg.select('PostTypeId == 1') rg.link('OwnerUserId') rg.label('UserId2') rg.group('Group', 'UserId1', 'UserId2') #rg.count('Count') #rg.select('Count >= 2') #rg.link('Group') rg.unique() rg.link('UserId2') rg.makegraph()
def QAGraph(): rg = ringo.Ringo() rg.load('data/posts.tsv') rg.start('posts', 'OwnerUserId') rg.setSource() rg.label('UserId1') rg.select('PostTypeId == 2') rg.link('ParentId') rg.join('posts', 'Id') rg.select('PostTypeId == 1') rg.link('OwnerUserId') rg.label('UserId2') rg.group('Group', 'UserId1', 'UserId2') rg.count('Count') rg.select('Count >= 2') rg.link('Group') rg.unique() rg.link('UserId2') rg.makegraph() rg.dump()
sys.path.append("../utils") sys.path.append("../ringo-engine-python") import ringo import snap import testutils if __name__ == '__main__': if len(sys.argv) < 2: print """Usage: """ + sys.argv[0] + """ <srcfile> srcfile: posts.tsv file from StackOverflow dataset""" sys.exit(1) srcfile = sys.argv[1] ringo = ringo.Ringo() t = testutils.Timer() r = testutils.Resource() S = [("Id", "int"), ("OwnerUserId", "int"), ("AcceptedAnswerId", "int"), ("CreationDate", "string"), ("Score", "int")] table = ringo.LoadTableTSV(S, srcfile) t.show("load text") r.show("__loadtext__") table = table.IsNextK("CreationDate", 1, "OwnerUserId") t.show("isnextk") r.show("__isnextk__")
(t1_ColName4, t1_ColType4)] = [('PostId', 'int'), ('UserId', 'int'), ('AnswerId', 'int'), ('CreationDate', 'string')] [(t2_ColName1, t2_ColType1), (t2_ColName2, t2_ColType2)] = [('PostId', 'int'), ('Tag', 'string')] def generate(engine, filename0, filename1, filename2): t1 = engine.LoadTableTSV([(t1_ColName1, t1_ColType1), (t1_ColName2, t1_ColType2), (t1_ColName3, t1_ColType3), (t1_ColName4, t1_ColType4)], filename0) t2 = engine.LoadTableTSV([(t2_ColName1, t2_ColType1), (t2_ColName2, t2_ColType2)], filename1) t2 = engine.Select(t2, 'Tag = python', CompConstant=True) t3 = engine.Join(t1, t2, t1_ColName1, t2_ColName1) t4 = engine.Join(t3, t1, '1.AnswerId', t1_ColName1) graph = engine.ToGraph(t4, '1_2.1.UserId', '1.UserId') engine.GetHits(graph) t5 = engine.TableFromHashMap(HTAuth, 'UserId', 'Authority') t5 = engine.Order(t5, ['Authority'], Asc=False) t5 = engine.SaveTableTSV(t5, filename2) return t5 engine = ringo.Ringo() files = ['input/posts.tsv', 'input/tags.tsv', 'experts.tsv/experts.tsv'] for i in xrange(min(len(files), len(sys.argv) - 1)): files[i] = sys.argv[i + 1] t5 = generate(engine, *files)
#!/usr/bin/python import sys, os ROOT_DIR = os.path.join(os.path.dirname(__file__), "..") sys.path.append(ROOT_DIR) import ringo import time NUM_TRIALS = 10 TEST_DIR = os.path.join(ROOT_DIR, "test") QUERY_DIR = os.path.join(TEST_DIR, "queries") DATA_DIR = os.path.join(TEST_DIR, "data") rg = ringo.Ringo() print "Building Q&A graph..." start = time.clock() # processor time on Unix, wall-clock time on Windows for i in xrange(NUM_TRIALS): rg.add_table(os.path.join(DATA_DIR, "posts.tsv")) rg.make_graph(os.path.join(QUERY_DIR, "qa.rg"), None, True) print "Average time for Q&A graph: " + str( (time.clock() - start) / NUM_TRIALS) + " seconds" print "Building Common Comments graph..." start = time.clock() for i in xrange(NUM_TRIALS): rg.add_table(os.path.join(DATA_DIR, "comments.tsv")) rg.make_graph(os.path.join(QUERY_DIR, "comments.rg"), None, True) print "Average time for Common Comments graph: " + str( (time.clock() - start) / NUM_TRIALS) + " seconds"
import sys sys.path.append("../ringo-engine-python/") import ringo r = ringo.Ringo() G = r.GenRndGnm(r.PUNGraph, 100, 100) G.Save('test') G2 = r.Load('test') G2.AddSelfEdges() print G2.GetNodes() G2.GenerateProvenance('provenance_scripts/p01.py')
import sys, os ROOT_DIR = os.path.join(os.path.dirname(__file__), "..") sys.path.append(ROOT_DIR) import ringo TEST_DIR = os.path.join(ROOT_DIR, "test") engines = ["PANDAS", "PYTHON"] for eng in engines: print "Using engine " + eng rg = ringo.Ringo(eng) rg.load(os.path.join(TEST_DIR, "data", "comments.tsv")) rg.start("comments", "UserId") rg.label("UID1") rg.link("PostId") rg.join("comments", "PostId") rg.link("UserId") rg.label("UID2") rg.select("!=", "UID1", "UID2") rg.unique("UID1", "UID2") rg.build_graph()
import ringo as rg rg = rg.Ringo("PYTHON") rg.load("data/posts.tsv") rg.start("posts", "OwnerUserId") #rg.dump() print rg.getSize() rg.select(">=", "Tags", "10000") #rg.dump() print rg.getSize() rg.link("OwnerUserId") rg.join("posts", "OwnerUserId") #rg.dump() print rg.getSize()