コード例 #1
0
def BadgesGraph():
    rg = ringo.Ringo()
    rg.load('data/badges.xml')
    rg.load('data/posts.xml')
    rg.start('badges', 'Name')
    rg.setSource()
    rg.label('Badge1')
    rg.link('UserId')
    rg.join('posts', 'OwnerUserId')
    rg.select('PostTypeId == 2')
    rg.link('ParentId')
    rg.join('posts', 'Id')
    rg.select('PostTypeId == 1')
    rg.link('OwnerUserId')
    rg.join('badges', 'UserId')
    rg.link('Name')
    rg.label('Badge2')
    rg.group('Group', 'Badge1', 'Badge2')
    rg.count('Count')
    rg.select('Count >= 100')
    rg.link('Group')
    rg.unique()
    rg.link('Badge2')
    rg.makegraph()
    rg.dump()
コード例 #2
0
ファイル: test.py プロジェクト: viswajithiii/ringo
def import_bench_ext(ext):
    files = glob.glob("../../data_full/*." + ext)
    for f in files:
        print 'Importing file ' + f + '...'
        rg = ringo.Ringo()
        rg.load(f)
        print str(os.path.getsize(f)) + ' bytes, ' + str(
            rg.tables[0].numrows()) + ' rows'
コード例 #3
0
ファイル: test.py プロジェクト: viswajithiii/ringo
def CommentsGraph():
    rg = ringo.Ringo()
    rg.load('data/comments.xml', 'data/posts.xml')
    rg.start('comments', 'UserId')
    rg.setSource()
    rg.label('UserId1')
    rg.link('PostId')
    rg.join('posts', 'Id')
    rg.link('OwnerUserId')
    rg.label('UserId2')
    rg.makegraph()
    rg.dump()
コード例 #4
0
ファイル: test.py プロジェクト: viswajithiii/ringo
def convert_files(safe=False):
    files = glob.glob("/lfs/local/0/mraison/*")
    for f in files:
        name, ext = os.path.splitext(f)
        if ext == ".xml":
            rg = ringo.Ringo()
            print 'Importing file ' + f + '...'
            rg.load(f)
            print "Writing file for " + f + "..."
            if safe:
                rg.tables[0].write_tsv(name + ".tsvs")
            else:
                rg.tables[0].write_tsv_fast(name + ".tsv")
コード例 #5
0
ファイル: test.py プロジェクト: viswajithiii/ringo
def DatesGraph():
    rg = ringo.Ringo()
    rg.load('data/posthistory.xml')
    rg.start('posthistory', 'PostId')
    rg.setSource()
    rg.group('FullEdits', 'CreationDate', 'PostId')
    rg.unique(
    )  # If a user changes different elements (eg, body, title and tags),
    # then there is one row for each element. These two lines group them into one.
    rg.order('Order', 'UserId', 'CreationDate')
    rg.group('Group', 'UserId')
    rg.link('PostId')
    rg.next('Group', 'Order', 'NextPostId')
    rg.makegraph()
    rg.dump()
コード例 #6
0
ファイル: test.py プロジェクト: viswajithiii/ringo
def SameEditors():
    rg = ringo.Ringo()
    rg.load('data/posthistory.xml')
    rg.start('posthistory', 'PostId')
    rg.setSource()
    rg.label('PostId1')
    rg.link('UserId')
    rg.join('posthistory', 'UserId')
    rg.link('PostId')
    rg.label('PostId2')
    rg.select('PostId1 != PostId2')
    rg.group('Group', 'PostId1', 'PostId2')
    rg.unique()
    rg.link('PostId2')
    rg.makegraph()
    rg.dump()
コード例 #7
0
ファイル: test.py プロジェクト: viswajithiii/ringo
def CommonVoters():
    rg = ringo.Ringo()
    rg.load('data/votes.xml')
    rg.start('votes', 'UserId')
    rg.setSource()
    rg.label('UserId1')
    rg.link('PostId')
    rg.join('votes', 'PostId')
    rg.link('UserId')
    rg.label('UserId2')
    rg.select('UserId1 != UserId2')
    rg.group('Group', 'UserId1', 'UserId2')
    rg.count('Count')
    rg.select('Count >= 10')
    rg.link('Group')
    rg.unique()
    rg.link('UserId2')
    rg.makegraph()
    rg.dump()
コード例 #8
0
ファイル: test.py プロジェクト: viswajithiii/ringo
def CommonComments():
    rg = ringo.Ringo()
    rg.load('/lfs/local/0/mraison/comments_10000.tsv')
    rg.tables[0].name = 'comments'
    rg.start('comments', 'UserId')
    rg.setSource()
    rg.label('UserId1')
    rg.link('PostId')
    rg.join('comments', 'PostId')
    rg.link('UserId')
    rg.label('UserId2')
    rg.select('UserId1 != UserId2')
    rg.group('Group', 'UserId1', 'UserId2')
    #rg.count('Count')
    #rg.select('Count >= 10 && UserId1 != UserId2')
    #rg.select('Count >= 10')
    #rg.link('Group')
    rg.unique()
    rg.link('UserId2')
    rg.makegraph()
コード例 #9
0
ファイル: test.py プロジェクト: viswajithiii/ringo
def QAGraph():
    rg = ringo.Ringo()
    rg.load('/lfs/local/0/mraison/posts_10000.tsv')
    rg.tables[0].name = 'posts'
    rg.start('posts', 'OwnerUserId')
    rg.setSource()
    rg.label('UserId1')
    rg.select('PostTypeId == 2')
    rg.link('ParentId')
    rg.join('posts', 'Id')
    #rg.select('PostTypeId == 1')
    rg.link('OwnerUserId')
    rg.label('UserId2')
    rg.group('Group', 'UserId1', 'UserId2')
    #rg.count('Count')
    #rg.select('Count >= 2')
    #rg.link('Group')
    rg.unique()
    rg.link('UserId2')
    rg.makegraph()
コード例 #10
0
def QAGraph():
    rg = ringo.Ringo()
    rg.load('data/posts.tsv')
    rg.start('posts', 'OwnerUserId')
    rg.setSource()
    rg.label('UserId1')
    rg.select('PostTypeId == 2')
    rg.link('ParentId')
    rg.join('posts', 'Id')
    rg.select('PostTypeId == 1')
    rg.link('OwnerUserId')
    rg.label('UserId2')
    rg.group('Group', 'UserId1', 'UserId2')
    rg.count('Count')
    rg.select('Count >= 2')
    rg.link('Group')
    rg.unique()
    rg.link('UserId2')
    rg.makegraph()
    rg.dump()
コード例 #11
0
ファイル: 09-isnextk.py プロジェクト: viswajithiii/ringo
sys.path.append("../utils")
sys.path.append("../ringo-engine-python")
import ringo
import snap
import testutils

if __name__ == '__main__':

    if len(sys.argv) < 2:
        print """Usage: """ + sys.argv[0] + """ <srcfile>
        srcfile: posts.tsv file from StackOverflow dataset"""
        sys.exit(1)

    srcfile = sys.argv[1]
    ringo = ringo.Ringo()

    t = testutils.Timer()
    r = testutils.Resource()

    S = [("Id", "int"), ("OwnerUserId", "int"), ("AcceptedAnswerId", "int"), ("CreationDate", "string"), ("Score", "int")]
    table = ringo.LoadTableTSV(S, srcfile)
    t.show("load text")
    r.show("__loadtext__")

    table = table.IsNextK("CreationDate", 1, "OwnerUserId")

    t.show("isnextk")
    r.show("__isnextk__")

コード例 #12
0
 (t1_ColName4, t1_ColType4)] = [('PostId', 'int'), ('UserId', 'int'),
                                ('AnswerId', 'int'),
                                ('CreationDate', 'string')]
[(t2_ColName1, t2_ColType1), (t2_ColName2, t2_ColType2)] = [('PostId', 'int'),
                                                            ('Tag', 'string')]


def generate(engine, filename0, filename1, filename2):
    t1 = engine.LoadTableTSV([(t1_ColName1, t1_ColType1),
                              (t1_ColName2, t1_ColType2),
                              (t1_ColName3, t1_ColType3),
                              (t1_ColName4, t1_ColType4)], filename0)
    t2 = engine.LoadTableTSV([(t2_ColName1, t2_ColType1),
                              (t2_ColName2, t2_ColType2)], filename1)
    t2 = engine.Select(t2, 'Tag = python', CompConstant=True)
    t3 = engine.Join(t1, t2, t1_ColName1, t2_ColName1)
    t4 = engine.Join(t3, t1, '1.AnswerId', t1_ColName1)
    graph = engine.ToGraph(t4, '1_2.1.UserId', '1.UserId')
    engine.GetHits(graph)
    t5 = engine.TableFromHashMap(HTAuth, 'UserId', 'Authority')
    t5 = engine.Order(t5, ['Authority'], Asc=False)
    t5 = engine.SaveTableTSV(t5, filename2)
    return t5


engine = ringo.Ringo()
files = ['input/posts.tsv', 'input/tags.tsv', 'experts.tsv/experts.tsv']
for i in xrange(min(len(files), len(sys.argv) - 1)):
    files[i] = sys.argv[i + 1]
t5 = generate(engine, *files)
コード例 #13
0
#!/usr/bin/python
import sys, os

ROOT_DIR = os.path.join(os.path.dirname(__file__), "..")
sys.path.append(ROOT_DIR)
import ringo
import time

NUM_TRIALS = 10
TEST_DIR = os.path.join(ROOT_DIR, "test")
QUERY_DIR = os.path.join(TEST_DIR, "queries")
DATA_DIR = os.path.join(TEST_DIR, "data")

rg = ringo.Ringo()

print "Building Q&A graph..."
start = time.clock()  # processor time on Unix, wall-clock time on Windows
for i in xrange(NUM_TRIALS):
    rg.add_table(os.path.join(DATA_DIR, "posts.tsv"))
    rg.make_graph(os.path.join(QUERY_DIR, "qa.rg"), None, True)
print "Average time for Q&A graph: " + str(
    (time.clock() - start) / NUM_TRIALS) + " seconds"

print "Building Common Comments graph..."
start = time.clock()
for i in xrange(NUM_TRIALS):
    rg.add_table(os.path.join(DATA_DIR, "comments.tsv"))
    rg.make_graph(os.path.join(QUERY_DIR, "comments.rg"), None, True)
print "Average time for Common Comments graph: " + str(
    (time.clock() - start) / NUM_TRIALS) + " seconds"
コード例 #14
0
import sys
sys.path.append("../ringo-engine-python/")
import ringo

r = ringo.Ringo()
G = r.GenRndGnm(r.PUNGraph, 100, 100)
G.Save('test')

G2 = r.Load('test')
G2.AddSelfEdges()
print G2.GetNodes()
G2.GenerateProvenance('provenance_scripts/p01.py')
コード例 #15
0
import sys, os
ROOT_DIR = os.path.join(os.path.dirname(__file__), "..")
sys.path.append(ROOT_DIR)
import ringo

TEST_DIR = os.path.join(ROOT_DIR, "test")

engines = ["PANDAS", "PYTHON"]
for eng in engines:
    print "Using engine " + eng
    rg = ringo.Ringo(eng)
    rg.load(os.path.join(TEST_DIR, "data", "comments.tsv"))
    rg.start("comments", "UserId")
    rg.label("UID1")
    rg.link("PostId")
    rg.join("comments", "PostId")
    rg.link("UserId")
    rg.label("UID2")
    rg.select("!=", "UID1", "UID2")
    rg.unique("UID1", "UID2")
    rg.build_graph()
コード例 #16
0
ファイル: script2.py プロジェクト: viswajithiii/ringo
import ringo as rg

rg = rg.Ringo("PYTHON")
rg.load("data/posts.tsv")
rg.start("posts", "OwnerUserId")
#rg.dump()
print rg.getSize()
rg.select(">=", "Tags", "10000")
#rg.dump()
print rg.getSize()
rg.link("OwnerUserId")
rg.join("posts", "OwnerUserId")
#rg.dump()
print rg.getSize()