예제 #1
0
파일: table.py 프로젝트: Dshadowzh/dpark
def test():
    from context import DparkContext
    ctx = DparkContext()
    rdd = ctx.makeRDD(zip(range(1000), range(1000)))
    table = rdd.asTable(['f1', 'f2']) 
    print table.select('f1', 'f2').where('f1>10', 'f2<80', 'f1+f2>30 or f1*f2>200').groupBy('f1').select("-f1", f2="sum(f2)").sort('f1', reverse=True).take(5)
    print table.selectOne('count(*)', 'max(f1)', 'min(f2+f1)', 'sum(f1*f2+f1)')
    print table.groupBy('f1/20', f2s='sum(f2)', fcnt='count(*)').take(5)
    print table.execute('select f1, sum(f2), count(*) as cnt from me where f1>10 and f2<80 and (f1+f2>30 or f1*f2>200) group by f1 order by cnt limit 5')
    table2 = rdd.asTable(['f1', 'f3'])
    print table.innerJoin(table2).take(10)
    print table.join(table2).sort('f1').take(10)
예제 #2
0
파일: table.py 프로젝트: woerwin/dpark
def test():
    from context import DparkContext
    ctx = DparkContext()
    rdd = ctx.makeRDD(zip(range(1000), range(1000)))
    table = rdd.asTable(['f1', 'f2'])
    print table.select('f1', 'f2').where(
        'f1>10', 'f2<80', 'f1+f2>30 or f1*f2>200').groupBy('f1').select(
            "-f1", f2="sum(f2)").sort('f1', reverse=True).take(5)
    print table.selectOne('count(*)', 'max(f1)', 'min(f2+f1)', 'sum(f1*f2+f1)')
    print table.groupBy('f1/20', f2s='sum(f2)', fcnt='count(*)').take(5)
    print table.execute(
        'select f1, sum(f2), count(*) as cnt from me where f1>10 and f2<80 and (f1+f2>30 or f1*f2>200) group by f1 order by cnt limit 5'
    )
    table2 = rdd.asTable(['f1', 'f3'])
    print table.innerJoin(table2).take(10)
    print table.join(table2).sort('f1').take(10)