from __future__ import unicode_literals from ddf import DDFManager, DDF_HOME dm = DDFManager('spark') dm.sql('set hive.metastore.warehouse.dir=/tmp/hive/warehouse', False) dm.sql('drop table if exists mtcars', False) dm.sql( "CREATE TABLE mtcars (mpg double, cyl int, disp double, hp int, drat double, wt double," " qesc double, vs int, am int, gear int, carb string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '", False) dm.sql( "LOAD DATA LOCAL INPATH '" + DDF_HOME + "/resources/test/mtcars' INTO TABLE mtcars", False) ddf = dm.sql2ddf('select * from mtcars', False) print('Columns: ' + ', '.join(ddf.colnames)) print('Number of columns: {}'.format(ddf.cols)) print('Number of rows: {}'.format(ddf.rows)) print(ddf.summary()) print(ddf.head(2)) print(ddf.aggregate(['sum(mpg)', 'min(hp)'], ['vs', 'am'])) print(ddf.five_nums()) print(ddf.sample(3))
def setUp(self): self.dm = DDFManager('spark')
from __future__ import unicode_literals from ddf import DDFManager, DDF_HOME, ml dm = DDFManager("flink") dm.sql('DROP TABLE IF EXISTS mtcars', False) dm.sql("CREATE TABLE mtcars (mpg double, cyl int, disp double, hp int, drat double, wt double, " "qesc double, vs int, am int, gear int, carb string)", False) dm.sql("LOAD {}/resources/test/mtcars delimited by ' ' INTO mtcars".format(DDF_HOME), False) dm.sql("select count(*) from mtcars", False) ddf = dm.sql2ddf("select * from mtcars", False) print('Columns: ' + ', '.join(ddf.colnames)) print('Number of columns: {}'.format(ddf.cols)) print('Number of rows: {}'.format(ddf.rows)) print(ddf.summary()) print(ddf.head(2)) print(ddf.aggregate(['sum(mpg)', 'min(hp)'], ['vs', 'am'])) print(ddf.five_nums()) print(ddf.sample(3)) # Kmeans
def setUpClass(cls): cls.dm_spark = DDFManager('spark') cls.airlines = cls.loadAirlines(cls.dm_spark) cls.mtcars = cls.loadMtCars(cls.dm_spark)