Ejemplo n.º 1
0
 def testSqls(self):
     dm = DDFManager("spark")
     dm.sql("set hive.metastore.warehouse.dir=/tmp")
     dm.sql("drop table if exists airline_na")
     dm.sql("""create table airline_na (Year int,Month int,DayofMonth int,
          DayOfWeek int,DepTime int,CRSDepTime int,ArrTime int,
          CRSArrTime int,UniqueCarrier string, FlightNum int,
          TailNum string, ActualElapsedTime int, CRSElapsedTime int,
          AirTime int, ArrDelay int, DepDelay int, Origin string,
          Dest string, Distance int, TaxiIn int, TaxiOut int, Cancelled int,
          CancellationCode string, Diverted string, CarrierDelay int,
          WeatherDelay int, NASDelay int, SecurityDelay int, LateAircraftDelay int )
          ROW FORMAT DELIMITED FIELDS TERMINATED BY ','        
     """)
     # Current dir: os.getcwd() == tests
     dm.sql("load data local inpath '../../../../resources/test/airlineWithNA.csv' into table airline_na")
     
     ddf = dm.sql2ddf("select * from airline_na")
     self.assertEqual(ddf.getNumRows(), 31)
     self.assertEqual(ddf.getNumColumns(), 29)
     dm.shutdown()
Ejemplo n.º 2
0
    def testSqls(self):
        dm = DDFManager("spark")
        dm.sql("set hive.metastore.warehouse.dir=/tmp")
        dm.sql("drop table if exists airline_na")
        dm.sql("""create table airline_na (Year int,Month int,DayofMonth int,
             DayOfWeek int,DepTime int,CRSDepTime int,ArrTime int,
             CRSArrTime int,UniqueCarrier string, FlightNum int,
             TailNum string, ActualElapsedTime int, CRSElapsedTime int,
             AirTime int, ArrDelay int, DepDelay int, Origin string,
             Dest string, Distance int, TaxiIn int, TaxiOut int, Cancelled int,
             CancellationCode string, Diverted string, CarrierDelay int,
             WeatherDelay int, NASDelay int, SecurityDelay int, LateAircraftDelay int )
             ROW FORMAT DELIMITED FIELDS TERMINATED BY ','        
        """)
        # Current dir: os.getcwd() == tests
        dm.sql(
            "load data local inpath '../../../../resources/test/airlineWithNA.csv' into table airline_na"
        )

        ddf = dm.sql2ddf("select * from airline_na")
        self.assertEqual(ddf.getNumRows(), 31)
        self.assertEqual(ddf.getNumColumns(), 29)
        dm.shutdown()
Ejemplo n.º 3
0
from ddf.DDFManager import DDFManager
dm = DDFManager("spark")

import os
DDF_HOME = os.getenv("DDF_HOME")

dm.sql("set hive.metastore.warehouse.dir=/tmp/hive/warehouse")
dm.sql("drop table if exists mtcars")
dm.sql("CREATE TABLE mtcars (mpg double, cyl int, disp double, hp int, drat double, wt double, qesc double, vs int, am int, gear int, carb string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '")
dm.sql("LOAD DATA LOCAL INPATH '" + DDF_HOME  + "/resources/test/mtcars' INTO TABLE mtcars")

ddf = dm.sql2ddf("select * from mtcars")

ddf.getColumnNames()

ddf.getNumColumns()
ddf.getNumRows()

ddf.getSummary()

ddf.firstNRows(10)

ddf.aggregate("sum(mpg), min(hp)", "vs, am")

ddf.getFiveNumSummary()

ddf.sample(10)

dm.shutdown()