コード例 #1
0
ファイル: DDFManager.py プロジェクト: DoingDone9/DDF
 def testInitAndShutdown(self):
     dm = None
     try:
         dm = DDFManager("spark")
     except Exception:
         self.fail("Cannot initialize a DDFManager for 'spark' engine")
     
     try:
         dm.shutdown()
     except Exception:
         self.fail("Cannot shutdown DDFManager object")
コード例 #2
0
ファイル: DDFManager.py プロジェクト: nightwolfzor/DDF
    def testInitAndShutdown(self):
        dm = None
        try:
            dm = DDFManager("spark")
        except Exception:
            self.fail("Cannot initialize a DDFManager for 'spark' engine")

        try:
            dm.shutdown()
        except Exception:
            self.fail("Cannot shutdown DDFManager object")
コード例 #3
0
ファイル: DDFManager.py プロジェクト: DoingDone9/DDF
 def testSqls(self):
     dm = DDFManager("spark")
     dm.sql("set hive.metastore.warehouse.dir=/tmp")
     dm.sql("drop table if exists airline_na")
     dm.sql("""create table airline_na (Year int,Month int,DayofMonth int,
          DayOfWeek int,DepTime int,CRSDepTime int,ArrTime int,
          CRSArrTime int,UniqueCarrier string, FlightNum int,
          TailNum string, ActualElapsedTime int, CRSElapsedTime int,
          AirTime int, ArrDelay int, DepDelay int, Origin string,
          Dest string, Distance int, TaxiIn int, TaxiOut int, Cancelled int,
          CancellationCode string, Diverted string, CarrierDelay int,
          WeatherDelay int, NASDelay int, SecurityDelay int, LateAircraftDelay int )
          ROW FORMAT DELIMITED FIELDS TERMINATED BY ','        
     """)
     # Current dir: os.getcwd() == tests
     dm.sql("load data local inpath '../../../../resources/test/airlineWithNA.csv' into table airline_na")
     
     ddf = dm.sql2ddf("select * from airline_na")
     self.assertEqual(ddf.getNumRows(), 31)
     self.assertEqual(ddf.getNumColumns(), 29)
     dm.shutdown()
コード例 #4
0
from ddf.DDFManager import DDFManager
dm = DDFManager("spark")

import os
DDF_HOME = os.getenv("DDF_HOME")

dm.sql("set hive.metastore.warehouse.dir=/tmp/hive/warehouse")
dm.sql("drop table if exists mtcars")
dm.sql("CREATE TABLE mtcars (mpg double, cyl int, disp double, hp int, drat double, wt double, qesc double, vs int, am int, gear int, carb string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '")
dm.sql("LOAD DATA LOCAL INPATH '" + DDF_HOME  + "/resources/test/mtcars' INTO TABLE mtcars")

ddf = dm.sql2ddf("select * from mtcars")

ddf.getColumnNames()

ddf.getNumColumns()
ddf.getNumRows()

ddf.getSummary()

ddf.firstNRows(10)

ddf.aggregate("sum(mpg), min(hp)", "vs, am")

ddf.getFiveNumSummary()

ddf.sample(10)

dm.shutdown()

コード例 #5
0
ファイル: DDFManager.py プロジェクト: nightwolfzor/DDF
    def testSqls(self):
        dm = DDFManager("spark")
        dm.sql("set hive.metastore.warehouse.dir=/tmp")
        dm.sql("drop table if exists airline_na")
        dm.sql("""create table airline_na (Year int,Month int,DayofMonth int,
             DayOfWeek int,DepTime int,CRSDepTime int,ArrTime int,
             CRSArrTime int,UniqueCarrier string, FlightNum int,
             TailNum string, ActualElapsedTime int, CRSElapsedTime int,
             AirTime int, ArrDelay int, DepDelay int, Origin string,
             Dest string, Distance int, TaxiIn int, TaxiOut int, Cancelled int,
             CancellationCode string, Diverted string, CarrierDelay int,
             WeatherDelay int, NASDelay int, SecurityDelay int, LateAircraftDelay int )
             ROW FORMAT DELIMITED FIELDS TERMINATED BY ','        
        """)
        # Current dir: os.getcwd() == tests
        dm.sql(
            "load data local inpath '../../../../resources/test/airlineWithNA.csv' into table airline_na"
        )

        ddf = dm.sql2ddf("select * from airline_na")
        self.assertEqual(ddf.getNumRows(), 31)
        self.assertEqual(ddf.getNumColumns(), 29)
        dm.shutdown()