Beispiel #1
0
 def setUpClass(cls):
     ReusedPySparkTestCase.setUpClass()
     cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
     os.unlink(cls.tempdir.name)
     cls.sqlCtx = SQLContext(cls.sc)
     cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
     rdd = cls.sc.parallelize(cls.testData)
     cls.df = rdd.toDF()
Beispiel #2
0
 def setUpClass(cls):
     ReusedPySparkTestCase.setUpClass()
     cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
     os.unlink(cls.tempdir.name)
     print "type", type(cls.sc)
     print "type", type(cls.sc._jsc)
     _scala_HiveContext =\
         cls.sc._jvm.org.apache.spark.sql.hive.test.TestHiveContext(cls.sc._jsc.sc())
     cls.sqlCtx = HiveContext(cls.sc, _scala_HiveContext)
     cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
     cls.df = cls.sc.parallelize(cls.testData).toDF()
Beispiel #3
0
 def setUpClass(cls):
     ReusedPySparkTestCase.setUpClass()
     cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
     try:
         cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
     except py4j.protocol.Py4JError:
         cls.sqlCtx = None
         return
     os.unlink(cls.tempdir.name)
     _scala_HiveContext = cls.sc._jvm.org.apache.spark.sql.hive.test.TestHiveContext(cls.sc._jsc.sc())
     cls.sqlCtx = HiveContext(cls.sc, _scala_HiveContext)
     cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
     cls.df = cls.sc.parallelize(cls.testData).toDF()
Beispiel #4
0
 def setUpClass(cls):
     ReusedPySparkTestCase.setUpClass()
     cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
     try:
         cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
     except py4j.protocol.Py4JError:
         cls.sqlCtx = None
         return
     os.unlink(cls.tempdir.name)
     _scala_HiveContext =\
         cls.sc._jvm.org.apache.spark.sql.hive.test.TestHiveContext(cls.sc._jsc.sc())
     cls.sqlCtx = HiveContext(cls.sc, _scala_HiveContext)
     cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
     cls.df = cls.sc.parallelize(cls.testData).toDF()
Beispiel #5
0
 def setUpClass(cls):
     ReusedPySparkTestCase.setUpClass()
     cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
     cls.hive_available = True
     try:
         cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
     except py4j.protocol.Py4JError:
         cls.hive_available = False
     except TypeError:
         cls.hive_available = False
     os.unlink(cls.tempdir.name)
     if cls.hive_available:
         cls.spark = HiveContext._createForTesting(cls.sc)
         cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
         cls.df = cls.sc.parallelize(cls.testData).toDF()
Beispiel #6
0
 def setUpClass(cls):
     ReusedPySparkTestCase.setUpClass()
     cls.tempdir = tempfile.mkdtemp()
Beispiel #7
0
 def setUpClass(cls):
     ReusedPySparkTestCase.setUpClass()
     cls.spark = SparkSession(cls.sc)
     cls.tempdir = tempfile.mkdtemp()
Beispiel #8
0
 def setUpClass(cls):
     ReusedPySparkTestCase.setUpClass()
     cls.spark = SparkSession(cls.sc)
Beispiel #9
0
 def setUpClass(cls):
     PySparkTestCase.setUpClass()
     cls.spark = SparkSession(cls.sc)