def getOrCreate(self): """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a new one based on the options set in this builder. """ with self._lock: from pyspark.conf import SparkConf from pyspark.context import SparkContext from pyspark.sql.context import SQLContext sparkConf = SparkConf() for key, value in self._options.items(): sparkConf.set(key, value) sparkContext = SparkContext.getOrCreate(sparkConf) return SQLContext.getOrCreate(sparkContext).sparkSession
def getOrCreate(self): """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a new one based on the options set in this builder. """ with self._lock: from pyspark.conf import SparkConf from pyspark.context import SparkContext from pyspark.sql.context import SQLContext sparkConf = SparkConf() for key, value in self._options.items(): sparkConf.set(key, value) sparkContext = SparkContext.getOrCreate(sparkConf) return SQLContext.getOrCreate(sparkContext).sparkSession
def test_da_resampler_resample_dataframe_with_correct_number_of_rows( spark, df): uat = da.Resampler(SQLContext.getOrCreate(spark.sparkContext)) result = uat.resample(df, time_col='time', timezone='Europe/Vienna', step_size='500ms', join_tolerance='180ms') df.show() result.show() assert result.count() == ((df.count()) * 2) - 1
def getOrCreate(self): """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a new one based on the options set in this builder. This method first checks whether there is a valid thread-local SparkSession, and if yes, return that one. It then checks whether there is a valid global default SparkSession, and if yes, return that one. If no valid global default SparkSession exists, the method creates a new SparkSession and assigns the newly created SparkSession as the global default. In case an existing SparkSession is returned, the config options specified in this builder will be applied to the existing SparkSession. """ with self._lock: from pyspark.conf import SparkConf from pyspark.context import SparkContext from pyspark.sql.context import SQLContext sparkConf = SparkConf() for key, value in self._options.items(): sparkConf.set(key, value) sparkContext = SparkContext.getOrCreate(sparkConf) return SQLContext.getOrCreate(sparkContext).sparkSession
def test_pyspark_gateway(self): pg = PysparkGateway() import pyspark from pyspark import SparkContext, SparkConf from pyspark.sql.context import SQLContext from pyspark.sql.functions import udf conf = SparkConf().set('spark.io.encryption.enabled', 'true') sc = SparkContext(gateway=pg.gateway, conf=conf) sqlContext = SQLContext.getOrCreate(sc) self.assertEqual(type(sc), SparkContext) df = sqlContext.createDataFrame([(1,2,'value 1')], ['id1', 'id2', 'val']) self.assertEqual(df.count(), 1) rows = df.collect() self.assertEqual(rows[0].id1, 1) pd = df.toPandas() self.assertEqual(type(pd), pandas.core.frame.DataFrame) data = [(1,2,'a'),(3,4,'b'),(5,6,'c')] df = sqlContext.createDataFrame(data, ['foo', 'bar', 'baz']) df.createOrReplaceTempView('foo_table') def squared(v): return v * v sqlContext.udf.register('squared', squared) squared_df = sqlContext.sql('select squared(foo) AS val from foo_table') rows = squared_df.collect() self.assertEqual(rows[2].val, '25') sc.stop() pg.gateway.shutdown()
def getOrCreate(self): """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a new one based on the options set in this builder. This method first checks whether there is a valid thread-local SparkSession, and if yes, return that one. It then checks whether there is a valid global default SparkSession, and if yes, return that one. If no valid global default SparkSession exists, the method creates a new SparkSession and assigns the newly created SparkSession as the global default. In case an existing SparkSession is returned, the config options specified in this builder will be applied to the existing SparkSession. """ with self._lock: from pyspark.conf import SparkConf from pyspark.context import SparkContext from pyspark.sql.context import SQLContext sparkConf = SparkConf() for key, value in self._options.items(): sparkConf.set(key, value) sparkContext = SparkContext.getOrCreate(sparkConf) return SQLContext.getOrCreate(sparkContext).sparkSession