Exemple #1
0
 def getOrCreate(self):
     """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a new
     one based on the options set in this builder.
     """
     with self._lock:
         from pyspark.conf import SparkConf
         from pyspark.context import SparkContext
         from pyspark.sql.context import SQLContext
         sparkConf = SparkConf()
         for key, value in self._options.items():
             sparkConf.set(key, value)
         sparkContext = SparkContext.getOrCreate(sparkConf)
         return SQLContext.getOrCreate(sparkContext).sparkSession
Exemple #2
0
 def getOrCreate(self):
     """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a new
     one based on the options set in this builder.
     """
     with self._lock:
         from pyspark.conf import SparkConf
         from pyspark.context import SparkContext
         from pyspark.sql.context import SQLContext
         sparkConf = SparkConf()
         for key, value in self._options.items():
             sparkConf.set(key, value)
         sparkContext = SparkContext.getOrCreate(sparkConf)
         return SQLContext.getOrCreate(sparkContext).sparkSession
def test_da_resampler_resample_dataframe_with_correct_number_of_rows(
        spark, df):
    uat = da.Resampler(SQLContext.getOrCreate(spark.sparkContext))
    result = uat.resample(df,
                          time_col='time',
                          timezone='Europe/Vienna',
                          step_size='500ms',
                          join_tolerance='180ms')

    df.show()
    result.show()

    assert result.count() == ((df.count()) * 2) - 1
Exemple #4
0
        def getOrCreate(self):
            """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a
            new one based on the options set in this builder.

            This method first checks whether there is a valid thread-local SparkSession,
            and if yes, return that one. It then checks whether there is a valid global
            default SparkSession, and if yes, return that one. If no valid global default
            SparkSession exists, the method creates a new SparkSession and assigns the
            newly created SparkSession as the global default.

            In case an existing SparkSession is returned, the config options specified
            in this builder will be applied to the existing SparkSession.
            """
            with self._lock:
                from pyspark.conf import SparkConf
                from pyspark.context import SparkContext
                from pyspark.sql.context import SQLContext
                sparkConf = SparkConf()
                for key, value in self._options.items():
                    sparkConf.set(key, value)
                sparkContext = SparkContext.getOrCreate(sparkConf)
                return SQLContext.getOrCreate(sparkContext).sparkSession
Exemple #5
0
    def test_pyspark_gateway(self):
        pg = PysparkGateway()

        import pyspark
        from pyspark import SparkContext, SparkConf
        from pyspark.sql.context import SQLContext
        from pyspark.sql.functions import udf

        conf = SparkConf().set('spark.io.encryption.enabled', 'true')
        sc = SparkContext(gateway=pg.gateway, conf=conf)
        sqlContext = SQLContext.getOrCreate(sc)

        self.assertEqual(type(sc), SparkContext)

        df = sqlContext.createDataFrame([(1,2,'value 1')], ['id1', 'id2', 'val'])
        self.assertEqual(df.count(), 1)

        rows = df.collect()
        self.assertEqual(rows[0].id1, 1)

        pd = df.toPandas()
        self.assertEqual(type(pd), pandas.core.frame.DataFrame)

        data = [(1,2,'a'),(3,4,'b'),(5,6,'c')]
        df = sqlContext.createDataFrame(data, ['foo', 'bar', 'baz'])
        df.createOrReplaceTempView('foo_table')

        def squared(v):
            return v * v

        sqlContext.udf.register('squared', squared)

        squared_df = sqlContext.sql('select squared(foo) AS val from foo_table')
        rows = squared_df.collect()

        self.assertEqual(rows[2].val, '25')

        sc.stop()
        pg.gateway.shutdown()
Exemple #6
0
        def getOrCreate(self):
            """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a
            new one based on the options set in this builder.

            This method first checks whether there is a valid thread-local SparkSession,
            and if yes, return that one. It then checks whether there is a valid global
            default SparkSession, and if yes, return that one. If no valid global default
            SparkSession exists, the method creates a new SparkSession and assigns the
            newly created SparkSession as the global default.

            In case an existing SparkSession is returned, the config options specified
            in this builder will be applied to the existing SparkSession.
            """
            with self._lock:
                from pyspark.conf import SparkConf
                from pyspark.context import SparkContext
                from pyspark.sql.context import SQLContext
                sparkConf = SparkConf()
                for key, value in self._options.items():
                    sparkConf.set(key, value)
                sparkContext = SparkContext.getOrCreate(sparkConf)
                return SQLContext.getOrCreate(sparkContext).sparkSession