Esempio n. 1
0
    def test_stop_restores_the_environment(self, spark_session_mock, os_mock):
        os_mock.environ = {
            'PYSPARK_SUBMIT_ARGS':
            '--conf "my.conf.here=5g" --and-other-properties',
        }

        SparklySession()
        SparklySession.stop()

        self.assertEqual(
            os_mock.environ, {
                'PYSPARK_SUBMIT_ARGS':
                '--conf "my.conf.here=5g" --and-other-properties',
            })
Esempio n. 2
0
    def test_get_or_create_and_stop(self, spark_session_mock):
        # Not a great practice to test two functions in one unit test,
        # but get_or_create and stop are kind of intertwined with each other

        class _Session(SparklySession):
            pass

        # check stopping a running session
        original_session = _Session()
        _Session.stop()
        spark_session_mock.stop.assert_called_once_with(original_session)

        # check that stopping when there's no session has no impact
        _Session.stop()
        spark_session_mock.stop.assert_called_once_with(original_session)

        # check creating a new session thru get_or_create
        retrieved_session = _Session.get_or_create()
        self.assertNotEqual(id(retrieved_session), id(original_session))

        # check retrieving a session thru get_or_create
        original_session = _Session()
        retrieved_session = _Session.get_or_create()
        self.assertEqual(id(retrieved_session), id(original_session))

        # check retrieving a session thru SparklySession.get_or_create
        original_session = _Session()
        retrieved_session = SparklySession.get_or_create()
        self.assertEqual(id(retrieved_session), id(original_session))
Esempio n. 3
0
    def test_session_without_packages_jars_and_options(self, os_mock):
        os_mock.environ = {}

        SparklySession()

        self.assertEqual(
            os_mock.environ, {
                'PYSPARK_PYTHON': sys.executable,
                'PYSPARK_SUBMIT_ARGS': 'pyspark-shell',
            })
Esempio n. 4
0
    def test_session_without_packages_jars_and_options(self, os_mock):
        os_mock.environ = {}

        SparklySession()

        self.assertEqual(
            os_mock.environ, {
                'PYSPARK_PYTHON':
                sys.executable,
                'PYSPARK_SUBMIT_ARGS':
                '--conf "spark.sql.catalogImplementation=hive" pyspark-shell',
            })
Esempio n. 5
0
    def test_session_appends_to_pyspark_submit_args(self, os_mock):
        os_mock.environ = {
            'PYSPARK_SUBMIT_ARGS':
            '--conf "my.conf.here=5g" --and-other-properties',
        }

        SparklySession()

        self.assertEqual(
            os_mock.environ, {
                'PYSPARK_PYTHON':
                sys.executable,
                'PYSPARK_SUBMIT_ARGS':
                ('--conf "my.conf.here=5g" --and-other-properties '
                 '--conf "spark.sql.catalogImplementation=hive" '
                 'pyspark-shell'),
            })

        # test more complicated session
        os_mock.environ = {
            'PYSPARK_SUBMIT_ARGS':
            '--conf "my.conf.here=5g" --and-other-properties',
        }

        class _Session(SparklySession):
            options = {'my.conf.here': '10g'}

        _Session()

        self.assertEqual(
            os_mock.environ,
            {
                'PYSPARK_PYTHON':
                sys.executable,
                'PYSPARK_SUBMIT_ARGS': (
                    '--conf "my.conf.here=5g" --and-other-properties '
                    # Note that spark honors the first conf it sees when multiple
                    # are defined
                    '--conf "my.conf.here=10g" '
                    '--conf "spark.sql.catalogImplementation=hive" '
                    'pyspark-shell'),
            })
Esempio n. 6
0
    def test_has_jar(self):
        hc = SparklySession()
        self.assertFalse(hc.has_jar('mysql-connector-java'))

        hc.jars = ['mysql-connector-java-5.1.39-bin.jar']
        self.assertTrue(hc.has_jar('mysql-connector-java'))
Esempio n. 7
0
    def test_has_package(self):
        hc = SparklySession()
        self.assertFalse(hc.has_package('datastax:spark-cassandra-connector'))

        hc.packages = ['datastax:spark-cassandra-connector:1.6.1-s_2.10']
        self.assertTrue(hc.has_package('datastax:spark-cassandra-connector'))