def test_stop_restores_the_environment(self, spark_session_mock, os_mock): os_mock.environ = { 'PYSPARK_SUBMIT_ARGS': '--conf "my.conf.here=5g" --and-other-properties', } SparklySession() SparklySession.stop() self.assertEqual( os_mock.environ, { 'PYSPARK_SUBMIT_ARGS': '--conf "my.conf.here=5g" --and-other-properties', })
def test_get_or_create_and_stop(self, spark_session_mock): # Not a great practice to test two functions in one unit test, # but get_or_create and stop are kind of intertwined with each other class _Session(SparklySession): pass # check stopping a running session original_session = _Session() _Session.stop() spark_session_mock.stop.assert_called_once_with(original_session) # check that stopping when there's no session has no impact _Session.stop() spark_session_mock.stop.assert_called_once_with(original_session) # check creating a new session thru get_or_create retrieved_session = _Session.get_or_create() self.assertNotEqual(id(retrieved_session), id(original_session)) # check retrieving a session thru get_or_create original_session = _Session() retrieved_session = _Session.get_or_create() self.assertEqual(id(retrieved_session), id(original_session)) # check retrieving a session thru SparklySession.get_or_create original_session = _Session() retrieved_session = SparklySession.get_or_create() self.assertEqual(id(retrieved_session), id(original_session))
def test_session_without_packages_jars_and_options(self, os_mock): os_mock.environ = {} SparklySession() self.assertEqual( os_mock.environ, { 'PYSPARK_PYTHON': sys.executable, 'PYSPARK_SUBMIT_ARGS': 'pyspark-shell', })
def test_session_without_packages_jars_and_options(self, os_mock): os_mock.environ = {} SparklySession() self.assertEqual( os_mock.environ, { 'PYSPARK_PYTHON': sys.executable, 'PYSPARK_SUBMIT_ARGS': '--conf "spark.sql.catalogImplementation=hive" pyspark-shell', })
def test_session_appends_to_pyspark_submit_args(self, os_mock): os_mock.environ = { 'PYSPARK_SUBMIT_ARGS': '--conf "my.conf.here=5g" --and-other-properties', } SparklySession() self.assertEqual( os_mock.environ, { 'PYSPARK_PYTHON': sys.executable, 'PYSPARK_SUBMIT_ARGS': ('--conf "my.conf.here=5g" --and-other-properties ' '--conf "spark.sql.catalogImplementation=hive" ' 'pyspark-shell'), }) # test more complicated session os_mock.environ = { 'PYSPARK_SUBMIT_ARGS': '--conf "my.conf.here=5g" --and-other-properties', } class _Session(SparklySession): options = {'my.conf.here': '10g'} _Session() self.assertEqual( os_mock.environ, { 'PYSPARK_PYTHON': sys.executable, 'PYSPARK_SUBMIT_ARGS': ( '--conf "my.conf.here=5g" --and-other-properties ' # Note that spark honors the first conf it sees when multiple # are defined '--conf "my.conf.here=10g" ' '--conf "spark.sql.catalogImplementation=hive" ' 'pyspark-shell'), })
def test_has_jar(self): hc = SparklySession() self.assertFalse(hc.has_jar('mysql-connector-java')) hc.jars = ['mysql-connector-java-5.1.39-bin.jar'] self.assertTrue(hc.has_jar('mysql-connector-java'))
def test_has_package(self): hc = SparklySession() self.assertFalse(hc.has_package('datastax:spark-cassandra-connector')) hc.packages = ['datastax:spark-cassandra-connector:1.6.1-s_2.10'] self.assertTrue(hc.has_package('datastax:spark-cassandra-connector'))