def test_pyspark_livy_sql_options(): query = "abc" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().collect(): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.25).collect(): ' u'print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.33).take(3234): ' u'print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
def test_pyspark_livy_sql_options_spark2(): query = "abc" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command("spark"), Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME))) sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1) assert_equals(sqlquery._pyspark_command("spark"), Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).collect(): print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sqlquery._pyspark_command("spark"), Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.25).collect(): ' u'print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sqlquery._pyspark_command("spark"), Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.33).take(3234): ' u'print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))
def test_scala_livy_sql_options(): query = "abc" sqlquery = SQLQuery(query, samplemethod='take', maxrows=100) assert_equals( sqlquery._scala_command(), Command('sqlContext.sql("""{}""").toJSON.take(100).foreach(println)'. format(query))) sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1) assert_equals( sqlquery._scala_command(), Command( 'sqlContext.sql("""{}""").toJSON.collect.foreach(println)'.format( query))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals( sqlquery._scala_command(), Command( 'sqlContext.sql("""{}""").toJSON.sample(false, 0.25).collect.foreach(println)' .format(query))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals( sqlquery._scala_command(), Command( 'sqlContext.sql("""{}""").toJSON.sample(false, 0.33).take(3234).foreach(println)' .format(query)))
def test_execute_sql_no_results(): global executed_once executed_once = False spark_events = MagicMock() sqlquery = SQLQuery("SHOW TABLES", "take", maxrows=-1, spark_events=spark_events) sqlquery.to_command = MagicMock() sqlquery.to_only_columns_query = MagicMock() result1 = "" result_data = pd.DataFrame([]) session = MagicMock() sqlquery.to_command.return_value.execute.return_value = ( True, result1, MIMETYPE_TEXT_PLAIN) session.kind = "spark" result = sqlquery.execute(session) assert_frame_equal(result, result_data) sqlquery.to_command.return_value.execute.assert_called_once_with(session) spark_events.emit_sql_execution_start_event.assert_called_once_with( session.guid, session.kind, session.id, sqlquery.guid, sqlquery.samplemethod, sqlquery.maxrows, sqlquery.samplefraction) spark_events.emit_sql_execution_end_event.assert_called_once_with( session.guid, session.kind, session.id, sqlquery.guid, sqlquery.to_command.return_value.guid, True, "", "")
def test_execute_sql(): spark_events = MagicMock() sqlquery = SQLQuery("HERE IS THE QUERY", "take", 100, 0.2, spark_events=spark_events) sqlquery.to_command = MagicMock(return_value=MagicMock()) result = """{"z":100, "nullv":null, "y":50} {"z":25, "nullv":null, "y":10}""" sqlquery.to_command.return_value.execute = MagicMock(return_value=(True, result)) result_data = pd.DataFrame([{ 'z': 100, "nullv": None, 'y': 50 }, { 'z': 25, "nullv": None, 'y': 10 }], columns=['z', "nullv", 'y']) session = MagicMock() session.kind = "pyspark" result = sqlquery.execute(session) assert_frame_equal(result, result_data) sqlquery.to_command.return_value.execute.assert_called_once_with(session) spark_events.emit_sql_execution_start_event.assert_called_once_with( session.guid, session.kind, session.id, sqlquery.guid, 'take', 100, 0.2) spark_events.emit_sql_execution_end_event.assert_called_once_with( session.guid, session.kind, session.id, sqlquery.guid, sqlquery.to_command.return_value.guid, True, '', '')
def switch_user_database(self, session_name): from sparkmagic.livyclientlib.sqlquery import SQLQuery database = conf.user_database() if conf.switch_to_user_database() and database: switch_db_code = "use {}".format(database) sqlquery = SQLQuery(switch_db_code) self.run_sqlquery(sqlquery, session_name) self.logger.debug("Switch user database: %s" % database)
def test_r_livy_sql_options_spark2(): query = "abc" sqlquery = SQLQuery(query, samplemethod='take', maxrows=100) assert_equals(sqlquery._r_command("spark"), Command('for ({} in (jsonlite:::toJSON(take(sql("{}"),100)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME))) sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1) assert_equals(sqlquery._r_command("spark"), Command('for ({} in (jsonlite:::toJSON(collect(sql("{}"))))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sqlquery._r_command("spark"), Command('for ({} in (jsonlite:::toJSON(collect(sample(sql("{}"), FALSE, 0.25))))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sqlquery._r_command("spark"), Command('for ({} in (jsonlite:::toJSON(take(sample(sql("{}"), FALSE, 0.33),3234)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))
def test_sql_df_execution_without_output_var(): df = 0 query = SQLQuery("") output_var = None magic.spark_controller.run_sqlquery = MagicMock(return_value=df) res = magic.execute_sqlquery("", None, None, None, session, output_var, False) magic.spark_controller.run_sqlquery.assert_called_once_with(query, session) assert res == df assert_equals(list(shell.user_ns.keys()), [])
def test_sqlquery_initializes(): query = "HERE IS MY SQL QUERY SELECT * FROM CREATE DROP TABLE" samplemethod = "take" maxrows = 120 samplefraction = 0.6 sqlquery = SQLQuery(query, samplemethod, maxrows, samplefraction) assert_equals(sqlquery.query, query) assert_equals(sqlquery.samplemethod, samplemethod) assert_equals(sqlquery.maxrows, maxrows) assert_equals(sqlquery.samplefraction, samplefraction)
def test_unicode_sql(): query = u"SELECT 'è'" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command("spark"), Command(u'for {} in spark.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding()))) assert_equals(sqlquery._scala_command("spark"), Command(u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format(query))) assert_equals(sqlquery._r_command("spark"), Command(u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))
def test_sqlquery_loads_defaults(): defaults = { conf.default_samplemethod.__name__: "sample", conf.default_maxrows.__name__: 419, conf.default_samplefraction.__name__: 0.99, } conf.override_all(defaults) query = "DROP TABLE USERS;" sqlquery = SQLQuery(query) assert_equals(sqlquery.query, query) assert_equals(sqlquery.samplemethod, defaults[conf.default_samplemethod.__name__]) assert_equals(sqlquery.maxrows, defaults[conf.default_maxrows.__name__]) assert_equals(sqlquery.samplefraction, defaults[conf.default_samplefraction.__name__])
def test_sql_df_execution_quiet_with_output_var(): df = 0 cell = SQLQuery("") output_var = "var_name" magic.spark_controller = MagicMock() magic.spark_controller.run_sqlquery = MagicMock(return_value=df) res = magic.execute_sqlquery("", None, None, None, session, output_var, True) magic.spark_controller.run_sqlquery.assert_called_once_with(cell, session) assert res is None assert shell.user_ns[output_var] == df
def test_execute_sql_failure_emits_event(): spark_events = MagicMock() sqlquery = SQLQuery("HERE IS THE QUERY", "take", 100, 0.2, spark_events) sqlquery.to_command = MagicMock() sqlquery.to_command.return_value.execute = MagicMock(side_effect=ValueError('yo')) session = MagicMock() session.kind = "pyspark" try: result = sqlquery.execute(session) assert False except ValueError: sqlquery.to_command.return_value.execute.assert_called_once_with(session) spark_events.emit_sql_execution_end_event.assert_called_once_with(session.guid, session.kind, session.id, sqlquery.guid, sqlquery.to_command.return_value.guid, False, 'ValueError', 'yo')
def test_unicode_sql(): query = u"SELECT 'è'" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) assert_equals( sqlquery._scala_command(), Command(u'sqlContext.sql("""{}""").toJSON.take(120).foreach(println)'. format(query))) try: sqlquery._r_command() assert False except NotImplementedError: pass
def test_run_sql_command_parses(): run_cell_method = MagicMock() run_cell_method.return_value = (True, "", MIMETYPE_TEXT_PLAIN) spark_controller.run_sqlquery = run_cell_method command = "-s" name = "sessions_name" context = "-c" context_name = "sql" meth = "-m" method_name = "sample" line = " ".join([command, name, context, context_name, meth, method_name]) cell = "cell code" result = magic.spark(line, cell) run_cell_method.assert_called_once_with(SQLQuery(cell, samplemethod=method_name), name) assert result is not None
def test_run_sql_command_knows_how_to_be_quiet(): run_cell_method = MagicMock() run_cell_method.return_value = (True, "") spark_controller.run_sqlquery = run_cell_method command = "-s" name = "sessions_name" context = "-c" context_name = "sql" quiet = "-q" meth = "-m" method_name = "sample" line = " ".join([command, name, context, context_name, quiet, meth, method_name]) cell = "cell code" result = magic.spark(line, cell) run_cell_method.assert_called_once_with(SQLQuery(cell, samplemethod=method_name), name) assert result is None
def test_run_sql_command_exception(): run_cell_method = MagicMock() run_cell_method.side_effect = LivyUnexpectedStatusException('WOW') spark_controller.run_sqlquery = run_cell_method command = "-s" name = "sessions_name" context = "-c" context_name = "sql" meth = "-m" method_name = "sample" line = " ".join([command, name, context, context_name, meth, method_name]) cell = "cell code" result = magic.spark(line, cell) run_cell_method.assert_called_once_with(SQLQuery(cell, samplemethod=method_name), name) ipython_display.send_error.assert_called_once_with(EXPECTED_ERROR_MSG .format(run_cell_method.side_effect))
def test_unicode_sql(): query = u"SELECT 'è'" longvar = LONG_RANDOM_VARIABLE_NAME sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command("spark"), Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\ .format(longvar, query, longvar))) assert_equals( sqlquery._scala_command("spark"), Command( u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format( query))) assert_equals( sqlquery._r_command("spark"), Command( u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}' .format(longvar, query, longvar)))
def test_df_execution_with_output_var(): shell = MagicMock() shell.user_ns = {} magic = SparkMagicBase(None) magic.shell = shell df = 0 query = SQLQuery("") session = MagicMock() output_var = "var_name" magic.spark_controller = MagicMock() magic.spark_controller.run_sqlquery = MagicMock(return_value=df) res = magic.execute_sqlquery("", None, None, None, session, output_var, False) magic.spark_controller.run_sqlquery.assert_called_once_with(query, session) assert res == df assert shell.user_ns[output_var] == df
def test_df_execution_quiet_without_output_var(): shell = MagicMock() shell.user_ns = {} magic = SparkMagicBase(None) magic.shell = shell df = 0 cell = SQLQuery("") session = MagicMock() output_var = None magic.spark_controller = MagicMock() magic.spark_controller.run_sqlquery = MagicMock(return_value=df) res = magic.execute_sqlquery("", None, None, None, session, output_var, True) magic.spark_controller.run_sqlquery.assert_called_once_with(cell, session) assert res is None assert_equals(list(shell.user_ns.keys()), [])
def test_to_command_pyspark3(): variable_name = "var_name" sqlquery = SQLQuery("Query") sqlquery._pyspark_command = MagicMock(return_value=MagicMock()) sqlquery.to_command("pyspark3", variable_name) sqlquery._pyspark_command.assert_called_with(variable_name, False)
def _sqlquery(cell, samplemethod, maxrows, samplefraction): return SQLQuery(cell, samplemethod, maxrows, samplefraction)
def test_sqlquery_rejects_bad_data(): query = "HERE IS MY SQL QUERY SELECT * FROM CREATE DROP TABLE" samplemethod = "foo" _ = SQLQuery(query, samplemethod)