def test_unicode(): variable_name = u"collect 'è'" sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=120) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) assert_equals(sparkcommand._scala_command(variable_name), Command(u'{}.toJSON.take(120).foreach(println)'.format(variable_name)))
def test_unicode(): variable_name = u"collect 'è'" sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=120) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'for {} in {}.toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding()))) assert_equals(sparkcommand._scala_command(variable_name), Command(u'{}.toJSON.take(120).foreach(println)'.format(variable_name)))
def test_execute_code(): spark_events = MagicMock() variable_name = "abc" sparkcommand = SparkStoreCommand(variable_name, "take", 100, 0.2, spark_events=spark_events) sparkcommand.to_command = MagicMock(return_value=MagicMock()) result = """{"z":100, "nullv":null, "y":50} {"z":25, "nullv":null, "y":10}""" sparkcommand.to_command.return_value.execute = MagicMock(return_value=(True, result, MIMETYPE_TEXT_PLAIN)) session = MagicMock() session.kind = "pyspark" result = sparkcommand.execute(session) sparkcommand.to_command.assert_called_once_with(session.kind, variable_name) sparkcommand.to_command.return_value.execute.assert_called_once_with(session)
def _spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce): return SparkStoreCommand(output_var, samplemethod, maxrows, samplefraction, coerce=coerce)
def test_run_spark_command_exception_while_storing(): run_cell_method = MagicMock() exception = LivyUnexpectedStatusException('WOW') run_cell_method.side_effect = [(True, ""), exception] spark_controller.run_command = run_cell_method command = "-s" name = "sessions_name" context = "-c" context_name = "spark" meth = "-m" method_name = "sample" output = "-o" output_var = "var_name" line = " ".join([ command, name, context, context_name, meth, method_name, output, output_var ]) cell = "cell code" result = magic.spark(line, cell) run_cell_method.assert_any_call(Command(cell), name) run_cell_method.assert_any_call( SparkStoreCommand(output_var, samplemethod=method_name), name) ipython_display.write.assert_called_once_with("") ipython_display.send_error.assert_called_once_with( EXPECTED_ERROR_MSG.format(exception))
def test_run_spark_with_store_correct_calls(): run_cell_method = MagicMock() run_cell_method.return_value = (True, "") spark_controller.run_command = run_cell_method command = "-s" name = "sessions_name" context = "-c" context_name = "spark" meth = "-m" method_name = "sample" output = "-o" output_var = "var_name" coer = "--coerce" coerce_value = "True" line = " ".join([ command, name, context, context_name, meth, method_name, output, output_var, coer, coerce_value ]) cell = "cell code" result = magic.spark(line, cell) run_cell_method.assert_any_call(Command(cell), name) run_cell_method.assert_any_call( SparkStoreCommand(output_var, samplemethod=method_name, coerce=True), name)
def test_sparkstorecommand_initializes(): variable_name = "var_name" samplemethod = "take" maxrows = 120 samplefraction = 0.6 sparkcommand = SparkStoreCommand(variable_name, samplemethod, maxrows, samplefraction) assert_equals(sparkcommand.samplemethod, samplemethod) assert_equals(sparkcommand.maxrows, maxrows) assert_equals(sparkcommand.samplefraction, samplefraction)
def test_sparkstorecommand_loads_defaults(): defaults = { conf.default_samplemethod.__name__: "sample", conf.default_maxrows.__name__: 419, conf.default_samplefraction.__name__: 0.99, } conf.override_all(defaults) variable_name = "var_name" sparkcommand = SparkStoreCommand(variable_name) assert_equals(sparkcommand.samplemethod, defaults[conf.default_samplemethod.__name__]) assert_equals(sparkcommand.maxrows, defaults[conf.default_maxrows.__name__]) assert_equals(sparkcommand.samplefraction, defaults[conf.default_samplefraction.__name__])
def test_pyspark_livy_sampling_options(): variable_name = "var_name" sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=120) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=-1) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).collect(): print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.25).collect(): ' u'print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.33).take(3234): ' u'print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.33).take(3234): ' u'print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).take(100): print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).take(100): print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME)))
def test_to_command_invalid(): variable_name = "var_name" sparkcommand = SparkStoreCommand(variable_name) assert_raises(BadUserDataException, sparkcommand.to_command, "invalid", variable_name)
def test_to_command_r(): variable_name = "var_name" sparkcommand = SparkStoreCommand(variable_name) sparkcommand._r_command = MagicMock(return_value=MagicMock()) sparkcommand.to_command("sparkr", variable_name) sparkcommand._r_command.assert_called_with(variable_name)
def test_r_livy_sampling_options(): variable_name = "abc" sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=100) assert_equals(sparkcommand._r_command(variable_name), Command('for ({} in (jsonlite::toJSON(take({},100)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=-1) assert_equals(sparkcommand._r_command(variable_name), Command('for ({} in (jsonlite::toJSON(collect({})))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sparkcommand._r_command(variable_name), Command('for ({} in (jsonlite::toJSON(collect(sample({}, FALSE, 0.25))))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sparkcommand._r_command(variable_name), Command('for ({} in (jsonlite::toJSON(take(sample({}, FALSE, 0.33),3234)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100) assert_equals(sparkcommand._r_command(variable_name), Command('for ({} in (jsonlite::toJSON(take({},100)))) {{cat({})}}'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME)))
def test_scala_livy_sampling_options(): variable_name = "abc" sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=100) assert_equals(sparkcommand._scala_command(variable_name), Command('{}.toJSON.take(100).foreach(println)'.format(variable_name))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=-1) assert_equals(sparkcommand._scala_command(variable_name), Command('{}.toJSON.collect.foreach(println)'.format(variable_name))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sparkcommand._scala_command(variable_name), Command('{}.toJSON.sample(false, 0.25).collect.foreach(println)'.format(variable_name))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sparkcommand._scala_command(variable_name), Command('{}.toJSON.sample(false, 0.33).take(3234).foreach(println)'.format(variable_name))) sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100) assert_equals(sparkcommand._scala_command(variable_name), Command('{}.toJSON.take(100).foreach(println)'.format(variable_name)))
def test_pyspark_livy_sampling_options(): variable_name = "var_name" sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=120) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'for {} in {}.toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding()))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=-1) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'for {} in {}.toJSON().collect(): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding()))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'for {} in {}.toJSON().sample(False, 0.25).collect(): ' u'print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding()))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'for {} in {}.toJSON().sample(False, 0.33).take(3234): ' u'print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding()))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sparkcommand._pyspark_command(variable_name, False), Command(u'for {} in {}.toJSON().sample(False, 0.33).take(3234): ' u'print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'for {} in {}.toJSON().take(100): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding()))) sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100) assert_equals(sparkcommand._pyspark_command(variable_name, False), Command(u'for {} in {}.toJSON().take(100): print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME)))
def test_to_command_pyspark3(): variable_name = "var_name" sparkcommand = SparkStoreCommand(variable_name) sparkcommand._pyspark_command = MagicMock(return_value=MagicMock()) sparkcommand.to_command("pyspark3", variable_name) sparkcommand._pyspark_command.assert_called_with(variable_name, False)