def test_pyspark_livy_sql_options(): query = "abc" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{}""").toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{}""").toJSON().collect(): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{}""").toJSON().sample(False, 0.25).collect(): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{}""").toJSON().sample(False, 0.33).take(3234): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
def _pyspark_command(self): command = u'sqlContext.sql(u"""{}""").toJSON()'.format(self.query) if self.samplemethod == u'sample': command = u'{}.sample(False, {})'.format(command, self.samplefraction) if self.maxrows >= 0: command = u'{}.take({})'.format(command, self.maxrows) else: command = u'{}.collect()'.format(command) command = u'for {} in {}: print({}.encode("{}"))'.format(constants.LONG_RANDOM_VARIABLE_NAME, command, constants.LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()) return Command(command)
def test_unicode_sql(): query = u"SELECT 'è'" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{}""").toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) assert_equals(sqlquery._scala_command(), Command(u'sqlContext.sql("""{}""").toJSON.take(120).foreach(println)'.format(query))) try: sqlquery._r_command() assert False except NotImplementedError: pass