Esempio n. 1
0
def test_pyspark_livy_sql_options():
    query = "abc"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().collect(): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.25).collect(): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.33).take(3234): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
Esempio n. 2
0
def test_pyspark_livy_sql_options():
    query = "abc"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().collect(): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.25,
                        maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.25).collect(): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.33,
                        maxrows=3234)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.33).take(3234): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
Esempio n. 3
0
 def _pyspark_command(self):
     command = u'sqlContext.sql(u"""{} """).toJSON()'.format(self.query)
     if self.samplemethod == u'sample':
         command = u'{}.sample(False, {})'.format(command, self.samplefraction)
     if self.maxrows >= 0:
         command = u'{}.take({})'.format(command, self.maxrows)
     else:
         command = u'{}.collect()'.format(command)
     command = u'for {} in {}: print({}.encode("{}"))'.format(constants.LONG_RANDOM_VARIABLE_NAME,
                                                 command,
                                                 constants.LONG_RANDOM_VARIABLE_NAME,
                                                 conf.pyspark_sql_encoding())
     return Command(command)
Esempio n. 4
0
 def _pyspark_command(self):
     command = u'sqlContext.sql(u"""{} """).toJSON()'.format(self.query)
     if self.samplemethod == u'sample':
         command = u'{}.sample(False, {})'.format(command,
                                                  self.samplefraction)
     if self.maxrows >= 0:
         command = u'{}.take({})'.format(command, self.maxrows)
     else:
         command = u'{}.collect()'.format(command)
     command = u'for {} in {}: print({}.encode("{}"))'.format(
         constants.LONG_RANDOM_VARIABLE_NAME, command,
         constants.LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())
     return Command(command)
Esempio n. 5
0
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
    assert_equals(sqlquery._scala_command(),
                  Command(u'sqlContext.sql("""{}""").toJSON.take(120).foreach(println)'.format(query)))

    try:
        sqlquery._r_command()
        assert False
    except NotImplementedError:
        pass
Esempio n. 6
0
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
    assert_equals(
        sqlquery._scala_command(),
        Command(u'sqlContext.sql("""{}""").toJSON.take(120).foreach(println)'.
                format(query)))

    try:
        sqlquery._r_command()
        assert False
    except NotImplementedError:
        pass
Esempio n. 7
0
 def _pyspark_command(self, sql_context_variable_name, encode_result=True):
     command = u'{}.sql(u"""{} """).toJSON()'.format(sql_context_variable_name, self.query)
     if self.samplemethod == u'sample':
         command = u'{}.sample(False, {})'.format(command, self.samplefraction)
     if self.maxrows >= 0:
         command = u'{}.take({})'.format(command, self.maxrows)
     else:
         command = u'{}.collect()'.format(command)
     # Unicode support has improved in Python 3 so we don't need to encode.
     if encode_result:
         print_command = '{}.encode("{}")'.format(constants.LONG_RANDOM_VARIABLE_NAME,
                                                  conf.pyspark_sql_encoding())
     else:
         print_command = constants.LONG_RANDOM_VARIABLE_NAME
     command = u'for {} in {}: print({})'.format(constants.LONG_RANDOM_VARIABLE_NAME,
                                                 command,
                                                 print_command)
     return Command(command)
Esempio n. 8
0
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command("spark"),
                  Command(u'for {} in spark.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
    assert_equals(
        sqlquery._scala_command("spark"),
        Command(
            u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format(
                query)))
    assert_equals(
        sqlquery._r_command(),
        Command(
            u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}'
            .format(LONG_RANDOM_VARIABLE_NAME, query,
                    LONG_RANDOM_VARIABLE_NAME)))
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod="take", maxrows=120)
    assert_equals(
        sqlquery._pyspark_command("spark"),
        Command(
            u'for {} in spark.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'.format(
                LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()
            )
        ),
    )
    assert_equals(
        sqlquery._scala_command("spark"),
        Command(u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format(query)),
    )
    assert_equals(
        sqlquery._r_command(),
        Command(
            u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}'.format(
                LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME
            )
        ),
    )