Beispiel #1
0
def test_pyspark_livy_sql_options():
    query = "abc"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().collect(): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.25,
                        maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.25).collect(): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.33,
                        maxrows=3234)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.33).take(3234): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
Beispiel #2
0
def test_pyspark_livy_sql_options_spark2():
    query = "abc"
    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)

    assert_equals(sqlquery._pyspark_command("spark"),
                  Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
    assert_equals(sqlquery._pyspark_command("spark"),
                  Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).collect(): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.25,
                        maxrows=-1)
    assert_equals(sqlquery._pyspark_command("spark"),
                  Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.25).collect(): '
                          u'print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.33,
                        maxrows=3234)
    assert_equals(sqlquery._pyspark_command("spark"),
                  Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.33).take(3234): '
                          u'print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME)))
Beispiel #3
0
def test_pyspark_livy_sql_options():
    query = "abc"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().collect(): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.25).collect(): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.33).take(3234): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
Beispiel #4
0
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command("spark"),
                  Command(u'for {} in spark.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding())))
    assert_equals(sqlquery._scala_command("spark"),
                  Command(u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format(query)))
    assert_equals(sqlquery._r_command("spark"),
                  Command(u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))
Beispiel #5
0
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
    assert_equals(sqlquery._scala_command(),
                  Command(u'sqlContext.sql("""{}""").toJSON.take(120).foreach(println)'.format(query)))

    try:
        sqlquery._r_command()
        assert False
    except NotImplementedError:
        pass
Beispiel #6
0
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
    assert_equals(
        sqlquery._scala_command(),
        Command(u'sqlContext.sql("""{}""").toJSON.take(120).foreach(println)'.
                format(query)))

    try:
        sqlquery._r_command()
        assert False
    except NotImplementedError:
        pass
Beispiel #7
0
def test_unicode_sql():
    query = u"SELECT 'è'"
    longvar = LONG_RANDOM_VARIABLE_NAME

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command("spark"),
                  Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\
                          .format(longvar, query,
                                  longvar)))
    assert_equals(
        sqlquery._scala_command("spark"),
        Command(
            u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format(
                query)))
    assert_equals(
        sqlquery._r_command("spark"),
        Command(
            u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}'
            .format(longvar, query, longvar)))
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod="take", maxrows=120)
    assert_equals(
        sqlquery._pyspark_command("spark"),
        Command(
            u'for {} in spark.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'.format(
                LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()
            )
        ),
    )
    assert_equals(
        sqlquery._scala_command("spark"),
        Command(u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format(query)),
    )
    assert_equals(
        sqlquery._r_command(),
        Command(
            u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}'.format(
                LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME
            )
        ),
    )
Beispiel #9
0
def test_to_command_pyspark3():
    variable_name = "var_name"
    sqlquery = SQLQuery("Query")
    sqlquery._pyspark_command = MagicMock(return_value=MagicMock())
    sqlquery.to_command("pyspark3", variable_name)
    sqlquery._pyspark_command.assert_called_with(variable_name, False)
def test_to_command_pyspark3():
    variable_name = "var_name"
    sqlquery = SQLQuery("Query")
    sqlquery._pyspark_command = MagicMock(return_value=MagicMock())
    sqlquery.to_command("pyspark3", variable_name)
    sqlquery._pyspark_command.assert_called_with(variable_name, False)