Ejemplo n.º 1
0
def test_session(integration_url, capsys, session_kind, params):

    assert _livy_available(integration_url)

    with LivySession.create(integration_url, kind=session_kind) as session:

        assert session.state == SessionState.IDLE

        session.run(params.print_foo_code)
        assert capsys.readouterr() == (params.print_foo_output, "")

        session.run(params.create_dataframe_code)
        capsys.readouterr()

        session.run(params.dataframe_count_code)
        assert capsys.readouterr() == (params.dataframe_count_output, "")

        with pytest.raises(SparkRuntimeError):
            session.run(params.error_code)

        assert session.download("df").equals(RANGE_DATAFRAME)

        session.upload("uploaded", RANGE_DATAFRAME)
        session.run(params.dataframe_multiply_code)
        assert session.download("multiplied").equals(RANGE_DATAFRAME * 2)

        session.upload("text", TEXT_DATAFRAME)
        session.run(params.dataframe_trim_code)
        assert session.download("trimmed").equals(
            TEXT_DATAFRAME.applymap(lambda s: s.strip()))

    assert _session_stopped(integration_url, session.session_id)
Ejemplo n.º 2
0
def test_sql_session(integration_url):

    assert _livy_available(integration_url)

    with LivySession.create(integration_url, kind=SessionKind.SQL) as session:

        assert session.state == SessionState.IDLE

        session.run(SQL_CREATE_VIEW)
        output = session.run("SELECT COUNT(*) FROM view")
        assert output.json["data"] == [[100]]

        with pytest.raises(SparkRuntimeError):
            session.run("not valid SQL!")

        assert session.download_sql("SELECT * FROM view").equals(
            RANGE_DATAFRAME)

    assert _session_stopped(integration_url, session.session_id)
Ejemplo n.º 3
0
    def exec_query(self, query, params=None, **kwargs):
        """
        Executes Query in the database.
        :param query: str - query to be executed.
        :param params: list - list of parameters to be used if necessary in query
        :return: result of query
        """
        df_variable_name = kwargs.get('df_variable_name', 'df')

        with LivySession.create(**self.params) as session:
            # Run some code on the remote cluster
            session.run(dedent("""
                spark.sql('use default')
                df = spark.sql('show databases')
            """.format(*params)))

            # Retrieve the result
            df = session.read(df_variable_name)

        return df
Ejemplo n.º 4
0
def test_session(integration_url, capsys, session_kind, params):

    assert _livy_available(integration_url)

    with LivySession.create(integration_url, kind=session_kind) as session:

        assert session.state == SessionState.IDLE

        session.run(params.print_foo_code)
        assert capsys.readouterr() == (params.print_foo_output, "")

        session.run(params.create_dataframe_code)
        capsys.readouterr()

        session.run(params.dataframe_count_code)
        assert capsys.readouterr() == (params.dataframe_count_output, "")

        with pytest.raises(SparkRuntimeError):
            session.run(params.error_code)

        assert session.read("df").equals(RANGE_EXPECTED_DATAFRAME)

    assert _session_stopped(integration_url, session.session_id)