Ejemplo n.º 1
0
def test_session(integration_url, capsys, session_kind, params):

    assert _livy_available(integration_url)

    with LivySession.create(integration_url, kind=session_kind) as session:

        assert session.state == SessionState.IDLE

        session.run(params.print_foo_code)
        assert capsys.readouterr() == (params.print_foo_output, "")

        session.run(params.create_dataframe_code)
        capsys.readouterr()

        session.run(params.dataframe_count_code)
        assert capsys.readouterr() == (params.dataframe_count_output, "")

        with pytest.raises(SparkRuntimeError):
            session.run(params.error_code)

        assert session.download("df").equals(RANGE_DATAFRAME)

        session.upload("uploaded", RANGE_DATAFRAME)
        session.run(params.dataframe_multiply_code)
        assert session.download("multiplied").equals(RANGE_DATAFRAME * 2)

        session.upload("text", TEXT_DATAFRAME)
        session.run(params.dataframe_trim_code)
        assert session.download("trimmed").equals(
            TEXT_DATAFRAME.applymap(lambda s: s.strip()))

    assert _session_stopped(integration_url, session.session_id)
Ejemplo n.º 2
0
def test_sql_session():

    with LivySession(LIVY_URL, kind=SessionKind.PYSPARK, auth=auth) as session:

        #        assert session.state == SessionState.IDLE

        session.run(PYSPARK_CREATE_DF)
        #        session.run(SQL_CREATE_VIEW)
        #        session.run("df = spark.sql('SELECT COUNT(*) FROM view')")

        df = session.read('df')
        count = df.count()['value']
        assert count == 100
Ejemplo n.º 3
0
def test_sql_session():

    assert livy_available()

    with LivySession(LIVY_URL, kind=SessionKind.SQL) as session:

        assert session.state == SessionState.IDLE

        session.run(SQL_CREATE_VIEW)
        output = session.run('SELECT COUNT(*) FROM view')
        assert output.json['data'] == [[100]]

        with pytest.raises(SparkRuntimeError):
            session.run('not valid SQL!')

        expected = pandas.DataFrame({'id': range(100)})
        assert session.read_sql('SELECT * FROM view').equals(expected)

    assert session_stopped(session.session_id)
Ejemplo n.º 4
0
def test_sql_session(integration_url):

    assert _livy_available(integration_url)

    with LivySession.create(integration_url, kind=SessionKind.SQL) as session:

        assert session.state == SessionState.IDLE

        session.run(SQL_CREATE_VIEW)
        output = session.run("SELECT COUNT(*) FROM view")
        assert output.json["data"] == [[100]]

        with pytest.raises(SparkRuntimeError):
            session.run("not valid SQL!")

        assert session.download_sql("SELECT * FROM view").equals(
            RANGE_DATAFRAME)

    assert _session_stopped(integration_url, session.session_id)
Ejemplo n.º 5
0
def test_sql_session(integration_url):

    assert livy_available(integration_url)

    with LivySession(integration_url, kind=SessionKind.SQL) as session:

        assert session.state == SessionState.IDLE

        session.run(SQL_CREATE_VIEW)
        output = session.run("SELECT COUNT(*) FROM view")
        assert output.json["data"] == [[100]]

        with pytest.raises(SparkRuntimeError):
            session.run("not valid SQL!")

        expected = pandas.DataFrame({"id": range(100)})
        assert session.read_sql("SELECT * FROM view").equals(expected)

    assert session_stopped(integration_url, session.session_id)
Ejemplo n.º 6
0
    def exec_query(self, query, params=None, **kwargs):
        """
        Executes Query in the database.
        :param query: str - query to be executed.
        :param params: list - list of parameters to be used if necessary in query
        :return: result of query
        """
        df_variable_name = kwargs.get('df_variable_name', 'df')

        with LivySession.create(**self.params) as session:
            # Run some code on the remote cluster
            session.run(dedent("""
                spark.sql('use default')
                df = spark.sql('show databases')
            """.format(*params)))

            # Retrieve the result
            df = session.read(df_variable_name)

        return df
Ejemplo n.º 7
0
def test_session(capsys, session_kind, params):

    with LivySession(LIVY_URL, kind=session_kind, auth=auth) as session:

        assert session.state == SessionState.IDLE

        session.run(params.print_foo_code)
        assert capsys.readouterr() == (params.print_foo_output, '')

        session.run(params.create_dataframe_code)
        capsys.readouterr()

        session.run(params.dataframe_count_code)
        assert capsys.readouterr() == (params.dataframe_count_output, '')

        with pytest.raises(SparkRuntimeError):
            session.run(params.error_code)

        expected = pandas.DataFrame({'value': range(100)}, dtype='int64')
        received = session.read('df')
        assert received.equals(expected)
Ejemplo n.º 8
0
def test_session(integration_url, capsys, session_kind, params):

    assert _livy_available(integration_url)

    with LivySession.create(integration_url, kind=session_kind) as session:

        assert session.state == SessionState.IDLE

        session.run(params.print_foo_code)
        assert capsys.readouterr() == (params.print_foo_output, "")

        session.run(params.create_dataframe_code)
        capsys.readouterr()

        session.run(params.dataframe_count_code)
        assert capsys.readouterr() == (params.dataframe_count_output, "")

        with pytest.raises(SparkRuntimeError):
            session.run(params.error_code)

        assert session.read("df").equals(RANGE_EXPECTED_DATAFRAME)

    assert _session_stopped(integration_url, session.session_id)
Ejemplo n.º 9
0
def test_session(capsys, session_kind, params):

    assert livy_available()

    with LivySession(LIVY_URL, kind=session_kind) as session:

        assert session.state == SessionState.IDLE

        session.run(params.print_foo_code)
        assert capsys.readouterr() == (params.print_foo_output, '')

        session.run(params.create_dataframe_code)
        capsys.readouterr()

        session.run(params.dataframe_count_code)
        assert capsys.readouterr() == (params.dataframe_count_output, '')

        with pytest.raises(SparkRuntimeError):
            session.run(params.error_code)

        expected = pandas.DataFrame({'value': range(100)})
        assert session.read('df').equals(expected)

    assert session_stopped(session.session_id)
Ejemplo n.º 10
0
def test_session(integration_url, capsys, session_kind, params):

    assert livy_available(integration_url)

    with LivySession(integration_url, kind=session_kind) as session:

        assert session.state == SessionState.IDLE

        session.run(params.print_foo_code)
        assert capsys.readouterr() == (params.print_foo_output, "")

        session.run(params.create_dataframe_code)
        capsys.readouterr()

        session.run(params.dataframe_count_code)
        assert capsys.readouterr() == (params.dataframe_count_output, "")

        with pytest.raises(SparkRuntimeError):
            session.run(params.error_code)

        expected = pandas.DataFrame({"value": range(100)})
        assert session.read("df").equals(expected)

    assert session_stopped(integration_url, session.session_id)
                self.modelId) + "/contents?name=" + str(self.modelFilename)
        with open(self.modelFilename, 'rb') as pklFile:
            r = requests.post(url, data=pklFile, headers=headers)

        fileResult = r.json()
        if 'creationTimeStamp' in fileResult:
            self.log("Uploaded model file to server.")
        else:
            self.log("Error!")
        self.log(fileResult)


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Not enough arguments! Usage: python " + str(sys.argv[0]) +
              " <command>")
        exit()
    else:
        core = Core()
        if sys.argv[1] == 'train':
            with LivySession(LIVY_URL) as session:
                session.run(train_code)
            os.system(
                'scp [email protected]:/tmp/models/logit [email protected]:/home/sasdemo/Viya_Spark_orchestration/'
            )
            core.createNewVersion()
            core.uploadModelFile()
        elif sys.argv[1] == 'score':
            with LivySession(LIVY_URL) as session:
                session.run(score_code)